initial commit

This commit is contained in:
NA-Wen 2025-09-23 20:31:19 +08:00
commit b24e6e6bd0
85 changed files with 8843 additions and 0 deletions

206
.gitignore vendored Normal file
View File

@ -0,0 +1,206 @@
# Byte-compiled / optimized / DLL files
__pycache__/
*.py[codz]
*$py.class
# C extensions
*.so
# Distribution / packaging
.Python
build/
develop-eggs/
dist/
downloads/
eggs/
.eggs/
lib/
lib64/
parts/
sdist/
var/
wheels/
share/python-wheels/
*.egg-info/
.installed.cfg
*.egg
MANIFEST
# PyInstaller
# Usually these files are written by a python script from a template
# before PyInstaller builds the exe, so as to inject date/other infos into it.
*.manifest
*.spec
# Installer logs
pip-log.txt
pip-delete-this-directory.txt
# Unit test / coverage reports
htmlcov/
.tox/
.nox/
.coverage
.coverage.*
.cache
nosetests.xml
coverage.xml
*.cover
*.py.cover
.hypothesis/
.pytest_cache/
cover/
# Translations
*.mo
*.pot
# Django stuff:
local_settings.py
db.sqlite3
db.sqlite3-journal
# Flask stuff:
instance/
.webassets-cache
# Scrapy stuff:
.scrapy
# Sphinx documentation
docs/_build/
# PyBuilder
.pybuilder/
target/
# Jupyter Notebook
.ipynb_checkpoints
# IPython
profile_default/
ipython_config.py
# pyenv
# For a library or package, you might want to ignore these files since the code is
# intended to run in multiple environments; otherwise, check them in:
# .python-version
# pipenv
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
# However, in case of collaboration, if having platform-specific dependencies or dependencies
# having no cross-platform support, pipenv may install dependencies that don't work, or not
# install all needed dependencies.
#Pipfile.lock
# UV
# Similar to Pipfile.lock, it is generally recommended to include uv.lock in version control.
# This is especially recommended for binary packages to ensure reproducibility, and is more
# commonly ignored for libraries.
#uv.lock
# poetry
# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
# This is especially recommended for binary packages to ensure reproducibility, and is more
# commonly ignored for libraries.
# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
#poetry.lock
#poetry.toml
# pdm
# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
# pdm recommends including project-wide configuration in pdm.toml, but excluding .pdm-python.
# https://pdm-project.org/en/latest/usage/project/#working-with-version-control
#pdm.lock
#pdm.toml
.pdm-python
.pdm-build/
# pixi
# Similar to Pipfile.lock, it is generally recommended to include pixi.lock in version control.
#pixi.lock
# Pixi creates a virtual environment in the .pixi directory, just like venv module creates one
# in the .venv directory. It is recommended not to include this directory in version control.
.pixi
# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
__pypackages__/
# Celery stuff
celerybeat-schedule
celerybeat.pid
# SageMath parsed files
*.sage.py
# Environments
.env
.envrc
.venv
env/
venv/
ENV/
env.bak/
venv.bak/
# Spyder project settings
.spyderproject
.spyproject
# Rope project settings
.ropeproject
# mkdocs documentation
/site
# mypy
.mypy_cache/
.dmypy.json
dmypy.json
# Pyre type checker
.pyre/
# pytype static type analyzer
.pytype/
# Cython debug symbols
cython_debug/
# PyCharm
# JetBrains specific template is maintained in a separate JetBrains.gitignore that can
# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
# and can be added to the global gitignore or merged into this file. For a more nuclear
# option (not recommended) you can uncomment the following to ignore the entire idea folder.
#.idea/
# Abstra
# Abstra is an AI-powered process automation framework.
# Ignore directories containing user credentials, local state, and settings.
# Learn more at https://abstra.io/docs
.abstra/
# Visual Studio Code
# Visual Studio Code specific template is maintained in a separate VisualStudioCode.gitignore
# that can be found at https://github.com/github/gitignore/blob/main/Global/VisualStudioCode.gitignore
# and can be added to the global gitignore or merged into this file. However, if you prefer,
# you could uncomment the following to ignore the entire vscode folder
# .vscode/
# Ruff stuff:
.ruff_cache/
# PyPI configuration file
.pypirc
# Cursor
# Cursor is an AI-powered code editor. `.cursorignore` specifies files/directories to
# exclude from AI features like autocomplete and code analysis. Recommended for sensitive data
# refer to https://docs.cursor.com/context/ignore-files
.cursorignore
.cursorindexingignore
# Marimo
marimo/_static/
marimo/_lsp/
__marimo__/

191
README.md Normal file
View File

@ -0,0 +1,191 @@
# Multi-Agent Collaboration via Evolving Orchestration
**Puppeteer** introduces a new way for large language models (LLMs) to collaborate efficiently on complex tasks.
Instead of static structures, our framework uses a centralized orchestrator (“puppeteer”) that dynamically directs multiple agents (“puppets”) based on evolving task states. The orchestrator is trained with reinforcement learning to sequence and prioritize agents, enabling flexible and adaptive collective reasoning.
![framework](assets/framework.png "Multi-Agent Collaboration via Evolving Orchestration")
# Quick Start
## Prerequisites
- Python 3.11 or higher
- CUDA-compatible GPU (optional, for policy training)
- API keys for desired LLM providers
## Installation
1. **Clone the repository**
```bash
git clone -b puppeteer https://github.com/OpenBMB/ChatDev
cd ChatDev
cd puppeteer
```
2. **Set up environment and install dependencies**
```bash
# Create conda environment
conda create -n puppeteer_env python=3.11
conda activate puppeteer_env
# Install dependencies
pip install -r requirements.txt
```
3. **Download the pre-trained puppeteer model base**
We use a 70B reward model as the untrained Puppeteer base, so we first need to download this model. The Hugging Face repository is `nvidia/Llama-3.1-Nemotron-70B-Reward`.
4. **Configure the system**
```bash
# Edit configurations with your settings
vim config/global.yaml # Add your API keys
```
- Global Configuration (`config/global.yaml`): configure API access, file paths, and system behavior:
```yaml
# API Configuration
logging:
level: INFO # Logging level, options: DEBUG, INFO, WARNING, ERROR
logpath: ./logs # Folder path to store log files
# Path to the folder containing model weights of the Puppeteer base model
# (downloaded in step 3, or directly loading)
model_weight_path: nvidia/Llama-3.1-Nemotron-70B-Reward
api_keys:
openai_api_key: "" # Your OpenAI API key
openai_base_url: "https://api.openai.com/v1/" # OpenAI base URL
bing_api_key: "" # Bing API key for web search (optional)
# System retry settings
max_retry_times: 10 # Maximum number of times to retry API calls
max_json_reformat_turns: 10 # Maximum retries for JSON parsing/reformatting
# Enable external tools (like web search, file read, etc.)
external_tools_enabled: True
# File paths that agents may need
file_path:
root_file_path: ./data # Root folder containing all necessary files for agents
# Graph exploration parameters for multi-agent reasoning
graph:
max_parallel_paths: 4 # Maximum number of parallel paths to explore (recommended 2-6)
max_step_num: 5 # Maximum number of steps (nodes) in each path (recommended 4-6)
```
⚠️ **Note:** Replace placeholders with your actual API keys and url, all the places are needed.
5. **Quick start with the predefined settings**
The agents are initialized from `puppeteer/personas/personas.jsonl`, which includes all currently supported reasoning patterns and tool modes. The default model backbone is GPT-4o.
```bash
cd puppeteer
python main.py <task> <mode> [--level LEVEL] [--index INDEX] [--data_limit LIMIT] [--personas PATH]
```
Example:
```bash
# Run MMLU-Pro validation set with a data limit of 10
python main.py MMLU-Pro validation --data_limit 10
```
If the run is successful, you will see output similar to [EXAMPLE](puppeteer/logs/example).
# Customization
Puppeteer provides multiple ways to tailor the system to your needs
## Agents
### 🔎 Agent Categories
In this framework, agents are divided into two main categories based on whether they have access to external tools:
1. Agents with Tools
- Description: These agents can interact with external systems to gather data, execute code, or access files.
- Supported Actions: `TOOL_ACTION_LIST`
- search_arxiv Search for academic papers on arXiv
- search_bing Query the Bing search engine
- access_website Access websites and extract information
- run_python Execute Python code
- read_file Read and extract content from files
2. Agents without Tools
- Description: These agents focus on internal reasoning, critique, reflection, and summarization. They do not interact with external systems.
- Supported Actions: `REASONING_ACTION_LIST`
- reasoning Logical reasoning
- critique Evaluate and critique reasoning
- question Generate clarifying sub-questions
- reflect Provide reflective analysis
- conclude Generate final conclusions
- summarize Summarize information concisely
- planning Create structured plans
- modify Correct errors and refine results
3. Termination Agent
- Description: A special agent responsible for determining when the reasoning process should stop.
- Supported Actions: `TERMINATION_ACTION_LIST`
- terminate End the reasoning process and deliver the final output
### ⚙️ Customize
You can extend this framework by creating new agents, adding actions, or integrating new base models.
#### 1. Multiple Actions per Agent
- Currently, each agent is designed to perform **a single action** (see [`reasoning_agent.py`](puppeteer/agent/reasoning_agent.py)).
- To create an agent that supports **multiple actions**, implement your own custom agent by inheriting from [`agent.py`](puppeteer/agent/agent.py).
#### 2. Adding New Actions
- To introduce a **new action**, you need to:
1. Define the corresponding **prompt or tool**.
2. Modify [`reasoning_agent.py`](puppeteer/agent/reasoning_agent.py) to integrate the new action into the reasoning workflow.
#### 3. Supporting New Base Models
- If you want to use a **new base model** for agents:
- Extend the configuration in [`model_config.py`](puppeteer/model/model_config.py).
- Ensure that the new model is properly registered and compatible with the agent framework.
## 🎭 Puppeteer Training
The training parameters are defined in [`policy.json`](puppeteer/config/policy.json). Key parameters include:
### 🔹 Optimization
- `learning_rate`: `0.0001`
Controls the learning speed of the policy network.
- `sample_size`: `1`
Number of samples used per training step.
### 🔹 Agent Scale Control
- `max_num_agents`: `3`
Maximum number of agents allowed in the system.
- `next_num_agents`: `3`
Number of agents spawned in the next step.
- `max_path`: `6`
Maximum trajectory length for agent exploration.
### 🔹 Reward Configuration
- `gamma`: `0.99`
Discount factor for future rewards.
- `reward_factors`: Shaping factors for different actions:
- `default`: `-1.0` → Penalty for invalid/neutral actions.
- `terminator`: `0.5` → Reward for correct termination.
- `web_search`: `-1.5` → Penalty for costly web-search actions.
### 🔹 Cost Control
- `scale`: `0.1`
Base cost scaling factor.
- `growth_rate`: `1.0`
Linear growth rate of cost per step.
- `inverse`: `false`
If set to `true`, applies inverse cost scaling.
### 🔹 Training Paradigm
The current training paradigm uses the hidden state of the last token from the Reward Model. This hidden state is passed through an MLP-based policy network to generate action probabilities.
You can switch the Reward Model or design a new training paradigm by modifying the policy network input/output structure.
# Citation
If you use Puppeteer in your work, please cite our NeurIPS 2025 paper:
```bibtex
@inproceedings{dang2025multiagentcollaboration,
title={Multi-Agent Collaboration via Evolving Orchestration},
author={Yufan Dang and Chen Qian and Xueheng Luo and Jingru Fan and Zihao Xie and Ruijie Shi and Weize Chen and Cheng Yang and Xiaoyin Che and Ye Tian and Xuantang Xiong and Lei Han and Zhiyuan Liu and Maosong Sun},
booktitle={The Thirty-ninth Annual Conference on Neural Information Processing Systems (NeurIPS)},
year={2025},
url={https://arxiv.org/abs/2505.19591}
}
```

BIN
assets/framework.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 270 KiB

View File

132
puppeteer/agent/agent.py Normal file
View File

@ -0,0 +1,132 @@
import json
import yaml
import hashlib
import re
import time
from utils.other_utils import JsonFormat
from copy import deepcopy
from abc import ABC, abstractmethod
from model.query_manager import query_manager
from agent.agent_info.global_info import GlobalInfo
global_config = yaml.safe_load(open("./config/global.yaml", "r"))
class Agent(ABC):
def __init__(self, role, role_prompt, index, model="gpt", actions=[], policy=None, global_info:GlobalInfo =None, initial_dialog_history=None) -> None:
"""
Initialize the Agent object.
:param role: The name of the agent's role
:param role_prompt: The role prompt information
:param index: The index to distinguish different agent instances
:param global_info: Global configuration info, default is None
:param model: The model to be used (either 'gpt' or 'gpt4'), default is 'gpt'
:param actions: List of actions available to the agent, default is empty
:param initial_dialog_history: Initial dialog history, default is None
"""
super().__init__()
# Initialize model query function
self.model = model
self.query_func = None
self.query_func = self._get_query_function()
if not self.query_func:
raise ValueError(f"Model '{model}' not implemented")
# Other basic settings
self.json_format = JsonFormat(query_func=self.query_func)
self.role = role
self.role_prompt = role_prompt
self.system_prompt = self.role_prompt # Initial system prompt
self.policy = policy
self.index = index
self.hash = hashlib.md5(f"{index}{role}{role_prompt}{model}{time.ctime()}".encode()).hexdigest()
# Tools and file path settings
self.actions = actions
self.root_file_path = global_config["file_path"]["root_file_path"]
if global_info:
self.workspace_path = global_info.workpath
# Activation state and dialog history
self._activated = False
self.initial_dialog_history = initial_dialog_history or []
self.dialog_history = deepcopy(self.initial_dialog_history)
@property
def simplified_dialog_history(self):
self._simplified_dialog_history = []
for h in self.dialog_history:
if h.get("role") == "user":
# Mask user input
# "*Your previous reasoning was {}*”
masked_text = re.sub(r'\*.*?\*', '', h["content"])
self._simplified_dialog_history.append({"role": h["role"], "content": masked_text})
else:
self._simplified_dialog_history.append(h)
return self._simplified_dialog_history
@property
def unique_identifier(self):
"""Return a unique identifier for the Agent instance."""
return {
"index": self.index,
"role": self.role,
"hash": self.hash
}
def _get_query_function(self):
def query_func(messages, system_prompt=None):
return query_manager.query(self.model, messages, system_prompt)
return query_func
@abstractmethod
def activate(self, global_info, initial_dialog_history=None):
"""Activate the agent, enabling it to perform actions."""
pass
@abstractmethod
def deactivate(self):
"""Deactivate the agent."""
self._activated = False
def reset(self):
"""Reset the agent's state, clearing dialog history and deactivating it."""
self.dialog_history = []
self.initial_dialog_history = []
self.deactivate()
@abstractmethod
def _build_current_action(self, format_action, flag, answer, step_data):
"""Build the current workflow guiding the agent's actions."""
pass
@abstractmethod
def take_action(self, global_info, external_tools_enabled=True):
"""Let the agent take an action based on the current state."""
pass
@abstractmethod
def _execute_action(self, action, global_info):
"""Execute a specific action."""
pass
@abstractmethod
def _reasoning_operation(self, action, global_info) -> str:
"""Perform a reasoning operation."""
pass
@abstractmethod
def _answer_operation(self, global_info) -> str:
"""Generate an answer based on the current state."""
pass
@abstractmethod
def _tool_operation(self, action: json, global_info) -> str:
"""Perform an operation involving external tools."""
pass
@abstractmethod
def _interaction_operation(self, code, env, global_info) -> str:
"""Handle operations related to agent interaction."""
pass

View File

@ -0,0 +1,3 @@
REASONING_ACTION_LIST = ["reasoning","critique","question","reflect","conclude","summarize","planning","modify"]
TOOL_ACTION_LIST = ["search_arxiv", "search_bing", "access_website", "run_python", "read_file"]
TERMINATION_ACTION_LIST = ["terminate"]

View File

@ -0,0 +1,91 @@
import os
import re
import logging
from agent.agent_info.workflow import Workflow
from agent.agent_info.workflow import Action
class GlobalInfo:
def __init__(self, path_id: int, workpath: str, task: dict, logger: logging.Logger=None, env=None, env_name=None):
self.path_id = path_id
self.logger = logger
self.workpath = workpath
self.task = task
self.workflow = Workflow(path_id=self.path_id, workpath=self.workpath)
self.url = self._extract_url(task.get("Question"))
self.file_name = task.get("file_name")
self.file_extension = self._extract_file_extension(self.file_name)
self.answers = []
self.code_path = ""
self.env_exists = env is not None
self.env_name = env_name
self.env = env
self.supervisor = self._extract_supervisor(env, env_name)
@property
def state_answers(self):
"""Returns the list of answers in the state
Return: list of answers
"""
ret = []
for index, a in enumerate(self.answers):
ret.append("{}".format(a))
if len(ret) == 0:
return []
return ret
@property
def total_tokens(self):
return self.workflow.total_tokens
@property
def total_cost(self):
return self.workflow.total_cost
def _extract_url(self, question):
"""Extracts the URL from the task question, if any."""
url_regex = r"(https?://[^\s]+)"
urls = re.findall(url_regex, question or "")
return urls[0] if urls else None
def _extract_file_extension(self, file_name):
"""Extracts the file extension from the file name."""
if file_name:
return os.path.splitext(file_name)[1]
return None
def _extract_supervisor(self, env, env_name):
if env_name == "appworld" and env is not None:
return env.task.supervisor
return None
def update(self, action: Action):
"""Updates the workflow with the given action and logs the update."""
self.workflow.path_id = self.path_id
self.workflow.add_action(action)
action.write_code()
self.workflow.write_down()
self.logger.info(f"Updated workflow: {self.workflow}")
def add_answer(self, answer):
"""Adds the answers to the workflow and logs the update."""
self.answers.append(answer)
def agent_role_list(self):
return self.workflow.get_agent_role_list()
def to_dict(self):
return {
"task": self.task,
"url": self.url,
"file_name": self.file_name,
"file_extension": self.file_extension,
"answer": self.answer,
"workflow": self.workflow,
"workspace_path": self.workpath,
"env_exists": self.env_exists,
"env_name": self.env_name,
"supervisor": self.supervisor
}

View File

@ -0,0 +1,187 @@
import os
import json
import networkx as nx
import matplotlib.pyplot as plt
from agent.agent_info.actions import REASONING_ACTION_LIST, TOOL_ACTION_LIST, TERMINATION_ACTION_LIST
from model.model_config import model_registry
class Action:
def __init__(self, action:dict, result:dict, success:str, agent_role:str, agent_model:str):
self.action = action # format action, e.g., {"action": "", "parameters": ""}
self.result = result # action result, e.g., {"step_data": "", "answer": ""}
self.success = success # Success or Failure of the action
self.agent_role = agent_role # Role of the agent
self.agent_model = agent_model # Model of the agent
self.model_parameter = model_registry.get_model_size(agent_model) if model_registry.get_model_size(agent_model) else 0
self.cost = 0
self.tokens = 0
def to_dict(self):
return {
"agent": self.agent_role,
"action": self.action,
"cost": self.cost,
"tokens": self.tokens,
"model_size": self.model_parameter,
"result": self.result,
"success": self.success
}
def to_str(self):
return "Agent: {}\nAction: {}\nResult: {}\nSuccess: {}".format(self.agent_role, self.action, self.result, self.success)
def set_workpath(self, workpath:str):
self.workpath = workpath
def write_code(self):
if self.result.get("code") is None:
return
else:
path = os.path.join(self.workpath, "code_{}.py".format(self.path_id))
with open(path, 'w', encoding='utf-8') as file:
json.dump(self.workflow, file, ensure_ascii=False, indent=4)
file.close()
def set_cost(self, tokens:int):
self.cost = 2 * self.model_parameter * tokens
self.tokens = tokens
print("[Action Cost]: {}".format(self.cost))
class Workflow:
def __init__(self, path_id:int, workpath:str):
self.path_id: int = path_id
self.workpath: str = workpath
self.workflow: list = []
@property
def total_cost(self):
cost = 0
for a in self.workflow:
cost += a.cost
return cost
@property
def total_tokens(self):
tokens = 0
for a in self.workflow:
tokens += a.tokens
return tokens
def to_dict(self):
return [action.to_dict() for action in self.workflow]
def write_down(self):
path = os.path.join(self.workpath, "path_{}.jsonl".format(self.path_id))
with open(path, 'w', encoding='utf-8') as file:
json.dump(self.to_dict(), file, ensure_ascii=False, indent=4)
file.close()
def add_action(self, action:Action):
action.set_workpath(self.workpath)
self.workflow.append(action)
def get_agent_role_list(self):
agent_role_list = []
for action in self.workflow:
role = action.agent_role
agent_role_list.append(role)
return agent_role_list
@property
def language_state(self):
state = []
for index, action in enumerate(self.workflow):
step_str = "{}({}) - {} - {}".format(
action.action.get("action"),
action.action.get("parameter"),
action.result.get("step_data"),
action.result.get("answer")
)
state.append(step_str)
if len(state) == 0:
return "None"
return "\n".join(state)
@property
def state(self):
state = []
for action in self.workflow:
flag = 1 if action.success == "Success" else 0
state.append((action.agent_role, action.action.get("action"), flag))
if len(state) == 0:
return tuple([(None, None, -1)])
return tuple(state)
@property
def valid_code(self):
data = []
for action in self.workflow:
if action.success == "Success":
data.append(action.result.get("code"))
return data
@property
def all_actions(self):
data = []
for action in self.workflow:
data.append(action.action.get("action"))
return data
@property
def valid_actions(self):
data = []
for action in self.workflow:
if action.success == "Success":
data.append(action.action.get("action"))
return data
@property
def valid_results(self):
data = []
for action in self.workflow:
if action.success == "Success":
data.append("Result: {}".format(action.result.get("step_data")))
return data
@property
def valid_reasoning_results(self):
data = []
for action in self.workflow:
if action.action.get("action") in REASONING_ACTION_LIST and action.success == "Success":
data.append("Successful Action: {}\nResult: {}".format(action.action.get("action"), action.result.get("step_data")))
return data
@property
def valid_tool_results(self):
data = []
for action in self.workflow:
if action.action.get("action") not in REASONING_ACTION_LIST and action.success == "Success":
data.append("Successful Action: {}\nResult: {}".format(action.action.get("action"), action.result.get("step_data")))
return data
@property
def unvalid_tool_results(self):
data = []
for action in self.workflow:
if action.action.get("action") not in REASONING_ACTION_LIST and action.success == "Failure":
data.append("Successful Action: {}\nResult: {}".format(action.action.get("action"), action.result.get("step_data")))
return data
def visualize(self):
G = nx.MultiDiGraph()
node_colors = []
for i, w in enumerate(self.workflow):
G.add_node(i, label=w.action.get("action"), result=w.result, status=w.success)
node_colors.append("green" if w.success == "Success" else "red")
if i > 0:
G.add_edge(i-1, i)
pos = nx.kamada_kawai_layout(G)
labels = nx.get_node_attributes(G, 'label')
nx.draw(G, pos, with_labels=True, labels=labels, node_size=3000, font_size=10, arrows=True, node_color=node_colors)
path = os.path.join(self.workpath, "workflow_path_{}.png".format(self.path_id))
plt.savefig(path)
plt.clf()

View File

@ -0,0 +1,375 @@
import json
import yaml
import os
from tenacity import retry, stop_after_attempt, wait_exponential
import re
from copy import deepcopy
from tools.base.register import global_tool_registry
from tools.web_search import Web_Search
from tools.code_interpreter import CodeInterpreter
from tools.file_read import FileRead
from agent.agent import Agent
from agent.agent_info.global_info import GlobalInfo
from agent.agent_info.workflow import Action
from agent.agent_info.actions import REASONING_ACTION_LIST, TOOL_ACTION_LIST, TERMINATION_ACTION_LIST
from utils.file_utils import format_code_with_prints, extract_code_from_text, write_code, write_text, read_code
global_config = yaml.safe_load(open("./config/global.yaml", "r"))
class Reasoning_Agent(Agent):
def __init__(self, role, role_prompt, index, model="gpt", actions=[], policy=None, global_info=None,initial_dialog_history=None) -> None:
super().__init__(role, role_prompt, index, model, actions, policy, global_info, initial_dialog_history)
def activate(self, global_info:GlobalInfo, initial_dialog_history=None):
if self._activated:
return
self._activated = True
system_step_data = global_info.workflow.valid_tool_results
prompt_filepath = "prompts/general/system_prompt.json"
with open(prompt_filepath, "r") as f:
system_prompt = json.load(f)
system_step_data = [self._compress_data(d) for d in system_step_data]
self.system_prompt = "\n".join(system_prompt['system_prompt']).format(self.role_prompt,
str(global_info.task.get("Question")),
str(system_step_data))
self.workspace_path = global_info.workpath
if initial_dialog_history is None or initial_dialog_history == []:
self.dialog_history = [{"role": "system", "content": self.system_prompt}]
else:
self.dialog_history = deepcopy(initial_dialog_history)
self.dialog_history[0] = {"role": "system", "content": self.system_prompt}
def deactivate(self):
self.initial_dialog_history = deepcopy(self.dialog_history)
self._activated = False
def _generate_action_prompt(self, global_info, previous_results, external_tools_enabled):
prompt_filepath = "prompts/general/action_decide.json"
with open(prompt_filepath, "r") as f:
select_prompt = json.load(f)
if external_tools_enabled:
query_prompt = "\n".join(select_prompt['action_query']).format(
global_info.workflow.valid_actions,
global_info.url,
global_info.file_name,
previous_results
)
else:
query_prompt = "\n".join(select_prompt['action_query_without_tools']).format(global_info.workflow.valid_actions, previous_results)
return query_prompt
def query_action(self, action, external_tools_enabled):
if external_tools_enabled:
results = self.action_collection.query(
query_texts=action,
n_results=1
)
else:
results = self.action_collection.query(
query_texts=action,
n_results=1,
where={"category": "reasoning"}
)
return results
def process_tool_parameters(self, results, global_info):
parameter = ""
parameter_type = results.get("metadatas")[0][0].get("input_type")
if "query" in parameter_type:
pass
elif "file" in parameter_type and global_info.file_name is not None:
parameter = global_info.file_name
elif "url" in parameter_type and global_info.url is not None:
parameter = global_info.url
if parameter is None:
parameter = ""
return parameter
def _compress_data(self, data):
if len(data) > 5000:
data = data[:5000]
return data
def _execute_action(self, format_action, global_info):
answer = ""
total_tokens = 0
print("\033[1;33mAgent {} Execute Action: {}\033[0m".format(self.role, format_action.get("action")))
code_generated_type = True if global_info.task.get("req")=="code" else False
text_generated_type = True if global_info.task.get("req")=="text" else False
if format_action.get("action") not in REASONING_ACTION_LIST and format_action.get("action") is not None:
flag, step_data = self._tool_operation(format_action, global_info)
step_data = self._compress_data(step_data)
print("\033[1;33m{} {}\033[0m".format(format_action.get("action"),"Success" if flag else "Failure"))
# for code generation task, correct step data as the result
if flag and code_generated_type:
if len(step_data) > 10:
code_path = write_code(self.workspace_path, step_data, global_info.code_path)
global_info.add_answer({"code_path": code_path, "code": step_data})
global_info.code_path = code_path
elif flag and text_generated_type:
# for text generation task, store valid step data directly as the answer
if len(step_data) > 10:
global_info.add_answer(step_data)
code_path = write_text(self.workspace_path, step_data, global_info.code_path)
global_info.add_answer({"code_path": code_path, "code": step_data})
global_info.code_path = code_path
# for code generation task, error code should get corrected
if flag or code_generated_type:
tool_result = {"role": "user", "content": "You have get results from {}: {}".format(format_action.get("action"), step_data)}
self.dialog_history.append(tool_result)
answer, total_tokens = self._answer_operation(global_info)
print("\033[1;33mAgent {} answered: {}\033[0m".format(self.role, answer))
if format_action.get("action") in REASONING_ACTION_LIST:
step_data, total_tokens = self._reasoning_operation(format_action, global_info)
flag = True
print("\033[1;33m{} {}\033[0m".format(format_action.get("action"),"Success" if flag else "Failure"))
if len(global_info.answers) > 0:
answer = global_info.answers[-1]
return step_data, answer, flag, total_tokens
def _build_current_action(self, format_action, flag=True, answer=None, step_data=None, tokens=0):
result = {
"step_data": step_data,
"answer": answer
}
current_action = Action(action=format_action, result=result,
success="Success" if flag else "Failure",
agent_role=self.role, agent_model=self.model)
if answer is None and step_data is None:
current_action.set_cost(tokens=0)
else:
current_action.set_cost(tokens=tokens)
return current_action
def take_action(self, global_info, external_tools_enabled=True, env=None, env_name=None):
logger = global_info.logger
total_tokens = 0
code_generated_type = True if global_info.task.get("req")=="code" else False
text_generated_type = True if global_info.task.get("req")=="text" else False
if self.actions[0] in TERMINATION_ACTION_LIST:
action_json = {"action": self.actions[0], "parameter": ""}
current_action = self._build_current_action(action_json, flag=True, answer=None, step_data=None)
terminated = True
return current_action, terminated
if self.actions[0] in TOOL_ACTION_LIST:
# only format the action json, without executing it
prompt_filepath = "prompts/general/actions_external_tools.jsonl"
prompt = ""
with open(prompt_filepath, 'r', encoding='utf-8') as f:
for line in f:
json_obj = json.loads(line)
if json_obj.get("action") == self.actions[0]:
prompt = json_obj.get("prompt")
break
if global_info.file_name is not None:
prompt = "You can access to file named {}.".format(global_info.file_name) + prompt
elif global_info.url is not None:
prompt = "You can access to the url {}.".format(global_info.url) + prompt
elif code_generated_type:
prompt = "Your previous code {}".format(read_code(global_info.code_path)) + prompt
elif text_generated_type:
prompt = "Your previous text {}".format(read_code(global_info.code_path)) + prompt
response, tokens = self._query(prompt)
total_tokens += tokens
action_json = self.json_format.json_reformat(response, global_config.get("max_json_reformat_turns"))
if not isinstance(action_json, dict):
action_json = {"action": self.actions[0], "parameter": ""}
else:
action_json["action"] = self.actions[0]
message = {"role": "assistant", "content": str(action_json)}
self.dialog_history[-1] = message
logger.info("[Action] {}\n".format(action_json))
elif self.actions[0] in REASONING_ACTION_LIST:
action_json = {"action": self.actions[0], "parameter": ""}
logger.info("[Action] {}\n".format(action_json))
step_data, answer, flag, tokens = self._execute_action(action_json, global_info)
total_tokens += tokens
current_action = self._build_current_action(action_json, flag, answer, step_data, total_tokens)
logger.info("-"*40)
terminated = False
self.deactivate()
return current_action, terminated
def _reasoning_operation(self, action, global_info) -> str:
logger = global_info.logger
prompt_filepath = "prompts/general/actions_reasoning.jsonl"
code_generated_type = True if global_info.task.get("req")=="code" else False
text_generated_type = True if global_info.task.get("req")=="text" else False
prompt = ""
with open(prompt_filepath, "r") as f:
for line in f:
json_obj = json.loads(line)
if json_obj.get("action") == action.get("action"):
prompt = json_obj.get("prompt")
break
if code_generated_type or text_generated_type:
query_prompt = prompt.format(read_code(global_info.code_path))
else:
query_prompt = prompt.format(global_info.workflow.valid_reasoning_results)
logger.info("[System Prompt] {}\n[Query] {}\n".format(self.system_prompt, query_prompt))
raw_response, total_tokens = self._query(query_prompt)
logger.info("[Reasoning]: "+ raw_response)
if code_generated_type:
answer = extract_code_from_text(raw_response)
logger.info("[Final Answer]: " + answer)
if len(answer) > 10:
code_path = write_code(self.workspace_path, answer, global_info.code_path)
global_info.add_answer(json.dumps({"code_path": code_path, "code": answer}, ensure_ascii=False))
global_info.code_path = code_path
reasoning_result = action.get("parameter") + raw_response
logger.info("[Reasoning Path]: " + reasoning_result)
return reasoning_result, total_tokens
elif text_generated_type:
regex_answer = r"FINAL ANSWER:([\s\S]*)"
matches = re.findall(regex_answer, raw_response)
if len(matches) > 0:
logger.info("[Final Answer]: "+matches[0])
code_path = write_text(self.workspace_path, matches[0], global_info.code_path)
global_info.add_answer(json.dumps({"code_path": code_path, "code": matches[0]}, ensure_ascii=False))
global_info.code_path = code_path
print("\033[1;33mAgent {} answered: {}\033[0m".format(self.role, matches[0]))
reasoning_result = action.get("parameter") + raw_response
logger.info("[Reasoning Path]: " + reasoning_result)
return reasoning_result, total_tokens
else:
regex_answer = r"FINAL ANSWER:([\s\S]*)"
matches = re.findall(regex_answer, raw_response)
if len(matches) > 0:
logger.info("[Final Answer]: "+matches[0])
global_info.add_answer(matches[0])
print("\033[1;33mAgent {} answered: {}\033[0m".format(self.role, matches[0]))
reasoning_result = action.get("parameter") + raw_response
logger.info("[Reasoning Path]: " + reasoning_result)
return reasoning_result, total_tokens
@retry(wait=wait_exponential(min=1, max=3), stop=stop_after_attempt(3))
def _answer_operation(self, global_info) -> str:
logger = global_info.logger
prompt_filepath = "prompts/general/answer_prompt.json"
code_generated_type = True if global_info.task.get("req")=="code" else False
text_generated_type = True if global_info.task.get("req")=="text" else False
with open(prompt_filepath, "r") as f:
select_prompt = json.load(f)
if global_info.task.get("type") == "MMLU" or global_info.task.get("type") == "MMLU-Pro":
query_prompt = "\n".join(select_prompt['MMLU_answer'])
elif global_info.task.get("type") == "GAIA":
query_prompt = "\n".join(select_prompt['GAIA_answer'])
elif global_info.task.get("type") == "GSM-Hard" or global_info.task.get("type") == "gsm-hard" or global_info.task.get("type") == "GSM8K":
query_prompt = "\n".join(select_prompt['gsm_answer'])
elif code_generated_type:
query_prompt = "\n".join(select_prompt['code_answer'])
elif text_generated_type:
query_prompt = "\n".join(select_prompt['text_answer'])
else:
query_prompt = "\n".join(select_prompt['answer'])
logger.info("[System Prompt] {}\n[Query] {}\n".format(self.system_prompt, query_prompt))
raw_response, total_tokens = self._query(query_prompt)
logger.info("[Format to Final Answer]: "+ raw_response)
if code_generated_type:
answer = extract_code_from_text(raw_response)
logger.info("[Final Answer]: " + answer)
if len(answer) > 10:
code_path = write_code(self.workspace_path, answer, global_info.code_path)
global_info.add_answer(json.dumps({"code_path": code_path, "code": answer}, ensure_ascii=False))
global_info.code_path = code_path
return answer, total_tokens
elif text_generated_type:
regex_answer = r"FINAL ANSWER: ([\s\S]*)"
matches = re.findall(regex_answer, raw_response)
if len(matches) > 0:
logger.info("[Final Answer]: "+matches[0])
code_path = write_text(self.workspace_path, matches[0], global_info.code_path)
global_info.add_answer(json.dumps({"code_path": code_path, "code": matches[0]}, ensure_ascii=False))
global_info.code_path = code_path
return matches[0], total_tokens
else:
return "", total_tokens
else:
regex_answer = r"FINAL ANSWER: ([\s\S]*)"
matches = re.findall(regex_answer, raw_response)
if len(matches) > 0:
logger.info("[Final Answer]: "+matches[0])
global_info.add_answer(matches[0])
return matches[0], total_tokens
else:
logger.info("[Error] No final answer found in the response: {}\n".format(raw_response))
return "", total_tokens
@retry(wait=wait_exponential(min=3, max=5), stop=stop_after_attempt(2))
def _query(self, query) -> str:
prompt = {"role": "user", "content": str(query)}
if self.dialog_history[-1] != prompt and self.dialog_history[-1]['role'] != 'user':
self.dialog_history.append(prompt)
elif self.dialog_history[-1] != prompt and self.dialog_history[-1]['role'] == 'user':
self.dialog_history[-1]['content'] += str(query)
self.last_prompt = prompt['content']
messages = list(self.dialog_history)
response = self.query_func(messages)
message = {"role": "assistant", "content": str(response)}
self.dialog_history.append(dict(message))
return response
def _tool_operation(self, action:json, global_info) ->str:
logger = global_info.logger
name = action.get("action")
parameter = action.get("parameter")
logger.info("[Action Execution] {}({})\n".format(name, parameter))
if 1:
if name == "read_file":
file_path = os.path.join(self.root_file_path, str(parameter))
flag, step_data = global_tool_registry.execute_tool(name, file_path=file_path, file_extension=global_info.file_extension)
logger.info("[Read File] {}: {}".format(("Success"if flag else "Failure"), step_data))
elif name == "run_python":
if global_info.task.get("type") != "SRDD" or global_info.task.get("type") != "human-eval":
parameter = format_code_with_prints(parameter)
timeout_detected = True
else:
timeout_detected = False
if global_info.file_name is not None:
file_path = os.path.join(self.root_file_path, global_info.file_name )
else:
file_path = ""
flag, step_data = global_tool_registry.execute_tool(name, work_path=self.workspace_path, code=parameter, file_path=file_path, timeout_detected=timeout_detected)
logger.info("[Run Python] {}: {}".format(("Success"if flag else "Failure"), step_data))
else:
flag, step_data = global_tool_registry.execute_tool(name, query=parameter, work_path=self.workspace_path)
logger.info("[Web Broswing] {}: {}".format(("Success"if flag else "Failure"), step_data))
return flag, step_data
else:
logger.info("Tool {} not registered for agent {}".format(name, self.role))
print("Tool {} not registered for agent {}".format(name, self.role))
return None, None
def _interaction_operation(self, code, env, global_info) -> str:
pass

View File

@ -0,0 +1,83 @@
from typing import Any
from agent.reasoning_agent import Reasoning_Agent
from utils.file_utils import iter_jsonl
class AgentRegister:
def __init__(self):
self.agents = {}
self.unique_agents = {}
def _register_agent(self, name, agent):
if agent.hash in self.unique_agents:
return
self.agents[name] = agent
self.unique_agents[agent.hash] = agent
def __call__(self, *args: Any, **kwds: Any):
def decorator(cls):
agent = cls(*args, **kwds)
self._register_agent(agent.role, agent)
return cls
return decorator
@property
def agent_config(self):
return self._agent_personas
@property
def agent_num(self):
return len(self.unique_agents)
@property
def agent_names(self):
return self.agents.keys()
@property
def agent_identifiers(self):
return self.unique_agents.keys()
def get_agent_from_name(self, name):
return self.agents.get(name)
def get_agent_from_idx(self, idx):
return self.unique_agents.get(idx)
def create_agent(self, name):
agent = self.get_agent_from_name(name).reinitialize()
if agent.hash in self.unique_agents:
raise ValueError(f"Agent {name} with hash {agent.hash} already registered")
self.unique_agents[agent.hash] = agent
if agent is None:
raise ValueError(f"Agent {name} not registered")
return agent
def register_all_agents(self, personas_path):
self._agent_personas = list(iter_jsonl(personas_path))
self._total_agent_num = len(self._agent_personas)
for index in range(self._total_agent_num):
self._initialize_agent(index)
def reset_all_agents(self):
for agent in self.unique_agents.values():
agent.reset()
def _initialize_agent(self, index):
agent_role_name = self._agent_personas[index].get("name")
agent_role_prompt = self._agent_personas[index].get("role_prompt")
agent_model_type = self._agent_personas[index].get("model_type", None)
agent_actions = self._agent_personas[index].get("actions", None)
agent_policy = self._agent_personas[index].get("policy", None)
if self._agent_personas[index].get("agent_type") == "reasoning":
agent = Reasoning_Agent(role=agent_role_name,
role_prompt=agent_role_prompt,
index=index,
model=agent_model_type,
actions=agent_actions,
policy=agent_policy)
self._register_agent(agent_role_name, agent)
def __getattribute__(self, name: str) -> Any:
return super().__getattribute__(name)
agent_global_registry = AgentRegister()

View File

@ -0,0 +1,19 @@
logging:
level: # logging level, e.g., INFO
logpath: # the path to the folder for storing log files
model_weight_path: # the path to the folder containing model weights of the puppeteer model base
api_keys:
openai_api_key: # openai api key
openai_base_url: # openai base url, e.g., https://api.openai.com/v1/
bing_api_key: # bing api key for web search
max_retry_times: # maximum number of times to retry API calls
max_json_reformat_turns: # maximum number of times to retry JSON parsing/reformatting
external_tools_enabled: True # whether to enable external tools like web search, file search, etc.
file_path:
root_file_path: # the path to the folder containing all the files which agents might need
graph:
max_parallel_paths: # maximum number of parallel paths to explore in the graph
max_step_num: # maximum number of steps (nodes) in each path

View File

@ -0,0 +1,56 @@
{
"device": {
"type": "cuda"
},
"paths": {
"checkpoint_path": "checkpoint/MMLU-Pro_test",
"model_path": "checkpoint/gsm-hard_validation/policy_net_20250414_105845.pt"
},
"training": {
"loading": false,
"training": true,
"learning_rate": 0.0001,
"sample_size": 1,
"gamma": 0.99,
"lambda_kl_loss": 0.0
},
"agent": {
"max_num_agents": 3,
"next_num_agents": 3,
"max_path": 6,
"threshold": 0.5,
"reward_factors": {
"default": -1.0,
"terminator": 0.5,
"web_search": -1.5
}
},
"llm": {
"prior": false,
"prior_redistribution": false,
"redistribution_weight": 0.1
},
"cost": {
"scale": 0.1,
"growth_rate": 1.0,
"inverse": false
},
"visualization": {
"base_path": "results",
"dpi": 300,
"figure_sizes": {
"policy_loss": [
12,
6
],
"entropy": [
10,
6
],
"action_probs": [
20,
10
]
}
}
}

View File

@ -0,0 +1,200 @@
{"concepts":["vest","snow","remove","silhouette","dunk","step","item","toddler","plaster","wall","piece","tool","circle","pull","wrap","headphone","deck","suit","press","mother","pit","cupcake","priest","rink","counter","rest","piano","wine","serve","winter"]}
{"concepts":["push","club","scramble","goat","sausage","wrap","chest","fix","stop","tattoo","plaster","tube","report","scooter","bird","pan","flip","animal","flute","tennis","concrete","boat","bed","phone","tooth","house","bartender"]}
{"concepts":["fruit","motorcycle","perform","jacket","vehicle","place","mat","walk","world","area","kiss","mother","pass","report","club","axis","tricep","patient","listen","owner","uniform","floor","hamburger","use","wine","cross","bull","sell","lawn","friend"]}
{"concepts":["shirt","dunk","bang","soap","court","float","swimmer","coat","sleigh","groom","station","pitcher","artist","enjoy","headphone","clear","drill","street","pig","bathroom","clap","orange","television","ride","treadmill","brick","policeman","building"]}
{"concepts":["individual","oil","item","sweep","mow","rink","hill","gear","leash","arrow","weight","yolk","stage","motorcycle","groom","ocean","fly","doctor","smash","newspaper","page","toe","employee","goat","smoke","wrap","mower"]}
{"concepts":["oxidation","cup","mower","space","window","arch","pineapple","spatula","smoke","tie","gun","insert","boat","use","tomato","chest","vest","bartender","move","microphone","axis"]}
{"concepts":["male","word","gear","dish","day","stove","home","ski","eyelash","pile","lawn","counter","toe","bike","cardboard","technique","goldfish","baseball","pitch","sink"]}
{"concepts":["drive","vest","performer","edge","wand","check","hat","clap","tooth","brush","box","chainsaw","engage","wheelbarrow","ear","sleep","sword","wetsuit","hose","trash","oil","fire","dry","card","bathroom","technique","tennis"]}
{"concepts":["goalie","stare","stage","team","piano","bike","shake","teammate","dinner","foot","garment","jersey","liquid","beach","audience","shine","wave","gas","winter","diver","curl","dig","razor"]}
{"concepts":["enjoy","faucet","headphone","eat","curl","demonstrate","surfboard","bat","rodeo","chainsaw","vault","technique","mortar","instruction","chisel","gong","roll","side","skater","spark"]}
{"concepts":["scooter","driveway","line","needle","card","treat","raft","shirt","vegetable","cereal","clip","cain","outfit","park","pair","javelin","pose","stuff","distance","jacket","spark"]}
{"concepts":["chip","lace","wrap","begin","bat","soap","crack","beverage","lie","stroll","outfit","horse","car","color","forest","suit","candle","costume","skater","chainsaw","wash","break","drill","block","hoop","court","presentation","carve"]}
{"concepts":["bowl","bridesmaid","card","beat","lens","screen","walk","orange","buck","stone","spark","swallow","suck","check","rope","shuffle","button","park","pet","singe","pancake","horse","arrow","flight"]}
{"concepts":["goldfish","microphone","pancake","squeegee","portrait","clipper","engage","spaghetti","faucet","glass","end","book","climb","present","sweep","laugh","point","bride","soldier","student","serve","smooth","yolk","sharpener","music","clear","bank","clothe"]}
{"concepts":["rest","singe","bench","oil","policeman","racket","team","basketball","alley","chase","clear","shot","sail","crosswalk","lasso","cake","backseat","fish","indoor","break","mallet","mortar","door","engage","dunk","burn","talk","microphone","shake","wear"]}
{"concepts":["drum","clip","window","cover","graffiti","wave","vomit","hurdle","dye","sidewalk","animal","slice","swing","pottery","pineapple","fix","decorate","tire","tag","bicycle","watermelon","sign","mask","plant"]}
{"concepts":["animal","pit","tourist","arch","shoe","tube","chair","card","lay","store","knit","sheep","trim","kid","camel","cardboard","remove","part","game","cart","hockey","rink","sail","vest","beverage","examine","market","tool","branch","burn"]}
{"concepts":["house","surgery","turn","singer","harness","hair","patient","bride","attire","bicycle","insert","crack","air","blanket","needle","grill","skater","pump","stone","candle","listen","put","truck","snow","type","toddler","solve","collar","rodeo"]}
{"concepts":["use","goat","wine","frisbee","leap","pole","tell","pencil","spin","birdie","catcher","fence","world","step","chop","sword","march","stage","axe","bat","place","roller","tomato"]}
{"concepts":["lift","fill","boat","bathtub","whistle","slice","bank","day","lap","oil","pole","stuff","mower","pin","flag","shirt","singe","discuss","pond","grape","oxidation","gong","spark","receive","store","lot","spray","ramp"]}
{"concepts":["plant","word","remove","toddler","hamburger","potato","set","line","place","control","eat","banner","singe","hurdle","bed","bar","alley","stove","top","air","cowboy","sharpen","pair","beat","arena","wave","singer","jacket"]}
{"concepts":["control","court","help","pop","wing","lean","peeler","continue","attempt","baseball","band","goal","bicycle","turn","rope","dry","bubble","cliff","cheer","part","glass","break","grape","roll","shuffle","bridge"]}
{"concepts":["fall","run","prevent","hurdle","gym","soldier","accept","suit","end","deal","cow","jersey","counter","food","snow","fold","dunk","wetsuit","oyster","shot","balloon","cream","leg","score","trick","orange","beat","direct","carry","lift"]}
{"concepts":["surgery","treadmill","coat","rider","swing","scissor","groom","sander","castle","keyboard","pierce","horse","sleigh","crack","hookah","dryer","meat","graffiti","goldfish","lake","spaghetti","bronco","lie"]}
{"concepts":["word","build","celebrate","attempt","win","calf","sock","wetsuit","family","stunt","garment","kid","lay","restaurant","hang","chef","bench","silhouette","board","roast","goalie","bridesmaid","end","decorate","accept","pen","color","portrait","ocean"]}
{"concepts":["stand","chew","ground","toe","time","vault","liquid","cream","wool","eat","button","fireplace","braid","prepare","stair","dealer","gutter","arrow","castle","batter","score","report"]}
{"concepts":["scramble","give","eyelash","carve","serve","hill","pet","drape","unicycle","skip","winter","ceremony","groom","roof","discuss","chainsaw","dive","gutter","tattoo","lumber","fill","speech","teach","trash","raft","singer","sun"]}
{"concepts":["priest","view","path","sand","worker","shoot","building","weight","number","shift","volleyball","horse","violin","court","puff","bathroom","bowl","sharpen","award","club","flap","jump","student","tell","drive","technique","surf","catcher"]}
{"concepts":["piano","engage","car","cast","hold","garment","spin","batter","lasso","toe","bench","football","roll","rodeo","gun","sand","press","cheer","middle","runner","event","dough","pond"]}
{"concepts":["pet","orange","gun","bang","crash","frisbee","laugh","suspend","stare","score","help","traffic","field","add","counter","beverage","path","examine","employee","side","plaster","beat","peeler","face","metal","foot","snake","harness","hammer"]}
{"concepts":["weight","walk","owner","engage","mallet","bull","bath","concert","dealer","ladder","home","shovel","motorcycle","tricep","concrete","dig","toe","move","sander","range","explain","barber","float","wing","hill","lady","cheer","discuss"]}
{"concepts":["street","comb","sofa","bikini","bat","towel","water","pierce","present","sle","lumber","court","tattoo","dive","carry","pitcher","volleyball","ceremony","ride","uniform","iron","chest","juice","mower","wine","separate","screen","sunglass","teammate"]}
{"concepts":["brush","land","bar","skater","tourist","track","hat","examine","officer","cain","routine","seat","lane","performance","suit","snowmobile","tank","speech","paper","water","collect","hurdle","follow"]}
{"concepts":["roll","pineapple","goldfish","pan","scissor","equipment","stroll","eat","give","course","axis","peel","collect","arch","stand","foot","tie","march","clear","climb","pottery","thread","room","knife","jacket"]}
{"concepts":["swimsuit","lady","lay","teach","push","nozzle","drop","saw","backseat","dock","swallow","target","spoon","type","gauge","trampoline","lot","clean","football","canoe","mat","vegetable","pineapple"]}
{"concepts":["sausage","football","joke","seat","blanket","concert","customer","talk","teacher","beer","kite","chest","bounce","shoe","jump","head","cellphone","map","drill","throw","fish","gong","leg","beam"]}
{"concepts":["light","market","cake","band","brush","distance","come","cymbal","fork","pond","mower","fruit","punch","music","report","towel","lie","umbrella","shine","hedge"]}
{"concepts":["lean","pump","stir","sle","clothing","blower","podium","ski","bathroom","animal","blow","student","cook","wand","background","house","purse","presentation","cowboy","towel","front","point","tell","dance"]}
{"concepts":["draw","body","time","saw","course","axis","curl","plant","massage","animal","ski","push","walk","tractor","scissor","gun","eye","jersey","bull","market"]}
{"concepts":["bottle","sell","stir","trash","edge","spaghetti","lean","body","instructor","bed","space","front","band","middle","pair","decorate","dish","technique","doctor","shake","portrait","ice","bride","kick"]}
{"concepts":["flip","short","suspend","smash","enjoy","playground","score","plate","audience","deal","class","gong","cast","down","tell","marker","car","part","club","night","carriage","discuss","tricep","throw","jean"]}
{"concepts":["sheep","flash","hamburger","tricycle","arm","golfer","collar","ground","song","body","worker","drill","stroll","dribble","bowl","ball","television","jump","sink","slice","book","demonstrate","spray","smooth","button","soap"]}
{"concepts":["vegetable","chess","keyboard","motorcycle","sun","sip","ski","part","shoe","mask","use","hose","racket","paint","shake","wheel","hang","hookah","dive","hole","lawn","wand","base","calf","crash","game"]}
{"concepts":["house","wool","suit","jersey","cellphone","shift","rip","bull","pressure","sip","rest","dance","fly","cart","roof","braid","way","mouth","water","dryer"]}
{"concepts":["log","chew","waterfall","pitch","squeegee","leash","coin","place","separate","cream","chest","stove","use","presentation","saw","net","eyelash","plate","rope","sword","ladder","market","egg","hill","lady","flag","solve","house"]}
{"concepts":["bow","balloon","cliff","wait","sausage","bench","object","chew","massage","treat","flute","work","mow","roll","cart","cup","crowd","hurdle","car","light","song","dribble","football","garage","pull","bathtub","backseat","board"]}
{"concepts":["look","dryer","forest","male","sew","puzzle","bubble","stone","purse","teacher","oil","beach","banner","pool","yard","piano","portrait","axe","playground","clean","report"]}
{"concepts":["chef","orange","ceremony","consist","branch","lock","engage","end","razor","face","paint","target","leash","stretch","pin","outfit","gym","country","backseat","shoulder","costume","ground","vomit","dive","fold"]}
{"concepts":["artist","bartender","bird","peace","line","dress","dunk","goldfish","collar","snake","insert","dice","dog","down","tooth","sle","leap","soldier","cross","hat","close","yolk","individual","draw","shift"]}
{"concepts":["receive","concrete","stool","air","sail","cheer","paint","help","unicycle","microphone","fish","part","flag","plate","vomit","pile","toothbrush","room","short","dryer","gauge","counter","direct","way","swing","candle","cup","object","bird","sit"]}
{"concepts":["middle","arena","style","driveway","policeman","silhouette","cover","snake","beam","mow","ingredient","pond","equipment","key","goal","gas","dry","give","groom","competition","spark","pump","short","slope"]}
{"concepts":["drink","rock","vendor","scooter","employee","cain","bride","top","kite","alligator","rope","patient","box","racket","stretch","arena","sharpen","pet","braid","down","dart","mortar","technique","couch","goldfish","rain","roof","hoop","skate","mix"]}
{"concepts":["leash","beach","gun","space","beverage","wound","punch","wall","teach","playground","field","sleigh","couch","stretch","mitt","trampoline","animal","boat","juggle","bowl","event","deal","camel","arrow"]}
{"concepts":["joke","feed","frisbee","glass","massage","toss","snow","soldier","rink","puff","sle","tank","bull","desert","tricep","pierce","design","gauge","lunch","cowboy","player","lace","mat","hit","band","table"]}
{"concepts":["scooter","end","indoor","number","mascara","axis","owner","hamburger","pottery","beat","color","serve","plate","guitarist","stone","cream","bubble","blower","ladder","wing","wool"]}
{"concepts":["duck","room","owner","suck","shoulder","fry","birthday","performer","blower","beam","cigarette","eye","basketball","bird","park","yard","separate","spark","drummer","bowl","smash","worker","ride","river"]}
{"concepts":["drop","audience","horse","sheep","water","ear","sharpener","pick","engage","wave","podium","swim","pumpkin","mother","axe","surgery","mower","presentation","lay","log","trim","color","short","pop","page","blow","suspend","racket","forest","attire"]}
{"concepts":["shine","sink","raise","pop","oil","surf","football","remove","flap","look","cellphone","equipment","liquid","officer","chip","motorcycle","read","gymnast","stuff","dinner","bag","tank","athlete"]}
{"concepts":["guitarist","driveway","restaurant","peeler","crosswalk","building","hold","park","beer","puck","celebrate","beam","frisbee","seat","leave","shoulder","fish","portrait","cliff","cat"]}
{"concepts":["pig","football","silhouette","sing","gym","carriage","attempt","leave","pitcher","cup","drop","goldfish","snowmobile","axis","pit","tag","turn","blower","wine","gun","bubble","barber"]}
{"concepts":["tie","stop","turn","dinner","world","protect","pose","axis","stage","braid","tennis","weld","skateboard","hockey","surfer","stool","peace","toe","plant","bunch","hand","smash","brush","suit","kick","yarn","mow","swimsuit"]}
{"concepts":["trim","screen","stir","swim","compete","golfer","hamburger","drummer","treat","needle","surfer","card","tie","shoe","paint","metal","stare","give","dye","perform","stove","ride","stone","graphic","mix"]}
{"concepts":["frisbee","catcher","screen","cigarette","gun","bat","customer","punch","hedge","rink","lasso","gum","glass","building","sleigh","suspend","lawn","smooth","ground","use","dive","dice","roof"]}
{"concepts":["pose","roller","scramble","night","individual","snake","smooth","drill","place","bottle","piece","gym","hold","smash","leash","goldfish","stop","washer","serve","axis","bath","microphone","burn","stare"]}
{"concepts":["rub","mower","fall","surf","fix","microphone","lock","ingredient","playground","student","decorate","lie","walk","toothbrush","chainsaw","dry","part","cream","roof","stop","snowball","cellphone","flag","turn","goggle"]}
{"concepts":["slope","gymnast","buck","shore","graffiti","hair","mat","winter","mitt","pitch","mirror","diver","clothe","peeler","bathroom","rock","stool","area","puff","climb","close","leash","turn","down"]}
{"concepts":["report","wheelbarrow","country","gas","dish","lake","lumber","needle","sell","field","remove","donut","discuss","drive","concrete","roast","skateboard","score","hedge","customer","speech","base"]}
{"concepts":["beard","area","gun","speech","audience","pitcher","friend","lock","birdie","room","motorcycle","ball","scrub","home","water","discuss","push","drill","pedestrian","tooth","policeman","cow","equipment","pose","tank","sheep","jog","broom","wax","gym"]}
{"concepts":["goldfish","wax","dock","door","roller","guitarist","basketball","surgery","birthday","blanket","dress","runner","tractor","background","cupcake","toe","treadmill","beverage","put","vendor","doctor"]}
{"concepts":["sand","rope","chip","drink","sharpener","building","tricep","sing","comb","employee","clear","female","board","faucet","teacher","part","razor","slide","silhouette","race","artist","give","stove","fish","cow","shift","dinner"]}
{"concepts":["pancake","event","goal","broom","basketball","stretch","athlete","point","stove","direct","soap","lap","wave","swimsuit","harness","family","object","car","chess","cliff"]}
{"concepts":["officer","rain","female","prepare","towel","blanket","sign","burger","fall","build","cain","fetch","bride","garment","dig","shake","instructor","pedestrian","music","sle","calf","set","say","saw","helmet","cowboy","beach","fill","ice"]}
{"concepts":["razor","paddle","rinse","pig","golfer","baton","cymbal","slope","crowd","run","jump","teach","graphic","skip","lotion","sheep","student","attach","hurdle","wound"]}
{"concepts":["cow","treadmill","wheel","worker","tongue","guitar","spaghetti","lot","fall","lake","washer","building","axe","singer","hamburger","course","swimsuit","station","shore","suit","cain","card","squeegee","motorcycle","grass"]}
{"concepts":["top","dice","vault","bath","chainsaw","jacket","punch","rock","stone","roast","arena","pass","stove","technique","swimsuit","shore","hit","walk","tattoo","lock","racket","drop","washer","pressure","listen"]}
{"concepts":["pool","sing","trash","tie","shake","flute","treat","patient","wall","word","surfboard","shoulder","oyster","talk","chip","massage","clean","bench","top","teacher","dock","cigarette"]}
{"concepts":["chisel","kite","shift","tire","leap","garage","fire","white","cardboard","spin","tag","ocean","short","surf","tomato","wash","feed","wave","blanket","routine","exercise","bed","talk","shoot","arm","roof","family","read","food","scrub"]}
{"concepts":["pencil","towel","door","priest","cream","makeup","policeman","garage","bike","birthday","crowd","student","peace","practice","whistle","wire","blower","burger","piano","accept","dress"]}
{"concepts":["pole","goal","mouth","castle","item","cart","teacher","chip","wing","rider","kiss","drive","runner","stop","lotion","attempt","guitarist","lawn","fire","card","goggle","body"]}
{"concepts":["peeler","hookah","newspaper","bench","serve","pile","sun","chase","desert","goggle","brick","drop","thread","raise","pot","prevent","sit","land","orange","sew","dance","hedge","spray"]}
{"concepts":["dancer","hat","chisel","fork","toy","control","lumber","brush","pedestrian","spoon","faucet","look","eye","stove","wave","batter","carve","tell","flute","tricep","helmet","bike","mountain","pet","raise","watch","flag","pipe","graphic"]}
{"concepts":["dye","follow","drape","solve","meat","store","give","unicycle","beach","building","down","station","pierce","castle","beer","razor","volleyball","number","desert","rest","chase","dribble","bartender","butter","flute","spin","harness","clear","cow","prevent"]}
{"concepts":["wear","roll","mountain","dribble","paddle","waterfall","bridesmaid","watch","newspaper","treadmill","bathtub","roof","bed","cigarette","scrub","lift","sock","board","crowd","pose","toy","key","paper","close","marker","use"]}
{"concepts":["guitarist","toothbrush","tooth","turn","cupcake","dice","tool","bartender","flash","blanket","kiss","puff","blow","arena","collect","win","skip","watermelon","celebrate","costume","room"]}
{"concepts":["roller","egg","donut","guitarist","dancer","athlete","move","silhouette","pile","pancake","purse","team","kid","continue","set","read","fill","goat","hold","individual","cat","slope","shake","tree","football","hand","birthday"]}
{"concepts":["couple","motorcycle","chew","clap","duck","tag","pressure","fill","family","owner","feed","shoulder","broom","customer","rock","gather","yard","singe","piano","bat","crack","cellphone","suspend","shuffle","clipper","brick","keyboard","helmet","compete","rip"]}
{"concepts":["violin","country","win","vault","drum","bridesmaid","ground","seat","spin","drummer","number","fish","dinner","fetch","goalie","fry","circle","insert","pen","graffiti","hammer","base","map","stroll"]}
{"concepts":["machine","alley","microphone","routine","base","design","joke","pop","motorcycle","squeegee","practice","hookah","dart","item","candle","doctor","suspend","spoon","wave","cube","event","raft","game","guitar","drum","bikini","pig","top"]}
{"concepts":["watermelon","word","beer","look","treat","puzzle","sit","fish","ear","bronco","river","family","fireplace","smoke","teammate","pan","sleep","game","swimmer","plate","volleyball","guitar","shovel","garage","mirror","bed"]}
{"concepts":["toe","band","hoop","cardboard","comb","base","jersey","contact","marker","run","tooth","short","potato","tricep","game","silhouette","ear","ski","slide","athlete","clean","suit","garage","tomato","jog","path","lap","pressure"]}
{"concepts":["driveway","sing","bar","tag","award","sand","harness","dive","student","mat","tomato","gong","oil","barber","bicycle","object","fork","lock","pile","hold","close","employee"]}
{"concepts":["area","river","gauge","way","dunk","station","continue","umbrella","axis","shot","front","stir","microphone","board","braid","lady","pierce","mask","chest","dry","sew","skateboard","orange","wire","range","flag","stage","trim"]}
{"concepts":["prevent","compete","home","wire","land","fireplace","drummer","use","family","garment","wound","scooter","horse","machine","direct","wing","white","distance","goat","puff","perform","tie","skateboard","spark","sip"]}
{"concepts":["raft","sip","hair","close","pocket","stretch","catch","pit","chase","hat","stop","yolk","dancer","apple","horse","tire","carry","drop","clip","deal","tell","bridesmaid","boat","cover","frisbee","line","sidewalk","animal"]}
{"concepts":["pair","couch","mat","drill","shoe","present","compete","trick","performer","pineapple","body","fish","crowd","build","towel","stroll","juice","banner","smoke","oyster","come","sleep","scooter","view","bike"]}
{"concepts":["cymbal","couch","sew","counter","cupcake","fly","puck","dish","cast","mat","pole","rock","goalie","sheep","bunch","marker","dryer","market","pond","prevent","lace","puff","toilet","wire","listen"]}
{"concepts":["apply","clothing","vomit","gymnast","tattoo","stove","fly","wheelbarrow","ladder","lot","cake","wetsuit","performance","event","bathtub","cereal","page","fork","yolk","hair","barbell","coat","leg","priest","drop","bunch"]}
{"concepts":["treadmill","laugh","perform","world","tie","shoe","yolk","tap","dog","pedestrian","leg","soldier","report","surgery","drink","side","headphone","cube","design","page","waterfall","drive","kite","singe"]}
{"concepts":["light","truck","examine","mask","treat","rain","drum","toss","shuffle","guitarist","prepare","cart","ocean","marker","podium","rink","pitcher","worker","head","look","washer","singe","background","concrete","wheelbarrow","instrument"]}
{"concepts":["needle","wall","tie","drink","building","pool","animal","sle","lady","jacket","sweep","bat","cymbal","eyelash","jersey","umbrella","body","vehicle","podium","room","tourist","bronco","tap","door","grill","canoe","paint","hookah"]}
{"concepts":["wall","wear","court","mitt","flag","dress","machine","donut","paint","burger","tooth","contact","part","treadmill","runner","pet","beer","vault","butter","hill","peel","compete","castle","decorate","rock","stuff","wing","piece","swim"]}
{"concepts":["side","horse","rodeo","goldfish","punch","joke","mother","surf","backseat","protect","eat","watermelon","mouth","drink","rock","rider","stroll","microphone","skater","meal","officer","flight","barber"]}
{"concepts":["paint","flash","bridesmaid","cain","block","cut","car","customer","draw","highchair","swimsuit","instruction","hit","down","beard","sander","dribble","field","song","clipper","area","sail","eyelash"]}
{"concepts":["skip","swing","accept","talk","bowl","tomato","wear","pedestrian","doctor","sidewalk","build","space","roof","march","glove","eat","listen","lens","crosswalk","bull","skate","orange","clipper","trampoline","toddler","award","arena","rub","playground"]}
{"concepts":["down","oyster","pile","continue","hockey","prevent","puff","bikini","drill","rodeo","weld","knife","lace","flight","sink","distance","scramble","bank","chew","birthday","egg","use","golfer"]}
{"concepts":["attach","instruction","cube","spatula","bank","clip","picture","pool","birthday","prepare","graffiti","come","cheer","edge","goat","continue","beam","garage","country","crash","room","rip","pile","arena"]}
{"concepts":["athlete","tank","groom","meat","head","toe","bowl","pet","pond","bike","braid","wine","hose","toss","indoor","control","press","garment","smoke","mower","student","look","egg","fire","country"]}
{"concepts":["roast","sand","vest","prepare","stick","pit","road","cube","boat","lotion","hoop","puff","ramp","part","player","smoke","pick","watermelon","yolk","tourist","smash","wax","win","mirror","axis"]}
{"concepts":["read","braid","dryer","sand","check","lake","soap","deal","hair","instruction","style","policeman","receive","knit","equipment","press","laugh","shot","mat","card","pan","design"]}
{"concepts":["consist","stir","engage","mouth","headphone","goal","cowboy","dice","number","tomato","shirt","cain","dealer","peace","leap","chip","bow","traffic","portrait","cross","walk"]}
{"concepts":["meat","clip","jean","come","day","indoor","catch","window","singer","javelin","step","knit","pocket","eyelash","wheelie","cereal","word","eye","toothbrush","pour","court","gas","garment","birdie","contact","stone","separate","wait"]}
{"concepts":["headphone","blow","serve","athlete","raise","hat","stair","hand","pass","point","alligator","egg","ice","deal","country","apple","dunk","piece","store","puck","instructor"]}
{"concepts":["technique","suck","artist","lace","bag","area","massage","box","score","wire","perform","ice","deal","flash","candle","gauge","individual","blow","restaurant","tank","protect","alley","silhouette"]}
{"concepts":["sleigh","race","drape","down","chef","fill","hockey","screen","song","newspaper","pan","building","team","cup","nozzle","wire","beat","fork","cowboy","market"]}
{"concepts":["washer","bounce","indoor","roll","hat","jersey","alley","dig","sew","wheelie","check","compete","truck","fork","sleigh","gather","sidewalk","mascara","side","stool","mask","eat","paddle","tooth"]}
{"concepts":["owner","tube","thread","teach","waterfall","shoe","street","bow","cereal","tag","skip","bride","shot","chess","plate","team","lake","suck","beer","bus","solve","slide","forest","bathroom","spark","remove","rip"]}
{"concepts":["side","castle","flip","country","clothe","field","wand","house","cliff","saw","report","fireplace","clean","lunch","bird","hair","bank","picture","read","close","outfit","male","teacher","pen","mask","owner","skateboard"]}
{"concepts":["mat","cake","tank","remove","dress","raise","bank","pineapple","arrow","rope","bar","beard","arena","toe","shave","golfer","gas","court","present","sofa","brick","stroll","bubble","station","award","skate"]}
{"concepts":["instruction","river","clothing","seat","birdie","jog","pipe","give","bridge","lace","bar","unicycle","tourist","bounce","button","block","pedestrian","alligator","purse","lumber","apple","scramble","truck","rock","hole","stand","faucet"]}
{"concepts":["sail","press","pit","balloon","bike","stir","lock","joke","air","apply","way","sword","ceremony","raise","scramble","pour","game","collar","concert","performance","gather","male"]}
{"concepts":["thread","goal","performance","garage","sausage","playground","turn","type","swing","yarn","pedestrian","market","hat","jacket","soldier","shore","jump","needle","use","field","tooth","bike","continue","concrete"]}
{"concepts":["water","beat","punch","alley","face","portrait","clip","piano","headphone","suit","continue","court","bow","house","officer","artist","kid","sail","course","friend","skateboarder","brick","pin","basketball","bathtub","snake","weld","fire","donut"]}
{"concepts":["bathroom","paper","clap","wetsuit","peel","fold","purse","face","pepper","wear","sand","smooth","duck","type","television","raise","podium","plate","sew","leave","yard","broom","vomit"]}
{"concepts":["sand","hat","clap","gutter","help","color","bathtub","direct","clothing","lace","front","teach","tooth","dish","equipment","contact","makeup","outfit","scrub","draw","beverage","chest"]}
{"concepts":["gum","spaghetti","field","sharpen","deck","chair","roof","towel","seat","world","kitchen","pineapple","traffic","kick","clipper","smoke","carriage","blow","report","brush","fruit","arena","sweep","goalie","fish","skateboarder","street","pressure","routine"]}
{"concepts":["pressure","clothing","gutter","bronco","spoon","swing","smash","rider","hair","fry","picture","float","plaster","mortar","circle","toy","line","chip","sleigh","track","snowball","wool","blanket","suspend","cut","needle","wave","suit","drop","ice"]}
{"concepts":["oxidation","concert","dance","protect","mascara","fall","barbell","lake","side","object","restaurant","bridge","station","report","item","class","peeler","street","table","dress","desert","dough","move","separate","flap"]}
{"concepts":["lady","broom","spoon","arch","sweep","tie","ski","song","point","apply","shave","chase","swing","number","band","scramble","policeman","solve","pump","report","golfer","board","rinse","turn"]}
{"concepts":["point","coin","alley","flash","skip","open","dart","beer","singe","put","yard","wire","dance","drape","gym","seat","score","side","knock","washer","pepper","catch","patient","cellphone","sand","spin","path","tool","dice","insert"]}
{"concepts":["priest","guitarist","shift","lot","floor","shake","dance","pool","engage","cowboy","cereal","brick","item","gym","slide","land","jump","driveway","pressure","add","card","bow","fish","chip","ice","beer","attach","drill"]}
{"concepts":["mallet","paper","white","wear","dribble","picture","wine","boat","point","joke","brush","stuff","distance","smooth","control","uniform","costume","paddle","toilet","store","cake","basketball","help","lift","bathtub","turn","home","sunglass"]}
{"concepts":["iron","pencil","wax","leash","marker","cymbal","lap","coat","score","tank","win","peace","smile","wheelie","fill","shine","examine","dish","female","flag","snow"]}
{"concepts":["tricycle","arena","braid","brush","slice","tennis","bottle","jean","stare","sign","balloon","stair","restaurant","way","officer","scooter","shake","male","beat","take","wire","hand","alley","store","footprint","gun","pocket"]}
{"concepts":["bull","stuff","help","hoop","piece","water","exercise","pop","house","goat","eat","grill","celebrate","soap","draw","speech","leg","bounce","clipper","swimmer","smooth","kick","policeman","dive","write"]}
{"concepts":["tractor","family","path","hedge","gutter","train","vehicle","vault","pick","bat","country","desert","perform","plaster","boat","toy","couch","knock","potato","chase","cellphone","camel","hamburger","ladder","fire","pile","skateboard","drop","mascara"]}
{"concepts":["tongue","plate","cellphone","jean","give","chase","lock","fetch","cereal","cart","umbrella","athlete","pull","fall","deal","sidewalk","skateboard","rip","bench","fence","player","begin","bike","ground","wound","grape","mascara","white","mower"]}
{"concepts":["sit","chew","card","clipper","buck","tricep","indoor","ground","basketball","background","track","costume","ladder","body","lace","toy","television","towel","soldier","makeup","bird","sail"]}
{"concepts":["coin","trick","direct","compete","pit","meal","carry","cowboy","station","throw","braid","footprint","shoulder","tourist","dough","skateboarder","sharpener","hold","winter","create","duck","silhouette","jersey","indoor","body","jog","chase"]}
{"concepts":["pottery","song","hose","grape","glove","cube","cellphone","street","wear","couple","button","harness","word","kite","unicycle","press","scissor","plaster","shuffle","clip","garment","umbrella","duck","family","gear","jean","circle","instructor","rip"]}
{"concepts":["floor","write","shine","front","roller","footprint","sle","pot","weight","trash","wing","item","stair","block","present","wheelbarrow","pepper","drop","hill","jog","sink","country","create"]}
{"concepts":["cube","use","demonstrate","fill","weld","number","collar","block","lay","treadmill","football","wine","mortar","crack","dress","routine","cliff","garment","wall","catch","ladder","bottle","perform","pot"]}
{"concepts":["employee","cream","traffic","dinner","beverage","gym","suit","pair","prevent","club","open","event","bowl","ceremony","puff","follow","presentation","pipe","lens","crosswalk","tricep","control","sausage","attire"]}
{"concepts":["highchair","yard","press","hedge","bathroom","carry","street","comb","run","award","stick","perform","trim","snake","hang","sign","insert","apple","fold","blower","chef"]}
{"concepts":["raise","bed","microphone","kite","dryer","baseball","weight","road","front","mortar","piano","gauge","calf","canoe","suit","bubble","spoon","short","skateboarder","feed","control","lunch","trash","surf","policeman","ski","fence"]}
{"concepts":["hedge","treadmill","smash","country","weld","spray","break","flight","roast","mortar","gymnast","punch","marker","smoke","fry","sail","cover","castle","sign","foot","wrap","ground","head"]}
{"concepts":["enjoy","chew","cut","lens","plaster","blanket","skateboard","cigarette","vomit","rock","leap","meat","tomato","help","individual","engage","goldfish","cake","cover","sle","piano"]}
{"concepts":["board","roller","male","break","vendor","light","collar","wing","plant","lie","suck","bikini","backseat","lace","juggle","push","tennis","direct","hedge","television","garment","lay"]}
{"concepts":["fall","distance","pineapple","cheerleader","metal","tie","cat","talk","examine","priest","driveway","peeler","pick","keyboard","dry","fly","wait","pet","tongue","arena","cain","vegetable","wand","carry","crowd","glove","hat","brick","joke","point"]}
{"concepts":["fire","concert","spoon","bronco","mortar","helmet","lean","dealer","couch","unicycle","cross","fetch","toss","shot","performance","plate","faucet","trampoline","camel","stop","bride"]}
{"concepts":["yard","mallet","goal","vendor","leave","rain","braid","card","wine","puck","skateboarder","swimmer","boat","indoor","wing","toe","club","shoot","cow","tooth"]}
{"concepts":["mask","fix","boat","sharpen","smile","swim","chair","piano","drink","building","pig","cow","wheel","scrub","tennis","bench","check","dunk","bikini","pin","slice","axis","drum","lawn","mirror","top","air","remove"]}
{"concepts":["piece","down","climb","concert","dig","rock","wine","spatula","bang","key","hammer","music","teammate","tie","stunt","costume","feed","football","class","pipe","rip","deck","accept","hedge","decorate","pedestrian","liquid","gear","leave","gong"]}
{"concepts":["broom","key","balloon","grape","picture","mirror","brick","clap","costume","cigarette","microphone","stool","blanket","customer","playground","instruction","slice","world","piece","fly","presentation","store","concert","dunk","fold","mouth","set","chew","lotion","type"]}
{"concepts":["pineapple","burger","piece","treadmill","serve","practice","cliff","distance","fork","bar","pen","peel","roast","object","say","climb","garage","cereal","surf","type"]}
{"concepts":["exercise","fold","drill","engage","cheer","saw","crash","puff","bikini","pineapple","dryer","cover","event","dart","dance","sand","band","clothing","sun","side","pierce","race","doctor"]}
{"concepts":["pipe","unicycle","sofa","female","scramble","puzzle","chop","mother","consist","contact","competition","ground","sail","accept","spray","smooth","washer","write","guitarist","edge","dye","pick","gather"]}
{"concepts":["serve","throw","stir","bottle","routine","juice","suit","flute","draw","ramp","truck","design","say","crosswalk","pet","fall","lap","weld","pit","meat","kid","view","purse"]}
{"concepts":["rub","prepare","move","worker","trim","candle","dance","bank","television","catch","design","shuffle","sharpen","work","take","decorate","massage","purse","road","coin","arrow","stuff","fix","home","hoop","family","fireplace","singe","skater"]}
{"concepts":["thread","attire","podium","dinner","hair","wheelie","oil","climb","stretch","microphone","beam","band","vehicle","beat","skip","cast","stop","puff","surgery","smash","routine"]}
{"concepts":["kitchen","policeman","read","ingredient","help","vomit","violin","pop","hold","speech","take","screen","stroll","crack","baton","soap","plaster","lace","hookah","concert","prepare","pencil","trim"]}
{"concepts":["drill","say","beam","hole","basketball","fall","attach","swim","stunt","purse","style","map","tractor","cain","shoulder","sleep","tube","bathtub","drummer","donut","shift","horse","jog"]}
{"concepts":["shovel","teach","mitt","key","deck","leap","cross","celebrate","fetch","needle","vault","toy","silhouette","juice","win","market","tree","gum","soldier","apply","sofa","paddle","arena","sip","collar","stroll","beer","park","circle"]}
{"concepts":["chew","rock","serve","spin","building","look","crack","hole","wear","drive","leap","goalie","bird","tap","eye","hang","pan","snake","garage","cliff","mascara","float","dice","suspend"]}
{"concepts":["drop","groom","deck","pepper","check","mouth","gather","attire","arrow","mother","goat","backseat","garment","graphic","hat","wood","clean","toddler","artist","score","mascara","uniform","background","sander","candle"]}
{"concepts":["work","write","sausage","bird","race","whistle","country","design","give","rub","sand","bottle","cake","chess","table","button","top","down","seat","hit","stretch","artist","mother","cupcake","groom","batter","shore","head","baseball"]}
{"concepts":["gum","butter","trim","chest","watermelon","put","sink","block","style","surgery","swim","vehicle","bottle","goggle","look","patient","shot","bike","worker","policeman","pass","tattoo","gauge","comb","coin"]}
{"concepts":["box","scrub","pile","buck","lunch","bronco","pop","dig","compete","ground","gum","watch","step","surfer","push","down","phone","edge","range","front","snowmobile","singer"]}
{"concepts":["shirt","spray","wine","cube","smile","rub","duck","volleyball","cupcake","work","drum","tank","pile","snake","dye","key","wound","bathtub","equipment","roast","surfboard","hammer"]}
{"concepts":["dough","competition","ball","lot","hamburger","pile","shirt","accept","egg","spatula","oil","floor","crack","pool","faucet","catcher","runner","frisbee","bar","spray","apple","crowd","shake","pipe","serve"]}
{"concepts":["protect","table","phone","roller","unicycle","traffic","pass","beer","perform","barber","plant","jersey","listen","dinner","fish","alligator","snowmobile","batter","wire","bank","tricycle","instrument"]}
{"concepts":["traffic","arm","box","purse","stick","suit","beach","point","gum","way","vehicle","hold","line","clip","hoop","dry","pin","cook","makeup","pass","bathtub","pick"]}
{"concepts":["puzzle","beat","wave","male","tennis","color","oil","blow","tire","umbrella","surfboard","yarn","ingredient","catch","object","hose","seat","pressure","vegetable","spoon","knit","hockey","circle"]}
{"concepts":["walk","ski","metal","tap","spin","scooter","suspend","stair","dog","joke","dance","student","roll","slice","hair","yard","toss","tourist","blow","oil","smash","river","eye","helmet"]}
{"concepts":["owner","cup","burn","toy","win","store","gear","board","cheer","punch","class","sand","prepare","female","beat","graffiti","scooter","brush","lumber","pan","consist","protect"]}
{"concepts":["target","pineapple","park","helmet","music","white","thread","plant","wetsuit","cube","birthday","officer","vomit","tooth","mitt","burger","tree","garage","chisel","teammate","object","help","pump","food","foot","chip","background","dough","bathroom"]}
{"concepts":["football","donut","card","tooth","cross","move","butter","employee","surgery","spaghetti","iron","pen","guitarist","hand","circle","mascara","screen","field","house","knock","separate","ramp","crosswalk"]}
{"concepts":["design","banner","plate","music","serve","glove","cream","lotion","surgery","clap","shine","attach","backseat","home","cake","pass","bridesmaid","thread","pan","treat"]}
{"concepts":["chew","bubble","chainsaw","object","lawn","roll","hit","fetch","separate","pair","sew","fruit","drink","ground","puck","pole","policeman","bathtub","examine","surfboard","ingredient","equipment","wetsuit","class"]}
{"concepts":["batter","award","slide","crash","pepper","throw","castle","station","outfit","world","sidewalk","alley","sock","lie","flash","class","shirt","fork","tag","cain","cut","chess","part","bathroom","skater"]}
{"concepts":["exercise","bike","artist","prevent","calf","pig","crash","base","mallet","book","number","pedestrian","lean","dive","background","march","dart","check","crack","duck","dryer","horse","spoon","vehicle","counter","canoe","event"]}
{"concepts":["pit","field","tricep","juggle","drink","hold","land","say","tourist","point","win","put","bunch","swim","game","catcher","pan","bang","iron","front","stretch","talk","whistle","crowd","ingredient","listen","object"]}
{"concepts":["tank","paddle","jersey","help","stair","class","car","puzzle","read","batter","insert","sail","bunch","mortar","background","cheerleader","cover","practice","jump","cat","fence","leave","base"]}
{"concepts":["silhouette","card","washer","door","concrete","jump","press","fireplace","gauge","help","restaurant","banner","field","arrow","plaster","diver","grape","worker","peace","rink","paddle","point","lawn","saw","eyelash","face","frisbee","mix","liquid","tattoo"]}
{"concepts":["swimmer","pitcher","lay","bank","treadmill","routine","car","eyelash","chess","lotion","teammate","hat","pancake","metal","book","blow","pen","drink","lace","tie","suspend","close","arena","stunt"]}
{"concepts":["speech","mascara","toddler","branch","yolk","lot","object","spark","sign","net","goalie","pencil","beam","highchair","joke","officer","team","bird","stove","alley","dog","smooth","cymbal","night","bartender","scooter"]}
{"concepts":["sleep","piece","iron","barbell","lake","begin","smoke","accept","bubble","cut","pencil","uniform","step","mow","edge","dealer","bathtub","feed","violin","carriage","snowmobile","wheelie","rip","tell","makeup","chair","yolk"]}
{"concepts":["pump","curl","mouth","trick","dish","climb","drink","cup","gather","eyelash","protect","vegetable","female","rock","dart","instrument","puzzle","present","chase","individual","clear","add","dice","portrait","ceremony","solve","razor","put","skateboard"]}
{"concepts":["check","train","dinner","canoe","floor","jog","gong","calf","ladder","ball","guitar","athlete","pin","gym","hedge","book","flight","chew","hit","clean","spin","lens","bike","trampoline","cliff","goldfish","waterfall","grass","wall","chef"]}
{"concepts":["punch","ground","pan","stare","guitar","blanket","chew","bag","shine","collect","dough","map","mat","eyelash","machine","balloon","javelin","ramp","mower","pedestrian","award","pin","plant"]}
{"concepts":["instrument","ladder","bird","chess","hoop","book","drive","world","practice","guitarist","racket","bathroom","range","style","push","build","bridge","juggle","couple","routine","barber","hit"]}
{"concepts":["swimmer","stool","highchair","banner","apple","point","walk","frisbee","clip","batter","room","slice","fork","shoulder","report","juggle","stick","jump","bunch","wine","toe","pick","open","beam","hole","sail"]}
{"concepts":["dealer","walk","pass","worker","ramp","float","sun","pond","design","rock","white","tricycle","grape","word","lock","rip","saw","cellphone","home","close","counter","lawn","spin","gear"]}
{"concepts":["butter","attire","country","puzzle","arena","arm","bride","attempt","key","podium","coat","wash","eat","end","presentation","trail","team","cut","goggle","customer","rodeo","doctor","market","stretch","grape","purse","instruction","pass"]}
{"concepts":["hug","stand","dryer","swallow","animal","pierce","horse","backseat","pop","bride","machine","picture","help","skateboard","celebrate","swimsuit","slice","weld","mat","sail","ocean","hurdle","paddle","dog"]}
{"concepts":["attire","shake","lasso","street","pipe","suit","say","instrument","begin","cellphone","drill","lift","dryer","decorate","oxidation","room","head","flap","lunch","costume","glove","mortar","castle","net","block"]}
{"concepts":["dye","fork","pull","way","lean","slope","shake","night","oyster","court","bird","drum","dive","face","guitarist","trim","ceremony","glove","collar","kite","fold","desert"]}
{"concepts":["bubble","singe","concert","vehicle","slope","nozzle","sock","gong","teach","sharpener","practice","rodeo","beach","marker","music","coat","design","fruit","weld","shovel","walk","guitarist","distance","mix"]}
{"concepts":["hurdle","wall","tomato","oxidation","market","cymbal","toilet","vegetable","spatula","building","cook","pit","teacher","song","trash","purse","throw","flash","hat","eat","club","horse","stove","step","stroll"]}
{"concepts":["phone","pair","end","tooth","concrete","shoulder","cube","lane","ball","sip","egg","clear","come","close","equipment","clip","grass","blanket","book","truck","branch","sleigh","hose","baseball","smash","flip","fruit","kid","band","squeegee"]}
{"concepts":["stair","bubble","team","dryer","bath","set","vegetable","step","bowl","bathtub","grill","soldier","metal","roll","rest","graphic","take","cellphone","scooter","driveway","pencil","presentation","uniform","gym","dance","stick","explain","bike"]}

Binary file not shown.

Binary file not shown.

Binary file not shown.

1201
puppeteer/data/SRDD/SRDD.csv Normal file

File diff suppressed because it is too large Load Diff

View File

View File

View File

@ -0,0 +1,6 @@
class Edge:
def __init__(self, u, v, index):
self.u = u
self.v = v
self.index = index
self.color = "black"

View File

@ -0,0 +1,46 @@
from typing import List, Any
from abc import ABC, abstractmethod
from inference.base.edge import Edge
class Graph(ABC):
def __init__(self) -> None:
self._nodes = []
self._edges = []
self._nodes_num = 0
self._edges_num = 0
def initialize_nodes(self, nodes: List[Any]):
self._nodes = nodes
def _add_node(self, node):
self._nodes.append(node)
def _add_edge(self, node1, node2, index):
edge = Edge(node1, node2, index)
self._edges.append(edge)
def _get_edge(self, node1, node2):
idx_list = []
for edge in self._edges:
if edge.u == node1 and edge.v == node2:
idx_list.append(edge.index)
if len(idx_list) > 0:
return idx_list
else:
return None
def _remove_edges(self, node1, node2):
self._edges = [edge for edge in self._edges if edge != (node1, node2)]
def adjacency_matrix(self):
matrix = [[0 for _ in range(len(self._nodes))] for _ in range(len(self._nodes))]
for edge in self._edges:
matrix[edge[0].index][edge[1].index] = 1
return matrix
@abstractmethod
def visualize(self):
pass

View File

View File

@ -0,0 +1,48 @@
import yaml
import networkx as nx
from inference.base.graph import Graph
from pyvis.network import Network
from agent.agent_info.actions import REASONING_ACTION_LIST, TOOL_ACTION_LIST, TERMINATION_ACTION_LIST
class ActionGraph(Graph):
def __init__(self):
super().__init__()
self.REASONING_ACTION_LIST = REASONING_ACTION_LIST
self.TOOL_ACTION_LIST = TOOL_ACTION_LIST
self.TERMINATION_ACTION_LIST = TERMINATION_ACTION_LIST
global_config = yaml.safe_load(open("./config/global.yaml", "r"))
external_tools_enabled = global_config.get("external_tools_enabled")
if external_tools_enabled:
self.actions_collection = REASONING_ACTION_LIST + TOOL_ACTION_LIST + TERMINATION_ACTION_LIST
else:
self.actions_collection = REASONING_ACTION_LIST + TERMINATION_ACTION_LIST
def add_action(self, action_id, action_data, agent_data):
self._add_node({"id": action_id, "action": action_data, "agent": agent_data})
def add_dependency(self, from_action_id, to_action_id):
self._add_edge(from_action_id, to_action_id, len(self._edges))
def visualize(self, path="action_graph.html"):
G = nx.DiGraph()
nodes_colors = []
for node in self._nodes:
G.add_node(node["id"], label=node["action"]["action"]["action"] + "\n" + node["agent"],
status=node["action"]["success"],
color="green" if node["action"]["success"] == "Success" else "red")
nodes_colors.append("green" if node["action"]["success"] == "Success" else "red")
for edge in self._edges:
G.add_edge(edge.u, edge.v)
net = Network(notebook=True, height="750px", width="100%", bgcolor="#FFFFFF", font_color="black", directed=True)
net.from_nx(G)
net.show(path)
def get_action_data(self, action_id):
for node in self._nodes:
if node["id"] == action_id:
return node
return None
def get_dependencies(self, action_id):
return [edge.v for edge in self._edges if edge.u == action_id]

View File

@ -0,0 +1,119 @@
import networkx as nx
from pyvis.network import Network
import seaborn as sns
import yaml
from inference.base.graph import Graph
from agent.register.register import agent_global_registry
import logging
main_logger = logging.getLogger('global')
class AgentGraph(Graph):
def __init__(self):
super().__init__()
self._nodes_num = agent_global_registry.agent_num
self._edges_num = 0
for agent in agent_global_registry.unique_agents.values():
self._add_node(agent)
print("-"*10+"\033[31mAgent Graph Initialized\033[0m"+"-"*10)
@property
def hash_nodes(self):
return [node.hash for node in self._nodes]
@property
def role_nodes(self):
return [node.role for node in self._nodes]
def get_agent_from_index(self, index):
return self._nodes[index]
def get_agent_from_role(self, role):
for agent in self._nodes:
if agent.role == role:
return agent
return None
def get_agent_from_hash(self, hash):
for agent in self._nodes:
if agent.hash == hash:
return agent
return None
def get_agent_dialog_history(self, agent_role_list: list, **kwargs):
"""get agent dialog history
Keyword arguments:
idx -- agent idx
Return: corresponding agent dialog history. If idx is illegal, return []
"""
question = kwargs.get("question", None)
history = []
for role in agent_role_list:
agent = self.get_agent_from_role(role)
for h in agent.simplified_dialog_history:
history.append(h)
if len(agent_role_list) == 0 and question is not None:
history = [{'role': 'system', 'content': 'You are an assistant. Your task is to {}'.format(question)}]
assert len(history)!=0, "Dialog history can not be empty"
return history
@property
def agent_prompt(self):
agent_prompt = []
for agent in self._nodes:
if agent.role != "TerminatorAgent":
agent_prompt.append(f"Agent {agent.role} using model {agent.model}' hash: {agent.hash}")
agent_prompt = "\n".join(agent_prompt)
return agent_prompt
@property
def terminator_agent_index(self):
for agent in self._nodes:
if agent.role == "TerminatorAgent":
return agent.index
return None
@property
def search_agent_indices(self):
indices = []
for agent in self._nodes:
if agent.role == "WebsiteAgent" or agent.role == "BingAgent" or agent.role == "ArxivAgent":
indices.append(agent.index)
return indices
def agent_list(self):
agent_info_list = [
f"index:{agent.index}, role:{agent.role}, model:{agent.model}, hash:{agent.hash}, tool:{agent.tools}"
for agent in self._nodes
]
return '\n'.join(agent_info_list)
def visualize(self, path="agent_graph.html"):
def generate_color_map(node_ids):
color_palette = sns.color_palette("husl", len(node_ids)).as_hex()
color_map = {node_id: color_palette[i % len(color_palette)] for i, node_id in enumerate(node_ids)}
return color_map
node_color_map = generate_color_map(self.hash_nodes)
edge_color_map = generate_color_map([edge.index for edge in self._edges])
G = nx.MultiDiGraph()
edge_labels = {}
for node in self._nodes:
G.add_node(node.index, label=f"{node.role}\nbase model: {node.model}\nindex: {node.index}",color = node_color_map[node.hash])
for edge in self._edges:
G.add_edge(edge.v.index, edge.u.index, color = edge_color_map[edge.index])
edge_labels[(edge.v.index, edge.u.index)] = f"Reasoning..."
net = Network(notebook=True, height="750px", width="100%", bgcolor="#FFFFFF", font_color="black", directed=True)
net.from_nx(G)
net.show(path)
@property
def num(self):
return self._nodes_num
def add_agent(self):
pass
def delete_agent(self):
pass

View File

@ -0,0 +1,593 @@
import torch
import atexit
import os
import datetime
import json
import numpy as np
import torch.nn as nn
import yaml
import logging
from utils.other_utils import Singleton
from inference.policy.base_policy import LLMPolicy, LearningPolicy
from model.embedding import RewardModelTokenRepresentation
global_config = yaml.safe_load(open("./config/global.yaml", "r"))
logger = logging.getLogger("train")
@Singleton
class MLP_PolicyNetwork(nn.Module):
def __init__(self, input_dim, output_dim):
super().__init__()
self.fc1 = torch.nn.Linear(input_dim, 512)
self.fc2 = torch.nn.Linear(512, 128)
self.fc3 = torch.nn.Linear(128, 32)
self.fc4 = torch.nn.Linear(32, output_dim)
self.relu = torch.nn.ReLU()
self.softmax = torch.nn.Softmax(dim=1)
self.input_dim = input_dim
self.output_dim = output_dim
def forward(self, x):
x = x.to(torch.float32)
x = self.fc1(x)
x = self.relu(x)
x = self.fc2(x)
x = self.relu(x)
x = self.fc3(x)
x = self.relu(x)
x = self.fc4(x)
x = self.softmax(x)
return x
@Singleton
class ContinuousREINFORCE(LearningPolicy):
def __init__(self, agent_graph, action_graph, config_path="config/policy.json"):
super().__init__(agent_graph, action_graph)
with open(config_path, 'r') as f:
self.config = json.load(f)
# Set parameters from config
self.device = self.config["device"]["type"]
# Training parameters
self.model_path = self.config["paths"]["model_path"]
self.training = self.config["training"]["training"]
self.loading = self.config["training"]["loading"]
self.learning_rate = self.config["training"]["learning_rate"]
self.gamma = self.config["training"]["gamma"]
self.sample_size = self.config["training"]["sample_size"]
self.lambda_kl_loss = self.config["training"]["lambda_kl_loss"]
# Agent parameters
self.max_num_agents = self.config["agent"]["max_num_agents"]
self.next_num_agents = self.config["agent"]["next_num_agents"]
self.max_path = self.config["agent"]["max_path"]
self.threshold = self.config["agent"]["threshold"]
# LLM parameters
self.llm_prior = self.config["llm"]["prior"]
self.llm_prior_redistribution = self.config["llm"]["prior_redistribution"]
self.redistribution_weight = self.config["llm"]["redistribution_weight"]
# Initialize state representation and policy network
self.state_representation = RewardModelTokenRepresentation()
self.policy_network = MLP_PolicyNetwork(self.state_representation.dim, self.actions_dim)
self.policy_network = self.policy_network.to(self.device)
if not self.training:
self.load_model(self.get_latest_model_path())
if self.loading:
self.load_model(self.model_path)
# Agent setup
self.agent_hash_list = agent_graph.hash_nodes
self.agent_role_list = agent_graph.role_nodes
# Initialize tracking variables
self.executed_trajectories = []
self.execution_count = 0
self.current_trajectories = []
self.current_trajectory_idx = 0
self.policy_losses = []
self.rewards_history = []
self.action_probs_history = []
self.llm_action_probs_history = []
self.reward_from_rm = []
self.accumulated_acc = []
self.entropy_history = []
# Setup actions and rewards
self.end_action = torch.tensor(self.agent_graph.terminator_agent_index, device=self.device)
self.web_actions = torch.tensor(self.agent_graph.search_agent_indices, device=self.device)
# Initialize reward factors from config
reward_factors = self.config["agent"]["reward_factors"]
self.agent_reward_factor = [reward_factors["default"]] * self.actions_dim
self.agent_reward_factor[self.end_action.item()] = reward_factors["terminator"]
for web_idx in self.web_actions:
self.agent_reward_factor[web_idx.item()] = reward_factors["web_search"]
self.current_task = None
self.previous_task = None
self.global_step = 0
self.prob_step=0
# Initialize optimizer
self.optimizer = torch.optim.Adam(self.policy_network.parameters(), lr=self.learning_rate)
self.max_step_num = global_config.get("graph").get("max_step_num")
self.llm_policy = LLMPolicy(self.agent_graph, self.action_graph)
atexit.register(self.save_model)
def logarithmic_cost(self, step):
"""Calculate logarithmic cost using config parameters"""
scale = self.config["cost"]["scale"]
growth_rate = self.config["cost"]["growth_rate"]
# Normalize step to [0,1] range
normalized_step = (step + 1) / (self.max_step_num + 1)
if self.config["cost"]["inverse"]:
step_cost = scale * (1 - torch.log(torch.tensor(1 + growth_rate * normalized_step, device=self.device))
/ torch.log(torch.tensor(1 + growth_rate, device=self.device)))
else:
step_cost = scale * (torch.log(torch.tensor(1 + growth_rate * normalized_step, device=self.device))
/ torch.log(torch.tensor(1 + growth_rate, device=self.device)))
print("\033[1;33mstep cost: {}\033[0m".format(step_cost))
return step_cost
def save_model(self, path=None, tag=None):
"""Save model with config"""
path = self.config["paths"]["checkpoint_path"]
os.makedirs(path, exist_ok=True)
timestamp = datetime.datetime.now().strftime('%Y%m%d_%H%M%S')
filename = f'policy_net_{timestamp}' + (f'_{tag}' if tag else '') + '.pt'
save_path = os.path.join(path, filename)
checkpoint = {
'model_state_dict': self.policy_network.state_dict(),
'optimizer_state_dict': self.optimizer.state_dict() if hasattr(self, 'optimizer') else None,
'input_dim': self.policy_network.input_dim,
'output_dim': self.policy_network.output_dim,
'timestamp': timestamp,
'config': self.config, # Save the config with the model
'metadata': {
'tag': tag,
'version': '1.0',
}
}
try:
torch.save(checkpoint, save_path)
print(f"Model saved successfully to {save_path}")
return save_path
except Exception as e:
print(f"Error saving model: {str(e)}")
return None
def update_executed_trajectories(self):
if self.current_task != self.previous_task:
self.previous_task = self.current_task
self.execution_count += 1
num_to_add = self.execution_count - len(self.executed_trajectories)
if num_to_add > 0:
self.executed_trajectories.extend([[] for _ in range(num_to_add)])
self.current_trajectories = self.executed_trajectories[self.execution_count-1]
def init_forward(self, global_info):
print("\033[1;33mInit Policy Forward\033[0m")
logger.info("[Init Policy Forward]")
self.current_task = global_info.task
self.update_executed_trajectories()
state, rew = self.get_state_representation(global_info)
action_probs = self.policy_network(state) #shape (1,action_dim)
self.action_probs_history.append(action_probs.T.squeeze(1))
self.reward_from_rm.append(rew)
logger.info("Action probs: {}".format(action_probs))
entropy = -(action_probs * torch.log(action_probs + 1e-10)).sum()
self.entropy_history.append(entropy)
m = torch.distributions.Categorical(action_probs)
agent_indices = self.select_agents_by_threshold(action_probs).T.squeeze(1)
self.current_trajectory_idx = 0
length = len(self.current_trajectories) + agent_indices.shape[0]
while len(self.current_trajectories) < length:
self.current_trajectories.append([])
for i, agent_idx in enumerate(agent_indices):
prob_value = action_probs[0, agent_idx.item()]
if i == 0:
self.append_to_trajectory(self.current_trajectory_idx, agent_idx, prob_value, global_info, None, m, rew)
else:
trajectory_idx = len(self.current_trajectories) - len(agent_indices) + i
self.append_to_trajectory(trajectory_idx, agent_idx, prob_value, global_info, None, m, rew)
return agent_indices
def iter_forward(self, global_info):
print("\033[1;33mFollowing Policy Forward\033[0m")
logger.info("Following Policy Forward")
self.current_task = global_info.task
if self.llm_prior:
prior_action_probs = self.llm_policy.forward_prior(global_info)
else:
prior_action_probs = None
print("Prior action probs: {}".format(prior_action_probs))
logger.info("Prior action probs: {}".format(prior_action_probs))
state, rew = self.get_state_representation(global_info)
action_probs = self.policy_network(state) #shape (1,action_dim)
if self.llm_prior_redistribution:
action_probs = (1 - self.redistribution_weight) * action_probs + \
self.redistribution_weight * prior_action_probs
action_probs /= action_probs.sum()
self.action_probs_history.append(action_probs.T.squeeze(1))
self.reward_from_rm.append(rew)
logger.info("Action probs: {}".format(action_probs))
entropy = -(action_probs * torch.log(action_probs + 1e-10)).sum()
self.entropy_history.append(entropy)
m = torch.distributions.Categorical(action_probs)
agent_indices = self.select_agents_by_threshold(action_probs).T.squeeze(1)
self.current_trajectory_idx = global_info.path_id
length = len(self.current_trajectories) + len(agent_indices) - 1
original_length = len(self.current_trajectories)
while len(self.current_trajectories) < length:
self.current_trajectories.append([])
for i, agent_idx in enumerate(agent_indices):
prob_value = action_probs[0, agent_idx.item()]
if i == 0:
self.append_to_trajectory(self.current_trajectory_idx, agent_idx, prob_value, global_info, prior_action_probs, m, rew)
else:
trajectory_idx = original_length + i - 1
self.current_trajectories[trajectory_idx] = self.clone_trajectory(self.current_trajectory_idx)
self.append_to_trajectory(trajectory_idx, agent_idx, prob_value, global_info, prior_action_probs, m, rew)
return agent_indices
def append_to_trajectory(self, trajectory_idx, agent_idx, prob_value, global_info, prior_action_probs, m, rew=0):
cost = self.logarithmic_cost(len(self.current_trajectories[trajectory_idx])) * self.agent_reward_factor[agent_idx.item()]
self.current_trajectories[trajectory_idx].append({
'prob': prob_value,
'log_prob': m.log_prob(agent_idx),
'state_identifier': global_info.workflow.state,
'action': self.agent_role_list[agent_idx.item()],
'reward': cost,
'reward_model': rew,
'prior_prob': prior_action_probs[agent_idx.item()] if prior_action_probs is not None else None
})
print(trajectory_idx, self.current_trajectories[trajectory_idx])
def clone_trajectory(self, source_idx):
return [{
'prob': t['prob'].clone(),
'log_prob': t['log_prob'].clone(),
'state_identifier': t['state_identifier'],
'action': t['action'],
'reward': t['reward'],
'reward_model': t['reward_model'],
'prior_prob': t['prior_prob'].clone() if t['prior_prob'] is not None else None
} for t in self.current_trajectories[source_idx][:-1]]
def forward(self, global_info):
if global_info.path_id == -1:
agent_indices = self.init_forward(global_info)
else:
agent_indices = self.iter_forward(global_info)
print("Agent Indices: {}".format(agent_indices))
selected_agents = [self.agent_hash_list[i] for i in agent_indices]
return selected_agents
def calculate_returns(self, trajectory):
returns = []
R = 0
for t in reversed(trajectory):
R = t.get('reward', 0) + self.gamma * R
returns.insert(0, R)
return torch.tensor(returns, device=self.device)
def get_state_representation(self, global_info):
role_list = global_info.agent_role_list()
print(role_list)
state_context = self.agent_graph.get_agent_dialog_history(role_list, question=global_info.task.get("Question"))
print(state_context)
print(type(state_context))
state, reward = self.state_representation(state_context)
print(state, reward)
return state, reward
def update(self):
logger.info("Update")
logger.info("Executed trajectories: {}".format(self.executed_trajectories))
if not self.training:
metrics = {
'reasoning/action_probs': torch.sum(torch.stack(self.action_probs_history), dim=0),
"training/entropy": np.mean([e.detach().cpu().item() for e in self.entropy_history])
}
logger.info("metrics: {}".format(metrics))
self.current_trajectories = []
self.executed_trajectories = []
self.entropy_history = []
self.execution_count = 0
return {}
if len(self.executed_trajectories) >= self.sample_size:
episode_returns = []
episode_lengths = []
episode_last_rewards = []
episode_acc = []
episode_tokens = []
episode_cost = []
episode_metrics = {}
kl_losses = []
logger.info("Update with sample size {}".format(self.sample_size))
policy_loss = []
episode_loss = []
for trajectories in self.executed_trajectories[:self.sample_size]:
task_avg_length = []
task_avg_reward = []
task_last_reward = []
task_acc = []
task_avg_tokens = []
task_avg_cost = []
task_avg_metrics = []
for trajectory in trajectories:
if trajectory[-1].get('finalized', False):
logger.info("Trajectory: {}".format(trajectory))
returns = self.calculate_returns(trajectory)
# episode_returns.append(sum(returns))
task_avg_reward.append(sum(returns))
task_avg_length.append(len(trajectory))
task_last_reward.append(trajectory[-1].get('reward', 0))
task_avg_tokens.append(trajectory[-1].get('total_tokens', 0))
task_avg_cost.append(trajectory[-1].get('total_cost', 0))
task_avg_metrics.append(trajectory[-1].get('metrics', {}))
if task_last_reward[-1] > 0:
task_acc.append(1)
else:
task_acc.append(0)
# task_acc.append(task_last_reward[-1].cpu().item())
# episode_lengths.append(len(trajectory))
print("returns: {}".format(returns))
logger.info("Trajectory returns: {}".format(returns))
for t, R in zip(trajectory, returns):
if t.get('prob', None) is not None and t.get('prior_prob', None) is not None:
kl_loss = t.get('prior_prob', 0) * torch.log(t['prior_prob'] / (t['prob']+1e-10))
logger.info("Add KL loss: {}".format(kl_loss))
else:
kl_loss = 0
logger.info("No KL loss: {}".format(kl_loss))
kl_loss = torch.tensor(kl_loss).to(self.device)
kl_losses.append(kl_loss)
loss = (-t['log_prob'] * R + self.lambda_kl_loss * kl_loss).to(self.device)
if loss.dim() == 0: # scalar loss, convert to shape [1]
loss = loss.view(1)
elif loss.dim() == 1: # already [1], keep it
pass
policy_loss.append(loss)
logger.info("loss for one sample: {}".format(policy_loss))
if len(task_avg_length) == 0:
continue
else:
episode_lengths.append(sum(task_avg_length)/len(task_avg_length))
if len(task_avg_reward) == 0:
continue
else:
episode_returns.append(sum(task_avg_reward)/len(task_avg_reward))
if len(task_last_reward) == 0:
continue
else:
episode_last_rewards.append(sum(task_last_reward)/len(task_last_reward))
if len(task_avg_tokens) == 0:
continue
else:
episode_tokens.append(sum(task_avg_tokens)/len(task_avg_tokens))
if len(task_avg_cost) == 0:
continue
else:
episode_cost.append(sum(task_avg_cost)/len(task_avg_cost))
if len(task_acc) == 0:
continue
else:
episode_acc.append(sum(task_acc)/len(task_acc))
if len(task_avg_metrics) == 0:
continue
elif task_avg_metrics[0] == {}:
continue
else:
for key in task_avg_metrics[0].keys():
if key not in episode_metrics:
episode_metrics[key] = []
episode_metrics[key].append(sum([m[key] for m in task_avg_metrics])/len(task_avg_metrics))
if policy_loss:
logger.info("Policy loss: {}".format(policy_loss))
policy_loss = torch.stack(policy_loss).sum()/(self.sample_size)
logger.info("Policy loss stack: {}".format(policy_loss))
policy_loss -= sum(self.entropy_history)
logger.info("Policy loss with entropy: {}".format(policy_loss))
self.optimizer.zero_grad()
policy_loss.backward()
self.optimizer.step()
metrics = {
'reasoning/action_probs': torch.sum(torch.stack(self.action_probs_history), dim=0),
'reasoning/reward_from_rm': sum(self.reward_from_rm),
'reasoning/acc': np.mean([a for a in episode_acc]),
'reasoning/tokens': np.mean([t for t in episode_tokens]),
'reasoning/cost': np.mean([c for c in episode_cost]),
'training/policy_loss': policy_loss.item(),
'reasoning/mean_return': np.mean([r.detach().cpu().item() for r in episode_returns]),
'reasoning/mean_episode_length': np.mean(episode_lengths),
'reasoning/mean_last_reward': np.mean([r.detach().cpu().item() for r in episode_last_rewards]),
'training/mean_kl_loss': np.mean([kl.detach().cpu().item() for kl in kl_losses]),
"training/entropy": np.mean([e.detach().cpu().item() for e in self.entropy_history]),
}
metrics.update({f'reasoning/{key}': np.mean([r.cpu().item() for r in episode_metrics[key]]) for key in episode_metrics})
logger.info("metrics: {}".format(metrics))
self.global_step += 1
self.policy_losses.append(policy_loss.item())
self.current_trajectories = []
self.executed_trajectories = []
self.entropy_history = []
self.execution_count = 0
self.reward_from_rm = []
self.action_probs_history = []
self.llm_action_probs_history = []
return {
'policy_loss': policy_loss.item(),
'mean_reward': torch.tensor(returns, device=self.device).mean().item()
}
return {}
def finalize_task(self, transition, global_info):
print("\033[1;33mtransition reward: {}\033[0m".format(transition.get('reward', 0)))
self.current_trajectories = self.executed_trajectories[self.execution_count-1]
idx = transition.get('path_id', 0)
if self.current_trajectories and idx < len(self.current_trajectories):
state, rew = self.get_state_representation(global_info)
action_probs = self.policy_network(state)
prob_value = action_probs[0, self.end_action.item()]
m = torch.distributions.Categorical(action_probs)
current_trajectory = self.current_trajectories[idx]
for index, action in enumerate(global_info.workflow.workflow):
cost = action.cost
print("\033[1;33mtoken cost: {}\033[0m".format(cost))
print("\033[1;33mcost factor: {}\033[0m".format(cost/100000))
current_trajectory[index]["reward"] *= cost/100000
print("\033[1;33mReward: {}\033[0m".format(current_trajectory[index]['reward']))
if current_trajectory:
step_reward = self.logarithmic_cost(len(current_trajectory))
total_tokens = global_info.total_tokens
total_cost = global_info.total_cost
if transition.get('reward', 0) > 0:
reward = transition.get('reward', 0) + self.agent_reward_factor[self.end_action.item()] * step_reward
else:
reward = transition.get('reward', 0) - self.agent_reward_factor[self.end_action.item()] * step_reward
if current_trajectory[-1].get("action") == self.agent_role_list[self.end_action.item()]:
current_trajectory[-1]["reward"] = reward
current_trajectory[-1]['total_tokens'] = total_tokens
current_trajectory[-1]['total_cost'] = total_cost
current_trajectory[-1]['finalized'] = True
current_trajectory[-1]['reward_model'] = rew
current_trajectory[-1]['metrics'] = transition.get('metrics', {})
print("\033[1;33mLast Reward: {}\033[0m".format(current_trajectory[-1]['reward']))
else:
current_trajectory.append({
'prob': prob_value,
'log_prob': m.log_prob(self.end_action),
'state_identifier': transition.get('state', global_info.workflow.state),
'action': self.agent_role_list[self.end_action.item()],
'reward': reward,
'reward_model': rew,
'finalized': True,
'total_tokens': total_tokens,
'total_cost': total_cost,
'metrics': transition.get('metrics', {})
})
print("\033[1;33mLast Reward: {}\033[0m".format(current_trajectory[-1]['reward']))
self.rewards_history.append(transition.get('reward', 0))
def select_agents_by_probability(self, action_probs):
num_agents_to_select = torch.randint(1, self.max_num_agents+1, (1,)).item()
selected_indices = torch.multinomial(action_probs, num_agents_to_select, replacement=False)
return selected_indices
def select_agents_by_threshold(self, action_probs, threshold=0.1):
threshold = 2/self.agent_graph.num
selected_indices = torch.nonzero(action_probs[0] > threshold).squeeze(1)
if len(selected_indices) == 0:
num_to_select = min(self.max_path, self.max_num_agents)
selected_indices = torch.multinomial(action_probs, num_to_select, replacement=False)
return selected_indices
else:
probs = action_probs[0][selected_indices]
sorted_idx = torch.argsort(probs, descending=True)
selected_indices = selected_indices[sorted_idx]
num_agents_to_select = min(len(selected_indices), self.max_path, self.max_num_agents)
selected_indices = selected_indices[:num_agents_to_select]
return selected_indices.unsqueeze(0)
def load_model(self, path, strict=True):
try:
if not os.path.exists(path):
logger.error(f"Model file not found: {path}")
return False
checkpoint = torch.load(path, map_location=self.device)
# Validate model architecture
if (checkpoint['input_dim'] != self.policy_network.input_dim or
checkpoint['output_dim'] != self.policy_network.output_dim):
if strict:
raise ValueError(f"Model architecture mismatch. Expected input_dim={self.policy_network.input_dim}, "
f"output_dim={self.policy_network.output_dim} but got input_dim={checkpoint['input_dim']}, "
f"output_dim={checkpoint['output_dim']}")
logger.warning("Model architecture mismatch, but continuing due to non-strict mode")
# Load model state
self.policy_network.load_state_dict(checkpoint['model_state_dict'], strict=strict)
self.policy_network = self.policy_network.to(self.device)
# Load optimizer state if available
if checkpoint['optimizer_state_dict'] and hasattr(self, 'optimizer'):
self.optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
# Move optimizer state to correct device
for state in self.optimizer.state.values():
for k, v in state.items():
if isinstance(v, torch.Tensor):
state[k] = v.to(self.device)
# Load config if available
if 'config' in checkpoint:
# Merge loaded config with current config, prioritizing current config
self.config.update({k: v for k, v in checkpoint['config'].items()
if k not in self.config})
logger.info(f"Model loaded successfully from {path}")
logger.info(f"Model timestamp: {checkpoint['timestamp']}")
if checkpoint['metadata'].get('tag'):
logger.info(f"Model tag: {checkpoint['metadata']['tag']}")
return True
except Exception as e:
logger.error(f"Error loading model: {str(e)}")
return False
def get_latest_model_path(self):
"""Get the path of the latest model checkpoint"""
try:
path = self.model_path
if os.path.exists(path) and os.path.isfile(path):
return path
path = self.config["paths"]["checkpoint_path"]
if not os.path.exists(path):
return None
model_files = [f for f in os.listdir(path) if f.endswith('.pt')]
if not model_files:
return None
latest_model = max(model_files, key=lambda x: os.path.getctime(os.path.join(path, x)))
return os.path.join(path, latest_model)
except Exception as e:
print(f"Error finding latest model: {str(e)}")
return None

View File

@ -0,0 +1,84 @@
import torch
import json
from model import query_gpt
import re
from abc import ABC
from tenacity import retry, stop_after_attempt, wait_exponential
class Policy(ABC):
def __init__(self, agent_graph, action_graph) -> None:
super().__init__()
self.agent_graph = agent_graph
self.action_graph = action_graph
self.actions_dim = agent_graph.num
class LearningPolicy(Policy):
def __init__(self, agent_graph, action_graph):
super().__init__(agent_graph, action_graph)
self.actions_dim = agent_graph.num
self.agent_hash_list = agent_graph.hash_nodes
self.training = True
def train(self):
self.training = True
def eval(self):
self.training = False
def update(self):
pass
def save_model(self, path):
raise NotImplementedError
def load_model(self, path):
raise NotImplementedError
def finalize_task(self, transition, global_info):
raise NotImplementedError
class LLMPolicy(Policy):
def __init__(self, agent_graph, action_graph) -> None:
super().__init__(agent_graph, action_graph)
self.agent_hash_list = agent_graph.hash_nodes
self.agent_role_list = agent_graph.role_nodes
@retry(wait=wait_exponential(min=5, max=300), stop=stop_after_attempt(10))
def forward(self, global_info, max_num:int =1) -> list:
system_prompt_filepath = "prompts/general/agent_selection.json"
with open(system_prompt_filepath, "r") as f:
select_prompt = json.load(f)
select_prompt = "\n".join(select_prompt['simple_select']).format(global_info.task.get("Question"),
global_info.workflow.all_actions,
self.agent_graph.agent_prompt,
max_num,
max_num,
self.agent_hash_list[0])
response, _ = query_gpt(select_prompt)
regex = r"\b(\w{32})\b"
matches = re.findall(regex, response)
if len(matches) <= 0:
raise Exception("No agent found")
if len(matches) > max_num:
matches = matches[:max_num]
elif len(matches) < max_num:
matches += [matches[-1]]*(max_num-len(matches))
for index, m in enumerate(matches[1:]):
if m is None:
matches[index] = matches[index-1]
for m in matches:
assert m in self.agent_hash_list
return matches
def forward_prior(self, global_info, max_num:int = 1) -> list:
matches = self.forward(global_info, max_num)
probs = [0.0] * self.actions_dim
if matches:
prob_per_agent = 1.0
for agent_hash in matches:
idx = self.agent_hash_list.index(agent_hash)
probs[idx] = prob_per_agent
temprature = 0.1
probs = torch.softmax(torch.tensor(probs)/temprature, dim=0,)
return probs

View File

@ -0,0 +1,146 @@
from enum import Enum
import yaml
import uuid
from agent.register.register import agent_global_registry
from inference.graph.action_graph import ActionGraph
import os
import copy
from agent.agent_info.global_info import GlobalInfo
global_config = yaml.safe_load(open("./config/global.yaml", "r"))
class ReasoningState(Enum):
INITIALIZED = 1
SPLITING = 2
ANSWERING = 3
FINALIZING = 4
DISCARDING = 5
AGGREGATING = 6
class GraphReasoningPath:
def __init__(self, start_agent, max_parallel_paths, global_logger, workspace_path, action_graph:ActionGraph, frontier=[], agent_sequence = [], index = None, global_info:GlobalInfo=None, state = ReasoningState.INITIALIZED, env=None, env_name=None, policy=None):
self.state = state
self.index = index
self.global_logger = global_logger
self.workspace_path = workspace_path
self.action_graph = action_graph
self.frontier = frontier
global_logger.create_logger('path{}_logger'.format(index), os.path.join(global_logger.folder_path, "path{}.log".format(index)), "INFO")
self.logger = global_logger.get_logger('path{}_logger'.format(index))
self.workflow_path = os.path.join(workspace_path, "path_{}.jsonl".format(index))
self.workcode_path = os.path.join(workspace_path, "code_{}.py".format(index))
self.start_agent = start_agent
self.agent_sequence = agent_sequence
if self.agent_sequence == []:
self.agent_sequence.append(start_agent.unique_identifier)
self.max_parallel_paths = max_parallel_paths
self.max_step_num = global_config.get("graph").get("max_step_num")
self.current_agent = start_agent
self.next_agents = []
self.env = env
self.env_name = env_name
self.policy = policy
self.global_info = global_info
self.global_info.logger = self.logger
self.global_info.workpath = self.workspace_path
self.global_info.path_id = self.index
self.logger.info("{}[Reasoning Path{} Start]{}".format("-"*30,self.index, "-"*30))
self.logger.info("Reasoning Path{}:{}".format(self.index, state))
self.logger.info("Start agent: {}".format(start_agent.role))
self.logger.info("Previous Agent sequence: {}".format(self.print_agent_sequence()))
def update_global_info(self, current_action):
self.global_info.update(current_action)
self.logger.info("Updated global_info: {}".format(self.global_info.__dict__))
def step(self):
external_tools_enabled = global_config.get("external_tools_enabled")
current_action, terminated = self.current_agent.take_action(self.global_info, external_tools_enabled, self.env, self.env_name)
self.current_agent.deactivate()
self.update_global_info(current_action)
node_id = str(uuid.uuid4())
self.action_graph.add_action(node_id, current_action.to_dict(), self.current_agent.role)
for successor in self.frontier:
self.action_graph.add_dependency(successor, node_id)
self.frontier = [node_id]
# Deal with the case meeting the termination condition:
# 1. The current agent is the terminator
# 2. The maximum number of steps is reached
if terminated or len(self.agent_sequence) >= self.max_step_num:
self.state = ReasoningState.FINALIZING
self.last_agent = self.current_agent
self.last_query_func = self.current_agent.query_func
return self.state
# Deal with the case where the current agent is the terminator
next_agents_idx = self.policy.forward(self.global_info)
self.next_agents = [agent_global_registry.get_agent_from_idx(idx) for idx in next_agents_idx]
# Deal with the case where there is only one next agent
if len(self.next_agents) == 1:
self.current_agent = self.next_agents[0]
self.current_agent.activate(global_info=self.global_info, initial_dialog_history=self.current_agent.initial_dialog_history)
self.agent_sequence.append(self.current_agent.unique_identifier)
self.state = ReasoningState.ANSWERING
return self.state
# Deal with the case where there are multiple next agents
else:
for agent in self.next_agents:
agent.activate(global_info=self.global_info, initial_dialog_history=agent.initial_dialog_history)
self.state = ReasoningState.SPLITING
return self.state
def split(self, current_path_num):
split_reasoning_paths = []
if current_path_num >= self.max_parallel_paths:
self.current_agent = self.next_agents[0]
self.agent_sequence.append(self.current_agent.unique_identifier)
self.state = ReasoningState.ANSWERING
return split_reasoning_paths
for index, agent in enumerate(self.next_agents[1:self.max_parallel_paths-current_path_num+1]):
agent_sequence = copy.deepcopy(self.agent_sequence)
if self.env is not None:
env = copy.deepcopy(self.env)
else:
env = None
path_index = current_path_num + index
reasoning_path = GraphReasoningPath(
start_agent=agent,
max_parallel_paths=self.max_parallel_paths,
action_graph=self.action_graph,
agent_sequence = agent_sequence,
index=path_index,
global_info=copy.deepcopy(self.global_info),
state=ReasoningState.ANSWERING,
global_logger=self.global_logger,
workspace_path=self.workspace_path,
env=env,
frontier=self.frontier,
policy=self.policy
)
reasoning_path.agent_sequence.append(agent.unique_identifier)
reasoning_path.current_agent = agent
reasoning_path.next_agents = []
split_reasoning_paths.append(reasoning_path)
print("\033[1;36mPath {} Initialized (split from path {})\033[0m".format(path_index,self.index))
self.current_agent = self.next_agents[0]
self.agent_sequence.append(self.current_agent.unique_identifier)
self.state = ReasoningState.ANSWERING
return split_reasoning_paths
def print_agent_sequence(self):
agent_sequence = "".join([agent.get("role") + "->" for agent in self.agent_sequence[:-1]] + [self.agent_sequence[-1].get("role")])
return agent_sequence

View File

@ -0,0 +1,300 @@
from typing import List
import json
import yaml
import os
import copy
import logging
from inference.reasoning.path import ReasoningState, GraphReasoningPath
from inference.graph.agent_graph import AgentGraph
from inference.graph.action_graph import ActionGraph
from inference.policy.REINFORCE_continuous import ContinuousREINFORCE
from utils.logging import LogManager
from agent.register.register import agent_global_registry
from agent.agent_info.global_info import GlobalInfo
from tasks.evaluator import BenchmarkEvaluator
global_config = yaml.safe_load(open("./config/global.yaml", "r"))
main_logger = logging.getLogger('global')
class GraphReasoning:
def __init__(self, task:json, graph: AgentGraph, env=None, env_name=None):
self.task = task
self.agent_graph = graph
self.action_graph = ActionGraph()
self.reasoning_paths: List[GraphReasoningPath] = []
self.max_parallel_paths = global_config.get("graph").get("max_parallel_paths")
self.final_answer = ""
self.answers = []
self.global_logger = LogManager("./config/global.yaml", self.task.get("type"))
self.workspace_path = self.global_logger.folder_path
self.policy = ContinuousREINFORCE(agent_graph=self.agent_graph, action_graph=self.action_graph)
self.env = env
self.env_name = env_name
main_logger.info("{}[Graph Reasoning Initialized]{}".format("-"*30, "-"*30))
main_logger.info(global_config)
main_logger.info(self.agent_graph.role_nodes)
def save_checkpoint(self, save_data):
main_logger.info("{}[Save Checkpoint]{}".format("-"*30, "-"*30))
cur_acc = save_data["best_acc"]
cur_data_len = save_data["best_data_len"]
main_logger.info("best acc: {}, data len: {}".format(cur_acc, cur_data_len))
tag = "acc_{}-data_{}".format(cur_acc, cur_data_len)
self.policy.save_model(path=None, tag=tag)
def start(self, save_data):
if save_data != None:
self.save_checkpoint(save_data)
print("-"*10+"\033[1;31mGraph Reasoning Start\033[0m"+"-"*10)
main_logger.info("{}[Graph Reasoning Start]{}".format("-"*30, "-"*30))
main_logger.info("Task:\n{}".format(self.task.get("Question")))
# -1 is the default path id for intialization
global_info = GlobalInfo(path_id=-1,
workpath=self.workspace_path,
task=self.task,
env=self.env,
env_name=self.env_name)
matches = self.policy.forward(global_info)
for index, match in enumerate(matches):
global_info = GlobalInfo(path_id=index,
workpath=self.workspace_path,
task=self.task,
env=self.env,
env_name=self.env_name)
agent = agent_global_registry.get_agent_from_idx(match)
agent.activate(global_info)
main_logger.info("[Path {} Initialized".format(index))
print("\033[1;36mPath {} Initialized\033[0m".format(index))
reasoning_path = GraphReasoningPath(start_agent=agent,
max_parallel_paths=self.max_parallel_paths,
action_graph=self.action_graph,
agent_sequence=[],
index=index,
global_info = copy.deepcopy(global_info),
global_logger = self.global_logger,
workspace_path=self.workspace_path,
state=copy.deepcopy(ReasoningState.INITIALIZED),
env=self.env,
env_name=self.env_name,
policy=self.policy
)
self.reasoning_paths.append(reasoning_path)
main_logger.info("Reasoning Path: {}\nAgent Sequence: {}\n".format(index, reasoning_path.print_agent_sequence()))
def n_step(self, n:int):
for i in range(n):
self.step()
if self.check_finalize():
break
return self.finalize()
def step(self):
main_logger.info("{}[STEP]{}".format("-"*30, "-"*30))
for reasoning_path in self.reasoning_paths[:self.max_parallel_paths]:
# Deal with the case where the reasoning path is not finalizing and not spliting
if reasoning_path.state != ReasoningState.FINALIZING and reasoning_path.state != ReasoningState.SPLITING:
main_logger.info("{}[Reasoning Path{} STEP]{}".format("-"*30, reasoning_path.index, "-"*30))
print("\033[1;36mPath {} Step\033[0m".format(reasoning_path.index))
reasoning_path.step()
main_logger.info("{}[DONE]: Reasoning Path{} STEP{}".format("-"*30, reasoning_path.index, "-"*30))
buffer_reasoning_paths = []
for reasoning_path in self.reasoning_paths[:self.max_parallel_paths]:
# Deal with the case where the reasoning path is spliting
if reasoning_path.state == ReasoningState.SPLITING :
current_path_count = len(self.reasoning_paths) + len(buffer_reasoning_paths)
print("\033[1;36mPath {} Split\033[0m".format(reasoning_path.index))
split_reasoning_paths = reasoning_path.split(current_path_count)
if len(split_reasoning_paths) > 0:
main_logger.info("Split Reasoning Paths: {} From Path {}".format([path.index for path in split_reasoning_paths], reasoning_path.index))
buffer_reasoning_paths.extend(split_reasoning_paths)
# Deal with the case where the reasoning path is finalizing
elif reasoning_path.state == ReasoningState.FINALIZING:
print("\033[1;36mPath {} Finalize\033[0m".format(reasoning_path.index))
main_logger.info("{}[Reasoning Path{} FINALIZING]{}".format("-"*30, reasoning_path.index, "-"*30))
print(p for p in self.reasoning_paths)
self.reasoning_paths.extend(buffer_reasoning_paths)
self.format_index()
self.print_paths()
self.update_graph()
return self.answers
def aggregate_answers(self, global_info, answers:list, query_func=None) -> str:
# only choose the last result without any format or extract
if query_func is None:
if len(answers) == 0:
return None
else:
main_logger.info("[Aggregation] {}".format(answers[-1]))
return answers[-1]
# only choose the last result without any format or extract
if self.task.get("type") == "SRDD" or self.task.get("type") == "CW":
main_logger.info("[Aggregation] {}".format(global_info.code_path))
return global_info.code_path
prompt_filepath = "prompts/general/answer_prompt.json"
with open(prompt_filepath, "r") as f:
prompt = json.load(f)
if self.task.get("type") == "MMLU" or self.task.get("type") == "MMLU-Pro":
answer_prompt = "\n".join(prompt["MMLU_aggregation"]).format(str(["{}\n".format(answer) for answer in answers]))
elif self.task.get("type") == "GAIA":
answer_prompt = "\n".join(prompt["GAIA_aggregation"]).format(str(["{}\n".format(answer) for answer in answers]))
elif self.task.get("type") == "GSM-Hard" or self.task.get("type") == "gsm-hard" or self.task.get("type") == "GSM8K":
answer_prompt = "\n".join(prompt["gsm_aggregation"]).format(str(["{}\n".format(answer) for answer in answers]))
else:
answer_prompt = "\n".join(prompt["answer_aggregation"]).format(str(["{}\n".format(answer) for answer in answers]))
main_logger.info("[Aggregating] {}".format(answer_prompt))
raw_response, _ = query_func(messages=answer_prompt)
main_logger.info("[Aggregation Answer] {}".format(raw_response))
return raw_response if len(raw_response)!=0 else answers[-1]
def majority_vote(self, answers: List) -> str:
if self.task.get("type") == "MMLU" or self.task.get("type") == "MMLU-Pro":
answers = [BenchmarkEvaluator.extract_choice_answer(answer) for answer in answers]
main_logger.info("[Majority Vote] Answers: {}".format(answers))
elif self.task.get("type") == "gsm-hard" or self.task.get("type") == "GSM8K":
answers = [BenchmarkEvaluator.extract_math_answer(answer) for answer in answers]
main_logger.info("[Majority Vote] Answers: {}".format(answers))
else:
main_logger.info("[Majority Vote] Answers: {}".format(answers))
answer_counts = {}
for answer in answers:
answer = str(answer).strip() # Convert to string and remove whitespace
answer_counts[answer] = answer_counts.get(answer, 0) + 1
if not answer_counts:
return "" # Return empty string if no answers
max_count = max(answer_counts.values())
most_common = [ans for ans, count in answer_counts.items() if count == max_count]
main_logger.info("[Majority Vote] Most Common: {}".format(most_common))
return most_common[-1]
def finalize(self):
print("-"*10+"\033[1;31mGraph Reasoning Finalize\033[0m"+"-"*10)
print(p for p in self.reasoning_paths)
for idx, reasoning_path in enumerate(self.reasoning_paths):
if hasattr(reasoning_path, "last_query_func"):
aggregated_answer = self.aggregate_answers(reasoning_path.global_info, reasoning_path.global_info.state_answers, reasoning_path.last_query_func)
else:
aggregated_answer = self.aggregate_answers(reasoning_path.global_info, reasoning_path.global_info.state_answers)
if self.task.get("type") == "MMLU-Pro":
transition = {
'state': reasoning_path.global_info.workflow.state,
'reward': 1 if BenchmarkEvaluator.check_mmlu(aggregated_answer, self.task.get("Answer")) else -1,
'action': None,
'next_state': None,
'done': True,
'path_id': idx
}
print(transition)
self.policy.finalize_task(transition, reasoning_path.global_info)
elif self.task.get("type") == "GSM-Hard":
transition = {
'state': reasoning_path.global_info.workflow.state,
'reward': 1 if BenchmarkEvaluator.check_gsm8k(aggregated_answer, self.task.get("Answer")) else -1,
'action': None,
'next_state': None,
'done': True,
'path_id': idx
}
print(transition)
self.policy.finalize_task(transition, reasoning_path.global_info)
elif self.task.get("type") == "SRDD":
reward, metrics = BenchmarkEvaluator.check_srdd(aggregated_answer, reasoning_path.global_info.task.get("Question"))
transition = {
'state': reasoning_path.global_info.workflow.state,
'reward': reward,
'action': None,
'next_state': None,
'done': True,
'path_id': idx ,
"metrics":metrics
}
main_logger.info(metrics)
self.policy.finalize_task(transition, reasoning_path.global_info)
elif self.task.get("type") == "CW":
reward, metrics = BenchmarkEvaluator.check_commongen(concepts=reasoning_path.global_info.task.get("concepts"), text_path=aggregated_answer)
transition = {
'state': reasoning_path.global_info.workflow.state,
'reward': reward,
'action': None,
'next_state': None,
'done': True,
'path_id': idx ,
"metrics":metrics
}
main_logger.info(metrics)
self.policy.finalize_task(transition, reasoning_path.global_info)
if aggregated_answer is not None:
self.answers.append(aggregated_answer)
main_logger.info("[Aggregated Answer From Path {}]: {}".format(idx, aggregated_answer))
self.policy.update()
for agent in agent_global_registry.agents.values():
agent.reset()
if len(self.answers) == 1 or self.task.get("type") == "SRDD" or self.task.get("type") == "CW":
if len(self.answers) == 0:
self.final_answer = ""
else:
self.final_answer = self.answers[-1]
else:
self.final_answer = self.majority_vote(self.answers)
main_logger.info("[Final Answer]: {}".format(self.final_answer))
print("-"*10+"\033[1;31mGraph Reasoning Finalized\033[0m"+"-"*10)
return self.final_answer, self.task.get("Answer")
def visualize_path(self):
for reasoning_path in self.reasoning_paths:
reasoning_path.global_info.workflow.visualize()
def visualize_graph(self):
self.agent_graph.visualize(os.path.join(self.workspace_path, "agent_graph.html"))
self.action_graph.visualize(os.path.join(self.workspace_path, "action_graph.html"))
def print_paths(self):
for reasoning_path in self.reasoning_paths:
main_logger.info("Reasoning Path: {}\nAgent Sequence: {}\n".format(reasoning_path.index, reasoning_path.print_agent_sequence()))
def format_index(self):
for index, reasoning_path in enumerate(self.reasoning_paths):
reasoning_path.index = index
def update_graph(self):
for index, reasoning_path in enumerate(self.reasoning_paths):
for successor, predecessor in zip(reasoning_path.agent_sequence[:-1], reasoning_path.agent_sequence[1:]):
successor = agent_global_registry.get_agent_from_idx(successor.get("hash"))
predecessor = agent_global_registry.get_agent_from_idx(predecessor.get("hash"))
res = self.agent_graph._get_edge(predecessor, successor)
if res is None or index not in res:
self.agent_graph._add_edge(predecessor, successor, index)
def check_finalize(self):
for reasoning_path in self.reasoning_paths[:self.max_parallel_paths]:
if reasoning_path.state != ReasoningState.FINALIZING and reasoning_path.state != ReasoningState.DISCARDING:
return False
return True

View File

@ -0,0 +1,155 @@
<html>
<head>
<meta charset="utf-8">
<script src="lib/bindings/utils.js"></script>
<link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/vis-network/9.1.2/dist/dist/vis-network.min.css" integrity="sha512-WgxfT5LWjfszlPHXRmBWHkV2eceiWTOBvrKCNbdgDYTHrT2AeLCGbF4sZlZw3UMN3WtL0tGUoIAKsu8mllg/XA==" crossorigin="anonymous" referrerpolicy="no-referrer" />
<script src="https://cdnjs.cloudflare.com/ajax/libs/vis-network/9.1.2/dist/vis-network.min.js" integrity="sha512-LnvoEWDFrqGHlHmDD2101OrLcbsfkrzoSpvtSQtxK3RMnRV0eOkhhBN2dXHKRrUU8p2DGRTk35n4O8nWSVe1mQ==" crossorigin="anonymous" referrerpolicy="no-referrer"></script>
<center>
<h1></h1>
</center>
<!-- <link rel="stylesheet" href="../node_modules/vis/dist/vis.min.css" type="text/css" />
<script type="text/javascript" src="../node_modules/vis/dist/vis.js"> </script>-->
<link
href="https://cdn.jsdelivr.net/npm/bootstrap@5.0.0-beta3/dist/css/bootstrap.min.css"
rel="stylesheet"
integrity="sha384-eOJMYsd53ii+scO/bJGFsiCZc+5NDVN2yr8+0RDqr0Ql0h+rP48ckxlpbzKgwra6"
crossorigin="anonymous"
/>
<script
src="https://cdn.jsdelivr.net/npm/bootstrap@5.0.0-beta3/dist/js/bootstrap.bundle.min.js"
integrity="sha384-JEW9xMcG8R+pH31jmWH6WWP0WintQrMb4s7ZOdauHnUtxwoG2vI5DkLtS3qm9Ekf"
crossorigin="anonymous"
></script>
<center>
<h1></h1>
</center>
<style type="text/css">
#mynetwork {
width: 100%;
height: 750px;
background-color: #FFFFFF;
border: 1px solid lightgray;
position: relative;
float: left;
}
</style>
</head>
<body>
<div class="card" style="width: 100%">
<div id="mynetwork" class="card-body"></div>
</div>
<script type="text/javascript">
// initialize global variables.
var edges;
var nodes;
var allNodes;
var allEdges;
var nodeColors;
var originalNodes;
var network;
var container;
var options, data;
var filter = {
item : '',
property : '',
value : []
};
// This method is responsible for drawing the graph, returns the drawn network
function drawGraph() {
var container = document.getElementById('mynetwork');
// parsing and collecting nodes and edges from the python
nodes = new vis.DataSet([{"color": "green", "font": {"color": "black"}, "id": "ad54f8ec-782b-457c-a8a2-6c4ef453dccc", "label": "critique\nCriticAgent_gpt4o", "shape": "dot", "size": 10, "status": "Success"}, {"color": "green", "font": {"color": "black"}, "id": "8912ab4d-3e84-4150-92a7-5a5b502e153e", "label": "planning\nPlannerAgent_gpt4o", "shape": "dot", "size": 10, "status": "Success"}, {"color": "green", "font": {"color": "black"}, "id": "8dd841e4-f7c9-4cc7-a268-d514ef0f112d", "label": "summarize\nSummarizerAgent_gpt4o", "shape": "dot", "size": 10, "status": "Success"}, {"color": "green", "font": {"color": "black"}, "id": "cbd174fb-802e-41c8-a7f0-9f01c3093cd8", "label": "modify\nModifier_gpt4o", "shape": "dot", "size": 10, "status": "Success"}, {"color": "green", "font": {"color": "black"}, "id": "7f17cdf4-5327-4d30-9812-48d3ead86d7b", "label": "conclude\nConcluderAgent_gpt4o", "shape": "dot", "size": 10, "status": "Success"}, {"color": "green", "font": {"color": "black"}, "id": "b5493311-d168-4049-a679-22c24bd22e3f", "label": "run_python\nPythonAgent_gpt4o", "shape": "dot", "size": 10, "status": "Success"}, {"color": "green", "font": {"color": "black"}, "id": "ee87f1fe-669f-4e65-9c2f-16469535aa47", "label": "critique\nCriticAgent_gpt4o", "shape": "dot", "size": 10, "status": "Success"}, {"color": "green", "font": {"color": "black"}, "id": "ea4c2232-8661-452e-aeff-334a39428c7c", "label": "terminate\nTerminatorAgent", "shape": "dot", "size": 10, "status": "Success"}]);
edges = new vis.DataSet([{"arrows": "to", "from": "ad54f8ec-782b-457c-a8a2-6c4ef453dccc", "to": "8912ab4d-3e84-4150-92a7-5a5b502e153e", "width": 1}, {"arrows": "to", "from": "ad54f8ec-782b-457c-a8a2-6c4ef453dccc", "to": "8dd841e4-f7c9-4cc7-a268-d514ef0f112d", "width": 1}, {"arrows": "to", "from": "ad54f8ec-782b-457c-a8a2-6c4ef453dccc", "to": "cbd174fb-802e-41c8-a7f0-9f01c3093cd8", "width": 1}, {"arrows": "to", "from": "7f17cdf4-5327-4d30-9812-48d3ead86d7b", "to": "b5493311-d168-4049-a679-22c24bd22e3f", "width": 1}, {"arrows": "to", "from": "7f17cdf4-5327-4d30-9812-48d3ead86d7b", "to": "ee87f1fe-669f-4e65-9c2f-16469535aa47", "width": 1}]);
nodeColors = {};
allNodes = nodes.get({ returnType: "Object" });
for (nodeId in allNodes) {
nodeColors[nodeId] = allNodes[nodeId].color;
}
allEdges = edges.get({ returnType: "Object" });
// adding nodes and edges to the graph
data = {nodes: nodes, edges: edges};
var options = {
"configure": {
"enabled": false
},
"edges": {
"color": {
"inherit": true
},
"smooth": {
"enabled": true,
"type": "dynamic"
}
},
"interaction": {
"dragNodes": true,
"hideEdgesOnDrag": false,
"hideNodesOnDrag": false
},
"physics": {
"enabled": true,
"stabilization": {
"enabled": true,
"fit": true,
"iterations": 1000,
"onlyDynamicEdges": false,
"updateInterval": 50
}
}
};
network = new vis.Network(container, data, options);
return network;
}
drawGraph();
</script>
</body>
</html>

View File

@ -0,0 +1,23 @@
def handle_advertising_regulation():
# Guidelines for typical advertising regulation
advert_rules = {
'encourage': 'illegal activities',
'cause_unnecessary': ['fear', 'offense'],
'cause': 'harm'
}
# Printing out the general context
print("This function demonstrates typical advertising regulation guidelines.\n")
# Printing out the full dictionary
print("Advertising regulation guidelines:")
print(advert_rules)
# Printing out the rules with clear explanations
print("\nTypical advertising regulatory bodies suggest, for example, that adverts must not:")
print("- Encourage:", advert_rules['encourage'])
print("- Cause unnecessary:", ', '.join(advert_rules['cause_unnecessary']))
print("- Must not cause:", advert_rules['cause'])
# Run the function
handle_advertising_regulation()

View File

@ -0,0 +1,155 @@
<html>
<head>
<meta charset="utf-8">
<script src="lib/bindings/utils.js"></script>
<link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/vis-network/9.1.2/dist/dist/vis-network.min.css" integrity="sha512-WgxfT5LWjfszlPHXRmBWHkV2eceiWTOBvrKCNbdgDYTHrT2AeLCGbF4sZlZw3UMN3WtL0tGUoIAKsu8mllg/XA==" crossorigin="anonymous" referrerpolicy="no-referrer" />
<script src="https://cdnjs.cloudflare.com/ajax/libs/vis-network/9.1.2/dist/vis-network.min.js" integrity="sha512-LnvoEWDFrqGHlHmDD2101OrLcbsfkrzoSpvtSQtxK3RMnRV0eOkhhBN2dXHKRrUU8p2DGRTk35n4O8nWSVe1mQ==" crossorigin="anonymous" referrerpolicy="no-referrer"></script>
<center>
<h1></h1>
</center>
<!-- <link rel="stylesheet" href="../node_modules/vis/dist/vis.min.css" type="text/css" />
<script type="text/javascript" src="../node_modules/vis/dist/vis.js"> </script>-->
<link
href="https://cdn.jsdelivr.net/npm/bootstrap@5.0.0-beta3/dist/css/bootstrap.min.css"
rel="stylesheet"
integrity="sha384-eOJMYsd53ii+scO/bJGFsiCZc+5NDVN2yr8+0RDqr0Ql0h+rP48ckxlpbzKgwra6"
crossorigin="anonymous"
/>
<script
src="https://cdn.jsdelivr.net/npm/bootstrap@5.0.0-beta3/dist/js/bootstrap.bundle.min.js"
integrity="sha384-JEW9xMcG8R+pH31jmWH6WWP0WintQrMb4s7ZOdauHnUtxwoG2vI5DkLtS3qm9Ekf"
crossorigin="anonymous"
></script>
<center>
<h1></h1>
</center>
<style type="text/css">
#mynetwork {
width: 100%;
height: 750px;
background-color: #FFFFFF;
border: 1px solid lightgray;
position: relative;
float: left;
}
</style>
</head>
<body>
<div class="card" style="width: 100%">
<div id="mynetwork" class="card-body"></div>
</div>
<script type="text/javascript">
// initialize global variables.
var edges;
var nodes;
var allNodes;
var allEdges;
var nodeColors;
var originalNodes;
var network;
var container;
var options, data;
var filter = {
item : '',
property : '',
value : []
};
// This method is responsible for drawing the graph, returns the drawn network
function drawGraph() {
var container = document.getElementById('mynetwork');
// parsing and collecting nodes and edges from the python
nodes = new vis.DataSet([{"color": "#37aabb", "font": {"color": "black"}, "id": 8, "label": "CriticAgent_gpt4o\nbase model: gpt4o\nindex: 8", "shape": "dot", "size": 10}, {"color": "#34af8a", "font": {"color": "black"}, "id": 6, "label": "PlannerAgent_gpt4o\nbase model: gpt4o\nindex: 6", "shape": "dot", "size": 10}, {"color": "#c180f4", "font": {"color": "black"}, "id": 11, "label": "SummarizerAgent_gpt4o\nbase model: gpt4o\nindex: 11", "shape": "dot", "size": 10}, {"color": "#f669ba", "font": {"color": "black"}, "id": 13, "label": "Modifier_gpt4o\nbase model: gpt4o\nindex: 13", "shape": "dot", "size": 10}, {"color": "#f45deb", "font": {"color": "black"}, "id": 12, "label": "ConcluderAgent_gpt4o\nbase model: gpt4o\nindex: 12", "shape": "dot", "size": 10}, {"color": "#32b24e", "font": {"color": "black"}, "id": 5, "label": "PythonAgent_gpt4o\nbase model: gpt4o\nindex: 5", "shape": "dot", "size": 10}, {"color": "#f77189", "font": {"color": "black"}, "id": 0, "label": "FileAgent\nbase model: gpt-3.5\nindex: 0", "shape": "dot", "size": 10}, {"color": "#ef7d32", "font": {"color": "black"}, "id": 1, "label": "ArxivAgent\nbase model: gpt-3.5\nindex: 1", "shape": "dot", "size": 10}, {"color": "#c69432", "font": {"color": "black"}, "id": 2, "label": "BingAgent\nbase model: gpt-3.5\nindex: 2", "shape": "dot", "size": 10}, {"color": "#a79f31", "font": {"color": "black"}, "id": 3, "label": "WebsiteAgent\nbase model: gpt-3.5\nindex: 3", "shape": "dot", "size": 10}, {"color": "#82a931", "font": {"color": "black"}, "id": 4, "label": "TerminatorAgent\nbase model: gpt-3.5\nindex: 4", "shape": "dot", "size": 10}, {"color": "#36ada4", "font": {"color": "black"}, "id": 7, "label": "ReasoningAgent_gpt4o\nbase model: gpt4o\nindex: 7", "shape": "dot", "size": 10}, {"color": "#3aa6da", "font": {"color": "black"}, "id": 9, "label": "ReflectAgent_gpt4o\nbase model: gpt4o\nindex: 9", "shape": "dot", "size": 10}, {"color": "#8197f4", "font": {"color": "black"}, "id": 10, "label": "QuestionAgent_gpt4o\nbase model: gpt4o\nindex: 10", "shape": "dot", "size": 10}]);
edges = new vis.DataSet([{"arrows": "to", "color": "#f77189", "from": 8, "to": 6, "width": 1}, {"arrows": "to", "color": "#33b07a", "from": 8, "to": 11, "width": 1}, {"arrows": "to", "color": "#38a9c5", "from": 8, "to": 13, "width": 1}, {"arrows": "to", "color": "#ae9d31", "from": 12, "to": 5, "width": 1}, {"arrows": "to", "color": "#cc7af4", "from": 12, "to": 8, "width": 1}]);
nodeColors = {};
allNodes = nodes.get({ returnType: "Object" });
for (nodeId in allNodes) {
nodeColors[nodeId] = allNodes[nodeId].color;
}
allEdges = edges.get({ returnType: "Object" });
// adding nodes and edges to the graph
data = {nodes: nodes, edges: edges};
var options = {
"configure": {
"enabled": false
},
"edges": {
"color": {
"inherit": true
},
"smooth": {
"enabled": true,
"type": "dynamic"
}
},
"interaction": {
"dragNodes": true,
"hideEdgesOnDrag": false,
"hideNodesOnDrag": false
},
"physics": {
"enabled": true,
"stabilization": {
"enabled": true,
"fit": true,
"iterations": 1000,
"onlyDynamicEdges": false,
"updateInterval": 50
}
}
};
network = new vis.Network(container, data, options);
return network;
}
drawGraph();
</script>
</body>
</html>

View File

@ -0,0 +1,209 @@
[10:09:45 INFO]
------------------------------[Graph Reasoning Initialized]------------------------------
[10:09:45 INFO]
['FileAgent', 'ArxivAgent', 'BingAgent', 'WebsiteAgent', 'TerminatorAgent', 'PythonAgent_gpt4o', 'PlannerAgent_gpt4o', 'ReasoningAgent_gpt4o', 'CriticAgent_gpt4o', 'ReflectAgent_gpt4o', 'QuestionAgent_gpt4o', 'SummarizerAgent_gpt4o', 'ConcluderAgent_gpt4o', 'Modifier_gpt4o']
[10:09:45 INFO]
------------------------------[Graph Reasoning Start]------------------------------
[10:09:45 INFO]
Task:
The following are multiple choice questions (with answers) about business.
Typical advertising regulatory bodies suggest, for example that adverts must not: encourage _________, cause unnecessary ________ or _____, and must not cause _______ offence.
A: Safe practices, Fear, Jealousy, Trivial B: Unsafe practices, Distress, Joy, Trivial C: Safe practices, Wants, Jealousy, Trivial D: Safe practices, Distress, Fear, Trivial E: Unsafe practices, Wants, Jealousy, Serious F: Safe practices, Distress, Jealousy, Serious G: Safe practices, Wants, Fear, Serious H: Unsafe practices, Wants, Fear, Trivial I: Unsafe practices, Distress, Fear, Serious
[10:09:53 INFO]
[Path 0 Initialized
[10:09:53 INFO]
Reasoning Path: 0
Agent Sequence: TerminatorAgent
[10:09:53 INFO]
[Path 1 Initialized
[10:09:53 INFO]
Reasoning Path: 1
Agent Sequence: CriticAgent_gpt4o
[10:09:53 INFO]
[Path 2 Initialized
[10:09:53 INFO]
Reasoning Path: 2
Agent Sequence: ConcluderAgent_gpt4o
[10:09:53 INFO]
------------------------------[STEP]------------------------------
[10:09:53 INFO]
------------------------------[Reasoning Path0 STEP]------------------------------
[10:09:53 INFO]
------------------------------[DONE]: Reasoning Path0 STEP------------------------------
[10:09:53 INFO]
------------------------------[Reasoning Path1 STEP]------------------------------
[10:09:56 INFO]
------------------------------[DONE]: Reasoning Path1 STEP------------------------------
[10:09:56 INFO]
------------------------------[Reasoning Path2 STEP]------------------------------
[10:09:59 INFO]
------------------------------[DONE]: Reasoning Path2 STEP------------------------------
[10:09:59 INFO]
------------------------------[Reasoning Path0 FINALIZING]------------------------------
[10:09:59 INFO]
Split Reasoning Paths: [3, 4] From Path 1
[10:09:59 INFO]
Split Reasoning Paths: [5] From Path 2
[10:09:59 INFO]
Reasoning Path: 0
Agent Sequence: TerminatorAgent
[10:09:59 INFO]
Reasoning Path: 1
Agent Sequence: CriticAgent_gpt4o->PlannerAgent_gpt4o
[10:09:59 INFO]
Reasoning Path: 2
Agent Sequence: ConcluderAgent_gpt4o->PythonAgent_gpt4o
[10:09:59 INFO]
Reasoning Path: 3
Agent Sequence: CriticAgent_gpt4o->SummarizerAgent_gpt4o
[10:09:59 INFO]
Reasoning Path: 4
Agent Sequence: CriticAgent_gpt4o->Modifier_gpt4o
[10:09:59 INFO]
Reasoning Path: 5
Agent Sequence: ConcluderAgent_gpt4o->CriticAgent_gpt4o
[10:09:59 INFO]
------------------------------[STEP]------------------------------
[10:09:59 INFO]
------------------------------[Reasoning Path1 STEP]------------------------------
[10:10:06 INFO]
------------------------------[DONE]: Reasoning Path1 STEP------------------------------
[10:10:06 INFO]
------------------------------[Reasoning Path2 STEP]------------------------------
[10:10:22 INFO]
------------------------------[DONE]: Reasoning Path2 STEP------------------------------
[10:10:22 INFO]
------------------------------[Reasoning Path3 STEP]------------------------------
[10:10:27 INFO]
------------------------------[DONE]: Reasoning Path3 STEP------------------------------
[10:10:27 INFO]
------------------------------[Reasoning Path4 STEP]------------------------------
[10:10:34 INFO]
------------------------------[DONE]: Reasoning Path4 STEP------------------------------
[10:10:34 INFO]
------------------------------[Reasoning Path5 STEP]------------------------------
[10:10:39 INFO]
------------------------------[DONE]: Reasoning Path5 STEP------------------------------
[10:10:39 INFO]
------------------------------[Reasoning Path0 FINALIZING]------------------------------
[10:10:39 INFO]
------------------------------[Reasoning Path1 FINALIZING]------------------------------
[10:10:39 INFO]
------------------------------[Reasoning Path2 FINALIZING]------------------------------
[10:10:39 INFO]
------------------------------[Reasoning Path3 FINALIZING]------------------------------
[10:10:39 INFO]
------------------------------[Reasoning Path4 FINALIZING]------------------------------
[10:10:39 INFO]
------------------------------[Reasoning Path5 FINALIZING]------------------------------
[10:10:39 INFO]
Reasoning Path: 0
Agent Sequence: TerminatorAgent
[10:10:39 INFO]
Reasoning Path: 1
Agent Sequence: CriticAgent_gpt4o->PlannerAgent_gpt4o
[10:10:39 INFO]
Reasoning Path: 2
Agent Sequence: ConcluderAgent_gpt4o->PythonAgent_gpt4o
[10:10:39 INFO]
Reasoning Path: 3
Agent Sequence: CriticAgent_gpt4o->SummarizerAgent_gpt4o
[10:10:39 INFO]
Reasoning Path: 4
Agent Sequence: CriticAgent_gpt4o->Modifier_gpt4o
[10:10:39 INFO]
Reasoning Path: 5
Agent Sequence: ConcluderAgent_gpt4o->CriticAgent_gpt4o
[10:10:39 INFO]
[Aggregating] You have several answer candidates. They are:
[]
Choose the most common answer among the candidates.
If responses conflict, prioritize answers that provide clear reasoning or appear later
Format the answer as a single letter.
Do not include any other units or punctuation.
Return your answer with only the single letter.
[10:10:41 INFO]
[Aggregation Answer] Sure! Please provide the answer candidates so I can evaluate them for you.
[10:10:41 INFO]
[Aggregated Answer From Path 0]: Sure! Please provide the answer candidates so I can evaluate them for you.
[10:10:41 INFO]
[Aggregating] You have several answer candidates. They are:
[' The reasoning is incomplete because no explanation was given, leaving crucial aspects of plausibility, support, and logical flow unaddressed.\n']
Choose the most common answer among the candidates.
If responses conflict, prioritize answers that provide clear reasoning or appear later
Format the answer as a single letter.
Do not include any other units or punctuation.
Return your answer with only the single letter.
[10:10:44 INFO]
[Aggregation Answer] A
[10:10:44 INFO]
[Aggregated Answer From Path 1]: A
[10:10:44 INFO]
[Aggregating] You have several answer candidates. They are:
[' **I: Unsafe practices, Distress, Fear, Serious**\n', 'C\n']
Choose the most common answer among the candidates.
If responses conflict, prioritize answers that provide clear reasoning or appear later
Format the answer as a single letter.
Do not include any other units or punctuation.
Return your answer with only the single letter.
[10:10:48 INFO]
[Aggregation Answer] C
[10:10:48 INFO]
[Aggregated Answer From Path 2]: C
[10:10:48 INFO]
[Aggregating] You have several answer candidates. They are:
[' The reasoning is incomplete because no explanation was given, leaving crucial aspects of plausibility, support, and logical flow unaddressed.\n', ' The reasoning was incomplete due to a lack of prior explanation or analysis, leaving gaps in the logical and ethical framework necessary to determine the correct multiple-choice answer.\n']
Choose the most common answer among the candidates.
If responses conflict, prioritize answers that provide clear reasoning or appear later
Format the answer as a single letter.
Do not include any other units or punctuation.
Return your answer with only the single letter.
[10:10:49 INFO]
[Aggregation Answer] B
[10:10:49 INFO]
[Aggregated Answer From Path 3]: B
[10:10:49 INFO]
[Aggregating] You have several answer candidates. They are:
[' The reasoning is incomplete because no explanation was given, leaving crucial aspects of plausibility, support, and logical flow unaddressed.\n', '** \nThe correct answer is: \n**I: Unsafe practices, Distress, Fear, Serious.** \n\nThis answer aligns with standard advertising ethical guidelines, ensuring that adverts discourage unsafe behaviors, refrain from evoking unnecessary distress or fear, and prevent causing serious or widespread offense.\n']
Choose the most common answer among the candidates.
If responses conflict, prioritize answers that provide clear reasoning or appear later
Format the answer as a single letter.
Do not include any other units or punctuation.
Return your answer with only the single letter.
[10:10:50 INFO]
[Aggregation Answer] B
[10:10:51 INFO]
[Aggregated Answer From Path 4]: B
[10:10:51 INFO]
[Aggregating] You have several answer candidates. They are:
[' **I: Unsafe practices, Distress, Fear, Serious**\n', ' The reasoning is overall plausible, logical, and correctly concludes that the answer is **Option I: Unsafe practices, Distress, Fear, Serious**.\n']
Choose the most common answer among the candidates.
If responses conflict, prioritize answers that provide clear reasoning or appear later
Format the answer as a single letter.
Do not include any other units or punctuation.
Return your answer with only the single letter.
[10:10:53 INFO]
[Aggregation Answer] I
[10:10:53 INFO]
[Aggregated Answer From Path 5]: I
[10:10:57 INFO]
[Majority Vote] Answers: ['Sure! Please provide the answer candidates so I can evaluate them for you.', 'A', 'C', 'B', 'B', 'I']
[10:10:57 INFO]
[Majority Vote] Most Common: ['B']
[10:10:57 INFO]
[Final Answer]: B

View File

@ -0,0 +1,259 @@
[10:09:53 INFO]
[Model Query] Querying gpt4o
[10:09:53 INFO]
[Model Query] [{'role': 'system', 'content': 'You are an expert in critiquing. Your task is to critique the reasoning and solutions provided by others., and You work as a helpful AI assistant. \nI will ask you a question. Answer this question using your coding and language skills.\nNow your question is: The following are multiple choice questions (with answers) about business.\nTypical advertising regulatory bodies suggest, for example that adverts must not: encourage _________, cause unnecessary ________ or _____, and must not cause _______ offence.\nA: Safe practices, Fear, Jealousy, Trivial B: Unsafe practices, Distress, Joy, Trivial C: Safe practices, Wants, Jealousy, Trivial D: Safe practices, Distress, Fear, Trivial E: Unsafe practices, Wants, Jealousy, Serious F: Safe practices, Distress, Jealousy, Serious G: Safe practices, Wants, Fear, Serious H: Unsafe practices, Wants, Fear, Trivial I: Unsafe practices, Distress, Fear, Serious\nPreviously, you collected the some information about this question from some actions: []'}, {'role': 'user', 'content': 'You need to critique the previous reasoning. You should complete your reasoning using this format: REASONING RESULT: [YOUR REASONING RESULT]. Conclude your answer using this format: FINAL ANSWER: [YOUR FINAL ANSWER]. *Your previous reasoning was: [].* Consider the following when critiquing the reasoning: 1. Plausibility:'}]
[10:09:56 INFO]
[Model Query] Token Usage:
Completion Tokens: 150
Prompt Tokens: 295
Total Tokens: 445
[10:09:56 INFO]
[Model Query] gpt4o response: Since no reasoning was provided in this context with "Your previous reasoning was: []", there isnt any substantive content to critique regarding the given multiple-choice question.
However, I will critique the absence or missing reasoning content:
REASONING RESULT: The absence of prior reasoning means there was no provided logic or analysis related to the question. This represents a gap in thought since the question requires understanding and applying principles related to advertising ethics and regulations. Without reasoning, it is impossible to verify the plausibility, trace the logical progression, or evaluate the correctness of the reasoning leading to an answer.
FINAL ANSWER: The reasoning is incomplete because no explanation was given, leaving crucial aspects of plausibility, support, and logical flow unaddressed.
[10:09:56 INFO]
[Model Query] Querying gpt4o
[10:09:56 INFO]
[Model Query] [{'role': 'system', 'content': 'You are an expert in concluding. Your task is to provide final conclusions based on the reasoning process., and You work as a helpful AI assistant. \nI will ask you a question. Answer this question using your coding and language skills.\nNow your question is: The following are multiple choice questions (with answers) about business.\nTypical advertising regulatory bodies suggest, for example that adverts must not: encourage _________, cause unnecessary ________ or _____, and must not cause _______ offence.\nA: Safe practices, Fear, Jealousy, Trivial B: Unsafe practices, Distress, Joy, Trivial C: Safe practices, Wants, Jealousy, Trivial D: Safe practices, Distress, Fear, Trivial E: Unsafe practices, Wants, Jealousy, Serious F: Safe practices, Distress, Jealousy, Serious G: Safe practices, Wants, Fear, Serious H: Unsafe practices, Wants, Fear, Trivial I: Unsafe practices, Distress, Fear, Serious\nPreviously, you collected the some information about this question from some actions: []'}, {'role': 'user', 'content': 'You need to conclude the task and provide a final answer. You should finish your reasoning with the following template: REASONING RESULT: [YOUR REASONING RESULT]. Finish your answer with the following template: FINAL ANSWER: [YOUR FINAL ANSWER]. *Your previous reasoning was: [].* You need to conclude the task and provide a final answer: '}]
[10:09:59 INFO]
[Model Query] Token Usage:
Completion Tokens: 156
Prompt Tokens: 294
Total Tokens: 450
[10:09:59 INFO]
[Model Query] gpt4o response: Typical advertising regulatory bodies aim to ensure that advertisements are ethical and do not promote or encourage harmful behavior or emotions. They stress the importance of not encouraging unsafe practices, causing unnecessary distress, or promoting jealousy, and further ensuring that advertisements avoid causing serious offense.
Analyzing the provided options while aligning with these stated guidelines:
1. The right combination should discourage unsafe practices, eliminate distress or jealousy, and avoid serious offense.
2. Therefore, Option **I (Unsafe practices, Distress, Fear, Serious)** fits perfectly with the context of advertising regulations.
REASONING RESULT: The correct choice must ensure advertising standards do not promote unsafe practices, distress, fear, or serious offense.
FINAL ANSWER: **I: Unsafe practices, Distress, Fear, Serious**
[10:09:59 INFO]
[Model Query] Querying gpt4o
[10:09:59 INFO]
[Model Query] [{'role': 'system', 'content': 'You are an expert in planning. Your task is to create detailed plans for achieving specific goals., and You work as a helpful AI assistant. \nI will ask you a question. Answer this question using your coding and language skills.\nNow your question is: The following are multiple choice questions (with answers) about business.\nTypical advertising regulatory bodies suggest, for example that adverts must not: encourage _________, cause unnecessary ________ or _____, and must not cause _______ offence.\nA: Safe practices, Fear, Jealousy, Trivial B: Unsafe practices, Distress, Joy, Trivial C: Safe practices, Wants, Jealousy, Trivial D: Safe practices, Distress, Fear, Trivial E: Unsafe practices, Wants, Jealousy, Serious F: Safe practices, Distress, Jealousy, Serious G: Safe practices, Wants, Fear, Serious H: Unsafe practices, Wants, Fear, Trivial I: Unsafe practices, Distress, Fear, Serious\nPreviously, you collected the some information about this question from some actions: []'}, {'role': 'user', 'content': 'Decompose the question and plan the next steps to address the question. You should complete your planning using the following template: REASONING RESULT: [YOUR REASONING RESULT]. *Your previous reasoning was: [\'Successful Action: critique\\nResult: Since no reasoning was provided in this context with "Your previous reasoning was: []", there isnt any substantive content to critique regarding the given multiple-choice question.\\n\\nHowever, I will critique the absence or missing reasoning content: \\n\\nREASONING RESULT: The absence of prior reasoning means there was no provided logic or analysis related to the question. This represents a gap in thought since the question requires understanding and applying principles related to advertising ethics and regulations. Without reasoning, it is impossible to verify the plausibility, trace the logical progression, or evaluate the correctness of the reasoning leading to an answer.\\n\\nFINAL ANSWER: The reasoning is incomplete because no explanation was given, leaving crucial aspects of plausibility, support, and logical flow unaddressed.\'].* Your planning should include: '}]
[10:10:06 INFO]
[Model Query] Token Usage:
Completion Tokens: 332
Prompt Tokens: 434
Total Tokens: 766
[10:10:06 INFO]
[Model Query] gpt4o response: REASONING RESULT: The question aims to test understanding of ethical advertising practices as guided by regulatory bodies. The multiple choices provide various options related to recommended behaviors and prohibited actions in advertising. The objective of this task is to identify which answer aligns most closely with widely accepted principles of advertising ethics and regulations. To achieve this, it is essential to break down and evaluate the key components of the question and choices based on the logical alignment of terms like "safe practices," "distress," "jealousy," "fear," "offense," and their corresponding severity.
**PLANNING STEPS:**
1. **Understand Regulatory Advertising Standards:**
- Analyze the general expectations from advertising regulatory bodies, such as avoiding harm, promoting safety, and refraining from causing unnecessary offense or emotional responses.
2. **Break Down the Key Terms in the Question:**
- Identify the implications of each term in the question: "safe practices," "distress," "jealousy," "fear," and "offense."
- Assess how these terms contribute to ethical advertising.
3. **Evaluate Each Answer Choice:**
- Examine each choice systematically and determine whether it aligns with the principles of advertising standards.
- Eliminate options that deviate from these principles.
4. **Identify the Correct Answer:**
- Select the choice that best fits the principles after careful analysis.
5. **Finalize Conclusion:**
- Present the reasoning for the selected answer clearly and concisely.
By following these steps, it will be possible to determine the correct answer with a logical and well-supported explanation.
[10:10:06 INFO]
[Model Query] Querying gpt4o
[10:10:06 INFO]
[Model Query] [{'role': 'system', 'content': 'You are an expert in Python programming. Your task is to run Python code and provide the results., and You work as a helpful AI assistant. \nI will ask you a question. Answer this question using your coding and language skills.\nNow your question is: The following are multiple choice questions (with answers) about business.\nTypical advertising regulatory bodies suggest, for example that adverts must not: encourage _________, cause unnecessary ________ or _____, and must not cause _______ offence.\nA: Safe practices, Fear, Jealousy, Trivial B: Unsafe practices, Distress, Joy, Trivial C: Safe practices, Wants, Jealousy, Trivial D: Safe practices, Distress, Fear, Trivial E: Unsafe practices, Wants, Jealousy, Serious F: Safe practices, Distress, Jealousy, Serious G: Safe practices, Wants, Fear, Serious H: Unsafe practices, Wants, Fear, Trivial I: Unsafe practices, Distress, Fear, Serious\nPreviously, you collected the some information about this question from some actions: []'}, {'role': 'user', 'content': 'You have chosen to write and run Python code. Please write generic Python code in the parameter to solve this type of problems using only standard python libraries. Make sure you use the \'print\' function for all output when relevant. Return in json format. Example: {"action": "run_python", "parameter": "print(\'Hello, World!\')"}'}]
[10:10:08 INFO]
[Model Query: ChatCompletion] query failed: Error code: 429 - {'error': {'message': '当前分组上游负载已饱和,请稍后再试 (request id: 20250909101008489440162dhklTW8w)', 'type': 'openai_error', 'param': '', 'code': 'model_not_found'}}
[10:10:13 INFO]
[Model Query] [{'role': 'system', 'content': 'You are an expert in Python programming. Your task is to run Python code and provide the results., and You work as a helpful AI assistant. \nI will ask you a question. Answer this question using your coding and language skills.\nNow your question is: The following are multiple choice questions (with answers) about business.\nTypical advertising regulatory bodies suggest, for example that adverts must not: encourage _________, cause unnecessary ________ or _____, and must not cause _______ offence.\nA: Safe practices, Fear, Jealousy, Trivial B: Unsafe practices, Distress, Joy, Trivial C: Safe practices, Wants, Jealousy, Trivial D: Safe practices, Distress, Fear, Trivial E: Unsafe practices, Wants, Jealousy, Serious F: Safe practices, Distress, Jealousy, Serious G: Safe practices, Wants, Fear, Serious H: Unsafe practices, Wants, Fear, Trivial I: Unsafe practices, Distress, Fear, Serious\nPreviously, you collected the some information about this question from some actions: []'}, {'role': 'user', 'content': 'You have chosen to write and run Python code. Please write generic Python code in the parameter to solve this type of problems using only standard python libraries. Make sure you use the \'print\' function for all output when relevant. Return in json format. Example: {"action": "run_python", "parameter": "print(\'Hello, World!\')"}'}]
[10:10:16 INFO]
[Model Query] Token Usage:
Completion Tokens: 169
Prompt Tokens: 168
Total Tokens: 337
[10:10:16 INFO]
[Model Query] gpt4o response: {"action": "run_python", "parameter": "def handle_advertising_regulation():\n # Guidelines for typical advertising regulation\n advert_rules = {\n 'encourage': 'illegal activities',\n 'cause_unnecessary': ['fear', 'offense'],\n 'cause': 'harm' \n }\n\n # Printing out the rules\n print(\"Typical advertising regulatory bodies suggest, for example, that adverts must not:\")\n print(\"- Encourage:\", advert_rules['encourage'])\n print(\"- Cause unnecessary:\", ', '.join(advert_rules['cause_unnecessary']))\n print(\"- Must not cause:\", advert_rules['cause'])\n\n# Run the function\nhandle_advertising_regulation()"}
[10:10:16 INFO]
[Model Query] Querying gpt-3.5
[10:10:16 INFO]
[Model Query] [{'role': 'system', 'content': 'You are an assistant'}, {'role': 'user', 'content': 'Add \'print\' statements to print all necessary information.\nCode:\ndef handle_advertising_regulation():\n # Guidelines for typical advertising regulation\n advert_rules = {\n \'encourage\': \'illegal activities\',\n \'cause_unnecessary\': [\'fear\', \'offense\'],\n \'cause\': \'harm\' \n }\n\n # Printing out the rules\n print("Typical advertising regulatory bodies suggest, for example, that adverts must not:")\n print("- Encourage:", advert_rules[\'encourage\'])\n print("- Cause unnecessary:", \', \'.join(advert_rules[\'cause_unnecessary\']))\n print("- Must not cause:", advert_rules[\'cause\'])\n\n# Run the function\nhandle_advertising_regulation()'}]
[10:10:20 INFO]
[Model Query] Token Usage:
Completion Tokens: 295
Prompt Tokens: 159
Total Tokens: 454
[10:10:20 INFO]
[Model Query] gpt-3.5 response: You can add print statements to display all necessary information during the function execution, such as printing the dictionary `advert_rules` and other key points. Here's an enhanced version of your code:
```python
def handle_advertising_regulation():
# Guidelines for typical advertising regulation
advert_rules = {
'encourage': 'illegal activities',
'cause_unnecessary': ['fear', 'offense'],
'cause': 'harm'
}
# Printing out the general context
print("This function demonstrates typical advertising regulation guidelines.\n")
# Printing out the full dictionary
print("Advertising regulation guidelines:")
print(advert_rules)
# Printing out the rules with clear explanations
print("\nTypical advertising regulatory bodies suggest, for example, that adverts must not:")
print("- Encourage:", advert_rules['encourage'])
print("- Cause unnecessary:", ', '.join(advert_rules['cause_unnecessary']))
print("- Must not cause:", advert_rules['cause'])
# Run the function
handle_advertising_regulation()
```
### Explanation:
1. I added a print statement at the start to clarify what the function demonstrates.
2. I printed the full `advert_rules` dictionary to show the structure before breaking it down.
3. The rest of the print statements remain to display each specific guideline.
This will help visualize not only the specific rules but also the entire dictionary of regulations in the output.
[10:10:21 INFO]
[Model Query] Querying gpt4o
[10:10:21 INFO]
[Model Query] [{'role': 'system', 'content': 'You are an expert in Python programming. Your task is to run Python code and provide the results., and You work as a helpful AI assistant. \nI will ask you a question. Answer this question using your coding and language skills.\nNow your question is: The following are multiple choice questions (with answers) about business.\nTypical advertising regulatory bodies suggest, for example that adverts must not: encourage _________, cause unnecessary ________ or _____, and must not cause _______ '}, {'role': 'user', 'content': 'You have chosen to write and run Python code. Please write generic Python code in the parameter to solve this type of problems using only standard python libraries. Make sure you use the \'print\' function for all output when relevant. Return in json format. Example: {"action": "run_python", "parameter": "print(\'Hello, World!\')"}'}, {'role': 'assistant', 'content': '{\'action\': \'run_python\', \'parameter\': \'def handle_advertising_regulation():\\n # Guidelines for typical advertising regulation\\n advert_rules = {\\n \\\'encourage\\\': \\\'illegal activities\\\',\\n \\\'cause_unnecessary\\\': [\\\'fear\\\', \\\'offense\\\'],\\n \\\'cause\\\': \\\'harm\\\' \\n }\\n\\n # Printing out the rules\\n print("Typical advertising regulatory bodies suggest, for example, that adverts must not:")\\n print("- Encourage:", advert_rules[\\\'encourage\\\'])\\n print("- Cause unnecessary:", \\\', \\\'.join(advert_rules[\\\'cause_unnecessary\\\']))\\n print("- Must not cause:", advert_rules[\\\'cause\\\'])\\n\\n# Run the function\\nhandle_advertising_regulation()\'}'}, {'role': 'user', 'content': "You have get results from run_python: This function demonstrates typical advertising regulation guidelines.\n\nAdvertising regulation guidelines:\n{'encourage': 'illegal activities', 'cause_unnecessary': ['fear', 'offense'], 'cause': 'harm'}\n\nTypical advertising regulatory bodies suggest, for example, that adverts must not:\n- Encourage: illegal activities\n- Cause unnecessary: fear, offense\n- Must not cause: harm\nChoose the most common one and then format it.\nFormat it using the fewest words possible, it should be a single letter, but do not alter the original answer.\nDo not include any other units or punctuation. \nAnd finish your answer with the following template: FINAL ANSWER: [YOUR FINAL ANSWER]\nReturn your answer with only the single letter."}]
[10:10:22 INFO]
[Model Query] Token Usage:
Completion Tokens: 5
Prompt Tokens: 500
Total Tokens: 505
[10:10:22 INFO]
[Model Query] gpt4o response: FINAL ANSWER: C
[10:10:22 INFO]
[Model Query] Querying gpt4o
[10:10:22 INFO]
[Model Query] [{'role': 'system', 'content': 'You are an expert in summarizing. Your task is to summarize the information and provide concise conclusions., and You work as a helpful AI assistant. \nI will ask you a question. Answer this question using your coding and language skills.\nNow your question is: The following are multiple choice questions (with answers) about business.\nTypical advertising regulatory bodies suggest, for example that adverts must not: encourage _________, cause unnecessary ________ or _____, and must not cause _______ offence.\nA: Safe practices, Fear, Jealousy, Trivial B: Unsafe practices, Distress, Joy, Trivial C: Safe practices, Wants, Jealousy, Trivial D: Safe practices, Distress, Fear, Trivial E: Unsafe practices, Wants, Jealousy, Serious F: Safe practices, Distress, Jealousy, Serious G: Safe practices, Wants, Fear, Serious H: Unsafe practices, Wants, Fear, Trivial I: Unsafe practices, Distress, Fear, Serious\nPreviously, you collected the some information about this question from some actions: []'}, {'role': 'user', 'content': 'You need to summarize those previous results and provide some intermediate conclusions. You should finish your reasoning with the following template: REASONING RESULT: [YOUR REASONING RESULT]. Finish your answer with the following template: FINAL ANSWER: [YOUR FINAL ANSWER]. *Your previous reasoning was: [\'Successful Action: critique\\nResult: Since no reasoning was provided in this context with "Your previous reasoning was: []", there isnt any substantive content to critique regarding the given multiple-choice question.\\n\\nHowever, I will critique the absence or missing reasoning content: \\n\\nREASONING RESULT: The absence of prior reasoning means there was no provided logic or analysis related to the question. This represents a gap in thought since the question requires understanding and applying principles related to advertising ethics and regulations. Without reasoning, it is impossible to verify the plausibility, trace the logical progression, or evaluate the correctness of the reasoning leading to an answer.\\n\\nFINAL ANSWER: The reasoning is incomplete because no explanation was given, leaving crucial aspects of plausibility, support, and logical flow unaddressed.\'].* You need to summarize the reasoning paths and provide a final conclusion:'}]
[10:10:27 INFO]
[Model Query] Token Usage:
Completion Tokens: 164
Prompt Tokens: 458
Total Tokens: 622
[10:10:27 INFO]
[Model Query] gpt4o response: SUMMARY OF REASONING PATH:
The previous reasoning critiques the absence of prior analysis or logical conclusions related to the multiple-choice question about advertising ethics and regulations. It highlights that no reasoning was provided to understand or apply principles of advertising ethics, making it impossible to verify, trace, or evaluate the answer. This lack of explanation results in an incomplete reasoning process where plausibility and support for the chosen answer are absent.
REASONING RESULT: The reasoning process lacked substantive content to connect advertising ethical guidelines with the correct multiple-choice answer. This incomplete pathway does not adequately justify the derived conclusion, leaving room for doubt about its accuracy and logical flow.
FINAL ANSWER: The reasoning was incomplete due to a lack of prior explanation or analysis, leaving gaps in the logical and ethical framework necessary to determine the correct multiple-choice answer.
[10:10:27 INFO]
[Model Query] Querying gpt4o
[10:10:27 INFO]
[Model Query] [{'role': 'system', 'content': 'You are an expert in error correction and modification. Your task is to identify errors in previous reasoning, explain why they are incorrect, and provide accurate corrections., and You work as a helpful AI assistant. \nI will ask you a question. Answer this question using your coding and language skills.\nNow your question is: The following are multiple choice questions (with answers) about business.\nTypical advertising regulatory bodies suggest, for example that adverts must not: encourage _________, cause unnecessary ________ or _____, and must not cause _______ offence.\nA: Safe practices, Fear, Jealousy, Trivial B: Unsafe practices, Distress, Joy, Trivial C: Safe practices, Wants, Jealousy, Trivial D: Safe practices, Distress, Fear, Trivial E: Unsafe practices, Wants, Jealousy, Serious F: Safe practices, Distress, Jealousy, Serious G: Safe practices, Wants, Fear, Serious H: Unsafe practices, Wants, Fear, Trivial I: Unsafe practices, Distress, Fear, Serious\nPreviously, you collected the some information about this question from some actions: []'}, {'role': 'user', 'content': 'You need to identify and correct errors in the previous reasoning. You should finish your reasoning with the following template: REASONING RESULT: [Clearly state: 1. Which part of the previous reasoning was incorrect 2. Why it was incorrect 3. What is the correct understanding]. Finish your answer with the following template: FINAL ANSWER: [Provide the complete corrected answer]. *Your previous reasoning was: [\'Successful Action: critique\\nResult: Since no reasoning was provided in this context with "Your previous reasoning was: []", there isnt any substantive content to critique regarding the given multiple-choice question.\\n\\nHowever, I will critique the absence or missing reasoning content: \\n\\nREASONING RESULT: The absence of prior reasoning means there was no provided logic or analysis related to the question. This represents a gap in thought since the question requires understanding and applying principles related to advertising ethics and regulations. Without reasoning, it is impossible to verify the plausibility, trace the logical progression, or evaluate the correctness of the reasoning leading to an answer.\\n\\nFINAL ANSWER: The reasoning is incomplete because no explanation was given, leaving crucial aspects of plausibility, support, and logical flow unaddressed.\'].* Please explicitly point out and correct any errors, misconceptions, or inaccuracies in the previous reasoning.'}]
[10:10:34 INFO]
[Model Query] Token Usage:
Completion Tokens: 307
Prompt Tokens: 500
Total Tokens: 807
[10:10:34 INFO]
[Model Query] gpt4o response: **REASONING RESULT:**
1. **Which part of the previous reasoning was incorrect**: The previous reasoning pointed out the absence of logical analysis and reasoning for the multiple-choice question, acknowledging that no explanation was provided. While this was accurate, the critique itself failed to evaluate the central question and hypothesize or provide an answer based on the principle of advertising ethics. The previous reasoning did not attempt to outline potential answers or explain the ethical standards relevant to the question context.
2. **Why it was incorrect**: Although the absence of reasoning was correctly identified, the response missed an opportunity to analyze the question itself and assess it against foundational advertising ethical principles. This omission makes the critique incomplete because the intention of identifying gaps is not only to recognize them but also to address them with accurate substitutions.
3. **What is the correct understanding**: The correct approach would involve interpreting the question based on advertising regulatory principles. Most advertising codes of conduct, such as those outlined by organizations like the Advertising Standards Authority (ASA), emphasize that adverts must not encourage unsafe practices, cause unnecessary distress, fear, or exploitation of emotions such as jealousy. They should avoid causing serious or widespread offense. The correct answer to the question reflects a synthesis of these ethical guidelines.
**FINAL ANSWER:**
The correct answer is:
**I: Unsafe practices, Distress, Fear, Serious.**
This answer aligns with standard advertising ethical guidelines, ensuring that adverts discourage unsafe behaviors, refrain from evoking unnecessary distress or fear, and prevent causing serious or widespread offense.
[10:10:34 INFO]
[Model Query] Querying gpt4o
[10:10:34 INFO]
[Model Query] [{'role': 'system', 'content': 'You are an expert in critiquing. Your task is to critique the reasoning and solutions provided by others., and You work as a helpful AI assistant. \nI will ask you a question. Answer this question using your coding and language skills.\nNow your question is: The following are multiple choice questions (with answers) about business.\nTypical advertising regulatory bodies suggest, for example that adverts must not: encourage _________, cause unnecessary ________ or _____, and must not cause _______ offence.\nA: Safe practices, Fear, Jealousy, Trivial B: Unsafe practices, Distress, Joy, Trivial C: Safe practices, Wants, Jealousy, Trivial D: Safe practices, Distress, Fear, Trivial E: Unsafe practices, Wants, Jealousy, Serious F: Safe practices, Distress, Jealousy, Serious G: Safe practices, Wants, Fear, Serious H: Unsafe practices, Wants, Fear, Trivial I: Unsafe practices, Distress, Fear, Serious\nPreviously, you collected the some information about this question from some actions: []'}, {'role': 'user', 'content': 'You need to critique the previous reasoning. You should complete your reasoning using this format: REASONING RESULT: [YOUR REASONING RESULT]. Conclude your answer using this format: FINAL ANSWER: [YOUR FINAL ANSWER]. *Your previous reasoning was: [].* Consider the following when critiquing the reasoning: 1. Plausibility:'}, {'role': 'assistant', 'content': '(\'Since no reasoning was provided in this context with "Your previous reasoning was: []", there isnt any substantive content to critique regarding the given multiple-choice question.\\n\\nHowever, I will critique the absence or missing reasoning content: \\n\\nREASONING RESULT: The absence of prior reasoning means there was no provided logic or analysis related to the question. This represents a gap in thought since the question requires understanding and applying principles related to advertising ethics and regulations. Without reasoning, it is impossible to verify the plausibility, trace the logical progression, or evaluate the correctness of the reasoning leading to an answer.\\n\\nFINAL ANSWER: The reasoning is incomplete because no explanation was given, leaving crucial aspects of plausibility, support, and logical flow unaddressed.\', 445)'}, {'role': 'user', 'content': "You need to critique the previous reasoning. You should complete your reasoning using this format: REASONING RESULT: [YOUR REASONING RESULT]. Conclude your answer using this format: FINAL ANSWER: [YOUR FINAL ANSWER]. *Your previous reasoning was: ['Successful Action: conclude\\nResult: Typical advertising regulatory bodies aim to ensure that advertisements are ethical and do not promote or encourage harmful behavior or emotions. They stress the importance of not encouraging unsafe practices, causing unnecessary distress, or promoting jealousy, and further ensuring that advertisements avoid causing serious offense. \\n\\nAnalyzing the provided options while aligning with these stated guidelines:\\n1. The right combination should discourage unsafe practices, eliminate distress or jealousy, and avoid serious offense. \\n2. Therefore, Option **I (Unsafe practices, Distress, Fear, Serious)** fits perfectly with the context of advertising regulations.\\n\\nREASONING RESULT: The correct choice must ensure advertising standards do not promote unsafe practices, distress, fear, or serious offense.\\n\\nFINAL ANSWER: **I: Unsafe practices, Distress, Fear, Serious**'].* Consider the following when critiquing the reasoning: 1. Plausibility:"}]
[10:10:39 INFO]
[Model Query] Token Usage:
Completion Tokens: 180
Prompt Tokens: 699
Total Tokens: 879
[10:10:39 INFO]
[Model Query] gpt4o response: REASONING RESULT: The previous reasoning effectively addresses the given problem. It aligns well with standard advertising principles, emphasizing ethical concerns such as avoiding unsafe practices, unnecessary distress, fear, jealousy, and serious offense. The reasoning evaluates each aspect of the regulatory guidelines and validates the option based on these principles. The conclusion, selecting **Option I** ("Unsafe practices, Distress, Fear, Serious"), is plausible and consistent with the ethical guidelines for advertising practices. The reasoning demonstrates a logical flow, examining the requirements and directly relating them to the selected option.
However, the reasoning could be improved by briefly comparing other options and explicitly eliminating them by showing why they do not meet the outlined principles. This would make the argument more robust.
FINAL ANSWER: The reasoning is overall plausible, logical, and correctly concludes that the answer is **Option I: Unsafe practices, Distress, Fear, Serious**.
[10:10:39 INFO]
[Model Query] Querying gpt-3.5
[10:10:39 INFO]
[Model Query] [{'role': 'system', 'content': 'You are an assistant'}, {'role': 'user', 'content': 'You have several answer candidates. They are:\n[]\nChoose the most common answer among the candidates.\nIf responses conflict, prioritize answers that provide clear reasoning or appear later\nFormat the answer as a single letter.\nDo not include any other units or punctuation.\nReturn your answer with only the single letter.'}]
[10:10:41 INFO]
[Model Query] Token Usage:
Completion Tokens: 15
Prompt Tokens: 74
Total Tokens: 89
[10:10:41 INFO]
[Model Query] gpt-3.5 response: Sure! Please provide the answer candidates so I can evaluate them for you.
[10:10:41 INFO]
[Model Query] Querying gpt4o
[10:10:41 INFO]
[Model Query] [{'role': 'system', 'content': 'You are an assistant'}, {'role': 'user', 'content': "You have several answer candidates. They are:\n[' The reasoning is incomplete because no explanation was given, leaving crucial aspects of plausibility, support, and logical flow unaddressed.\\n']\nChoose the most common answer among the candidates.\nIf responses conflict, prioritize answers that provide clear reasoning or appear later\nFormat the answer as a single letter.\nDo not include any other units or punctuation.\nReturn your answer with only the single letter."}]
[10:10:44 INFO]
[Model Query] Token Usage:
Completion Tokens: 1
Prompt Tokens: 92
Total Tokens: 93
[10:10:44 INFO]
[Model Query] gpt4o response: A
[10:10:44 INFO]
[Model Query] Querying gpt4o
[10:10:44 INFO]
[Model Query] [{'role': 'system', 'content': 'You are an assistant'}, {'role': 'user', 'content': "You have several answer candidates. They are:\n[' **I: Unsafe practices, Distress, Fear, Serious**\\n', 'C\\n']\nChoose the most common answer among the candidates.\nIf responses conflict, prioritize answers that provide clear reasoning or appear later\nFormat the answer as a single letter.\nDo not include any other units or punctuation.\nReturn your answer with only the single letter."}]
[10:10:48 INFO]
[Model Query] Token Usage:
Completion Tokens: 1
Prompt Tokens: 83
Total Tokens: 84
[10:10:48 INFO]
[Model Query] gpt4o response: C
[10:10:48 INFO]
[Model Query] Querying gpt4o
[10:10:48 INFO]
[Model Query] [{'role': 'system', 'content': 'You are an assistant'}, {'role': 'user', 'content': "You have several answer candidates. They are:\n[' The reasoning is incomplete because no explanation was given, leaving crucial aspects of plausibility, support, and logical flow unaddressed.\\n', ' The reasoning was incomplete due to a lack of prior explanation or analysis, leaving gaps in the logical and ethical framework necessary to determine the correct multiple-choice answer.\\n']\nChoose the most common answer among the candidates.\nIf responses conflict, prioritize answers that provide clear reasoning or appear later\nFormat the answer as a single letter.\nDo not include any other units or punctuation.\nReturn your answer with only the single letter."}]
[10:10:49 INFO]
[Model Query] Token Usage:
Completion Tokens: 1
Prompt Tokens: 126
Total Tokens: 127
[10:10:49 INFO]
[Model Query] gpt4o response: B
[10:10:49 INFO]
[Model Query] Querying gpt4o
[10:10:49 INFO]
[Model Query] [{'role': 'system', 'content': 'You are an assistant'}, {'role': 'user', 'content': "You have several answer candidates. They are:\n[' The reasoning is incomplete because no explanation was given, leaving crucial aspects of plausibility, support, and logical flow unaddressed.\\n', '** \\nThe correct answer is: \\n**I: Unsafe practices, Distress, Fear, Serious.** \\n\\nThis answer aligns with standard advertising ethical guidelines, ensuring that adverts discourage unsafe behaviors, refrain from evoking unnecessary distress or fear, and prevent causing serious or widespread offense.\\n']\nChoose the most common answer among the candidates.\nIf responses conflict, prioritize answers that provide clear reasoning or appear later\nFormat the answer as a single letter.\nDo not include any other units or punctuation.\nReturn your answer with only the single letter."}]
[10:10:50 INFO]
[Model Query] Token Usage:
Completion Tokens: 1
Prompt Tokens: 157
Total Tokens: 158
[10:10:50 INFO]
[Model Query] gpt4o response: B
[10:10:51 INFO]
[Model Query] Querying gpt4o
[10:10:51 INFO]
[Model Query] [{'role': 'system', 'content': 'You are an assistant'}, {'role': 'user', 'content': "You have several answer candidates. They are:\n[' **I: Unsafe practices, Distress, Fear, Serious**\\n', ' The reasoning is overall plausible, logical, and correctly concludes that the answer is **Option I: Unsafe practices, Distress, Fear, Serious**.\\n']\nChoose the most common answer among the candidates.\nIf responses conflict, prioritize answers that provide clear reasoning or appear later\nFormat the answer as a single letter.\nDo not include any other units or punctuation.\nReturn your answer with only the single letter."}]
[10:10:53 INFO]
[Model Query] Token Usage:
Completion Tokens: 1
Prompt Tokens: 112
Total Tokens: 113
[10:10:53 INFO]
[Model Query] gpt4o response: I

View File

@ -0,0 +1,12 @@
[10:09:53 INFO]
------------------------------[Reasoning Path0 Start]------------------------------
[10:09:53 INFO]
Reasoning Path0:ReasoningState.INITIALIZED
[10:09:53 INFO]
Start agent: TerminatorAgent
[10:09:53 INFO]
Previous Agent sequence: TerminatorAgent
[10:09:53 INFO]
Updated workflow: <agent.agent_info.workflow.Workflow object at 0x2aea56b4ff90>
[10:09:53 INFO]
Updated global_info: {'path_id': 0, 'logger': <Logger path0_logger (INFO)>, 'workpath': 'logs/MMLU-Pro/2025-09-09-10-09-43', 'task': {'type': 'MMLU-Pro', 'Question': 'The following are multiple choice questions (with answers) about business.\nTypical advertising regulatory bodies suggest, for example that adverts must not: encourage _________, cause unnecessary ________ or _____, and must not cause _______ offence.\nA: Safe practices, Fear, Jealousy, Trivial B: Unsafe practices, Distress, Joy, Trivial C: Safe practices, Wants, Jealousy, Trivial D: Safe practices, Distress, Fear, Trivial E: Unsafe practices, Wants, Jealousy, Serious F: Safe practices, Distress, Jealousy, Serious G: Safe practices, Wants, Fear, Serious H: Unsafe practices, Wants, Fear, Trivial I: Unsafe practices, Distress, Fear, Serious', 'Answer': 'I', 'id': 70}, 'workflow': <agent.agent_info.workflow.Workflow object at 0x2aea56b4ff90>, 'url': None, 'file_name': None, 'file_extension': None, 'answers': [], 'code_path': '', 'env_exists': False, 'env_name': None, 'env': None, 'supervisor': None}

View File

@ -0,0 +1,108 @@
[10:09:53 INFO]
------------------------------[Reasoning Path1 Start]------------------------------
[10:09:53 INFO]
Reasoning Path1:ReasoningState.INITIALIZED
[10:09:53 INFO]
Start agent: CriticAgent_gpt4o
[10:09:53 INFO]
Previous Agent sequence: CriticAgent_gpt4o
[10:09:53 INFO]
[Action] {'action': 'critique', 'parameter': ''}
[10:09:53 INFO]
[System Prompt] You are an expert in critiquing. Your task is to critique the reasoning and solutions provided by others., and You work as a helpful AI assistant.
I will ask you a question. Answer this question using your coding and language skills.
Now your question is: The following are multiple choice questions (with answers) about business.
Typical advertising regulatory bodies suggest, for example that adverts must not: encourage _________, cause unnecessary ________ or _____, and must not cause _______ offence.
A: Safe practices, Fear, Jealousy, Trivial B: Unsafe practices, Distress, Joy, Trivial C: Safe practices, Wants, Jealousy, Trivial D: Safe practices, Distress, Fear, Trivial E: Unsafe practices, Wants, Jealousy, Serious F: Safe practices, Distress, Jealousy, Serious G: Safe practices, Wants, Fear, Serious H: Unsafe practices, Wants, Fear, Trivial I: Unsafe practices, Distress, Fear, Serious
Previously, you collected the some information about this question from some actions: []
[Query] You need to critique the previous reasoning. You should complete your reasoning using this format: REASONING RESULT: [YOUR REASONING RESULT]. Conclude your answer using this format: FINAL ANSWER: [YOUR FINAL ANSWER]. *Your previous reasoning was: [].* Consider the following when critiquing the reasoning: 1. Plausibility:
[10:09:56 INFO]
[Reasoning]: Since no reasoning was provided in this context with "Your previous reasoning was: []", there isnt any substantive content to critique regarding the given multiple-choice question.
However, I will critique the absence or missing reasoning content:
REASONING RESULT: The absence of prior reasoning means there was no provided logic or analysis related to the question. This represents a gap in thought since the question requires understanding and applying principles related to advertising ethics and regulations. Without reasoning, it is impossible to verify the plausibility, trace the logical progression, or evaluate the correctness of the reasoning leading to an answer.
FINAL ANSWER: The reasoning is incomplete because no explanation was given, leaving crucial aspects of plausibility, support, and logical flow unaddressed.
[10:09:56 INFO]
[Final Answer]: The reasoning is incomplete because no explanation was given, leaving crucial aspects of plausibility, support, and logical flow unaddressed.
[10:09:56 INFO]
[Reasoning Path]: Since no reasoning was provided in this context with "Your previous reasoning was: []", there isnt any substantive content to critique regarding the given multiple-choice question.
However, I will critique the absence or missing reasoning content:
REASONING RESULT: The absence of prior reasoning means there was no provided logic or analysis related to the question. This represents a gap in thought since the question requires understanding and applying principles related to advertising ethics and regulations. Without reasoning, it is impossible to verify the plausibility, trace the logical progression, or evaluate the correctness of the reasoning leading to an answer.
FINAL ANSWER: The reasoning is incomplete because no explanation was given, leaving crucial aspects of plausibility, support, and logical flow unaddressed.
[10:09:56 INFO]
----------------------------------------
[10:09:56 INFO]
Updated workflow: <agent.agent_info.workflow.Workflow object at 0x2aea57d0d910>
[10:09:56 INFO]
Updated global_info: {'path_id': 1, 'logger': <Logger path1_logger (INFO)>, 'workpath': 'logs/MMLU-Pro/2025-09-09-10-09-43', 'task': {'type': 'MMLU-Pro', 'Question': 'The following are multiple choice questions (with answers) about business.\nTypical advertising regulatory bodies suggest, for example that adverts must not: encourage _________, cause unnecessary ________ or _____, and must not cause _______ offence.\nA: Safe practices, Fear, Jealousy, Trivial B: Unsafe practices, Distress, Joy, Trivial C: Safe practices, Wants, Jealousy, Trivial D: Safe practices, Distress, Fear, Trivial E: Unsafe practices, Wants, Jealousy, Serious F: Safe practices, Distress, Jealousy, Serious G: Safe practices, Wants, Fear, Serious H: Unsafe practices, Wants, Fear, Trivial I: Unsafe practices, Distress, Fear, Serious', 'Answer': 'I', 'id': 70}, 'workflow': <agent.agent_info.workflow.Workflow object at 0x2aea57d0d910>, 'url': None, 'file_name': None, 'file_extension': None, 'answers': [' The reasoning is incomplete because no explanation was given, leaving crucial aspects of plausibility, support, and logical flow unaddressed.'], 'code_path': '', 'env_exists': False, 'env_name': None, 'env': None, 'supervisor': None}
[10:09:59 INFO]
[Action] {'action': 'planning', 'parameter': ''}
[10:09:59 INFO]
[System Prompt] You are an expert in planning. Your task is to create detailed plans for achieving specific goals., and You work as a helpful AI assistant.
I will ask you a question. Answer this question using your coding and language skills.
Now your question is: The following are multiple choice questions (with answers) about business.
Typical advertising regulatory bodies suggest, for example that adverts must not: encourage _________, cause unnecessary ________ or _____, and must not cause _______ offence.
A: Safe practices, Fear, Jealousy, Trivial B: Unsafe practices, Distress, Joy, Trivial C: Safe practices, Wants, Jealousy, Trivial D: Safe practices, Distress, Fear, Trivial E: Unsafe practices, Wants, Jealousy, Serious F: Safe practices, Distress, Jealousy, Serious G: Safe practices, Wants, Fear, Serious H: Unsafe practices, Wants, Fear, Trivial I: Unsafe practices, Distress, Fear, Serious
Previously, you collected the some information about this question from some actions: []
[Query] Decompose the question and plan the next steps to address the question. You should complete your planning using the following template: REASONING RESULT: [YOUR REASONING RESULT]. *Your previous reasoning was: ['Successful Action: critique\nResult: Since no reasoning was provided in this context with "Your previous reasoning was: []", there isnt any substantive content to critique regarding the given multiple-choice question.\n\nHowever, I will critique the absence or missing reasoning content: \n\nREASONING RESULT: The absence of prior reasoning means there was no provided logic or analysis related to the question. This represents a gap in thought since the question requires understanding and applying principles related to advertising ethics and regulations. Without reasoning, it is impossible to verify the plausibility, trace the logical progression, or evaluate the correctness of the reasoning leading to an answer.\n\nFINAL ANSWER: The reasoning is incomplete because no explanation was given, leaving crucial aspects of plausibility, support, and logical flow unaddressed.'].* Your planning should include:
[10:10:06 INFO]
[Reasoning]: REASONING RESULT: The question aims to test understanding of ethical advertising practices as guided by regulatory bodies. The multiple choices provide various options related to recommended behaviors and prohibited actions in advertising. The objective of this task is to identify which answer aligns most closely with widely accepted principles of advertising ethics and regulations. To achieve this, it is essential to break down and evaluate the key components of the question and choices based on the logical alignment of terms like "safe practices," "distress," "jealousy," "fear," "offense," and their corresponding severity.
**PLANNING STEPS:**
1. **Understand Regulatory Advertising Standards:**
- Analyze the general expectations from advertising regulatory bodies, such as avoiding harm, promoting safety, and refraining from causing unnecessary offense or emotional responses.
2. **Break Down the Key Terms in the Question:**
- Identify the implications of each term in the question: "safe practices," "distress," "jealousy," "fear," and "offense."
- Assess how these terms contribute to ethical advertising.
3. **Evaluate Each Answer Choice:**
- Examine each choice systematically and determine whether it aligns with the principles of advertising standards.
- Eliminate options that deviate from these principles.
4. **Identify the Correct Answer:**
- Select the choice that best fits the principles after careful analysis.
5. **Finalize Conclusion:**
- Present the reasoning for the selected answer clearly and concisely.
By following these steps, it will be possible to determine the correct answer with a logical and well-supported explanation.
[10:10:06 INFO]
[Reasoning Path]: REASONING RESULT: The question aims to test understanding of ethical advertising practices as guided by regulatory bodies. The multiple choices provide various options related to recommended behaviors and prohibited actions in advertising. The objective of this task is to identify which answer aligns most closely with widely accepted principles of advertising ethics and regulations. To achieve this, it is essential to break down and evaluate the key components of the question and choices based on the logical alignment of terms like "safe practices," "distress," "jealousy," "fear," "offense," and their corresponding severity.
**PLANNING STEPS:**
1. **Understand Regulatory Advertising Standards:**
- Analyze the general expectations from advertising regulatory bodies, such as avoiding harm, promoting safety, and refraining from causing unnecessary offense or emotional responses.
2. **Break Down the Key Terms in the Question:**
- Identify the implications of each term in the question: "safe practices," "distress," "jealousy," "fear," and "offense."
- Assess how these terms contribute to ethical advertising.
3. **Evaluate Each Answer Choice:**
- Examine each choice systematically and determine whether it aligns with the principles of advertising standards.
- Eliminate options that deviate from these principles.
4. **Identify the Correct Answer:**
- Select the choice that best fits the principles after careful analysis.
5. **Finalize Conclusion:**
- Present the reasoning for the selected answer clearly and concisely.
By following these steps, it will be possible to determine the correct answer with a logical and well-supported explanation.
[10:10:06 INFO]
----------------------------------------
[10:10:06 INFO]
Updated workflow: <agent.agent_info.workflow.Workflow object at 0x2aea57d0d910>
[10:10:06 INFO]
Updated global_info: {'path_id': 1, 'logger': <Logger path1_logger (INFO)>, 'workpath': 'logs/MMLU-Pro/2025-09-09-10-09-43', 'task': {'type': 'MMLU-Pro', 'Question': 'The following are multiple choice questions (with answers) about business.\nTypical advertising regulatory bodies suggest, for example that adverts must not: encourage _________, cause unnecessary ________ or _____, and must not cause _______ offence.\nA: Safe practices, Fear, Jealousy, Trivial B: Unsafe practices, Distress, Joy, Trivial C: Safe practices, Wants, Jealousy, Trivial D: Safe practices, Distress, Fear, Trivial E: Unsafe practices, Wants, Jealousy, Serious F: Safe practices, Distress, Jealousy, Serious G: Safe practices, Wants, Fear, Serious H: Unsafe practices, Wants, Fear, Trivial I: Unsafe practices, Distress, Fear, Serious', 'Answer': 'I', 'id': 70}, 'workflow': <agent.agent_info.workflow.Workflow object at 0x2aea57d0d910>, 'url': None, 'file_name': None, 'file_extension': None, 'answers': [' The reasoning is incomplete because no explanation was given, leaving crucial aspects of plausibility, support, and logical flow unaddressed.'], 'code_path': '', 'env_exists': False, 'env_name': None, 'env': None, 'supervisor': None}

View File

@ -0,0 +1,103 @@
[10:09:53 INFO]
------------------------------[Reasoning Path2 Start]------------------------------
[10:09:53 INFO]
Reasoning Path2:ReasoningState.INITIALIZED
[10:09:53 INFO]
Start agent: ConcluderAgent_gpt4o
[10:09:53 INFO]
Previous Agent sequence: ConcluderAgent_gpt4o
[10:09:56 INFO]
[Action] {'action': 'conclude', 'parameter': ''}
[10:09:56 INFO]
[System Prompt] You are an expert in concluding. Your task is to provide final conclusions based on the reasoning process., and You work as a helpful AI assistant.
I will ask you a question. Answer this question using your coding and language skills.
Now your question is: The following are multiple choice questions (with answers) about business.
Typical advertising regulatory bodies suggest, for example that adverts must not: encourage _________, cause unnecessary ________ or _____, and must not cause _______ offence.
A: Safe practices, Fear, Jealousy, Trivial B: Unsafe practices, Distress, Joy, Trivial C: Safe practices, Wants, Jealousy, Trivial D: Safe practices, Distress, Fear, Trivial E: Unsafe practices, Wants, Jealousy, Serious F: Safe practices, Distress, Jealousy, Serious G: Safe practices, Wants, Fear, Serious H: Unsafe practices, Wants, Fear, Trivial I: Unsafe practices, Distress, Fear, Serious
Previously, you collected the some information about this question from some actions: []
[Query] You need to conclude the task and provide a final answer. You should finish your reasoning with the following template: REASONING RESULT: [YOUR REASONING RESULT]. Finish your answer with the following template: FINAL ANSWER: [YOUR FINAL ANSWER]. *Your previous reasoning was: [].* You need to conclude the task and provide a final answer:
[10:09:59 INFO]
[Reasoning]: Typical advertising regulatory bodies aim to ensure that advertisements are ethical and do not promote or encourage harmful behavior or emotions. They stress the importance of not encouraging unsafe practices, causing unnecessary distress, or promoting jealousy, and further ensuring that advertisements avoid causing serious offense.
Analyzing the provided options while aligning with these stated guidelines:
1. The right combination should discourage unsafe practices, eliminate distress or jealousy, and avoid serious offense.
2. Therefore, Option **I (Unsafe practices, Distress, Fear, Serious)** fits perfectly with the context of advertising regulations.
REASONING RESULT: The correct choice must ensure advertising standards do not promote unsafe practices, distress, fear, or serious offense.
FINAL ANSWER: **I: Unsafe practices, Distress, Fear, Serious**
[10:09:59 INFO]
[Final Answer]: **I: Unsafe practices, Distress, Fear, Serious**
[10:09:59 INFO]
[Reasoning Path]: Typical advertising regulatory bodies aim to ensure that advertisements are ethical and do not promote or encourage harmful behavior or emotions. They stress the importance of not encouraging unsafe practices, causing unnecessary distress, or promoting jealousy, and further ensuring that advertisements avoid causing serious offense.
Analyzing the provided options while aligning with these stated guidelines:
1. The right combination should discourage unsafe practices, eliminate distress or jealousy, and avoid serious offense.
2. Therefore, Option **I (Unsafe practices, Distress, Fear, Serious)** fits perfectly with the context of advertising regulations.
REASONING RESULT: The correct choice must ensure advertising standards do not promote unsafe practices, distress, fear, or serious offense.
FINAL ANSWER: **I: Unsafe practices, Distress, Fear, Serious**
[10:09:59 INFO]
----------------------------------------
[10:09:59 INFO]
Updated workflow: <agent.agent_info.workflow.Workflow object at 0x2aea58599850>
[10:09:59 INFO]
Updated global_info: {'path_id': 2, 'logger': <Logger path2_logger (INFO)>, 'workpath': 'logs/MMLU-Pro/2025-09-09-10-09-43', 'task': {'type': 'MMLU-Pro', 'Question': 'The following are multiple choice questions (with answers) about business.\nTypical advertising regulatory bodies suggest, for example that adverts must not: encourage _________, cause unnecessary ________ or _____, and must not cause _______ offence.\nA: Safe practices, Fear, Jealousy, Trivial B: Unsafe practices, Distress, Joy, Trivial C: Safe practices, Wants, Jealousy, Trivial D: Safe practices, Distress, Fear, Trivial E: Unsafe practices, Wants, Jealousy, Serious F: Safe practices, Distress, Jealousy, Serious G: Safe practices, Wants, Fear, Serious H: Unsafe practices, Wants, Fear, Trivial I: Unsafe practices, Distress, Fear, Serious', 'Answer': 'I', 'id': 70}, 'workflow': <agent.agent_info.workflow.Workflow object at 0x2aea58599850>, 'url': None, 'file_name': None, 'file_extension': None, 'answers': [' **I: Unsafe practices, Distress, Fear, Serious**'], 'code_path': '', 'env_exists': False, 'env_name': None, 'env': None, 'supervisor': None}
[10:10:16 INFO]
[Action] {'action': 'run_python', 'parameter': 'def handle_advertising_regulation():\n # Guidelines for typical advertising regulation\n advert_rules = {\n \'encourage\': \'illegal activities\',\n \'cause_unnecessary\': [\'fear\', \'offense\'],\n \'cause\': \'harm\' \n }\n\n # Printing out the rules\n print("Typical advertising regulatory bodies suggest, for example, that adverts must not:")\n print("- Encourage:", advert_rules[\'encourage\'])\n print("- Cause unnecessary:", \', \'.join(advert_rules[\'cause_unnecessary\']))\n print("- Must not cause:", advert_rules[\'cause\'])\n\n# Run the function\nhandle_advertising_regulation()'}
[10:10:16 INFO]
[Action Execution] run_python(def handle_advertising_regulation():
# Guidelines for typical advertising regulation
advert_rules = {
'encourage': 'illegal activities',
'cause_unnecessary': ['fear', 'offense'],
'cause': 'harm'
}
# Printing out the rules
print("Typical advertising regulatory bodies suggest, for example, that adverts must not:")
print("- Encourage:", advert_rules['encourage'])
print("- Cause unnecessary:", ', '.join(advert_rules['cause_unnecessary']))
print("- Must not cause:", advert_rules['cause'])
# Run the function
handle_advertising_regulation())
[10:10:21 INFO]
[Run Python] Success: This function demonstrates typical advertising regulation guidelines.
Advertising regulation guidelines:
{'encourage': 'illegal activities', 'cause_unnecessary': ['fear', 'offense'], 'cause': 'harm'}
Typical advertising regulatory bodies suggest, for example, that adverts must not:
- Encourage: illegal activities
- Cause unnecessary: fear, offense
- Must not cause: harm
[10:10:21 INFO]
[System Prompt] You are an expert in Python programming. Your task is to run Python code and provide the results., and You work as a helpful AI assistant.
I will ask you a question. Answer this question using your coding and language skills.
Now your question is: The following are multiple choice questions (with answers) about business.
Typical advertising regulatory bodies suggest, for example that adverts must not: encourage _________, cause unnecessary ________ or _____, and must not cause _______ offence.
A: Safe practices, Fear, Jealousy, Trivial B: Unsafe practices, Distress, Joy, Trivial C: Safe practices, Wants, Jealousy, Trivial D: Safe practices, Distress, Fear, Trivial E: Unsafe practices, Wants, Jealousy, Serious F: Safe practices, Distress, Jealousy, Serious G: Safe practices, Wants, Fear, Serious H: Unsafe practices, Wants, Fear, Trivial I: Unsafe practices, Distress, Fear, Serious
Previously, you collected the some information about this question from some actions: []
[Query] Choose the most common one and then format it.
Format it using the fewest words possible, it should be a single letter, but do not alter the original answer.
Do not include any other units or punctuation.
And finish your answer with the following template: FINAL ANSWER: [YOUR FINAL ANSWER]
Return your answer with only the single letter.
[10:10:22 INFO]
[Format to Final Answer]: FINAL ANSWER: C
[10:10:22 INFO]
[Final Answer]: C
[10:10:22 INFO]
----------------------------------------
[10:10:22 INFO]
Updated workflow: <agent.agent_info.workflow.Workflow object at 0x2aea58599850>
[10:10:22 INFO]
Updated global_info: {'path_id': 2, 'logger': <Logger path2_logger (INFO)>, 'workpath': 'logs/MMLU-Pro/2025-09-09-10-09-43', 'task': {'type': 'MMLU-Pro', 'Question': 'The following are multiple choice questions (with answers) about business.\nTypical advertising regulatory bodies suggest, for example that adverts must not: encourage _________, cause unnecessary ________ or _____, and must not cause _______ offence.\nA: Safe practices, Fear, Jealousy, Trivial B: Unsafe practices, Distress, Joy, Trivial C: Safe practices, Wants, Jealousy, Trivial D: Safe practices, Distress, Fear, Trivial E: Unsafe practices, Wants, Jealousy, Serious F: Safe practices, Distress, Jealousy, Serious G: Safe practices, Wants, Fear, Serious H: Unsafe practices, Wants, Fear, Trivial I: Unsafe practices, Distress, Fear, Serious', 'Answer': 'I', 'id': 70}, 'workflow': <agent.agent_info.workflow.Workflow object at 0x2aea58599850>, 'url': None, 'file_name': None, 'file_extension': None, 'answers': [' **I: Unsafe practices, Distress, Fear, Serious**', 'C'], 'code_path': '', 'env_exists': False, 'env_name': None, 'env': None, 'supervisor': None}

View File

@ -0,0 +1,44 @@
[10:09:59 INFO]
------------------------------[Reasoning Path3 Start]------------------------------
[10:09:59 INFO]
Reasoning Path3:ReasoningState.ANSWERING
[10:09:59 INFO]
Start agent: SummarizerAgent_gpt4o
[10:09:59 INFO]
Previous Agent sequence: CriticAgent_gpt4o
[10:10:22 INFO]
[Action] {'action': 'summarize', 'parameter': ''}
[10:10:22 INFO]
[System Prompt] You are an expert in summarizing. Your task is to summarize the information and provide concise conclusions., and You work as a helpful AI assistant.
I will ask you a question. Answer this question using your coding and language skills.
Now your question is: The following are multiple choice questions (with answers) about business.
Typical advertising regulatory bodies suggest, for example that adverts must not: encourage _________, cause unnecessary ________ or _____, and must not cause _______ offence.
A: Safe practices, Fear, Jealousy, Trivial B: Unsafe practices, Distress, Joy, Trivial C: Safe practices, Wants, Jealousy, Trivial D: Safe practices, Distress, Fear, Trivial E: Unsafe practices, Wants, Jealousy, Serious F: Safe practices, Distress, Jealousy, Serious G: Safe practices, Wants, Fear, Serious H: Unsafe practices, Wants, Fear, Trivial I: Unsafe practices, Distress, Fear, Serious
Previously, you collected the some information about this question from some actions: []
[Query] You need to summarize those previous results and provide some intermediate conclusions. You should finish your reasoning with the following template: REASONING RESULT: [YOUR REASONING RESULT]. Finish your answer with the following template: FINAL ANSWER: [YOUR FINAL ANSWER]. *Your previous reasoning was: ['Successful Action: critique\nResult: Since no reasoning was provided in this context with "Your previous reasoning was: []", there isnt any substantive content to critique regarding the given multiple-choice question.\n\nHowever, I will critique the absence or missing reasoning content: \n\nREASONING RESULT: The absence of prior reasoning means there was no provided logic or analysis related to the question. This represents a gap in thought since the question requires understanding and applying principles related to advertising ethics and regulations. Without reasoning, it is impossible to verify the plausibility, trace the logical progression, or evaluate the correctness of the reasoning leading to an answer.\n\nFINAL ANSWER: The reasoning is incomplete because no explanation was given, leaving crucial aspects of plausibility, support, and logical flow unaddressed.'].* You need to summarize the reasoning paths and provide a final conclusion:
[10:10:27 INFO]
[Reasoning]: SUMMARY OF REASONING PATH:
The previous reasoning critiques the absence of prior analysis or logical conclusions related to the multiple-choice question about advertising ethics and regulations. It highlights that no reasoning was provided to understand or apply principles of advertising ethics, making it impossible to verify, trace, or evaluate the answer. This lack of explanation results in an incomplete reasoning process where plausibility and support for the chosen answer are absent.
REASONING RESULT: The reasoning process lacked substantive content to connect advertising ethical guidelines with the correct multiple-choice answer. This incomplete pathway does not adequately justify the derived conclusion, leaving room for doubt about its accuracy and logical flow.
FINAL ANSWER: The reasoning was incomplete due to a lack of prior explanation or analysis, leaving gaps in the logical and ethical framework necessary to determine the correct multiple-choice answer.
[10:10:27 INFO]
[Final Answer]: The reasoning was incomplete due to a lack of prior explanation or analysis, leaving gaps in the logical and ethical framework necessary to determine the correct multiple-choice answer.
[10:10:27 INFO]
[Reasoning Path]: SUMMARY OF REASONING PATH:
The previous reasoning critiques the absence of prior analysis or logical conclusions related to the multiple-choice question about advertising ethics and regulations. It highlights that no reasoning was provided to understand or apply principles of advertising ethics, making it impossible to verify, trace, or evaluate the answer. This lack of explanation results in an incomplete reasoning process where plausibility and support for the chosen answer are absent.
REASONING RESULT: The reasoning process lacked substantive content to connect advertising ethical guidelines with the correct multiple-choice answer. This incomplete pathway does not adequately justify the derived conclusion, leaving room for doubt about its accuracy and logical flow.
FINAL ANSWER: The reasoning was incomplete due to a lack of prior explanation or analysis, leaving gaps in the logical and ethical framework necessary to determine the correct multiple-choice answer.
[10:10:27 INFO]
----------------------------------------
[10:10:27 INFO]
Updated workflow: <agent.agent_info.workflow.Workflow object at 0x2aeab0dc4dd0>
[10:10:27 INFO]
Updated global_info: {'path_id': 3, 'logger': <Logger path3_logger (INFO)>, 'workpath': 'logs/MMLU-Pro/2025-09-09-10-09-43', 'task': {'type': 'MMLU-Pro', 'Question': 'The following are multiple choice questions (with answers) about business.\nTypical advertising regulatory bodies suggest, for example that adverts must not: encourage _________, cause unnecessary ________ or _____, and must not cause _______ offence.\nA: Safe practices, Fear, Jealousy, Trivial B: Unsafe practices, Distress, Joy, Trivial C: Safe practices, Wants, Jealousy, Trivial D: Safe practices, Distress, Fear, Trivial E: Unsafe practices, Wants, Jealousy, Serious F: Safe practices, Distress, Jealousy, Serious G: Safe practices, Wants, Fear, Serious H: Unsafe practices, Wants, Fear, Trivial I: Unsafe practices, Distress, Fear, Serious', 'Answer': 'I', 'id': 70}, 'workflow': <agent.agent_info.workflow.Workflow object at 0x2aeab0dc4dd0>, 'url': None, 'file_name': None, 'file_extension': None, 'answers': [' The reasoning is incomplete because no explanation was given, leaving crucial aspects of plausibility, support, and logical flow unaddressed.', ' The reasoning was incomplete due to a lack of prior explanation or analysis, leaving gaps in the logical and ethical framework necessary to determine the correct multiple-choice answer.'], 'code_path': '', 'env_exists': False, 'env_name': None, 'env': None, 'supervisor': None}

View File

@ -0,0 +1,60 @@
[10:09:59 INFO]
------------------------------[Reasoning Path4 Start]------------------------------
[10:09:59 INFO]
Reasoning Path4:ReasoningState.ANSWERING
[10:09:59 INFO]
Start agent: Modifier_gpt4o
[10:09:59 INFO]
Previous Agent sequence: CriticAgent_gpt4o
[10:10:27 INFO]
[Action] {'action': 'modify', 'parameter': ''}
[10:10:27 INFO]
[System Prompt] You are an expert in error correction and modification. Your task is to identify errors in previous reasoning, explain why they are incorrect, and provide accurate corrections., and You work as a helpful AI assistant.
I will ask you a question. Answer this question using your coding and language skills.
Now your question is: The following are multiple choice questions (with answers) about business.
Typical advertising regulatory bodies suggest, for example that adverts must not: encourage _________, cause unnecessary ________ or _____, and must not cause _______ offence.
A: Safe practices, Fear, Jealousy, Trivial B: Unsafe practices, Distress, Joy, Trivial C: Safe practices, Wants, Jealousy, Trivial D: Safe practices, Distress, Fear, Trivial E: Unsafe practices, Wants, Jealousy, Serious F: Safe practices, Distress, Jealousy, Serious G: Safe practices, Wants, Fear, Serious H: Unsafe practices, Wants, Fear, Trivial I: Unsafe practices, Distress, Fear, Serious
Previously, you collected the some information about this question from some actions: []
[Query] You need to identify and correct errors in the previous reasoning. You should finish your reasoning with the following template: REASONING RESULT: [Clearly state: 1. Which part of the previous reasoning was incorrect 2. Why it was incorrect 3. What is the correct understanding]. Finish your answer with the following template: FINAL ANSWER: [Provide the complete corrected answer]. *Your previous reasoning was: ['Successful Action: critique\nResult: Since no reasoning was provided in this context with "Your previous reasoning was: []", there isnt any substantive content to critique regarding the given multiple-choice question.\n\nHowever, I will critique the absence or missing reasoning content: \n\nREASONING RESULT: The absence of prior reasoning means there was no provided logic or analysis related to the question. This represents a gap in thought since the question requires understanding and applying principles related to advertising ethics and regulations. Without reasoning, it is impossible to verify the plausibility, trace the logical progression, or evaluate the correctness of the reasoning leading to an answer.\n\nFINAL ANSWER: The reasoning is incomplete because no explanation was given, leaving crucial aspects of plausibility, support, and logical flow unaddressed.'].* Please explicitly point out and correct any errors, misconceptions, or inaccuracies in the previous reasoning.
[10:10:34 INFO]
[Reasoning]: **REASONING RESULT:**
1. **Which part of the previous reasoning was incorrect**: The previous reasoning pointed out the absence of logical analysis and reasoning for the multiple-choice question, acknowledging that no explanation was provided. While this was accurate, the critique itself failed to evaluate the central question and hypothesize or provide an answer based on the principle of advertising ethics. The previous reasoning did not attempt to outline potential answers or explain the ethical standards relevant to the question context.
2. **Why it was incorrect**: Although the absence of reasoning was correctly identified, the response missed an opportunity to analyze the question itself and assess it against foundational advertising ethical principles. This omission makes the critique incomplete because the intention of identifying gaps is not only to recognize them but also to address them with accurate substitutions.
3. **What is the correct understanding**: The correct approach would involve interpreting the question based on advertising regulatory principles. Most advertising codes of conduct, such as those outlined by organizations like the Advertising Standards Authority (ASA), emphasize that adverts must not encourage unsafe practices, cause unnecessary distress, fear, or exploitation of emotions such as jealousy. They should avoid causing serious or widespread offense. The correct answer to the question reflects a synthesis of these ethical guidelines.
**FINAL ANSWER:**
The correct answer is:
**I: Unsafe practices, Distress, Fear, Serious.**
This answer aligns with standard advertising ethical guidelines, ensuring that adverts discourage unsafe behaviors, refrain from evoking unnecessary distress or fear, and prevent causing serious or widespread offense.
[10:10:34 INFO]
[Final Answer]: **
The correct answer is:
**I: Unsafe practices, Distress, Fear, Serious.**
This answer aligns with standard advertising ethical guidelines, ensuring that adverts discourage unsafe behaviors, refrain from evoking unnecessary distress or fear, and prevent causing serious or widespread offense.
[10:10:34 INFO]
[Reasoning Path]: **REASONING RESULT:**
1. **Which part of the previous reasoning was incorrect**: The previous reasoning pointed out the absence of logical analysis and reasoning for the multiple-choice question, acknowledging that no explanation was provided. While this was accurate, the critique itself failed to evaluate the central question and hypothesize or provide an answer based on the principle of advertising ethics. The previous reasoning did not attempt to outline potential answers or explain the ethical standards relevant to the question context.
2. **Why it was incorrect**: Although the absence of reasoning was correctly identified, the response missed an opportunity to analyze the question itself and assess it against foundational advertising ethical principles. This omission makes the critique incomplete because the intention of identifying gaps is not only to recognize them but also to address them with accurate substitutions.
3. **What is the correct understanding**: The correct approach would involve interpreting the question based on advertising regulatory principles. Most advertising codes of conduct, such as those outlined by organizations like the Advertising Standards Authority (ASA), emphasize that adverts must not encourage unsafe practices, cause unnecessary distress, fear, or exploitation of emotions such as jealousy. They should avoid causing serious or widespread offense. The correct answer to the question reflects a synthesis of these ethical guidelines.
**FINAL ANSWER:**
The correct answer is:
**I: Unsafe practices, Distress, Fear, Serious.**
This answer aligns with standard advertising ethical guidelines, ensuring that adverts discourage unsafe behaviors, refrain from evoking unnecessary distress or fear, and prevent causing serious or widespread offense.
[10:10:34 INFO]
----------------------------------------
[10:10:34 INFO]
Updated workflow: <agent.agent_info.workflow.Workflow object at 0x2aea56b7fad0>
[10:10:34 INFO]
Updated global_info: {'path_id': 4, 'logger': <Logger path4_logger (INFO)>, 'workpath': 'logs/MMLU-Pro/2025-09-09-10-09-43', 'task': {'type': 'MMLU-Pro', 'Question': 'The following are multiple choice questions (with answers) about business.\nTypical advertising regulatory bodies suggest, for example that adverts must not: encourage _________, cause unnecessary ________ or _____, and must not cause _______ offence.\nA: Safe practices, Fear, Jealousy, Trivial B: Unsafe practices, Distress, Joy, Trivial C: Safe practices, Wants, Jealousy, Trivial D: Safe practices, Distress, Fear, Trivial E: Unsafe practices, Wants, Jealousy, Serious F: Safe practices, Distress, Jealousy, Serious G: Safe practices, Wants, Fear, Serious H: Unsafe practices, Wants, Fear, Trivial I: Unsafe practices, Distress, Fear, Serious', 'Answer': 'I', 'id': 70}, 'workflow': <agent.agent_info.workflow.Workflow object at 0x2aea56b7fad0>, 'url': None, 'file_name': None, 'file_extension': None, 'answers': [' The reasoning is incomplete because no explanation was given, leaving crucial aspects of plausibility, support, and logical flow unaddressed.', '** \nThe correct answer is: \n**I: Unsafe practices, Distress, Fear, Serious.** \n\nThis answer aligns with standard advertising ethical guidelines, ensuring that adverts discourage unsafe behaviors, refrain from evoking unnecessary distress or fear, and prevent causing serious or widespread offense.'], 'code_path': '', 'env_exists': False, 'env_name': None, 'env': None, 'supervisor': None}

View File

@ -0,0 +1,40 @@
[10:09:59 INFO]
------------------------------[Reasoning Path5 Start]------------------------------
[10:09:59 INFO]
Reasoning Path5:ReasoningState.ANSWERING
[10:09:59 INFO]
Start agent: CriticAgent_gpt4o
[10:09:59 INFO]
Previous Agent sequence: ConcluderAgent_gpt4o
[10:10:34 INFO]
[Action] {'action': 'critique', 'parameter': ''}
[10:10:34 INFO]
[System Prompt] You are an expert in critiquing. Your task is to critique the reasoning and solutions provided by others., and You work as a helpful AI assistant.
I will ask you a question. Answer this question using your coding and language skills.
Now your question is: The following are multiple choice questions (with answers) about business.
Typical advertising regulatory bodies suggest, for example that adverts must not: encourage _________, cause unnecessary ________ or _____, and must not cause _______ offence.
A: Safe practices, Fear, Jealousy, Trivial B: Unsafe practices, Distress, Joy, Trivial C: Safe practices, Wants, Jealousy, Trivial D: Safe practices, Distress, Fear, Trivial E: Unsafe practices, Wants, Jealousy, Serious F: Safe practices, Distress, Jealousy, Serious G: Safe practices, Wants, Fear, Serious H: Unsafe practices, Wants, Fear, Trivial I: Unsafe practices, Distress, Fear, Serious
Previously, you collected the some information about this question from some actions: []
[Query] You need to critique the previous reasoning. You should complete your reasoning using this format: REASONING RESULT: [YOUR REASONING RESULT]. Conclude your answer using this format: FINAL ANSWER: [YOUR FINAL ANSWER]. *Your previous reasoning was: ['Successful Action: conclude\nResult: Typical advertising regulatory bodies aim to ensure that advertisements are ethical and do not promote or encourage harmful behavior or emotions. They stress the importance of not encouraging unsafe practices, causing unnecessary distress, or promoting jealousy, and further ensuring that advertisements avoid causing serious offense. \n\nAnalyzing the provided options while aligning with these stated guidelines:\n1. The right combination should discourage unsafe practices, eliminate distress or jealousy, and avoid serious offense. \n2. Therefore, Option **I (Unsafe practices, Distress, Fear, Serious)** fits perfectly with the context of advertising regulations.\n\nREASONING RESULT: The correct choice must ensure advertising standards do not promote unsafe practices, distress, fear, or serious offense.\n\nFINAL ANSWER: **I: Unsafe practices, Distress, Fear, Serious**'].* Consider the following when critiquing the reasoning: 1. Plausibility:
[10:10:39 INFO]
[Reasoning]: REASONING RESULT: The previous reasoning effectively addresses the given problem. It aligns well with standard advertising principles, emphasizing ethical concerns such as avoiding unsafe practices, unnecessary distress, fear, jealousy, and serious offense. The reasoning evaluates each aspect of the regulatory guidelines and validates the option based on these principles. The conclusion, selecting **Option I** ("Unsafe practices, Distress, Fear, Serious"), is plausible and consistent with the ethical guidelines for advertising practices. The reasoning demonstrates a logical flow, examining the requirements and directly relating them to the selected option.
However, the reasoning could be improved by briefly comparing other options and explicitly eliminating them by showing why they do not meet the outlined principles. This would make the argument more robust.
FINAL ANSWER: The reasoning is overall plausible, logical, and correctly concludes that the answer is **Option I: Unsafe practices, Distress, Fear, Serious**.
[10:10:39 INFO]
[Final Answer]: The reasoning is overall plausible, logical, and correctly concludes that the answer is **Option I: Unsafe practices, Distress, Fear, Serious**.
[10:10:39 INFO]
[Reasoning Path]: REASONING RESULT: The previous reasoning effectively addresses the given problem. It aligns well with standard advertising principles, emphasizing ethical concerns such as avoiding unsafe practices, unnecessary distress, fear, jealousy, and serious offense. The reasoning evaluates each aspect of the regulatory guidelines and validates the option based on these principles. The conclusion, selecting **Option I** ("Unsafe practices, Distress, Fear, Serious"), is plausible and consistent with the ethical guidelines for advertising practices. The reasoning demonstrates a logical flow, examining the requirements and directly relating them to the selected option.
However, the reasoning could be improved by briefly comparing other options and explicitly eliminating them by showing why they do not meet the outlined principles. This would make the argument more robust.
FINAL ANSWER: The reasoning is overall plausible, logical, and correctly concludes that the answer is **Option I: Unsafe practices, Distress, Fear, Serious**.
[10:10:39 INFO]
----------------------------------------
[10:10:39 INFO]
Updated workflow: <agent.agent_info.workflow.Workflow object at 0x2aea5558ec50>
[10:10:39 INFO]
Updated global_info: {'path_id': 5, 'logger': <Logger path5_logger (INFO)>, 'workpath': 'logs/MMLU-Pro/2025-09-09-10-09-43', 'task': {'type': 'MMLU-Pro', 'Question': 'The following are multiple choice questions (with answers) about business.\nTypical advertising regulatory bodies suggest, for example that adverts must not: encourage _________, cause unnecessary ________ or _____, and must not cause _______ offence.\nA: Safe practices, Fear, Jealousy, Trivial B: Unsafe practices, Distress, Joy, Trivial C: Safe practices, Wants, Jealousy, Trivial D: Safe practices, Distress, Fear, Trivial E: Unsafe practices, Wants, Jealousy, Serious F: Safe practices, Distress, Jealousy, Serious G: Safe practices, Wants, Fear, Serious H: Unsafe practices, Wants, Fear, Trivial I: Unsafe practices, Distress, Fear, Serious', 'Answer': 'I', 'id': 70}, 'workflow': <agent.agent_info.workflow.Workflow object at 0x2aea5558ec50>, 'url': None, 'file_name': None, 'file_extension': None, 'answers': [' **I: Unsafe practices, Distress, Fear, Serious**', ' The reasoning is overall plausible, logical, and correctly concludes that the answer is **Option I: Unsafe practices, Distress, Fear, Serious**.'], 'code_path': '', 'env_exists': False, 'env_name': None, 'env': None, 'supervisor': None}

View File

@ -0,0 +1,17 @@
[
{
"agent": "TerminatorAgent",
"action": {
"action": "terminate",
"parameter": ""
},
"cost": 0,
"tokens": 0,
"model_size": 175,
"result": {
"step_data": null,
"answer": null
},
"success": "Success"
}
]

View File

@ -0,0 +1,32 @@
[
{
"agent": "CriticAgent_gpt4o",
"action": {
"action": "critique",
"parameter": ""
},
"cost": 178000,
"tokens": 445,
"model_size": 200,
"result": {
"step_data": "Since no reasoning was provided in this context with \"Your previous reasoning was: []\", there isnt any substantive content to critique regarding the given multiple-choice question.\n\nHowever, I will critique the absence or missing reasoning content: \n\nREASONING RESULT: The absence of prior reasoning means there was no provided logic or analysis related to the question. This represents a gap in thought since the question requires understanding and applying principles related to advertising ethics and regulations. Without reasoning, it is impossible to verify the plausibility, trace the logical progression, or evaluate the correctness of the reasoning leading to an answer.\n\nFINAL ANSWER: The reasoning is incomplete because no explanation was given, leaving crucial aspects of plausibility, support, and logical flow unaddressed.",
"answer": " The reasoning is incomplete because no explanation was given, leaving crucial aspects of plausibility, support, and logical flow unaddressed."
},
"success": "Success"
},
{
"agent": "PlannerAgent_gpt4o",
"action": {
"action": "planning",
"parameter": ""
},
"cost": 306400,
"tokens": 766,
"model_size": 200,
"result": {
"step_data": "REASONING RESULT: The question aims to test understanding of ethical advertising practices as guided by regulatory bodies. The multiple choices provide various options related to recommended behaviors and prohibited actions in advertising. The objective of this task is to identify which answer aligns most closely with widely accepted principles of advertising ethics and regulations. To achieve this, it is essential to break down and evaluate the key components of the question and choices based on the logical alignment of terms like \"safe practices,\" \"distress,\" \"jealousy,\" \"fear,\" \"offense,\" and their corresponding severity.\n\n**PLANNING STEPS:**\n\n1. **Understand Regulatory Advertising Standards:**\n - Analyze the general expectations from advertising regulatory bodies, such as avoiding harm, promoting safety, and refraining from causing unnecessary offense or emotional responses.\n\n2. **Break Down the Key Terms in the Question:**\n - Identify the implications of each term in the question: \"safe practices,\" \"distress,\" \"jealousy,\" \"fear,\" and \"offense.\"\n - Assess how these terms contribute to ethical advertising.\n\n3. **Evaluate Each Answer Choice:**\n - Examine each choice systematically and determine whether it aligns with the principles of advertising standards.\n - Eliminate options that deviate from these principles.\n\n4. **Identify the Correct Answer:**\n - Select the choice that best fits the principles after careful analysis.\n\n5. **Finalize Conclusion:**\n - Present the reasoning for the selected answer clearly and concisely.\n\nBy following these steps, it will be possible to determine the correct answer with a logical and well-supported explanation.",
"answer": " The reasoning is incomplete because no explanation was given, leaving crucial aspects of plausibility, support, and logical flow unaddressed."
},
"success": "Success"
}
]

View File

@ -0,0 +1,32 @@
[
{
"agent": "ConcluderAgent_gpt4o",
"action": {
"action": "conclude",
"parameter": ""
},
"cost": 180000,
"tokens": 450,
"model_size": 200,
"result": {
"step_data": "Typical advertising regulatory bodies aim to ensure that advertisements are ethical and do not promote or encourage harmful behavior or emotions. They stress the importance of not encouraging unsafe practices, causing unnecessary distress, or promoting jealousy, and further ensuring that advertisements avoid causing serious offense. \n\nAnalyzing the provided options while aligning with these stated guidelines:\n1. The right combination should discourage unsafe practices, eliminate distress or jealousy, and avoid serious offense. \n2. Therefore, Option **I (Unsafe practices, Distress, Fear, Serious)** fits perfectly with the context of advertising regulations.\n\nREASONING RESULT: The correct choice must ensure advertising standards do not promote unsafe practices, distress, fear, or serious offense.\n\nFINAL ANSWER: **I: Unsafe practices, Distress, Fear, Serious**",
"answer": " **I: Unsafe practices, Distress, Fear, Serious**"
},
"success": "Success"
},
{
"agent": "PythonAgent_gpt4o",
"action": {
"action": "run_python",
"parameter": "def handle_advertising_regulation():\n # Guidelines for typical advertising regulation\n advert_rules = {\n 'encourage': 'illegal activities',\n 'cause_unnecessary': ['fear', 'offense'],\n 'cause': 'harm' \n }\n\n # Printing out the rules\n print(\"Typical advertising regulatory bodies suggest, for example, that adverts must not:\")\n print(\"- Encourage:\", advert_rules['encourage'])\n print(\"- Cause unnecessary:\", ', '.join(advert_rules['cause_unnecessary']))\n print(\"- Must not cause:\", advert_rules['cause'])\n\n# Run the function\nhandle_advertising_regulation()"
},
"cost": 336800,
"tokens": 842,
"model_size": 200,
"result": {
"step_data": "This function demonstrates typical advertising regulation guidelines.\n\nAdvertising regulation guidelines:\n{'encourage': 'illegal activities', 'cause_unnecessary': ['fear', 'offense'], 'cause': 'harm'}\n\nTypical advertising regulatory bodies suggest, for example, that adverts must not:\n- Encourage: illegal activities\n- Cause unnecessary: fear, offense\n- Must not cause: harm\n",
"answer": "C"
},
"success": "Success"
}
]

View File

@ -0,0 +1,32 @@
[
{
"agent": "CriticAgent_gpt4o",
"action": {
"action": "critique",
"parameter": ""
},
"cost": 178000,
"tokens": 445,
"model_size": 200,
"result": {
"step_data": "Since no reasoning was provided in this context with \"Your previous reasoning was: []\", there isnt any substantive content to critique regarding the given multiple-choice question.\n\nHowever, I will critique the absence or missing reasoning content: \n\nREASONING RESULT: The absence of prior reasoning means there was no provided logic or analysis related to the question. This represents a gap in thought since the question requires understanding and applying principles related to advertising ethics and regulations. Without reasoning, it is impossible to verify the plausibility, trace the logical progression, or evaluate the correctness of the reasoning leading to an answer.\n\nFINAL ANSWER: The reasoning is incomplete because no explanation was given, leaving crucial aspects of plausibility, support, and logical flow unaddressed.",
"answer": " The reasoning is incomplete because no explanation was given, leaving crucial aspects of plausibility, support, and logical flow unaddressed."
},
"success": "Success"
},
{
"agent": "SummarizerAgent_gpt4o",
"action": {
"action": "summarize",
"parameter": ""
},
"cost": 248800,
"tokens": 622,
"model_size": 200,
"result": {
"step_data": "SUMMARY OF REASONING PATH:\n\nThe previous reasoning critiques the absence of prior analysis or logical conclusions related to the multiple-choice question about advertising ethics and regulations. It highlights that no reasoning was provided to understand or apply principles of advertising ethics, making it impossible to verify, trace, or evaluate the answer. This lack of explanation results in an incomplete reasoning process where plausibility and support for the chosen answer are absent.\n\nREASONING RESULT: The reasoning process lacked substantive content to connect advertising ethical guidelines with the correct multiple-choice answer. This incomplete pathway does not adequately justify the derived conclusion, leaving room for doubt about its accuracy and logical flow.\n\nFINAL ANSWER: The reasoning was incomplete due to a lack of prior explanation or analysis, leaving gaps in the logical and ethical framework necessary to determine the correct multiple-choice answer.",
"answer": " The reasoning was incomplete due to a lack of prior explanation or analysis, leaving gaps in the logical and ethical framework necessary to determine the correct multiple-choice answer."
},
"success": "Success"
}
]

View File

@ -0,0 +1,32 @@
[
{
"agent": "CriticAgent_gpt4o",
"action": {
"action": "critique",
"parameter": ""
},
"cost": 178000,
"tokens": 445,
"model_size": 200,
"result": {
"step_data": "Since no reasoning was provided in this context with \"Your previous reasoning was: []\", there isnt any substantive content to critique regarding the given multiple-choice question.\n\nHowever, I will critique the absence or missing reasoning content: \n\nREASONING RESULT: The absence of prior reasoning means there was no provided logic or analysis related to the question. This represents a gap in thought since the question requires understanding and applying principles related to advertising ethics and regulations. Without reasoning, it is impossible to verify the plausibility, trace the logical progression, or evaluate the correctness of the reasoning leading to an answer.\n\nFINAL ANSWER: The reasoning is incomplete because no explanation was given, leaving crucial aspects of plausibility, support, and logical flow unaddressed.",
"answer": " The reasoning is incomplete because no explanation was given, leaving crucial aspects of plausibility, support, and logical flow unaddressed."
},
"success": "Success"
},
{
"agent": "Modifier_gpt4o",
"action": {
"action": "modify",
"parameter": ""
},
"cost": 322800,
"tokens": 807,
"model_size": 200,
"result": {
"step_data": "**REASONING RESULT:**\n\n1. **Which part of the previous reasoning was incorrect**: The previous reasoning pointed out the absence of logical analysis and reasoning for the multiple-choice question, acknowledging that no explanation was provided. While this was accurate, the critique itself failed to evaluate the central question and hypothesize or provide an answer based on the principle of advertising ethics. The previous reasoning did not attempt to outline potential answers or explain the ethical standards relevant to the question context.\n\n2. **Why it was incorrect**: Although the absence of reasoning was correctly identified, the response missed an opportunity to analyze the question itself and assess it against foundational advertising ethical principles. This omission makes the critique incomplete because the intention of identifying gaps is not only to recognize them but also to address them with accurate substitutions.\n\n3. **What is the correct understanding**: The correct approach would involve interpreting the question based on advertising regulatory principles. Most advertising codes of conduct, such as those outlined by organizations like the Advertising Standards Authority (ASA), emphasize that adverts must not encourage unsafe practices, cause unnecessary distress, fear, or exploitation of emotions such as jealousy. They should avoid causing serious or widespread offense. The correct answer to the question reflects a synthesis of these ethical guidelines.\n\n**FINAL ANSWER:** \nThe correct answer is: \n**I: Unsafe practices, Distress, Fear, Serious.** \n\nThis answer aligns with standard advertising ethical guidelines, ensuring that adverts discourage unsafe behaviors, refrain from evoking unnecessary distress or fear, and prevent causing serious or widespread offense.",
"answer": "** \nThe correct answer is: \n**I: Unsafe practices, Distress, Fear, Serious.** \n\nThis answer aligns with standard advertising ethical guidelines, ensuring that adverts discourage unsafe behaviors, refrain from evoking unnecessary distress or fear, and prevent causing serious or widespread offense."
},
"success": "Success"
}
]

View File

@ -0,0 +1,32 @@
[
{
"agent": "ConcluderAgent_gpt4o",
"action": {
"action": "conclude",
"parameter": ""
},
"cost": 180000,
"tokens": 450,
"model_size": 200,
"result": {
"step_data": "Typical advertising regulatory bodies aim to ensure that advertisements are ethical and do not promote or encourage harmful behavior or emotions. They stress the importance of not encouraging unsafe practices, causing unnecessary distress, or promoting jealousy, and further ensuring that advertisements avoid causing serious offense. \n\nAnalyzing the provided options while aligning with these stated guidelines:\n1. The right combination should discourage unsafe practices, eliminate distress or jealousy, and avoid serious offense. \n2. Therefore, Option **I (Unsafe practices, Distress, Fear, Serious)** fits perfectly with the context of advertising regulations.\n\nREASONING RESULT: The correct choice must ensure advertising standards do not promote unsafe practices, distress, fear, or serious offense.\n\nFINAL ANSWER: **I: Unsafe practices, Distress, Fear, Serious**",
"answer": " **I: Unsafe practices, Distress, Fear, Serious**"
},
"success": "Success"
},
{
"agent": "CriticAgent_gpt4o",
"action": {
"action": "critique",
"parameter": ""
},
"cost": 351600,
"tokens": 879,
"model_size": 200,
"result": {
"step_data": "REASONING RESULT: The previous reasoning effectively addresses the given problem. It aligns well with standard advertising principles, emphasizing ethical concerns such as avoiding unsafe practices, unnecessary distress, fear, jealousy, and serious offense. The reasoning evaluates each aspect of the regulatory guidelines and validates the option based on these principles. The conclusion, selecting **Option I** (\"Unsafe practices, Distress, Fear, Serious\"), is plausible and consistent with the ethical guidelines for advertising practices. The reasoning demonstrates a logical flow, examining the requirements and directly relating them to the selected option.\n\nHowever, the reasoning could be improved by briefly comparing other options and explicitly eliminating them by showing why they do not meet the outlined principles. This would make the argument more robust.\n\nFINAL ANSWER: The reasoning is overall plausible, logical, and correctly concludes that the answer is **Option I: Unsafe practices, Distress, Fear, Serious**.",
"answer": " The reasoning is overall plausible, logical, and correctly concludes that the answer is **Option I: Unsafe practices, Distress, Fear, Serious**."
},
"success": "Success"
}
]

File diff suppressed because one or more lines are too long

Binary file not shown.

After

Width:  |  Height:  |  Size: 5.3 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 7.8 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 7.9 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 8.0 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 7.8 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 7.8 KiB

53
puppeteer/main.py Normal file
View File

@ -0,0 +1,53 @@
import argparse
import os
import json
import yaml
from tasks.runner import BenchmarkRunner
from tasks.evaluator import BenchmarkEvaluator
from tasks import mmlu_pro, gsm_hard, srdd, creative_writing
def main():
parser = argparse.ArgumentParser(description="Run benchmark tasks")
parser.add_argument("task", choices=["MMLU-Pro", "gsm-hard", "SRDD", "CW"])
parser.add_argument("mode", choices=["validation", "test"])
parser.add_argument("--level", type=int, default=1)
parser.add_argument("--index", type=int, default=-1)
parser.add_argument("--data_limit", type=int, default=1)
parser.add_argument("--personas", type=str, default="personas/personas.jsonl")
args = parser.parse_args()
# load global config
with open("config/global.yaml", "r") as f:
global_config = yaml.safe_load(f)
runner = BenchmarkRunner(args.personas, global_config)
evaluator = BenchmarkEvaluator()
results_dir = os.path.join(os.getcwd(), "results", f"{args.task}_{args.mode}")
os.makedirs(results_dir, exist_ok=True)
# change policy.json
config_path = "config/policy.json"
with open(config_path, 'r') as f:
config = json.load(f)
config["dataset_name"] = args.task
config["dataset_mode"] = args.mode
config['paths']["checkpoint_path"] = f"checkpoint/{args.task}_{args.mode}"
with open(config_path, 'w') as f:
json.dump(config, f, indent=4)
task_map = {
"MMLU-Pro": mmlu_pro.run,
"gsm-hard": gsm_hard.run,
"SRDD": srdd.run,
"CW": creative_writing.run,
}
if args.task in task_map:
task_map[args.task](runner, evaluator, results_dir, args.mode, args.data_limit)
else:
print(f"Unknown task: {args.task}")
if __name__ == "__main__":
main()

View File

@ -0,0 +1,18 @@
from model.api_config import api_config
global_openai_client = api_config.global_openai_client()
from model.model_config import model_registry
from model.query_manager import query_manager
def _create_query_function(model_key: str):
def query_func(messages, system_prompt=None):
return query_manager.query(model_key, messages, system_prompt)
return query_func
_generated_functions = {}
for model_key, config in model_registry.get_all_models().items():
func = _create_query_function(model_key)
func.__name__ = config.function_name
_generated_functions[config.function_name] = func
globals()[config.function_name] = func
__all__ = ['ModelQueryManager', 'query_manager'] + list(_generated_functions.keys())

View File

@ -0,0 +1,28 @@
import yaml
class APIConfig:
def __init__(self):
self._config = self._init_config()
def _init_config(self):
global_config = yaml.safe_load(open("config/global.yaml", "r"))
key_config = {
"openai":{
"openai_api_key": global_config.get("api_keys").get("openai_api_key"),
"openai_base_url": global_config.get("api_keys").get("openai_base_url", None),
},
"retry_times": global_config.get("max_retry_times", 10),
"weight_path": global_config.get("model_weight_path")
}
return key_config
def get(self, provider: str) -> dict:
return self._config.get(provider, {})
def global_openai_client(self):
from openai import OpenAI
api_key = self._config.get("openai").get("openai_api_key", None)
base_url = self._config.get("openai").get("openai_base_url", None)
return OpenAI(api_key=api_key, base_url=base_url)
api_config = APIConfig()

View File

@ -0,0 +1,103 @@
import os
import openai
import yaml
from chromadb import EmbeddingFunction, Embeddings
from model.model_utils import model_log_and_print
from tenacity import retry, stop_after_attempt, wait_exponential
from typing import List
import torch
import numpy as np
from transformers import AutoModelForCausalLM, AutoTokenizer
try:
with open("./config/global.yaml", "r", encoding="utf-8") as f:
GLOBAL_CONFIG = yaml.safe_load(f)
except FileNotFoundError:
raise FileNotFoundError("Global config file './config/global.yaml' not found!")
OPENAI_API_KEY = GLOBAL_CONFIG.get("api_keys", {}).get("openai_api_key")
BASE_URL = GLOBAL_CONFIG.get("api_keys", {}).get("openai_base_url", None)
MAX_RETRY_TIMES = GLOBAL_CONFIG.get("max_retry_times", 10)
MODEL_WEIGHT_PATH = GLOBAL_CONFIG.get("model_weight_path")
if BASE_URL:
client = openai.OpenAI(api_key=OPENAI_API_KEY, base_url=BASE_URL)
else:
client = openai.OpenAI(api_key=OPENAI_API_KEY)
class OpenAIEmbedding(EmbeddingFunction):
@staticmethod
@retry(wait=wait_exponential(min=5, max=10), stop=stop_after_attempt(MAX_RETRY_TIMES))
def get_embedding(text) -> Embeddings:
embedding_model = "text-embedding-ada-002"
model_log_and_print(f"[Embedding] embedding from {embedding_model}")
if isinstance(text, str):
text = [text.replace("\n", " ")]
else:
text = [t.replace("\n", " ") for t in text]
text = [t[:8191] if len(t) > 8191 else t for t in text]
text = ["none"] if all(len(t) == 0 for t in text) else text
model_log_and_print(f"[Embedding] {text}")
response = client.embeddings.create(input=text, model=embedding_model)
embeddings = [data.embedding for data in response.data]
prompt_tokens = response.usage.prompt_tokens
total_tokens = response.usage.total_tokens
model_log_and_print(f"[Embedding] Token Usage\nPrompt Tokens: {prompt_tokens}\nTotal Tokens: {total_tokens}")
return embeddings
@property
def dim(self):
return 1536
class RewardModelTokenRepresentation():
def __init__(self):
self.model_name = "nvidia/Llama-3.1-Nemotron-70B-Reward-HF"
self.model = AutoModelForCausalLM.from_pretrained(MODEL_WEIGHT_PATH, torch_dtype=torch.bfloat16, device_map="auto")
self.tokenizer = AutoTokenizer.from_pretrained(MODEL_WEIGHT_PATH)
print("device: {}".format(self.model.device))
def truncate(self, messages):
length = sum(len(message["content"]) for message in messages)
while length > 12000:
for message in messages:
message["content"] = message["content"][-int(len(message["content"]) * 0.75):]
length = sum(len(message["content"]) for message in messages)
return messages
def __call__(self, messages:List):
with torch.no_grad():
messages = self.truncate(messages)
model_log_and_print("tokenizing")
model_log_and_print(messages)
tokenized_message = self.tokenizer.apply_chat_template(messages, tokenize=True, add_generation_prompt=False, return_tensors="pt", return_dict=True, max_length=4096)
model_log_and_print("tokenized done")
input_ids = tokenized_message['input_ids'].to('cuda')
attention_mask = tokenized_message['attention_mask'].to('cuda')
response_token_ids = self.model.generate(input_ids=input_ids,
attention_mask=attention_mask,
max_new_tokens=1,
return_dict_in_generate=True,
output_scores=True,
output_logits=True,
output_hidden_states=True)
reward = response_token_ids['scores'][0][0][0].item()
hidden_states = response_token_ids.hidden_states
state = hidden_states[0][-1]
last_state = state[:,-1,:]
print(reward)
return last_state, reward
@property
def dim(self):
return 8192

View File

@ -0,0 +1,87 @@
from typing import Dict, Any, Optional, List
from dataclasses import dataclass
@dataclass
class ModelConfig:
name: str
function_name: str
api_model_name: str
provider: str
max_tokens: int
model_size: int # for open-source models, this is the number of parameters in millions; but for API models, this is just an estimate
url: Optional[str] = None
temperature: float = 0.1
description: str = ""
MODEL_REGISTRY: Dict[str, ModelConfig] = {
"gpt-3.5": ModelConfig(
name = "gpt-3.5",
function_name="query_gpt",
api_model_name="gpt-3.5-turbo",
provider="openai",
model_size=175,# which is estimated
max_tokens=4096,
description="OpenAI GPT-3.5 Turbo model"
),
"gpt-4o": ModelConfig(
name = "gpt-4o",
function_name="query_gpt4o",
api_model_name="gpt-4o",
provider="openai",
model_size=200,# which is estimated
max_tokens=128000,
description="OpenAI GPT-4o model"
),
"qwen-2.5-14b": ModelConfig(
name = "qwen-2.5-14b",
function_name="query_qwen2_5_14b",
api_model_name="Qwen/Qwen2.5-14B-Instruct",
provider="local",
model_size=14,
max_tokens=8192,
url="http://",
description="Qwen 2.5 14B Instruct model deployed locally"
),
}
class ModelRegistry:
def __init__(self):
self.registry = MODEL_REGISTRY.copy()
def register_model(self, key: str, config: ModelConfig) -> None:
self.registry[key] = config
def get_model_config(self, key: str) -> Optional[ModelConfig]:
return self.registry.get(key)
def get_model_size(self, key: str) -> Optional[int]:
config = self.get_model_config(key)
return config.model_size if config else None
def get_all_models(self) -> Dict[str, ModelConfig]:
return self.registry.copy()
def get_models_by_provider(self, provider: str) -> Dict[str, ModelConfig]:
return {k: v for k, v in self.registry.items() if v.provider == provider}
def get_function_name(self, key: str) -> Optional[str]:
config = self.get_model_config(key)
return config.function_name if config else None
def get_api_model_name(self, key: str) -> Optional[str]:
config = self.get_model_config(key)
return config.api_model_name if config else None
def list_available_models(self) -> List[str]:
return list(self.registry.keys())
def search_models(self, keyword: str) -> Dict[str, ModelConfig]:
keyword = keyword.lower()
return {
k: v for k, v in self.registry.items()
if keyword in k.lower() or keyword in v.display_name.lower()
}
model_registry = ModelRegistry()

View File

@ -0,0 +1,96 @@
from typing import Dict
import logging
from tenacity import retry
from tenacity.stop import stop_after_attempt
from tenacity.wait import wait_exponential
logger = logging.getLogger("model")
class APIConfig:
SLOW_FLAG = False
TRUNCATE_FACTOR = 0
def model_log_and_print(content):
if content is not None:
logger.info(content)
print(content)
def truncate_messages(messages):
max_length = 0
max_index = 0
for i, msg in enumerate(messages):
if len(msg.get('content', '')) > max_length:
max_length = len(msg['content'])
max_index = i
content = messages[max_index]['content']
factor = 1/(2**APIConfig.TRUNCATE_FACTOR)
messages[max_index]['content'] = content[:int(len(content)*factor)]
return messages
def calc_max_token(messages, max_tokens):
string = "\n".join([str(message["content"]) for message in messages])
num_prompt_tokens = int(len(string)//1.8) # approximation of tokens number
gap_between_send_receive = 15 * len(messages)
num_prompt_tokens += gap_between_send_receive
num_max_completion_tokens = max_tokens - num_prompt_tokens
logger.info(f"num_prompt_tokens: {num_prompt_tokens}, num_max_completion_tokens: {num_max_completion_tokens}")
if num_max_completion_tokens < 0:
logger.warning(f"num_max_completion_tokens is negative: {num_max_completion_tokens}")
return 0
return num_max_completion_tokens
@retry(wait=wait_exponential(min=5, max=10), stop=stop_after_attempt(10))
def chat_completion_request(messages, model, new_client, model_config_dict: Dict = None):
if model_config_dict is None:
model_config_dict = {
"temperature": 0.1,
"top_p": 1.0,
"n": 1,
"stream": False,
"frequency_penalty": 0.0,
"presence_penalty": 0.0,
"logit_bias": {},
}
json_data = {
"model": model,
"messages": messages,
"max_tokens": 4096,
"temperature": model_config_dict["temperature"],
"top_p": model_config_dict["top_p"],
"n": model_config_dict["n"],
"stream": model_config_dict["stream"],
"frequency_penalty": model_config_dict["frequency_penalty"],
"presence_penalty": model_config_dict["presence_penalty"],
"logit_bias": model_config_dict["logit_bias"],
}
try:
model_log_and_print("[Model Query] {}".format(messages))
if APIConfig.SLOW_FLAG:
messages = truncate_messages(messages=messages)
response = new_client.chat.completions.create(**json_data)
completion_tokens = response.usage.completion_tokens
prompt_tokens = response.usage.prompt_tokens
total_tokens = response.usage.total_tokens
if total_tokens == 0:
total_tokens = prompt_tokens + completion_tokens
if total_tokens == 0:
total_tokens = len(response.choices[0].message.content)//1.8
model_log_and_print(f"[Model Query] Token Usage: \nCompletion Tokens: {completion_tokens} \nPrompt Tokens: {prompt_tokens} \nTotal Tokens: {total_tokens}")
APIConfig.SLOW_FLAG = False
APIConfig.TRUNCATE_FACTOR = 0
return response, total_tokens
except Exception as e:
print("Unable to generate ChatCompletion response. " + f"OpenAI calling Exception: {e}")
APIConfig.SLOW_FLAG = True
APIConfig.TRUNCATE_FACTOR += 1
model_log_and_print(f"[Model Query: ChatCompletion] query failed: {str(e)}")
raise Exception()

View File

@ -0,0 +1,83 @@
from typing import List, Dict, Any, Optional, Tuple
import yaml
from model.model_config import model_registry, ModelConfig
from model.api_config import api_config
from model.model_utils import chat_completion_request, model_log_and_print
class ModelQueryManager:
def __init__(self):
self.registry = model_registry
self.config_manager = api_config
self.clients = {}
self._setup_clients()
def _setup_clients(self):
from openai import OpenAI
for key, config in self.registry.get_all_models().items():
# set up client for openai models
if config.provider == "openai" and config.url is None:
api_key = self.config_manager.get("openai").get("openai_api_key", None)
base_url = self.config_manager.get("openai").get("openai_base_url", None)
self.clients[key] = OpenAI(api_key=api_key, base_url=base_url)
# set up client for local models
elif config.url:
self.clients[key] = OpenAI(api_key="none", base_url=config.url)
def query(self, model_key: str, messages: List[Dict[str, str]],
system_prompt: Optional[str] = None) -> Tuple[str, int]:
config = self.registry.get_model_config(model_key)
if not config:
available_models = ", ".join(self.registry.list_available_models())
raise ValueError(f"Unknown model: {model_key}. Available models: {available_models}")
return self._query_with_config(messages, config, system_prompt)
def _query_with_config(self, messages, config: ModelConfig, system_prompt=None):
model_config_dict = {
"temperature": config.temperature,
"top_p": 1.0,
"n": 1,
"stream": False,
"frequency_penalty": 0.0,
"presence_penalty": 0.0,
"logit_bias": {},
"max_tokens": config.max_tokens
}
if not isinstance(messages, list):
system_prompt = "You are an assistant" if system_prompt is None else system_prompt
messages = [
{'role': 'system', 'content': system_prompt},
{'role': 'user', 'content': messages}
]
response, total_tokens = chat_completion_request(
messages=messages,
model=config.api_model_name,
new_client=self.clients.get(config.name),
model_config_dict=model_config_dict
)
if isinstance(response, str):
return response, 1
response_message = response.choices[0].message.content
return response_message, total_tokens
def get_available_models(self) -> List[str]:
return self.registry.list_available_models()
def get_model_info(self, model_key: str) -> Optional[Dict[str, Any]]:
config = self.registry.get_model_config(model_key)
if config:
return {
"function_name": config.function_name,
"api_model_name": config.api_model_name,
"provider": config.provider,
"max_tokens": config.max_tokens,
"description": config.description
}
return None
query_manager = ModelQueryManager()

View File

@ -0,0 +1,14 @@
{"name": "FileAgent", "role_prompt":"You are an expert in file handling. Your task is to read files and extract relevant information.", "model_type":"gpt-3.5","actions":["read_file"],"agent_type":"reasoning","policy":"autonomous"}
{"name": "ArxivAgent", "role_prompt":"You are an expert in academic research. Your task is to search for relevant papers on arXiv.", "model_type":"gpt-3.5","actions":["search_arxiv"],"agent_type":"reasoning","policy":"autonomous"}
{"name": "BingAgent", "role_prompt":"You are an expert in web search. Your task is to search for relevant information using Bing.", "model_type":"gpt-3.5","actions":["search_bing"],"agent_type":"reasoning","policy":"autonomous"}
{"name": "WebsiteAgent", "role_prompt":"You are an expert in accessing and extracting information from websites. Your task is to access specific websites and gather relevant information.", "model_type":"gpt-3.5","actions":["access_website"],"agent_type":"reasoning","policy":"autonomous"}
{"name": "TerminatorAgent", "role_prompt":"You are an expert in terminating processes. Your task is to determine when the reasoning process should be terminated and provide the final answer.", "model_type":"gpt-3.5","actions":["terminate"],"agent_type":"reasoning","policy":"autonomous"}
{"name": "PythonAgent_gpt4o", "role_prompt":"You are an expert in Python programming. Your task is to run Python code and provide the results.", "model_type":"gpt-4o","actions":["run_python"],"agent_type":"reasoning","policy":"autonomous"}
{"name": "PlannerAgent_gpt4o", "role_prompt":"You are an expert in planning. Your task is to create detailed plans for achieving specific goals.", "model_type":"gpt-4o","actions":["planning"],"agent_type":"reasoning","policy":"autonomous"}
{"name": "ReasoningAgent_gpt4o", "role_prompt":"You are an expert in logical reasoning. Your task is to reason through complex problems and provide well-thought-out solutions.", "model_type":"gpt-4o","actions":["reasoning"],"agent_type":"reasoning","policy":"autonomous"}
{"name": "CriticAgent_gpt4o", "role_prompt":"You are an expert in critiquing. Your task is to critique the reasoning and solutions provided by others.", "model_type":"gpt-4o","actions":["critique"],"agent_type":"reasoning","policy":"autonomous"}
{"name": "ReflectAgent_gpt4o", "role_prompt":"You are an expert in reflection. Your task is to reflect on the reasoning process and provide insights for improvement.", "model_type":"gpt-4o","actions":["reflect"],"agent_type":"reasoning","policy":"autonomous"}
{"name": "QuestionAgent_gpt4o", "role_prompt":"You are an expert in questioning. Your task is to propose relevant sub-questions that help in solving the main problem.", "model_type":"gpt-4o","actions":["question"],"agent_type":"reasoning","policy":"autonomous"}
{"name": "SummarizerAgent_gpt4o", "role_prompt":"You are an expert in summarizing. Your task is to summarize the information and provide concise conclusions.", "model_type":"gpt-4o","actions":["summarize"],"agent_type":"reasoning","policy":"autonomous"}
{"name": "ConcluderAgent_gpt4o", "role_prompt":"You are an expert in concluding. Your task is to provide final conclusions based on the reasoning process.", "model_type":"gpt-4o","actions":["conclude"],"agent_type":"reasoning","policy":"autonomous"}
{"name": "Modifier_gpt4o", "role_prompt":"You are an expert in error correction and modification. Your task is to identify errors in previous reasoning, explain why they are incorrect, and provide accurate corrections.", "model_type":"gpt-4o","actions":["modify"],"agent_type":"reasoning","policy":"autonomous"}

View File

@ -0,0 +1,5 @@
{"action": "search_arxiv", "prompt": "You have chosen to search for academic papers on arXiv. Please provide specific terms related to academic research, such as the title of a paper, keywords, or topics in fields like physics, mathematics, computer science, or machine learning. Return in json format. Example: {\"action\": \"search_arxiv\", \"parameter\": \"quantum computing\"}"}
{"action": "search_bing", "prompt": "You have chosen to search for information using Bing. Please provide descriptive phrases or keywords related to your query, including concepts, names, events, or specific questions to get a broad range of results, including news, articles, and websites. Return in json format. Example: {\"action\": \"search_bing\", \"parameter\": \"latest advancements in AI\"}"}
{"action": "access_website", "prompt": "You have chosen to access a website. Please provide the URL you want to access or the URL most relevant to the current question. Return in json format. Example: {\"action\": \"access_website\", \"parameter\": \"https://www.example.com\"}"}
{"action": "run_python", "prompt": "You have chosen to write and run Python code. Please write generic Python code in the parameter to solve this type of problems using only standard python libraries. Make sure you use the 'print' function for all output when relevant. Return in json format. Example: {\"action\": \"run_python\", \"parameter\": \"print('Hello, World!')\"}"}
{"action": "read_file", "prompt": "You have chosen to read a file. Please provide the filename you want to read. Return in json format. Example: {\"action\": \"read_file\", \"parameter\": \"data.txt\"}"}

View File

@ -0,0 +1,8 @@
{"action": "planning","prompt": "Decompose the question and plan the next steps to address the question. You should complete your planning using the following template: REASONING RESULT: [YOUR REASONING RESULT]. *Your previous reasoning was: {}.* Your planning should include: "}
{"action": "reasoning","prompt": "Now, you need to continue the reasoning to get closer to the correct answer. You should finish your reasoning with the following template: REASONING RESULT: [YOUR REASONING RESULT]. Finish your answer with the following template: FINAL ANSWER: [YOUR FINAL ANSWER]. *Your previous reasoning was: {}.* You need to follow the direction of the reasoning path and go forward:"}
{"action": "critique","prompt": "You need to critique the previous reasoning. You should complete your reasoning using this format: REASONING RESULT: [YOUR REASONING RESULT]. Conclude your answer using this format: FINAL ANSWER: [YOUR FINAL ANSWER]. *Your previous reasoning was: {}.* Consider the following when critiquing the reasoning: 1. Plausibility:"}
{"action": "reflect","prompt": "You will be provided with a previous reasoning attempt where you had access to relevant context and were tasked with answering a question. The attempt was unsuccessful either due to an incorrect answer or a phrasing mismatch with the answer key. In a few sentences, diagnose the potential cause of failure or phrasing discrepancy, and outline a new, concise, high-level plan to prevent the same issue from recurring. Use complete sentences. Reflect on the current state of the task and propose the next steps. You should conclude your reasoning using this template: REASONING RESULT: [YOUR REASONING RESULT]. Conclude your reflection using this template: FINAL ANSWER: [YOUR FINAL ANSWER]. *Your previous reasoning was: {}.* Your reflection:"}
{"action": "question","prompt": "Your task is to propose the next sub-question along with its answer. You should ensure that the sub-question logically follows from the previous reasoning and addresses any gaps. You should provide a well-reasoned answer to the sub-question, supported by evidence or logical arguments. You should conclude your reasoning using this template: REASONING RESULT: [YOUR REASONING RESULT]. Conclude your answer using this template: FINAL ANSWER: [YOUR FINAL ANSWER]. *Your previous reasoning was: {}.* Now, ask a sub-question and try to answer it:"}
{"action": "summarize","prompt": "You need to summarize those previous results and provide some intermediate conclusions. You should finish your reasoning with the following template: REASONING RESULT: [YOUR REASONING RESULT]. Finish your answer with the following template: FINAL ANSWER: [YOUR FINAL ANSWER]. *Your previous reasoning was: {}.* You need to summarize the reasoning paths and provide a final conclusion:"}
{"action": "conclude","prompt": "You need to conclude the task and provide a final answer. You should finish your reasoning with the following template: REASONING RESULT: [YOUR REASONING RESULT]. Finish your answer with the following template: FINAL ANSWER: [YOUR FINAL ANSWER]. *Your previous reasoning was: {}.* You need to conclude the task and provide a final answer: "}
{"action": "modify", "prompt": "You need to identify and correct errors in the previous reasoning. You should finish your reasoning with the following template: REASONING RESULT: [Clearly state: 1. Which part of the previous reasoning was incorrect 2. Why it was incorrect 3. What is the correct understanding]. Finish your answer with the following template: FINAL ANSWER: [Provide the complete corrected answer]. *Your previous reasoning was: {}.* Please explicitly point out and correct any errors, misconceptions, or inaccuracies in the previous reasoning."}

View File

@ -0,0 +1,76 @@
{
"answer": [
"When you find an answer, use as few words as possible, like a single number or a single word. And finish your answer with the following template: FINAL ANSWER: [YOUR FINAL ANSWER]. ",
"YOUR FINAL ANSWER should be a number OR as few words as possible OR a comma separated list of numbers and/or strings.",
"If you are asked for a number, don't use comma to write your number neither use units such as $ or percent sign unless specified otherwise. Do not include any units or punctuation.",
"If you are asked for a string, don't use articles, neither abbreviations (e.g. for cities), and write the digits in plain text unless specified otherwise.",
"If you are asked for a comma separated list, apply the above rules depending of whether the element to be put in the list is a number or a string.",
"Finish your answer with the following template: FINAL ANSWER: [YOUR FINAL ANSWER]"
],
"MMLU_answer": [
"Choose the most common one and then format it.",
"Format it using the fewest words possible, it should be a single letter, but do not alter the original answer.",
"Do not include any other units or punctuation. ",
"And finish your answer with the following template: FINAL ANSWER: [YOUR FINAL ANSWER]",
"Return your answer with only the single letter."
],
"gsm_answer": [
"Calculate carefully and provide only the final numerical answer.",
"The answer must be a pure number - either an integer or decimal number.",
"Do not include any units, symbols, or text in your answer.",
"Do not use commas in numbers.",
"Do not use scientific notation (e.g. 1e6).",
"Do not round the number unless specifically requested.",
"Use plain decimal format with decimal point if needed.",
"For large numbers write all digits without abbreviation.",
"Finish your answer with the following template: FINAL ANSWER: [YOUR FINAL ANSWER]"
],
"code_answer": [
"Provide the code in its most complete and executable form.",
"Ensure the code includes necessary libraries, imports, and setup.",
"Ensure the code is modular and follows good software design practices.",
"Avoid hardcoding values unless explicitly required.",
"Include proper error handling and edge case considerations.",
"If applicable, make sure the code is optimized for performance.",
"Do not add unnecessary comments unless required for clarification.",
"Finish your answer with the following template: FINAL ANSWER: [YOUR FINAL ANSWER]"
],
"text_answer": [
"Provide a clear and concise answer.",
"Ensure your answer is well-structured and logically organized.",
"Include all relevant information and avoid unnecessary details.",
"Use proper grammar, spelling, and punctuation.",
"Finish your answer with the following template: FINAL ANSWER: [YOUR FINAL ANSWER]"
],
"answer_aggregation": [
"You have several answer candidates. They are:\n{}",
"Critically evaluate these responses, considering potential biases or inaccuracies.",
"Choose the most reliable answer based on logical reasoning and factual accuracy.",
"If responses conflict, prioritize answers that provide clear reasoning or align with verifiable facts.",
"Format the chosen answer using the fewest words possible, but do not alter its core meaning.",
"YOUR FINAL ANSWER should be a number OR as few words as possible OR a comma separated list of numbers and/or strings.",
"If you are asked for a number, don't use comma to write your number neither use units such as $ or percent sign unless specified otherwise.",
"If you are asked for a string, don't use articles, neither abbreviations (e.g. for cities), and write the digits in plain text unless specified otherwise.",
"If you are asked for a comma separated list, apply the above rules depending of whether the element to be put in the list is a number or a string."
],
"MMLU_aggregation": [
"You have several answer candidates. They are:\n{}",
"Choose the most common answer among the candidates.",
"If responses conflict, prioritize answers that provide clear reasoning or appear later",
"Format the answer as a single letter.",
"Do not include any other units or punctuation.",
"Return your answer with only the single letter."
],
"gsm_aggregation": [
"You have several answer candidates. They are:\n{}",
"Choose the most reliable answer.",
"If responses conflict, prioritize answers that provide clear reasoning or appear later",
"The answer must be a pure number - either an integer or decimal number.",
"Do not include any units, symbols, or text in your answer.",
"Do not use commas in numbers.",
"Do not use scientific notation.",
"Do not round the number unless specifically requested.",
"Use plain decimal format with decimal point if needed.",
"For large numbers write all digits without abbreviation."
]
}

View File

@ -0,0 +1,8 @@
{
"system_prompt": [
"{}, and You work as a helpful AI assistant. ",
"I will ask you a question. Answer this question using your coding and language skills.",
"Now your question is: {}",
"Previously, you collected the some information about this question from some actions: {}"
]
}

View File

@ -0,0 +1,18 @@
import os
from utils.file_utils import write_jsonl
class BaseTask:
def __init__(self, runner, evaluator):
self.runner = runner
self.evaluator = evaluator
def write_result(self, fd, task_id, final_ans, true_ans=None, flag=None):
record = {
"task_id": task_id,
"final_ans": final_ans,
}
if true_ans is not None:
record["true_ans"] = true_ans
if flag is not None:
record["flag"] = flag
write_jsonl(fd, record)

View File

@ -0,0 +1,34 @@
import os
import json
from tqdm import tqdm
def load_dataset(data_limit=None):
path = "./data/CW/creative_writing.jsonl"
with open(path, "r", encoding="utf-8") as f:
data = [json.loads(line) for line in f]
return data[:data_limit] if data_limit else data
def format_question(q, idx):
question = "Concepts: " + ", ".join(q["concepts"]) + \
"\nGenerate a sentence including all key concepts, grammatically correct and coherent."
return {
"type": "CW",
"Question": question,
"id": idx,
"concepts": q["concepts"]
}
def run(runner, evaluator, results_dir, mode, data_limit=None):
dataset = load_dataset(data_limit)
result_path = os.path.join(results_dir, "cw.jsonl")
with open(result_path, "w", encoding="utf-8") as fd:
for idx, q in enumerate(tqdm(dataset)):
task = format_question(q, idx)
final_ans = runner.run_reasoning(task)
record = {
"id": task["id"],
"pred": final_ans
}
fd.write(json.dumps(record, ensure_ascii=False) + "\n")

View File

@ -0,0 +1,349 @@
import subprocess
import time
import torch
import numpy as np
import re
import os
import signal
import math
from model import query_gpt
from model.embedding import OpenAIEmbedding
from utils.file_utils import read_code, read_text
FLOAT_TOLERANCE = 1e-3
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
class BenchmarkEvaluator:
@staticmethod
def commongen_coverage(concepts, text_path):
generated_text = read_text(text_path)
generated_text = generated_text.lower()
concepts = [concept.lower() for concept in concepts]
missing_concepts = [concept for concept in concepts if not re.search(rf'\b{re.escape(concept)}\b', generated_text, re.IGNORECASE)]
if missing_concepts:
return 1 - len(missing_concepts)/len(concepts)
return 1
@staticmethod
def commongen_gpt_score(concepts, text_path):
generated_text = read_text(text_path)
prompt = '''
As a strict StoryMaster, your task is to meticulously evaluate the quality of stories across three primary dimensions: Grammar and Fluency, Context Relevance, and Logic Consistency. Each dimension will be rated on a refined scale from 1 (average) to 4 (perfect), ensuring that only stories of superior quality achieve the highest scores.
Implement Your Evaluation Mechanism with Enhanced Rigor:
Grammar and Fluency (Assess the story's linguistic precision and narrative flow):
Score 1 (solid): The story is free of grammatical errors, but the narrative lacks the stylistic variety and eloquence that elevate writing to a higher tier.
Score 2 (proficient): The narrative demonstrates a strong command of grammar and a coherent flow, yet it does not showcase the level of linguistic artistry found in superior works.
Score 3 (excellent): The story exhibits a refined sense of grammar and a compelling narrative flow, with sentence structures that are engaging and demonstrate a high level of craft.
Score 4 (masterful): The story is a testament to linguistic excellence, with sentence structures that are not only clear and elegant but also exhibit a creative and sophisticated use of language that captivates and inspires.
Context Relevance (Examine the coherence, interconnectedness, and depth of content within the story):
Score 1 (solid): The story establishes a basic framework of context relevance, but it does not delve into the intricacies of character and thematic development that enrich the narrative.
Score 2 (proficient): The narrative demonstrates a clear connection between elements, yet it lacks the depth and multi-layered content that would distinguish it as truly exceptional.
Score 3 (excellent): The story interweaves elements with a high degree of relevance, creating a narrative that is coherent and features content that is well-developed and insightful.
Score 4 (masterful): The story achieves an extraordinary level of context relevance, with every element artfully woven into a narrative that is not only coherent but also profound in its exploration of themes and characters, offering a rich and immersive experience.
Logic Consistency (Scrutinize the narrative for logical integrity and internal consistency):
Score 1 (solid): The story maintains a logical structure, but there may be occasional lapses in plausibility or minor inconsistencies that slightly undermine its credibility.
Score 2 (proficient): The narrative is generally logical, with a clear progression of events and character actions, yet it does not reach the level of seamless consistency expected of a superior story.
Score 3 (excellent): The story exhibits a strong logical consistency, with events and character actions that are well-aligned and plausible, contributing to a coherent and believable plot.
Score 4 (masterful): The story is characterized by impeccable logical consistency, with every event and character action meticulously aligned to create a plot that is not only coherent but also demonstrates a deep understanding of causality and human behavior.'''
prompt += '\nStory:\n' + generated_text
response_text, _ = query_gpt(prompt)
pattern = r'\d+'
remedy_prompt = 'Extract the score in each dimension in format: (Grammar and Fluency Score: X. Context Relevance Score: X. Logic Consistency Score: X. Overall Score Score: X.) of the following content.'
remedy_prompt += response_text
remedy_respond,_ = query_gpt(remedy_prompt)
score_list = re.findall(pattern, remedy_respond)
my_float_list = [float(item) for item in score_list]
score_list = [item/4 for item in my_float_list]
score_list = score_list[:3]
while len(score_list) != 3:
score_list.append(0)
return score_list
@staticmethod
def check_commongen(concepts, text_path):
# Metric implementation inspired by self-refine project:
# https://github.com/madaan/self-refine/tree/main/src/commongen
coverage = BenchmarkEvaluator.commongen_coverage(concepts, text_path)
coverage = torch.tensor(coverage, dtype=torch.float32, device=DEVICE)
scores = BenchmarkEvaluator.commongen_gpt_score(concepts, text_path)
grammar = torch.tensor(scores[0], dtype=torch.float32, device=DEVICE)
relevance = torch.tensor(scores[1], dtype=torch.float32, device=DEVICE)
consistency = torch.tensor(scores[2], dtype=torch.float32, device=DEVICE)
metrics = {"grammar": grammar, "relevance": relevance, "consistency": consistency, "coverage": coverage}
mean_score = torch.tensor(sum(scores) / 3, dtype=torch.float32, device=DEVICE)
if coverage == 0:
return -1.0, metrics
else:
return coverage*mean_score, metrics
@staticmethod
def check_srdd(code_path, text):
# Metric implementation inspired by ChatDev project:
# https://github.com/OpenBMB/ChatDev
path = code_path
code = read_code(path)
consistency = BenchmarkEvaluator.srdd_consistency(text, code)
completeness = BenchmarkEvaluator.srdd_completeness(code)
executability, _ = BenchmarkEvaluator.srdd_executability(path)
executability = 1 if executability else 0
executability = torch.tensor(executability, dtype=torch.float32, device=DEVICE)
consistency = torch.tensor(consistency, dtype=torch.float32, device=DEVICE)
completeness = torch.tensor(completeness, dtype=torch.float32, device=DEVICE)
metrics = {"consistency": consistency, "completeness": completeness, "executability": executability}
if executability:
alignment = consistency * completeness
return alignment, metrics
else:
return -1.0, metrics
@staticmethod
def srdd_consistency(text, code):
code = BenchmarkEvaluator.remove_comments(code)
text = re.sub(r'^[^\n]*\n', '', text)
text_embedding = OpenAIEmbedding.get_embedding(text)
code_embedding = OpenAIEmbedding.get_embedding(code)
similarity = BenchmarkEvaluator.get_cosine_similarity(text_embedding, code_embedding)
return similarity
@staticmethod
def srdd_completeness(code):
lines = code.split("\n")
lines = [line for line in lines if
"password" not in line.lower() and "passenger" not in line.lower() and "passed" not in line.lower() and "passes" not in line.lower()]
lines = [line for line in lines if "pass" in line.lower() or "todo" in line.lower()]
if len(lines) > 0:
return 0.0
return 1.0
@staticmethod
def srdd_executability(work_path):
def robust_kill(process):
"""Robustly kill the process based on the OS."""
if process.poll() is None: # Check if the process is still running
if os.name == 'nt': # For Windows
os.kill(process.pid, signal.SIGTERM)
time.sleep(1)
if process.poll() is None:
os.kill(process.pid, signal.CTRL_BREAK_EVENT)
else: # For Linux/macOS
os.killpg(os.getpgid(process.pid), signal.SIGTERM)
time.sleep(1)
if process.poll() is None:
os.killpg(os.getpgid(process.pid), signal.SIGKILL)
try:
if not os.path.exists(work_path):
return False, "The file path does not exist."
if os.name == 'nt':
command = f" python {work_path}"
process = subprocess.Popen(command, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE,
creationflags=subprocess.CREATE_NEW_PROCESS_GROUP)
else:
command = f"python3 {work_path}"
process = subprocess.Popen(command, shell=True, preexec_fn=os.setsid, stdout=subprocess.PIPE,
stderr=subprocess.PIPE)
try:
out, err = process.communicate(timeout=10)
except subprocess.TimeoutExpired:
robust_kill(process)
return True, "The process completes without encountering any errors."
return_code = process.returncode
output = out.decode('utf-8', errors='ignore')
error_output = err.decode('utf-8', errors='ignore')
# If the process is still running after the timeout
if process.poll() is None:
robust_kill(process)
return_code = process.returncode
# Handle return code and output
if return_code == 0:
# Clean up file paths in the output for readability
work_path = os.getcwd()
output = output.replace(work_path, "")
return True, output
else:
# Handle errors in the output
if error_output:
work_path = os.getcwd()
if "Traceback".lower() in error_output.lower():
errs = error_output.replace(work_path + "/", "").replace(work_path, "")
return False, errs
return False, error_output
except subprocess.CalledProcessError as e:
return False, f"CalledProcessError: {str(e)}"
except Exception as ex:
return False, f"An unexpected error occurred: {str(ex)}"
@staticmethod
def get_cosine_similarity(embeddingi, embeddingj):
embeddingi = np.array(embeddingi)
embeddingj = np.array(embeddingj).T
cos_sim = embeddingi.dot(embeddingj) / (np.linalg.norm(embeddingi) * np.linalg.norm(embeddingj))
return cos_sim
@staticmethod
def remove_comments(string):
def remove_comments_by_regex(string, regex):
lines = string.split("\n")
lines = [line for line in lines if not line.strip().startswith("#")]
string = "\n".join(lines)
comments = []
matches = re.finditer(regex, string, re.DOTALL)
for match in matches:
group1 = match.group(1)
comments.append(group1)
for comment in comments + ["''''''\n"]:
string = string.replace(comment, "")
return string
string = remove_comments_by_regex(string, r"'''(.*?)'''")
string = remove_comments_by_regex(string, r"\"\"\"(.*?)\"\"\"")
return string
@staticmethod
def check_mmlu(final_ans, true_ans):
if final_ans is None or true_ans is None:
return False
if len(final_ans) == 0:
return False
pattern = r'answer is\s+([A-Z])'
match = re.search(pattern, final_ans)
if match:
final = match.group(1).strip()
if final==true_ans:
return True
pattern = r'is\s+([A-Z])'
match = re.search(pattern, final_ans)
if match:
final = match.group(1).strip()
if final==true_ans:
return True
final_ans = final_ans.strip().lower()
true_ans = true_ans.strip().lower()
if final_ans == true_ans:
return True
final_ans_lines = final_ans.split('\n')
ans = []
residual = []
for li, al in enumerate(final_ans_lines):
ans.append(al)
if 'answer is' in al:
break
residual = list(final_ans_lines[li + 1:])
ans = '\n'.join(ans)
residual = '\n'.join(residual)
pattern = 'the answer is ('
pred = ans.split(pattern)
if len(pred) > 1:
pred = BenchmarkEvaluator.extract_letter(pred[1])
gold = BenchmarkEvaluator.extract_letter(true_ans)
return pred[0] == gold
else:
pred = BenchmarkEvaluator.extract_letter(ans)
gold = BenchmarkEvaluator.extract_letter(true_ans)
return pred == gold
@staticmethod
def check_gsm8k(final_ans, true_ans):
if final_ans is None or true_ans is None:
return False
if isinstance(final_ans, str):
final_num = BenchmarkEvaluator.extract_number(final_ans)
if final_num is None:
return False
else:
final_num = float(final_ans)
true_num = float(true_ans)
if not (math.isfinite(final_num) and math.isfinite(true_num)):
return False
# Accuracy computation adapted from: https://github.com/reasoning-machines/pal/blob/main/scripts/gsm_eval.py
is_correct = abs(float(final_num) - float(true_num)) < FLOAT_TOLERANCE
if not is_correct:
is_correct = (round(float(final_num)) == round(float(true_num)))
if is_correct:
return is_correct
if abs(int(float(final_num))) > 100 and abs(int(float(true_num))) > 100:
is_correct = (int(float(final_num)) == int(float(true_num)))
return is_correct
@staticmethod
def extract_math_answer(text):
if text is None:
return text
if isinstance(text, str):
final_num = BenchmarkEvaluator.extract_number(text)
else:
final_num = float(text)
return final_num
@staticmethod
def extract_choice_answer(text):
if text is None:
return text
# First pattern: 'answer is X'
pattern = r'answer is\s+([A-Z])'
match = re.search(pattern, text)
if match:
return match.group(1).strip()
# Second pattern: 'is X'
pattern = r'is\s+([A-Z])'
match = re.search(pattern, text)
if match:
return match.group(1).strip()
# Third pattern: '(X)' or 'The answer is (X)'
pattern = r'\(([A-Z])\)'
match = re.search(pattern, text)
if match:
return match.group(1).strip()
# Fourth pattern: 'X'
if len(text) == 1:
pattern = r'[A-Z]'
match = re.search(pattern, text)
if match:
return match.group(0).strip()
return text.strip()
@staticmethod
def normalize_string(s):
return ''.join(s.split()).lower()
@staticmethod
def extract_number(text):
matches = re.findall(r'-?\d+\.\d+|-?\d+', text)
return float(matches[0]) if matches else None
@staticmethod
def extract_ground_truth(text):
return text.split('####')[-1].strip()
@staticmethod
def extract_letter(text):
pattern = r'\((\w)\)'
match = re.search(pattern, text)
if match:
return match.group(1).strip()
return text.strip()

View File

@ -0,0 +1,37 @@
import os
import json
import pandas as pd
from tqdm import tqdm
from tasks.base.base_task import BaseTask
def load_dataset(mode, data_limit=None):
path = os.path.join("data", "GSM-Hard", "test.parquet")
data = pd.read_parquet(path)
data = data.sample(frac=1).reset_index(drop=True)
return data[:data_limit] if data_limit else data
def format_question(row, idx):
return {
"type": "GSM-Hard",
"Question": "You need to write python program to solve math problems:\n" + row["input"],
"Answer": row["target"],
"id": idx
}
def run(runner, evaluator, results_dir, mode, data_limit=None):
dataset = load_dataset(mode, data_limit)
result_path = os.path.join(results_dir, "gsm-hard.jsonl")
acc = 0
with open(result_path, "w", encoding="utf-8") as fd:
for idx, row in enumerate(tqdm(dataset.iterrows(), total=len(dataset))):
task = format_question(row[1], idx)
final_ans = runner.run_reasoning(task)
flag = evaluator.check_gsm8k(final_ans, task["Answer"])
if flag: acc += 1
record = {
"id": task["id"],
"pred": final_ans,
"correct": flag
}
fd.write(json.dumps(record, ensure_ascii=False) + "\n")

View File

@ -0,0 +1,41 @@
import os
import string
import json
import pandas as pd
from tqdm import tqdm
from tasks.base.base_task import BaseTask
def load_dataset(mode, data_limit=None):
path = os.path.join("data", "MMLU-Pro", f"{mode}.parquet")
data = pd.read_parquet(path)
return data[:data_limit] if data_limit else data
def format_question(task):
options = [f"{letter}: {op}" for letter, op in zip(string.ascii_uppercase, task["options"])]
prompt = f"The following are multiple choice questions (with answers) about {task['category']}."
question = prompt + "\n" + task["question"] + "\n" + " ".join(options)
return {
"type": "MMLU-Pro",
"Question": question,
"Answer": task["answer"],
"id": task["question_id"]
}
def run(runner, evaluator, results_dir, mode, data_limit=None):
dataset = load_dataset(mode, data_limit)
result_path = os.path.join(results_dir, f"MMLU-Pro_{mode}.jsonl")
acc = 0
with open(result_path, "w", encoding="utf-8") as fd:
for _, row in tqdm(dataset.iterrows(), total=len(dataset)):
task = format_question(row)
final_ans = runner.run_reasoning(task)
flag = evaluator.check_mmlu(final_ans, task["Answer"])
if flag:
acc += 1
record = {
"id": task["id"],
"pred": final_ans,
"correct": flag
}
fd.write(json.dumps(record, ensure_ascii=False) + "\n")

28
puppeteer/tasks/runner.py Normal file
View File

@ -0,0 +1,28 @@
from agent.register.register import agent_global_registry
from inference.reasoning.reasoning import GraphReasoning
from inference.graph.agent_graph import AgentGraph
class BenchmarkRunner:
def __init__(self, personas_path, global_config):
self.personas_path = personas_path
self.global_config = global_config
self.max_step_num = self.global_config.get('graph').get('max_step_num')
self.save_state = False
def setup_reasoning(self, data_item):
agent_global_registry.register_all_agents(self.personas_path)
agent_global_registry.reset_all_agents()
graph = AgentGraph()
return GraphReasoning(data_item, graph), graph
def run_reasoning(self, data_item):
reasoning, _ = self.setup_reasoning(data_item)
reasoning.start(self.save_state if self.save_state else None)
self.save_state = False
final_ans, _ = reasoning.n_step(self.max_step_num)
reasoning.visualize_path()
reasoning.visualize_graph()
return final_ans

31
puppeteer/tasks/srdd.py Normal file
View File

@ -0,0 +1,31 @@
import os
import pandas as pd
from tqdm import tqdm
import json
def load_dataset(data_limit=None):
data = pd.read_csv("./data/SRDD/SRDD.csv")
data = data.sample(frac=1).reset_index(drop=True)
return data[:data_limit] if data_limit else data
def format_question(row, idx):
return {
"type": "SRDD",
"Question": "Develop a pythonic software following description:\n" + row["Description"],
"id": idx
}
def run(runner, evaluator, results_dir, mode, data_limit=None):
dataset = load_dataset(data_limit)
result_path = os.path.join(results_dir, "srdd.jsonl")
with open(result_path, "w", encoding="utf-8") as fd:
for idx, row in tqdm(dataset.iterrows(), total=len(dataset)):
task = format_question(row, idx)
final_ans = runner.run_reasoning(task)
record = {
"id": task["id"],
"pred": final_ans
}
fd.write(json.dumps(record, ensure_ascii=False) + "\n")

View File

View File

@ -0,0 +1,41 @@
from abc import ABC, abstractmethod
from functools import wraps
import signal
import logging
class Tool(ABC):
def __init__(self, name, description, execute_function, timeout_duration=1, **kwargs):
super().__init__()
self.name = name
self.description = description
self.execute_function = execute_function
self.timeout_duration = timeout_duration
signal.alarm(0)
def timeout_handler(self, signum, frame):
raise TimeoutError(f"Tool execution timed out after {self.timeout_duration} seconds")
def with_timeout(self, func):
@wraps(func)
def wrapper(*args, **kwargs):
original_handler = signal.signal(signal.SIGALRM, self.timeout_handler)
signal.alarm(self.timeout_duration)
try:
result = func(*args, **kwargs)
return result
finally:
signal.alarm(0)
signal.signal(signal.SIGALRM, original_handler)
return wrapper
@abstractmethod
def execute(self, *args, **kwargs):
# Wrap the execute_function with timeout handling
safe_execute = self.with_timeout(self.execute_function)
try:
return safe_execute(*args, **kwargs)
except TimeoutError as e:
logging.error(f"Timeout in {self.name}: {str(e)}")
return False, str(e)
except Exception as e:
return False, f"Tool execution failed: {str(e)}"

View File

@ -0,0 +1,33 @@
from typing import Any
from tools.base.base_tool import Tool
class ToolRegistry:
def __init__(self):
self.tools = {}
def register_tool(self, name, tool):
if name in self.tools:
raise ValueError(f"Tool with name {name} is already registered.")
self.tools[name] = tool
def __call__(self, *args: Any, **kwds: Any):
def decorator(cls):
tool_instance = cls(*args, **kwds)
name = tool_instance.name
self.register_tool(name, tool_instance)
return cls
return decorator
def get_all_tools(self):
print(self.tools.keys())
def get_tool(self, name):
if name not in self.tools:
return None
else:
return self.tools[name]
def execute_tool(self, name, *args, **kwargs):
tool = self.get_tool(name)
return tool.execute(*args, **kwargs)
global_tool_registry = ToolRegistry()

View File

@ -0,0 +1,135 @@
from tools.base.register import global_tool_registry
from tools.base.base_tool import Tool
from tenacity import retry, stop_after_attempt, wait_fixed
import base64
import os
import shutil
from abc import ABC, abstractmethod
import subprocess
from subprocess import check_output
import time
import signal
FILE_REGEX = r"(^//.|^/|^ [a-zA-Z])?:?/.+ (/$)"
class CodeInterpreter(Tool):
def __init__(self):
super().__init__("run_code", "run code", self.execute)
@retry(stop=stop_after_attempt(3), wait=wait_fixed(2))
def execute(self, *args, **kwargs):
try:
work_path = kwargs.get("work_path", "")
code = kwargs.get("code", "")
file_path = kwargs.get("file_path", "")
self.timeout_detected = kwargs.get("timeout_detected", True)
code_path = self.write(work_path, code)
flag, ans = self.run(work_path, code_path, file_path)
except AttributeError:
# raise ValueError(f"Running Error")
return False, "Running Error"
return flag, ans
@abstractmethod
def write(self, work_path, code):
pass
@abstractmethod
def run(self, work_path, code_path, file_path):
pass
@global_tool_registry("run_python")
class PythonInterpreter(CodeInterpreter):
def __init__(self, name):
super().__init__()
self.name = name
def write(self, work_path, code):
code_path = os.path.join(work_path, "agent-main.py")
with open(code_path, 'w') as file:
file.write(code)
return code_path
def move_file(self, src_path, dest_path):
if not os.path.exists(src_path):
return
if dest_path == "":
dest_path = os.getcwd()
dest_dir = os.path.dirname(dest_path)
if not os.path.exists(dest_dir):
os.makedirs(dest_dir)
try:
shutil.copy2(src_path, dest_path)
except Exception as e:
return False
def robust_kill(self, process):
"""Robustly kill the process based on the OS."""
if process.poll() is None: # Check if the process is still running
if os.name == 'nt': # For Windows
os.kill(process.pid, signal.SIGTERM)
time.sleep(1) # Allow some time for graceful termination
if process.poll() is None: # Force kill if still running
os.kill(process.pid, signal.CTRL_BREAK_EVENT)
else: # For Linux/macOS
os.killpg(os.getpgid(process.pid), signal.SIGTERM) # Terminate the process group
time.sleep(1) # Allow some time for graceful termination
if process.poll() is None: # Force kill the group if still running
os.killpg(os.getpgid(process.pid), signal.SIGKILL)
def run(self, work_path, code_path, file_path):
"""Executes a process and handles file movement, command execution, and timeouts."""
try:
if len(file_path) > 0:
self.move_file(src_path=file_path, dest_path=work_path)
# Determine the command to run based on the operating system
if os.name == 'nt': # Windows
command = f"cd {work_path} && python agent-main.py"
process = subprocess.Popen(command, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE,
creationflags=subprocess.CREATE_NEW_PROCESS_GROUP)
else: # Linux/macOS
command = f"cd {work_path} && python3 agent-main.py"
process = subprocess.Popen(command, shell=True, preexec_fn=os.setsid, stdout=subprocess.PIPE,
stderr=subprocess.PIPE)
try:
# Wait for process completion with a timeout of 10 seconds
out, err = process.communicate(timeout=10)
except subprocess.TimeoutExpired:
self.robust_kill(process)
if self.timeout_detected:
return False, "The process timed out after 10 seconds."
else:
return True, "The process completes without encountering any errors."
return_code = process.returncode
output = out.decode('utf-8', errors='ignore')
error_output = err.decode('utf-8', errors='ignore')
# If the process is still running after the timeout
if process.poll() is None:
self.robust_kill(process) # Ensure the process is terminated
return_code = process.returncode
# Handle return code and output
if return_code == 0:
# Clean up file paths in the output for readability
work_path = os.getcwd()
output = output.replace(work_path, "")
return True, output
else:
# Handle errors in the output
if error_output:
work_path = os.getcwd()
if "Traceback".lower() in error_output.lower():
errs = error_output.replace(work_path + "/", "").replace(work_path, "")
return False, errs
return False, error_output
except subprocess.CalledProcessError as e:
return False, f"CalledProcessError: {str(e)}"
except Exception as ex:
return False, f"An unexpected error occurred: {str(ex)}"

View File

@ -0,0 +1,44 @@
from tools.base.register import global_tool_registry
from tools.base.base_tool import Tool
from tenacity import retry, stop_after_attempt, wait_fixed
import os
from tools.utils.converter import MarkdownConverter
from model import global_openai_client as client
@global_tool_registry("read_file")
class FileRead(Tool):
"""
support file type:
.csv .xlsx
.docx
.txt
.pdf
(LLM generate description).png .jpg .jpeg
(LLM generate transcript).mp3
.json .jsonld
.pptx
.wav
.html .htm
"""
def __init__(self, name):
super().__init__(name=name,
description="read file from local path",
execute_function=self.execute)
self.converter = MarkdownConverter(mlm_client=client)
@retry(stop=stop_after_attempt(3), wait=wait_fixed(2))
def execute(self, *args,**kwargs):
try:
file_path = kwargs.get("file_path","")
file_extension = kwargs.get("file_extension", "")
if not os.path.exists(file_path):
return False, "File Not Exists"
try:
ans = self.converter.convert_local(path=file_path,
file_extension=file_extension)
return True, ans.text_content
except Exception as e:
return False, f"Error processing file: {str(e)}"
except Exception as e:
return False, f"Error processing file: {str(e)}"

View File

@ -0,0 +1,505 @@
# This file incorporates code from the AutoGen.
# The original code can be found at:
# https://github.com/microsoft/autogen/blob/gaia_multiagent_v01_march_1st/autogen/browser_utils.py
import json
import os
import requests
import re
import io
import uuid
import mimetypes
import time
import pathlib
import pathvalidate
from urllib.parse import urljoin, urlparse, unquote, parse_qs
from urllib.request import url2pathname
from typing import Any, Dict, List, Optional, Union, Tuple
from .converter import MarkdownConverter, UnsupportedFormatException, FileConversionException
import diskcache as dc
import yaml
global_config = yaml.safe_load(open("./config/global.yaml", "r"))
class SimpleTextBrowser:
"""(In preview) An extremely simple text-based web browser comparable to Lynx. Suitable for Agentic use."""
def __init__(
self,
start_page: Optional[str] = None,
viewport_size: Optional[int] = 1024 * 8,
downloads_folder: Optional[Union[str, None]] = None,
bing_api_key: Optional[Union[str, None]] = None,
request_kwargs: Optional[Union[Dict[str, Any], None]] = None,
):
self.start_page: str = start_page if start_page else "about:blank"
self.viewport_size = viewport_size # Applies only to the standard uri types
# self.downloads_folder = log_path
self.history: List[Tuple[str, float]] = list()
self.page_title: Optional[str] = None
self.viewport_current_page = 0
self.viewport_pages: List[Tuple[int, int]] = list()
self.set_address(self.start_page)
self.bing_api_key = bing_api_key
if bing_api_key is None:
self.bing_api_key = global_config.get("api_keys").get("bing_api_key")
self.request_kwargs = request_kwargs
self._mdconvert = MarkdownConverter()
self._page_content: str = ""
self._find_on_page_query: Union[str, None] = None
self._find_on_page_last_result: Union[int, None] = None # Location of the last result
self.bing_cache = None
@property
def address(self) -> str:
"""Return the address of the current page."""
return self.history[-1][0]
def set_address(self, uri_or_path: str) -> None:
# TODO: Handle anchors
self.history.append((uri_or_path, time.time()))
# Handle special URIs
if uri_or_path == "about:blank":
self._set_page_content("")
elif uri_or_path.startswith("bing:"):
self._bing_search(uri_or_path[len("bing:") :].strip())
else:
if (
not uri_or_path.startswith("http:")
and not uri_or_path.startswith("https:")
and not uri_or_path.startswith("file:")
):
if len(self.history) > 1:
prior_address = self.history[-2][0]
uri_or_path = urljoin(prior_address, uri_or_path)
# Update the address with the fully-qualified path
self.history[-1] = (uri_or_path, self.history[-1][1])
self._fetch_page(uri_or_path)
self.viewport_current_page = 0
self.find_on_page_query = None
self.find_on_page_viewport = None
@property
def viewport(self) -> str:
"""Return the content of the current viewport."""
bounds = self.viewport_pages[self.viewport_current_page]
return self.page_content[bounds[0] : bounds[1]]
@property
def page_content(self) -> str:
"""Return the full contents of the current page."""
return self._page_content
def _set_page_content(self, content: str) -> None:
"""Sets the text content of the current page."""
self._page_content = content
self._split_pages()
if self.viewport_current_page >= len(self.viewport_pages):
self.viewport_current_page = len(self.viewport_pages) - 1
def page_down(self) -> None:
self.viewport_current_page = min(self.viewport_current_page + 1, len(self.viewport_pages) - 1)
def page_up(self) -> None:
self.viewport_current_page = max(self.viewport_current_page - 1, 0)
def find_on_page(self, query: str) -> Union[str, None]:
"""Searches for the query from the current viewport forward, looping back to the start if necessary."""
# Did we get here via a previous find_on_page search with the same query?
# If so, map to find_next
if query == self._find_on_page_query and self.viewport_current_page == self._find_on_page_last_result:
return self.find_next()
# Ok it's a new search start from the current viewport
self._find_on_page_query = query
viewport_match = self._find_next_viewport(query, self.viewport_current_page)
if viewport_match is None:
self._find_on_page_last_result = None
return None
else:
self.viewport_current_page = viewport_match
self._find_on_page_last_result = viewport_match
return self.viewport
def find_next(self) -> None:
"""Scroll to the next viewport that matches the query"""
if self._find_on_page_query is None:
return None
starting_viewport = self._find_on_page_last_result
if starting_viewport is None:
starting_viewport = 0
else:
starting_viewport += 1
if starting_viewport >= len(self.viewport_pages):
starting_viewport = 0
viewport_match = self._find_next_viewport(self._find_on_page_query, starting_viewport)
if viewport_match is None:
self._find_on_page_last_result = None
return None
else:
self.viewport_current_page = viewport_match
self._find_on_page_last_result = viewport_match
return self.viewport
def _find_next_viewport(self, query: str, starting_viewport: int) -> Union[int, None]:
"""Search for matches between the starting viewport looping when reaching the end."""
if query is None:
return None
# Normalize the query, and convert to a regular expression
nquery = re.sub(r"\*", "__STAR__", query)
nquery = " " + (" ".join(re.split(r"\W+", nquery))).strip() + " "
nquery = nquery.replace(" __STAR__ ", "__STAR__ ") # Merge isolated stars with prior word
nquery = nquery.replace("__STAR__", ".*").lower()
if nquery.strip() == "":
return None
idxs = list()
idxs.extend(range(starting_viewport, len(self.viewport_pages)))
idxs.extend(range(0, starting_viewport))
for i in idxs:
bounds = self.viewport_pages[i]
content = self.page_content[bounds[0] : bounds[1]]
# TODO: Remove markdown links and images
ncontent = " " + (" ".join(re.split(r"\W+", content))).strip().lower() + " "
if re.search(nquery, ncontent):
return i
return None
def visit_page(self, path_or_uri: str) -> str:
"""Update the address, visit the page, and return the content of the viewport."""
self.set_address(path_or_uri)
return self.viewport
def _split_pages(self) -> None:
# Do not split search results
if self.address.startswith("bing:"):
self.viewport_pages = [(0, len(self._page_content))]
return
# Handle empty pages
if len(self._page_content) == 0:
self.viewport_pages = [(0, 0)]
return
# Break the viewport into pages
self.viewport_pages = []
start_idx = 0
while start_idx < len(self._page_content):
end_idx = min(start_idx + self.viewport_size, len(self._page_content)) # type: ignore[operator]
# Adjust to end on a space
while end_idx < len(self._page_content) and self._page_content[end_idx - 1] not in [" ", "\t", "\r", "\n"]:
end_idx += 1
self.viewport_pages.append((start_idx, end_idx))
start_idx = end_idx
def _bing_api_call(self, query: str) -> Dict[str, Dict[str, List[Dict[str, Union[str, Dict[str, str]]]]]]:
# Check the cache
if self.bing_cache is not None:
cached = self.bing_cache.get(query)
if cached is not None:
return cached
# Make sure the key was set
if self.bing_api_key is None:
raise ValueError("Missing Bing API key.")
# Prepare the request parameters
request_kwargs = self.request_kwargs.copy() if self.request_kwargs is not None else {}
if "headers" not in request_kwargs:
request_kwargs["headers"] = {}
request_kwargs["headers"]["Ocp-Apim-Subscription-Key"] = self.bing_api_key
if "params" not in request_kwargs:
request_kwargs["params"] = {}
request_kwargs["params"]["q"] = query
request_kwargs["params"]["textDecorations"] = False
request_kwargs["params"]["textFormat"] = "raw"
request_kwargs["stream"] = False
request_kwargs["timeout"] = (5,10)
# Make the request
response = None
for _ in range(2):
try:
response = requests.get("https://api.bing.microsoft.com/v7.0/search", **request_kwargs)
response.raise_for_status()
break
except Exception:
pass
time.sleep(1)
if response is None:
raise requests.exceptions.RequestException("Failed to fetch Bing search results.")
results = response.json()
# Cache the results
if self.bing_cache is not None:
self.bing_cache.set(query, results)
return results # type: ignore[no-any-return]
def _bing_search(self, query: str) -> None:
results = self._bing_api_call(query)
def _prev_visit(url):
for i in range(len(self.history) - 1, -1, -1):
if self.history[i][0] == url:
# Todo make this more human-friendly
return f"You previously visited this page {round(time.time() - self.history[i][1])} seconds ago.\n"
return ""
web_snippets: List[str] = list()
idx = 0
if "webPages" in results:
for page in results["webPages"]["value"]:
idx += 1
web_snippets.append(
f"{idx}. [{page['name']}]({page['url']})\n{_prev_visit(page['url'])}{page['snippet']}"
)
if "deepLinks" in page:
for dl in page["deepLinks"]:
idx += 1
web_snippets.append(
f"{idx}. [{dl['name']}]({dl['url']})\n{_prev_visit(dl['url'])}{dl['snippet'] if 'snippet' in dl else ''}"
)
news_snippets = list()
if "news" in results:
for page in results["news"]["value"]:
idx += 1
datePublished = ""
if "datePublished" in page:
datePublished = "\nDate published: " + page["datePublished"].split("T")[0]
news_snippets.append(
f"{idx}. [{page['name']}]({page['url']})\n{_prev_visit(page['url'])}{page['description']}{datePublished}"
)
video_snippets = list()
if "videos" in results:
for page in results["videos"]["value"]:
if not page["contentUrl"].startswith("https://www.youtube.com/watch?v="):
continue
idx += 1
datePublished = ""
if "datePublished" in page:
datePublished = "\nDate published: " + page["datePublished"].split("T")[0]
video_snippets.append(
f"{idx}. [{page['name']}]({page['contentUrl']})\n{_prev_visit(page['contentUrl'])}{page.get('description', '')}{datePublished}"
)
self.page_title = f"{query} - Search"
content = (
f"A Bing search for '{query}' found {len(web_snippets) + len(news_snippets) + len(video_snippets)} results:\n\n## Web Results\n"
+ "\n\n".join(web_snippets)
)
if len(news_snippets) > 0:
content += "\n\n## News Results:\n" + "\n\n".join(news_snippets)
if len(video_snippets) > 0:
content += "\n\n## Video Results:\n" + "\n\n".join(video_snippets)
self._set_page_content(content)
def _fetch_page(self, url: str) -> None:
download_path = ""
response = None
print(f'Fetching page: {url}')
try:
if url.startswith("file://"):
download_path = os.path.normcase(os.path.normpath(unquote(url[8:])))
res = self._mdconvert.convert_local(download_path)
self.page_title = res.title
self._set_page_content(res.text_content)
else:
# Prepare the request parameters
request_kwargs = self.request_kwargs.copy() if self.request_kwargs is not None else {}
request_kwargs["stream"] = True
request_kwargs["timeout"] = (5,10)
# Send a HTTP request to the URL
response = requests.get(url, **request_kwargs)
response.raise_for_status()
# If the HTTP request was successful
content_type = response.headers.get("content-type", "")
# Text or HTML
if "text/" in content_type.lower():
res = self._mdconvert.convert_response(response)
self.page_title = res.title
self._set_page_content(res.text_content)
# A download
else:
# Try producing a safe filename
fname = None
download_path = None
try:
fname = pathvalidate.sanitize_filename(os.path.basename(urlparse(url).path)).strip()
download_path = os.path.abspath(os.path.join(self.downloads_folder, fname))
suffix = 0
while os.path.exists(download_path) and suffix < 1000:
suffix += 1
base, ext = os.path.splitext(fname)
new_fname = f"{base}__{suffix}{ext}"
download_path = os.path.abspath(os.path.join(self.downloads_folder, new_fname))
except NameError:
pass
# No suitable name, so make one
if fname is None:
extension = mimetypes.guess_extension(content_type)
if extension is None:
extension = ".download"
fname = str(uuid.uuid4()) + extension
download_path = os.path.abspath(os.path.join(self.downloads_folder, fname))
# Open a file for writing
with open(download_path, "wb") as fh:
for chunk in response.iter_content(chunk_size=512):
fh.write(chunk)
# Render it
local_uri = pathlib.Path(download_path).as_uri()
self.set_address(local_uri)
except UnsupportedFormatException as e:
print(f'Unsupported format: {e}')
self.page_title = ("Download complete.",)
self._set_page_content(f"# Download complete\n\nSaved file to '{download_path}'")
except FileConversionException as e:
print(f'File conversion error: {e}')
self.page_title = ("Download complete.",)
self._set_page_content(f"# Download complete\n\nSaved file to '{download_path}'")
except FileNotFoundError:
self.page_title = "Error 404"
self._set_page_content(f"## Error 404\n\nFile not found: {download_path}")
except requests.exceptions.RequestException:
if response is None:
self.page_title = "Error"
self._set_page_content(f"## Error\n\nFailed to fetch '{url}'")
else:
self.page_title = f"Error {response.status_code}"
# If the error was rendered in HTML we might as well render it
content_type = response.headers.get("content-type", "")
if content_type is not None and "text/html" in content_type.lower():
res = self._mdconvert.convert(response)
self.page_title = f"Error {response.status_code}"
text_content = getattr(res, "text_content", None)
self._set_page_content(f"## Error {response.status_code}\n\n{text_content}")
else:
text = ""
for chunk in response.iter_content(chunk_size=512, decode_unicode=True):
if type(chunk) == str:
text += chunk
self.page_title = f"Error {response.status_code}"
self._set_page_content(f"## Error {response.status_code}\n\n{text}")
# #https://stackoverflow.com/questions/10123929/fetch-a-file-from-a-local-url-with-python-requests
# class LocalFileAdapter(requests.adapters.BaseAdapter):
# """Protocol Adapter to allow Requests to GET file:// URLs"""
#
# @staticmethod
# def _chkpath(method, path):
# """Return an HTTP status for the given filesystem path."""
# if method.lower() in ("put", "delete"):
# return 501, "Not Implemented"
# elif method.lower() not in ("get", "head"):
# return 405, "Method Not Allowed"
# elif not os.path.exists(path):
# return 404, "File Not Found"
# elif not os.access(path, os.R_OK):
# return 403, "Access Denied"
# else:
# return 200, "OK"
#
# def send(self, req, **kwargs):
# """Return the file specified by the given request"""
# path = os.path.normcase(os.path.normpath(url2pathname(req.path_url)))
# response = requests.Response()
#
# response.status_code, response.reason = self._chkpath(req.method, path)
# if response.status_code == 200 and req.method.lower() != "head":
# try:
# if os.path.isfile(path):
# response.raw = open(path, "rb")
# else: # List the directory
# response.headers["content-type"] = "text/html"
# pardir = os.path.normpath(os.path.join(path, os.pardir))
# pardir_uri = pathlib.Path(pardir).as_uri()
# listing = f"""
# <!DOCTYPE html>
# <html>
# <head>
# <title>Index of {html.escape(path)}</title>
# </head>
# <body>
# <h1>Index of {html.escape(path)}</h1>
#
# <a href="{html.escape(pardir_uri, quote=True)}">.. (parent directory)</a>
#
# <table>
# <tr>
# <th>Name</th><th>Size</th><th>Date modified</th>
# </tr>
# """
#
# for entry in os.listdir(path):
# full_path = os.path.normpath(os.path.join(path, entry))
# full_path_uri = pathlib.Path(full_path).as_uri()
# size = ""
#
# if os.path.isdir(full_path):
# entry = entry + os.path.sep
# else:
# size = str(os.path.getsize(full_path))
#
# listing += (
# "<tr>\n"
# + f'<td><a href="{html.escape(full_path_uri, quote=True)}">{html.escape(entry)}</a></td>'
# + f"<td>{html.escape(size)}</td>"
# + f"<td>{html.escape(entry)}</td>"
# + "</tr>"
# )
#
# listing += """
# </table>
# </body>
# </html>
# """
#
# response.raw = io.StringIO(listing)
# except (OSError, IOError) as err:
# response.status_code = 500
# response.reason = str(err)
#
# if isinstance(req.url, bytes):
# response.url = req.url.decode("utf-8")
# else:
# response.url = req.url
#
# response.request = req
# response.connection = self
#
# return response
#
# def close(self):
# pass

View File

@ -0,0 +1,927 @@
# This file incorporates code from the AutoGen.
# The original code can be found at:
# https://github.com/microsoft/autogen/blob/gaia_multiagent_v01_march_1st/autogen/mdconvert.py
import json
import os
import requests
import re
import markdownify
import io
import uuid
import mimetypes
import html
import pathlib
import puremagic
import tempfile
import copy
import mammoth
import pptx
import pydub
import pandas as pd
import speech_recognition as sr
import sys
import traceback
import xml.etree.ElementTree as ET
import cv2
import PIL
import shutil
import subprocess
import easyocr
import numpy as np
from pytube import YouTube
import base64
from urllib.parse import urljoin, urlparse, parse_qs
from urllib.request import url2pathname
from bs4 import BeautifulSoup
from typing import Any, Dict, List, Optional, Union, Tuple
from tenacity import retry, stop_after_attempt, wait_fixed
# Optional PDF support
IS_PDF_CAPABLE = False
try:
import pdfminer
import pdfminer.high_level
IS_PDF_CAPABLE = True
except ModuleNotFoundError:
pass
# Optional YouTube transcription support
IS_YOUTUBE_TRANSCRIPT_CAPABLE = False
try:
from youtube_transcript_api import YouTubeTranscriptApi
IS_YOUTUBE_TRANSCRIPT_CAPABLE = True
except ModuleNotFoundError:
pass
class DocumentConverterResult:
"""The result of converting a document to text."""
def __init__(self, title: Union[str, None] = None, text_content: str = "", base64 = None):
self.title = title
self.text_content = text_content
self.base64 = base64
class DocumentConverter:
def convert(self, local_path, **kwargs) -> Union[None, DocumentConverterResult]:
raise NotImplementedError()
class PlainTextConverter(DocumentConverter):
"""Anything with content type text/plain"""
def convert(self, local_path, **kwargs) -> Union[None, DocumentConverterResult]:
extension = kwargs.get("file_extension", "")
if extension == "":
return None
content_type, encoding = mimetypes.guess_type("__placeholder" + extension)
if content_type is None:
return None
if "text/" not in content_type.lower():
return None
text_content = ""
with open(local_path, "rt", encoding="utf-8") as fh:
text_content = fh.read()
return DocumentConverterResult(
title=None,
text_content=text_content,
)
class HtmlConverter(DocumentConverter):
"""Anything with content type text/html"""
def convert(self, local_path, **kwargs) -> Union[None, DocumentConverterResult]:
# Bail if not html
extension = kwargs.get("file_extension", "")
if extension.lower() not in [".html", ".htm"]:
return None
result = None
with open(local_path, "rt", encoding='utf-8') as fh:
result = self._convert(fh.read())
return result
def _convert(self, html_content) -> Union[None, DocumentConverterResult]:
"""Helper function that converts and HTML string."""
# Parse the string
soup = BeautifulSoup(html_content, "html.parser")
# Remove javascript and style blocks
for script in soup(["script", "style"]):
script.extract()
# Print only the main content
body_elm = soup.find("body")
webpage_text = ""
if body_elm:
webpage_text = markdownify.MarkdownConverter(newline_style='backslash').convert_soup(body_elm)
else:
webpage_text = markdownify.MarkdownConverter().convert_soup(soup)
return DocumentConverterResult(
title=None if soup.title is None else soup.title.string,
text_content=webpage_text,
)
class WikipediaConverter(DocumentConverter):
"""Handle Wikipedia pages separately, focusing only on the main document content."""
def convert(self, local_path, **kwargs) -> Union[None, DocumentConverterResult]:
# Bail if not Wikipedia
extension = kwargs.get("file_extension", "")
if extension.lower() not in [".html", ".htm"]:
return None
url = kwargs.get("url", "")
if not re.search(r"^https?:\/\/[a-zA-Z]{2,3}\.wikipedia.org\/", url):
return None
# Parse the file
soup = None
with open(local_path, "rt", encoding='utf-8') as fh:
soup = BeautifulSoup(fh.read(), "html.parser")
# Remove javascript and style blocks
for script in soup(["script", "style"]):
script.extract()
# Print only the main content
body_elm = soup.find("div", {"id": "mw-content-text"})
title_elm = soup.find("span", {"class": "mw-page-title-main"})
webpage_text = ""
if body_elm:
# What's the title
main_title = soup.title.string
if title_elm and len(title_elm) > 0:
main_title = title_elm.string
# Convert the page
webpage_text = "# " + main_title + "\n\n" + markdownify.MarkdownConverter().convert_soup(body_elm)
else:
webpage_text = markdownify.MarkdownConverter().convert_soup(soup)
return DocumentConverterResult(
title=soup.title.string,
text_content=webpage_text,
)
class YouTubeConverter(DocumentConverter):
"""Handle YouTube specially, focusing on the video title, description, and transcript."""
def convert(self, local_path, **kwargs) -> Union[None, DocumentConverterResult]:
# Bail if not YouTube
extension = kwargs.get("file_extension", "")
if extension.lower() not in [".html", ".htm"]:
return None
url = kwargs.get("url", "")
if not url.startswith("https://www.youtube.com/watch?"):
return None
# Parse the file
soup = None
with open(local_path, "rt", encoding='utf-8') as fh:
soup = BeautifulSoup(fh.read(), "html.parser")
# Read the meta tags
metadata = {"title": soup.title.string}
for meta in soup(["meta"]):
for a in meta.attrs:
if a in ["itemprop", "property", "name"]:
metadata[meta[a]] = meta.get("content", "")
break
# We can also try to read the full description. This is more prone to breaking, since it reaches into the page implementation
try:
for script in soup(["script"]):
content = script.text
if "ytInitialData" in content:
lines = re.split(r"\r?\n", content)
obj_start = lines[0].find("{")
obj_end = lines[0].rfind("}")
if obj_start >= 0 and obj_end >= 0:
data = json.loads(lines[0][obj_start : obj_end + 1])
attrdesc = self._findKey(data, "attributedDescriptionBodyText")
if attrdesc:
metadata["description"] = attrdesc["content"]
break
except:
pass
# Start preparing the page
webpage_text = "# YouTube\n"
title = self._get(metadata, ["title", "og:title", "name"])
if title:
webpage_text += f"\n## {title}\n"
stats = ""
views = self._get(metadata, ["interactionCount"])
if views:
stats += f"- **Views:** {views}\n"
keywords = self._get(metadata, ["keywords"])
if keywords:
stats += f"- **Keywords:** {keywords}\n"
runtime = self._get(metadata, ["duration"])
if runtime:
stats += f"- **Runtime:** {runtime}\n"
if len(stats) > 0:
webpage_text += f"\n### Video Metadata\n{stats}\n"
description = self._get(metadata, ["description", "og:description"])
if description:
webpage_text += f"\n### Description\n{description}\n"
if IS_YOUTUBE_TRANSCRIPT_CAPABLE:
transcript_text = ""
parsed_url = urlparse(url)
params = parse_qs(parsed_url.query)
if "v" in params:
video_id = params["v"][0]
try:
# Must be a single transcript.
transcript = YouTubeTranscriptApi.get_transcript(video_id)
transcript_text = " ".join([part["text"] for part in transcript])
# Alternative formatting:
# formatter = TextFormatter()
# formatter.format_transcript(transcript)
except:
pass
if transcript_text:
webpage_text += f"\n### Transcript\n{transcript_text}\n"
output_path = self._download_video(url=url, local_path=local_path)
if output_path!="" and os.path.exists(output_path):
base64_video = self._base64_video(output_path)
else:
base64_video = ""
return DocumentConverterResult(
title=title if title else soup.title.string,
text_content=webpage_text,
base64=base64_video
)
def _download_video(self, url, local_path) -> str:
# TODO
return ""
try:
video_id = url.split('v=')[-1].split('&')[0]
video_id = video_id.strip()
youtube = YouTube(url)
video_stream = youtube.streams.filter(progressive=True, file_extension='mp4').order_by('resolution').desc().first()
if not video_stream:
raise ValueError("No suitable video stream found.")
output_path = f"{local_path}/{video_id}.mp4"
video_stream.download(output_path=local_path, filename=f"{video_id}.mp4")
return output_path
except Exception as e:
return ""
def _base64_video(self, output_path, frame_interval: int = 10) -> list:
video = cv2.VideoCapture(str(output_path))
base64_frames = []
frame_count = 0
while video.isOpened():
success, frame = video.read()
if not success:
break
if frame_count % frame_interval == 0:
_, buffer = cv2.imencode(".jpg", frame)
base64_frames.append(base64.b64encode(buffer).decode("utf-8"))
frame_count += 1
video.release()
return base64_frames
def _get(self, json, keys, default=None):
for k in keys:
if k in json:
return json[k]
return default
def _findKey(self, json, key):
if isinstance(json, list):
for elm in json:
ret = self._findKey(elm, key)
if ret is not None:
return ret
elif isinstance(json, dict):
for k in json:
if k == key:
return json[k]
else:
ret = self._findKey(json[k], key)
if ret is not None:
return ret
return None
class PdfConverter(DocumentConverter):
def convert(self, local_path, **kwargs) -> Union[None, DocumentConverterResult]:
# Bail if not a PDF
extension = kwargs.get("file_extension", "")
if extension.lower() != ".pdf":
return None
return DocumentConverterResult(
title=None,
text_content=pdfminer.high_level.extract_text(local_path),
)
class DocxConverter(HtmlConverter):
def convert(self, local_path, **kwargs) -> Union[None, DocumentConverterResult]:
# Bail if not a DOCX
extension = kwargs.get("file_extension", "")
if extension.lower() != ".docx":
return None
result = None
with open(local_path, "rb") as docx_file:
result = mammoth.convert_to_html(docx_file)
html_content = result.value
result = self._convert(html_content)
return result
class JsonConverter(HtmlConverter):
def convert(self, local_path, **kwargs) -> Union[None, DocumentConverterResult]:
extension = kwargs.get("file_extension", "")
if extension.lower() not in [".json", ".jsonld", "jsonl"]:
return None
with open(local_path, "r", encoding="utf-8") as json_file:
data = json.load(json_file)
md_content = self.dict_to_md(data)
return DocumentConverterResult(
title=None,
text_content=md_content.strip(),
)
def dict_to_md(self, data, level=1):
md_str = ""
if isinstance(data, dict):
for key, value in data.items():
md_str += f"{'#' * level} {key}\n"
md_str += self.dict_to_md(value, level + 1)
elif isinstance(data, list):
for item in data:
md_str += self.dict_to_md(item, level)
else:
md_str += f"{data}\n"
return md_str
class XmlConverter(HtmlConverter):
def convert(self, local_path, **kwargs) -> Union[None, DocumentConverterResult]:
# Bail if not a XML
extension = kwargs.get("file_extension", "")
if extension.lower() not in [".xml"]:
return None
with open(local_path, "rt", encoding='utf-8') as fh:
xml_content = fh.read()
root = ET.fromstring(xml_content)
md_content = self.xml_to_md(root)
return DocumentConverterResult(
title=None,
text_content=md_content.strip(),
)
def xml_to_md(self, element, level=1):
md_str = ""
md_str += f"{'#' * level} {element.tag}\n"
if element.text and element.text.strip():
md_str += f"{element.text.strip()}\n"
for child in element:
md_str += self.xml_to_md(child, level + 1)
return md_str
class XlsxConverter(HtmlConverter):
def convert(self, local_path, **kwargs) -> Union[None, DocumentConverterResult]:
# Bail if not a XLSX
extension = kwargs.get("file_extension", "")
if extension.lower() != ".xlsx":
return None
sheets = pd.read_excel(local_path, sheet_name=None)
md_content = ""
for s in sheets:
md_content += f"## {s}\n"
html_content = sheets[s].to_html(index=False)
md_content += self._convert(html_content).text_content.strip() + "\n\n"
return DocumentConverterResult(
title=None,
text_content=md_content.strip(),
)
class CsvConverter(HtmlConverter):
def convert(self, local_path, **kwargs) -> Union[None, DocumentConverterResult]:
# Bail if not a XLSX
extension = kwargs.get("file_extension", "")
if extension.lower() != ".csv":
return None
md_content = ""
sheets = pd.read_csv(local_path)
html_content = sheets.to_html(index=False)
md_content += self._convert(html_content).text_content.strip() + "\n\n"
return DocumentConverterResult(
title=None,
text_content=md_content.strip(),
)
class PptxConverter(HtmlConverter):
def convert(self, local_path, **kwargs) -> Union[None, DocumentConverterResult]:
# Bail if not a PPTX
extension = kwargs.get("file_extension", "")
if extension.lower() != ".pptx":
return None
md_content = ""
presentation = pptx.Presentation(local_path)
slide_num = 0
for slide in presentation.slides:
slide_num += 1
md_content += f"\n\n<!-- Slide number: {slide_num} -->\n"
title = slide.shapes.title
for shape in slide.shapes:
# Pictures
if self._is_picture(shape):
# https://github.com/scanny/python-pptx/pull/512#issuecomment-1713100069
alt_text = ""
try:
alt_text = shape._element._nvXxPr.cNvPr.attrib.get("descr", "")
except:
pass
# A placeholder name
filename = re.sub(r"\W", "", shape.name) + ".jpg"
# try:
# filename = shape.image.filename
# except:
# pass
md_content += "\n![" + (alt_text if alt_text else shape.name) + "](" + filename + ")\n"
# Tables
if self._is_table(shape):
html_table = "<html><body><table>"
first_row = True
for row in shape.table.rows:
html_table += "<tr>"
for cell in row.cells:
if first_row:
html_table += "<th>" + html.escape(cell.text) + "</th>"
else:
html_table += "<td>" + html.escape(cell.text) + "</td>"
html_table += "</tr>"
first_row = False
html_table += "</table></body></html>"
md_content += "\n" + self._convert(html_table).text_content.strip() + "\n"
# Text areas
elif shape.has_text_frame:
if shape == title:
md_content += "# " + shape.text.lstrip() + " "
else:
md_content += shape.text + " "
md_content = md_content.strip()
if slide.has_notes_slide:
md_content += "\n\n### Notes:\n"
notes_frame = slide.notes_slide.notes_text_frame
if notes_frame is not None:
md_content += notes_frame.text
md_content = md_content.strip()
return DocumentConverterResult(
title=None,
text_content=md_content.strip(),
)
def _is_picture(self, shape):
if shape.shape_type == pptx.enum.shapes.MSO_SHAPE_TYPE.PICTURE:
return True
if shape.shape_type == pptx.enum.shapes.MSO_SHAPE_TYPE.PLACEHOLDER:
if hasattr(shape, "image"):
return True
return False
def _is_table(self, shape):
if shape.shape_type == pptx.enum.shapes.MSO_SHAPE_TYPE.TABLE:
return True
return False
import whisper
import joblib
# cache asr function
asr_cache = joblib.Memory(location=".cache/asr", verbose=0)
@asr_cache.cache
def asr(local_path):
whisper_model = whisper.load_model("large")
return whisper_model.transcribe(local_path)['text']
class WavConverter(DocumentConverter):
def convert(self, local_path, **kwargs) -> Union[None, DocumentConverterResult]:
# Bail if not a XLSX
extension = kwargs.get("file_extension", "")
if extension.lower() != ".wav":
return None
# recognizer = sr.Recognizer()
# with sr.AudioFile(local_path) as source:
# audio = recognizer.record(source)
# text_content = recognizer.recognize_google(audio).strip()
text_content = asr(local_path)
return DocumentConverterResult(
title=None,
text_content="### Audio Transcript:\n" + ("[No speech detected]" if text_content == "" else text_content),
)
class Mp3Converter(WavConverter):
def convert(self, local_path, **kwargs) -> Union[None, DocumentConverterResult]:
# Bail if not a MP3
extension = kwargs.get("file_extension", "")
if extension.lower() != ".mp3":
return None
# handle, temp_path = tempfile.mkstemp(suffix=".wav")
# os.close(handle)
# try:
# sound = pydub.AudioSegment.from_mp3(local_path)
# sound.export(temp_path, format="wav")
# _args = dict()
# _args.update(kwargs)
# _args["file_extension"] = ".wav"
# result = super().convert(temp_path, **_args)
# finally:
# os.unlink(temp_path)
# return result
# ASR, large time cost
# if "5b89b147-cdab-40e1-be5b-819bc076c270" in local_path:
# text_content = ""
# else:
# text_content = asr(local_path)
mlm_client = kwargs.get("mlm_client")
if mlm_client is not None:
text_content = self._get_audio_transcript(local_path, extension, mlm_client).strip()
return DocumentConverterResult(
title=None,
text_content="### Audio Transcript:\n" + ("[No speech detected]" if text_content == "" else text_content),
)
def _get_audio_transcript(self, local_path, extension, client):
audio_file = open(local_path, "rb")
transcript = client.audio.transcriptions.create(
model="whisper-1",
file=audio_file
)
return transcript.text
class ImageConverter(DocumentConverter):
def convert(self, local_path, **kwargs) -> Union[None, DocumentConverterResult]:
# Bail if not a XLSX
extension = kwargs.get("file_extension", "")
if extension.lower() not in [".jpg", ".jpeg", ".png"]:
return None
ocr_min_confidence = kwargs.get("ocr_min_confidence", 0.25)
md_content = ""
# Add metadata
metadata = self._get_metadata(local_path)
if metadata:
for f in [
"Title",
"Caption",
"Description",
"Keywords",
"Artist",
"DateTimeOriginal",
"CreateDate",
"GPSPosition",
]:
if f in metadata:
md_content += f"{f}: {metadata[f]}\n"
# Try describing the image with GPTV
mlm_client = kwargs.get("mlm_client")
if mlm_client is not None:
md_content += (
"\n# Description:\n"
+ self._get_mlm_description(local_path, extension, mlm_client, prompt=kwargs.get("mlm_prompt")).strip()
+ "\n"
)
# OCR, large time cost
# image = PIL.Image.open(local_path)
# # Remove transparency
# if image.mode in ("RGBA", "P"):
# image = image.convert("RGB")
# reader = easyocr.Reader(["en"]) # specify the language(s)
# output = reader.readtext(np.array(image)) # local_path)
# # The output is a list of tuples, each containing the coordinates of the text and the text itself.
# # We join all the text pieces together to get the final text.
# ocr_text = " "
# for item in output:
# if item[2] >= ocr_min_confidence:
# ocr_text += item[1] + " "
# ocr_text = ocr_text.strip()
# if len(ocr_text) > 0:
# md_content += "\n# Text detected by OCR:\n" + ocr_text
base64_img = self._base64_img(local_path=local_path)
return DocumentConverterResult(
title=None,
text_content=md_content,
base64=base64_img
)
def _base64_img(self, local_path) -> str:
with open(local_path, "rb") as image_file:
encoded_image = base64.b64encode(image_file.read()).decode("utf-8")
return encoded_image
def _get_metadata(self, local_path):
exiftool = shutil.which("exiftool")
if not exiftool:
return None
else:
try:
result = subprocess.run([exiftool, "-json", local_path], capture_output=True, text=True).stdout
return json.loads(result)[0]
except:
return None
@retry(stop=stop_after_attempt(3), wait=wait_fixed(2))
def _get_mlm_description(self, local_path, extension, client, prompt=None):
if prompt is None or prompt.strip() == "":
prompt = """Write a detailed description for an image by describing the overall setting, main subjects, their actions and interactions,
secondary elements in the background, colors and lighting, the emotions and atmosphere conveyed, and any implied meaning or symbolism.
"""
sys.stderr.write(f"MLM Prompt:\n{prompt}\n")
data_uri = ""
with open(local_path, "rb") as image_file:
content_type, encoding = mimetypes.guess_type("_dummy" + extension)
if content_type is None:
content_type = "image/jpeg"
image_base64 = base64.b64encode(image_file.read()).decode("utf-8")
data_uri = f"data:{content_type};base64,{image_base64}"
messages = [
{
"role": "user",
"content": [
{"type": "text", "text": prompt},
{
"type": "image_url",
"image_url": {
"url": data_uri,
},
},
],
}
]
response = client.chat.completions.create(model="gpt-4o",messages=messages)
return response.choices[0].message.content
class FileConversionException(BaseException):
pass
class UnsupportedFormatException(BaseException):
pass
class MarkdownConverter:
"""(In preview) An extremely simple text-based document reader, suitable for LLM use.
This reader will convert common file-types or webpages to Markdown."""
def __init__(
self,
requests_session: Optional[requests.Session] = None,
mlm_client: Optional[Any] = None,
):
if requests_session is None:
self._requests_session = requests.Session()
else:
self._requests_session = requests_session
self._mlm_client = mlm_client
self._page_converters: List[DocumentConverter] = []
# Register converters for successful browsing operations
# Later registrations are tried first / take higher priority than earlier registrations
# To this end, the most specific converters should appear below the most generic converters
self.register_page_converter(PlainTextConverter())
self.register_page_converter(HtmlConverter())
self.register_page_converter(WikipediaConverter())
self.register_page_converter(YouTubeConverter())
self.register_page_converter(DocxConverter())
self.register_page_converter(XlsxConverter())
self.register_page_converter(PptxConverter())
self.register_page_converter(WavConverter())
self.register_page_converter(Mp3Converter())
self.register_page_converter(ImageConverter())
self.register_page_converter(CsvConverter())
self.register_page_converter(JsonConverter())
self.register_page_converter(XmlConverter())
if IS_PDF_CAPABLE:
self.register_page_converter(PdfConverter())
def convert(self, source, **kwargs):
"""
Args:
- source: can be a string representing a path or url, or a requests.response object
- extension: specifies the file extension to use when interpreting the file. If None, infer from source (path, uri, content-type, etc.)
"""
try:
# Local path or url
if isinstance(source, str):
if source.startswith("http://") or source.startswith("https://") or source.startswith("file://"):
return self.convert_url(source, **kwargs)
else:
return self.convert_local(source, **kwargs)
# Request response
elif isinstance(source, requests.Response):
return self.convert_response(source, **kwargs)
except Exception as e:
return f"Error: {e}"
def convert_local(self, path, **kwargs):
# Prepare a list of extensions to try (in order of priority)
ext = kwargs.get("file_extension")
extensions = [ext] if ext is not None else []
# Get extension alternatives from the path and puremagic
base, ext = os.path.splitext(path)
self._append_ext(extensions, ext)
self._append_ext(extensions, self._guess_ext_magic(path))
# Convert
return self._convert(path, extensions, **kwargs)
def convert_url(self, url, **kwargs):
# Send a HTTP request to the URL
response = self._requests_session.get(url, stream=True)
response.raise_for_status()
return self.convert_response(response, **kwargs)
def convert_response(self, response, **kwargs):
# Prepare a list of extensions to try (in order of priority)
ext = kwargs.get("file_extension")
extensions = [ext] if ext is not None else []
# Guess from the mimetype
content_type = response.headers.get("content-type", "").split(";")[0]
self._append_ext(extensions, mimetypes.guess_extension(content_type))
# Read the content disposition if there is one
content_disposition = response.headers.get("content-disposition", "")
m = re.search(r"filename=([^;]+)", content_disposition)
if m:
base, ext = os.path.splitext(m.group(1).strip("\"'"))
self._append_ext(extensions, ext)
# Read from the extension from the path
base, ext = os.path.splitext(urlparse(response.url).path)
self._append_ext(extensions, ext)
# Save the file locally to a temporary file. It will be deleted before this method exits
handle, temp_path = tempfile.mkstemp()
fh = os.fdopen(handle, "wb")
result = None
try:
# Download the file
for chunk in response.iter_content(chunk_size=512):
fh.write(chunk)
fh.close()
# Use puremagic to check for more extension options
self._append_ext(extensions, self._guess_ext_magic(temp_path))
# Convert
result = self._convert(temp_path, extensions, url=response.url)
# Clean up
finally:
try:
fh.close()
except:
pass
os.unlink(temp_path)
return result
def _convert(self, local_path, extensions, **kwargs):
error_trace = ""
for ext in extensions:
for converter in self._page_converters:
_kwargs = copy.deepcopy(kwargs)
_kwargs.update({"file_extension": ext})
# Copy any additional global options
if "mlm_client" not in _kwargs and self._mlm_client is not None:
_kwargs["mlm_client"] = self._mlm_client
# If we hit an error log it and keep trying
res = None
try:
res = converter.convert(local_path, **_kwargs)
except Exception as e:
error_trace = ("\n\n" + traceback.format_exc()).strip()
if res is not None:
# Normalize the content
res.text_content = "\n".join([line.rstrip() for line in re.split(r"\r?\n", res.text_content)])
res.text_content = re.sub(r"\n{3,}", "\n\n", res.text_content)
# Todo
return res
# If we got this far without success, report any exceptions
if len(error_trace) > 0:
# raise FileConversionException(
# f"Could not convert '{local_path}' to Markdown. File type was recognized as {extensions}. While converting the file, the following error was encountered:\n\n{error_trace}"
# )
return f"Could not convert '{local_path}' to Markdown. File type was recognized as {extensions}. While converting the file, the following error was encountered:\n\n{error_trace}"
# Nothing can handle it!
# raise UnsupportedFormatException(
# f"Could not convert '{local_path}' to Markdown. The formats {extensions} are not supported."
# )
return f"Could not convert '{local_path}' to Markdown. The formats {extensions} are not supported."
def _append_ext(self, extensions, ext):
"""Append a unique non-None, non-empty extension to a list of extensions."""
if ext is None:
return
ext = ext.strip()
if ext == "":
return
# if ext not in extensions:
if True:
extensions.append(ext)
def _guess_ext_magic(self, path):
"""Use puremagic (a Python implementation of libmagic) to guess a file's extension based on the first few bytes."""
# Use puremagic to guess
try:
guesses = puremagic.magic_file(path)
if len(guesses) > 0:
ext = guesses[0].extension.strip()
if len(ext) > 0:
return ext
except FileNotFoundError:
pass
except IsADirectoryError:
pass
except PermissionError:
pass
except Exception:
pass
return None
def register_page_converter(self, converter: DocumentConverter) -> None:
"""Register a page text converter."""
self._page_converters.insert(0, converter)

View File

@ -0,0 +1,134 @@
from tools.base.register import global_tool_registry
from tools.base.base_tool import Tool
import arxiv
from abc import abstractmethod
from tools.utils.broswer import SimpleTextBrowser
import signal
from functools import wraps
def timeout_handler(signum, frame):
raise TimeoutError("Request timed out")
def timeout(seconds=1):
def decorator(func):
@wraps(func)
def wrapper(*args, **kwargs):
# Set the signal handler
signal.signal(signal.SIGALRM, timeout_handler)
signal.alarm(seconds)
try:
result = func(*args, **kwargs)
finally:
# Disable the alarm
signal.alarm(0)
return result
return wrapper
return decorator
class Web_Search(Tool):
def __init__(self):
super().__init__("web_search", "Search the web for a given query", self.execute)
self.broswer = SimpleTextBrowser()
def execute(self,*args,**kwargs):
try:
query = kwargs.get("query", "")
self.broswer.downloads_folder = kwargs.get("work_path", "")
flag, ans = self.search(query)
except AttributeError:
return False, "No results found for query {}".format(query)
except TimeoutError:
return False, "Timeout"
except Exception as e:
return False, "No results found for query {}".format(query)
if (ans is None) or (len(ans) == 0):
# raise ValueError(f"No results found for query {query}.")
return False, "No results found for query {}".format(query)
return flag, ans
@abstractmethod
def search(self, query):
pass
import arxiv
import requests
@global_tool_registry("search_arxiv")
class arXiv_SearchEngine(Web_Search):
def __init__(self, name):
super().__init__()
self.name = name
def search(self, query):
# Custom timeout
timeout = 10 # Timeout in seconds
# Create a custom session with a timeout
session = requests.Session()
adapter = requests.adapters.HTTPAdapter(timeout=timeout)
session.mount("http://", adapter)
session.mount("https://", adapter)
try:
# Perform the search with custom session
search = arxiv.Search(
query=query,
max_results=5,
sort_by=arxiv.SortCriterion.Relevance,
session=session # Use the session with timeout
)
results = []
for result in search.results():
result_info = {
"title": result.title,
"authors": ", ".join(author.name for author in result.authors),
"summary": result.summary,
"pdf_url": result.pdf_url
}
results.append(result_info)
if len(results) == 0:
results = "Page not exists in arXiv, try different search tools like Bing search."
return False, results
return True, str(results)
except requests.exceptions.Timeout:
return False, "Request timed out. Please try again later."
except Exception as e:
return False, f"An error occurred: {e}"
@global_tool_registry("search_bing")
class Bing_SearchEngine(Web_Search):
def __init__(self, name):
super().__init__()
self.name = name
def search(self, query):
self.broswer.set_address("bing:"+query)
if self.broswer.page_content != None and len(self.broswer.page_content) != 0:
return True, self.broswer.page_content
else:
return False, "page not exists in bing, try different search tools"
@global_tool_registry("access_website")
class Website_SearchEngine(Web_Search):
def __init__(self, name):
super().__init__()
self.name = name
def search(self, url):
self.broswer.set_address(url)
if self.broswer.page_content != None and len(self.broswer.page_content) != 0:
if "Failed to fetch" in self.broswer.page_content:
return False, self.broswer.page_content
else:
return True, self.broswer.page_content
else:
return False, "Can not Access this website: {}".format(url)

View File

@ -0,0 +1,3 @@
from .logging import LogManager
__all__ = ['LogManager']

View File

@ -0,0 +1,142 @@
import subprocess
import json
import yaml
import time
import logging
from easydict import EasyDict
import re
import os
from typing import Optional, List
import ast
from model import query_gpt
# =============================
# File / JSON / Code Utilities
# =============================
def write_jsonl(fd, record: dict):
fd.write(json.dumps(record, ensure_ascii=False) + "\n")
def read_code(file_path: str) -> str:
if file_path and os.path.isfile(file_path):
with open(file_path, 'r', encoding='utf-8') as f:
return f.read()
return ""
def read_text(file_path: str) -> str:
return read_code(file_path)
def write_file(work_path: str, content: str, ext: str = "py", file_path: Optional[str] = None) -> str:
"""Write code/text to a file, auto-increment file name if needed."""
if file_path and os.path.isfile(file_path):
with open(file_path, 'w', encoding='utf-8') as f:
if len(content) > 0:
f.write(content)
return file_path
index = 0
while os.path.exists(os.path.join(work_path, f"agent-main_{index}.{ext}")):
index += 1
file_path = os.path.join(work_path, f"agent-main_{index}.{ext}")
with open(file_path, 'w', encoding='utf-8') as f:
f.write(content)
return file_path
def write_code(work_path: str, code: str, code_path: Optional[str] = None) -> str:
return write_file(work_path, code, ext="py", file_path=code_path)
def write_text(work_path: str, text: str, text_path: Optional[str] = None) -> str:
return write_file(work_path, text, ext="txt", file_path=text_path)
def format_code(code: str) -> str:
"""Remove empty lines."""
return "\n".join([line for line in code.splitlines() if line.strip()])
def iter_jsonl(data_path: str) -> List[dict]:
with open(data_path, 'r', encoding='utf-8') as f:
return [json.loads(line) for line in f]
def get_files_from_type(source_dir: str, filetype: str) -> List[str]:
files = []
for root, _, filenames in os.walk(source_dir):
for filename in filenames:
if filename.endswith(filetype):
files.append(os.path.join(root, filename))
return files
def cmd(command: str) -> str:
logging.info(f">> {command}")
return subprocess.run(command, shell=True, text=True, stdout=subprocess.PIPE).stdout
def get_easyDict_from_filepath(path: str) -> Optional[EasyDict]:
if path.endswith('.json'):
with open(path, 'r', encoding="utf-8") as f:
return EasyDict(json.load(f, strict=False))
elif path.endswith(('.yaml', '.yml')):
with open(path, 'r', encoding="utf-8") as f:
return EasyDict(yaml.load(f, Loader=yaml.FullLoader))
return None
def now() -> str:
return time.strftime("%Y%m%d%H%M%S", time.localtime())
def code_is_valid(code: str) -> bool:
try:
ast.parse(code)
return True
except Exception:
return False
def extract_code_from_text(text: str) -> str:
"""Extract valid Python code blocks from text."""
code_blocks = re.findall(r"```.*?```", text, re.DOTALL)
code_blocks = [
"\n".join([line for line in block.splitlines() if "```" not in line])
for block in code_blocks
]
code = "\n\n".join(code_blocks) if code_blocks else text
if len(code.strip().splitlines()) == 1:
return ""
if code_is_valid(code):
return code
# Search for longest valid code segment
lines = text.splitlines()
candidates = []
for start in range(len(lines)):
for end in range(start, len(lines)):
segment = "\n".join(lines[start:end + 1])
if code_is_valid(segment):
candidates.append((end - start, segment))
if not candidates:
return ""
candidates.sort(reverse=True)
return candidates[0][1]
def format_code_with_prints(code: Optional[str]) -> str:
"""Ensure code has print statements for important info."""
if code is None:
return ""
if not isinstance(code, str):
code = str(code)
if re.search(r'print$begin:math:text$.+$end:math:text$', code):
return extract_code_from_text(code)
prompt = f"Add 'print' statements to print all necessary information.\nCode:\n{code}"
code_out, _ = query_gpt(prompt)
return extract_code_from_text(code_out) or code_out

101
puppeteer/utils/logging.py Normal file
View File

@ -0,0 +1,101 @@
import logging
import yaml
import os
import datetime
class LogManager:
_instance = None
def __new__(cls, config_path=None, task_name=None):
if cls._instance is not None:
cls._instance._cleanup()
cls._instance = super(LogManager, cls).__new__(cls)
cls._instance._initialize(config_path, task_name)
return cls._instance
def _initialize(self, config_path, task_name):
self.loggers = {}
self.global_config = yaml.safe_load(open(config_path, "r"))
self.task_name = task_name
self.folder_path = self._create_log_folder()
self._setup_main_logger()
self._setup_model_logger()
self._setup_training_logger()
def _create_log_folder(self):
timestamp = datetime.datetime.now().strftime("%Y-%m-%d-%H-%M-%S")
folder_path = os.path.join(self.global_config.get('logging').get('logpath'), self.task_name, timestamp)
if not os.path.exists(folder_path):
os.makedirs(folder_path)
return folder_path
def _setup_main_logger(self):
main_logger = logging.getLogger('global')
main_logger.setLevel(self.global_config.get('logging').get('level'))
fh = logging.FileHandler(os.path.join(self.folder_path, "meta.log"), encoding="utf-8")
fh.setLevel(self.global_config.get('logging').get('level'))
formatter = logging.Formatter('[%(asctime)s %(levelname)s]\n%(message)s', datefmt='%Y-%d-%m %H:%M:%S')
fh.setFormatter(formatter)
main_logger.addHandler(fh)
def _setup_model_logger(self):
model_logger = logging.getLogger('model')
model_logger.setLevel(self.global_config.get('logging').get('level'))
fh = logging.FileHandler(os.path.join(self.folder_path, "model_query.log"), encoding="utf-8")
fh.setLevel(self.global_config.get('logging').get('level'))
formatter = logging.Formatter('[%(asctime)s %(levelname)s]\n%(message)s', datefmt='%Y-%d-%m %H:%M:%S')
fh.setFormatter(formatter)
model_logger.addHandler(fh)
def _setup_training_logger(self):
training_logger = logging.getLogger('train')
training_logger.setLevel(self.global_config.get('logging').get('level'))
fh = logging.FileHandler(os.path.join(self.folder_path,"train.log"), encoding="utf-8")
fh.setLevel(self.global_config.get('logging').get('level'))
formatter = logging.Formatter('[%(asctime)s %(levelname)s]\n%(message)s', datefmt='%Y-%d-%m %H:%M:%S')
fh.setFormatter(formatter)
training_logger.addHandler(fh)
def create_logger(self, name, log_file, level=logging.INFO):
logger = logging.getLogger(name)
logger.setLevel(level)
if not logger.handlers:
handler = logging.FileHandler(log_file, encoding="utf-8")
handler.setFormatter(logging.Formatter('[%(asctime)s %(levelname)s]\n%(message)s', datefmt='%Y-%d-%m %H:%M:%S'))
logger.addHandler(handler)
logger.propagate = False
self.loggers[name] = logger
def get_logger(self, index):
return self.loggers.get(index, logging.getLogger())
def _cleanup(self):
for logger in self.loggers.values():
handlers = logger.handlers[:]
for handler in handlers:
handler.close()
logger.removeHandler(handler)
main_logger = logging.getLogger('global')
handlers = main_logger.handlers[:]
for handler in handlers:
try:
handler.close()
main_logger.removeHandler(handler)
except Exception as e:
print(f"Error closing handler: {e}")
model_logger = logging.getLogger('model')
handlers = model_logger.handlers[:]
for handler in handlers:
try:
handler.close()
model_logger.removeHandler(handler)
except Exception as e:
print(f"Error closing handler: {e}")

View File

@ -0,0 +1,69 @@
import json
import logging
import re
from time import sleep
from typing import Union
# =============================
# Singleton Decorator
# =============================
def Singleton(cls):
instances = {}
def get_instance(*args, **kwargs):
if cls not in instances:
instances[cls] = cls(*args, **kwargs)
return instances[cls]
return get_instance
# =============================
# JSON Formatter Class
# =============================
class JsonFormat:
def __init__(self, query_func):
self.query_func = query_func
def load_json_with_invalid_escape(self, json_str: str) -> dict:
"""Handle invalid JSON escape sequences."""
json_str = json_str.strip()
json_str = re.sub(r'(?<!\\)\n', ' ', json_str)
try:
return json.loads(json_str)
except json.JSONDecodeError:
try:
json_str = re.sub(r'\$begin:math:text$?!["\\\\/bfnrtu])', r'\\\\\\\\', json_str)
json_str = re.sub(r'(?<!\\$end:math:text$"', '\\"', json_str)
json_str = f'"{json_str}"'
return json.loads(json_str)
except json.JSONDecodeError:
return {'action': 'Error', 'parameter': 'Invalid JSON format'}
def json_check(self, text: str) -> tuple[bool, Union[dict, str]]:
try:
d = self.load_json_with_invalid_escape(text)
if isinstance(d, dict) and d.get("action") != "Error":
return True, d
except Exception:
pass
return False, ""
def json_reformat(self, text: str, max_try_times: int = 3) -> dict:
"""Reformat GPT text to strict JSON object."""
prompt_template = """
Please reformat the given text strictly according to the specified JSON format.
The given text is: {}.
The specified JSON format is: {{"action": "", "parameter": ""}}, presented in plain text.
Only return one JSON object.
"""
for _ in range(max_try_times):
text = text.replace("null", '"Error"').replace("None", '"Error"').replace("```json", "").replace("```", "")
valid, json_obj = self.json_check(text)
if valid:
return json_obj
logging.info(f"Error format:\n{text}")
text, _ = self.query_func(prompt_template.format(text))
sleep(1)
return {'action': 'Error', 'parameter': 'Error'}

35
requirements.txt Normal file
View File

@ -0,0 +1,35 @@
arxiv==2.1.3
beautifulsoup4==4.13.5
chromadb==0.5.7
diskcache==5.6.3
easydict==1.10
easyocr==1.7.1
joblib
pyarrow
mammoth==1.8.0
markdownify==0.13.1
networkx==3.2.1
numpy==2.3.2
openai==1.107.0
openai_whisper
opencv_python_headless==4.10.0.84
pandas==2.3.2
pathvalidate==3.2.0
pdfminer.six==20240706
Pillow==11.3.0
puremagic==1.27
pydub==0.25.1
python_pptx==1.0.2
pytube==15.0.0
pyvis==0.3.2
PyYAML==6.0
Requests==2.32.5
seaborn==0.13.2
SpeechRecognition==3.10.4
sympy==1.12
tenacity==9.0.0
torch==2.4.0
tqdm==4.66.5
transformers==4.43.4
whisper==1.1.10
youtube_transcript_api==0.6.2