mirror of
https://github.com/msitarzewski/agency-agents
synced 2026-04-25 11:18:05 +00:00
Adds promptfoo eval harness for agent quality scoring. LLM-as-judge system scoring task completion, instruction adherence, identity consistency, deliverable quality, and safety. Includes tests.
25 lines
610 B
JSON
25 lines
610 B
JSON
{
|
|
"name": "agency-agents-evals",
|
|
"version": "0.1.0",
|
|
"private": true,
|
|
"description": "Evaluation harness for agency-agents specialist prompts",
|
|
"scripts": {
|
|
"eval": "promptfoo eval",
|
|
"eval:view": "promptfoo view",
|
|
"eval:cache-clear": "promptfoo cache clear",
|
|
"extract": "ts-node scripts/extract-metrics.ts",
|
|
"test": "vitest run",
|
|
"test:watch": "vitest"
|
|
},
|
|
"dependencies": {
|
|
"gray-matter": "^4.0.3",
|
|
"promptfoo": "^0.121.3"
|
|
},
|
|
"devDependencies": {
|
|
"@types/node": "^22.0.0",
|
|
"ts-node": "^10.9.0",
|
|
"typescript": "^5.7.0",
|
|
"vitest": "^3.0.0"
|
|
}
|
|
}
|