agency-agents/evals/package.json
Russell Jones b456845e85
feat: add promptfoo eval harness for agent quality scoring (#371)
Adds promptfoo eval harness for agent quality scoring. LLM-as-judge system scoring task completion, instruction adherence, identity consistency, deliverable quality, and safety. Includes tests.
2026-04-10 21:54:31 -05:00

25 lines
610 B
JSON

{
"name": "agency-agents-evals",
"version": "0.1.0",
"private": true,
"description": "Evaluation harness for agency-agents specialist prompts",
"scripts": {
"eval": "promptfoo eval",
"eval:view": "promptfoo view",
"eval:cache-clear": "promptfoo cache clear",
"extract": "ts-node scripts/extract-metrics.ts",
"test": "vitest run",
"test:watch": "vitest"
},
"dependencies": {
"gray-matter": "^4.0.3",
"promptfoo": "^0.121.3"
},
"devDependencies": {
"@types/node": "^22.0.0",
"ts-node": "^10.9.0",
"typescript": "^5.7.0",
"vitest": "^3.0.0"
}
}