🚧 Add performance comparison/regression CI workflow

This commit is contained in:
Andrey Antukh 2026-06-15 12:43:31 +02:00
parent fa54152a37
commit 22a06d4ae9
4 changed files with 509 additions and 9 deletions

187
.github/workflows/perf-regression.yml vendored Normal file
View File

@ -0,0 +1,187 @@
name: "CI: Performance Regression"
defaults:
run:
shell: bash
on:
pull_request:
paths:
- 'backend/src/**'
- 'common/src/**'
types:
- opened
- synchronize
- ready_for_review
concurrency:
group: ${{ github.workflow }}-${{ github.event.pull_request.number }}
cancel-in-progress: true
jobs:
perf-regression:
if: ${{ !github.event.pull_request.draft }}
name: "Performance Regression Check"
runs-on: penpot-runner-02
container:
image: penpotapp/devenv:latest
volumes:
- /var/cache/github-runner/m2:/root/.m2
- /var/cache/github-runner/gitlib:/root/.gitlibs
services:
postgres:
image: postgres:17
env:
POSTGRES_USER: penpot
POSTGRES_PASSWORD: penpot
POSTGRES_DB: penpot
options: >-
--health-cmd pg_isready
--health-interval 10s
--health-timeout 5s
--health-retries 5
redis:
image: valkey/valkey:9
env:
PENPOT_DATABASE_URI: "postgresql://postgres/penpot"
PENPOT_DATABASE_USERNAME: penpot
PENPOT_DATABASE_PASSWORD: penpot
PENPOT_REDIS_URI: "redis://redis/1"
PENPOT_FLAGS: "demo-users enable-backend-api-doc"
steps:
- name: Checkout repository
uses: actions/checkout@v6
- name: Install k6
run: |
curl -sSL https://dl.k6.io/key.gpg | gpg --dearmor -o /usr/share/keyrings/k6-archive-keyring.gpg
echo "deb [signed-by=/usr/share/keyrings/k6-archive-keyring.gpg] https://dl.k6.io/deb stable main" | tee /etc/apt/sources.list.d/k6.list
apt-get update
apt-get install -y k6
- name: Cache Maven dependencies
uses: actions/cache@v4
with:
path: |
~/.m2
~/.gitlibs
key: ${{ runner.os }}-m2-${{ hashFiles('backend/deps.edn', 'common/deps.edn') }}
restore-keys: |
${{ runner.os }}-m2-
# -------------------------------------------------------------------------
# Run performance tests on BASE branch (before change)
# -------------------------------------------------------------------------
- name: Checkout base branch
run: |
git fetch origin ${{ github.event.pull_request.base.ref }}
git checkout origin/${{ github.event.pull_request.base.ref }}
- name: Start backend (base branch)
working-directory: ./backend
run: |
clojure -M:dev -m app.main &
# Wait for backend to be ready
for i in $(seq 1 30); do
if curl -s http://localhost:6060/api/rpc/command/get-profile > /dev/null 2>&1; then
echo "Backend ready"
break
fi
echo "Waiting for backend... ($i/30)"
sleep 2
done
- name: Run performance tests (baseline)
working-directory: ./performance
run: |
mkdir -p results/baseline
./run.sh smoke
# Run the main test suite
K6_VUS=5 K6_ITERATIONS=10 ./run.sh lifecycle -v 5 -n 10
# Copy results
cp -r results/latest/* results/baseline/ 2>/dev/null || true
- name: Stop backend
run: |
pkill -f "app.main" || true
sleep 2
# -------------------------------------------------------------------------
# Run performance tests on PR branch (after change)
# -------------------------------------------------------------------------
- name: Checkout PR branch
run: |
git checkout ${{ github.event.pull_request.head.sha }}
- name: Start backend (PR branch)
working-directory: ./backend
run: |
clojure -M:dev -m app.main &
# Wait for backend to be ready
for i in $(seq 1 30); do
if curl -s http://localhost:6060/api/rpc/command/get-profile > /dev/null 2>&1; then
echo "Backend ready"
break
fi
echo "Waiting for backend... ($i/30)"
sleep 2
done
- name: Run performance tests (current)
working-directory: ./performance
run: |
mkdir -p results/current
./run.sh smoke
# Run the main test suite
K6_VUS=5 K6_ITERATIONS=10 ./run.sh lifecycle -v 5 -n 10
# Copy results
cp -r results/latest/* results/current/ 2>/dev/null || true
- name: Stop backend
run: |
pkill -f "app.main" || true
sleep 2
# -------------------------------------------------------------------------
# Compare results
# -------------------------------------------------------------------------
- name: Compare results
working-directory: ./performance
run: |
BASELINE=$(find results/baseline -name "k6-summary.json" | head -1)
CURRENT=$(find results/current -name "k6-summary.json" | head -1)
if [ -z "$BASELINE" ] || [ -z "$CURRENT" ]; then
echo "Warning: Could not find k6 summary files"
echo "Baseline: $BASELINE"
echo "Current: $CURRENT"
exit 0
fi
echo "Comparing:"
echo " Baseline: $BASELINE"
echo " Current: $CURRENT"
echo ""
node scripts/compare-results.cjs "$BASELINE" "$CURRENT" --threshold 20
# -------------------------------------------------------------------------
# Upload artifacts
# -------------------------------------------------------------------------
- name: Upload results
if: always()
uses: actions/upload-artifact@v4
with:
name: performance-results
path: performance/results/
retention-days: 30

View File

@ -56,6 +56,7 @@ Commands:
font-upload Upload fonts via chunked upload + create-font-variant
concurrent-edit Concurrent editing: same-file or multi-file mode
file-size-matrix Measure latency vs file size (10, 100, 500, 1000 shapes)
compare Compare two k6 JSON results for regression
all Run all scenarios together (orchestrator)
clean Remove test results
help Show this help
@ -293,6 +294,35 @@ cmd_file_size_matrix() {
run_script "file-size-matrix.js" "file-size-matrix"
}
cmd_compare() {
local baseline="$1"
local current="$2"
local threshold="${3:-20}"
if [[ -z "$baseline" || -z "$current" ]]; then
echo "Usage: ./run.sh compare <baseline.json> <current.json> [threshold]"
echo ""
echo "Compare two k6 JSON results for performance regression."
echo ""
echo "Arguments:"
echo " baseline.json k6 JSON output from base branch"
echo " current.json k6 JSON output from PR branch"
echo " threshold Fail if p95 increases > N% (default: 20)"
exit 1
fi
if [[ ! -f "$baseline" ]]; then
echo "Error: Baseline file not found: $baseline" >&2
exit 1
fi
if [[ ! -f "$current" ]]; then
echo "Error: Current file not found: $current" >&2
exit 1
fi
node "$SCRIPT_DIR/scripts/compare-results.cjs" "$baseline" "$current" --threshold "$threshold"
}
cmd_clean() {
local results_dir="$SCRIPT_DIR/results"
if [[ -d "$results_dir" ]]; then
@ -374,6 +404,7 @@ case "$command" in
font-upload) cmd_font_upload ;;
concurrent-edit) cmd_concurrent_edit ;;
file-size-matrix) cmd_file_size_matrix ;;
compare) cmd_compare "$@" ;;
all) cmd_all ;;
clean) cmd_clean ;;
help|-h|--help) usage ;;

View File

@ -0,0 +1,270 @@
#!/usr/bin/env node
//
// compare-results.js
//
// Compares two k6 JSON output files and reports performance regressions.
// Used for relative comparison: base branch vs PR branch in the same CI run.
//
// Usage:
// node scripts/compare-results.js <baseline.json> <current.json>
// node scripts/compare-results.js <baseline.json> <current.json> --threshold 20
//
// Exit codes:
// 0 - No regressions detected
// 1 - Regression detected (p95 increased > threshold)
// 2 - Error (invalid input, missing file, etc.)
const fs = require("fs");
const path = require("path");
// ---------------------------------------------------------------------------
// Configuration
// ---------------------------------------------------------------------------
const DEFAULT_THRESHOLD = 20; // Fail if p95 increases > 20%
const CRITICAL_COMMANDS = [
"get-file",
"update-file",
"login-with-password",
"create-demo-profile",
"get-file-libraries",
"get-file-object-thumbnails",
];
// ---------------------------------------------------------------------------
// Parse k6 JSON output
// ---------------------------------------------------------------------------
function parseK6Json(filePath) {
const content = fs.readFileSync(filePath, "utf-8");
const lines = content.trim().split("\n");
// Collect all http_req_duration points with rpc_command tag
const durations = {}; // { rpc_command: [value, ...] }
for (const line of lines) {
try {
const entry = JSON.parse(line);
if (
entry.type === "Point" &&
entry.metric === "http_req_duration" &&
entry.data?.tags?.rpc_command
) {
const cmd = entry.data.tags.rpc_command;
const value = entry.data.value;
if (!durations[cmd]) {
durations[cmd] = [];
}
durations[cmd].push(value);
}
} catch (e) {
// Skip malformed lines
}
}
return durations;
}
// ---------------------------------------------------------------------------
// Calculate percentiles
// ---------------------------------------------------------------------------
function percentile(values, p) {
if (values.length === 0) return 0;
const sorted = values.slice().sort((a, b) => a - b);
const index = Math.ceil((p / 100) * sorted.length) - 1;
return sorted[Math.max(0, index)];
}
function calculateStats(values) {
if (values.length === 0) {
return { count: 0, p50: 0, p95: 0, p99: 0, min: 0, max: 0, avg: 0 };
}
const sorted = values.slice().sort((a, b) => a - b);
const sum = values.reduce((a, b) => a + b, 0);
return {
count: values.length,
p50: percentile(values, 50),
p95: percentile(values, 95),
p99: percentile(values, 99),
min: sorted[0],
max: sorted[sorted.length - 1],
avg: sum / values.length,
};
}
// ---------------------------------------------------------------------------
// Compare two results
// ---------------------------------------------------------------------------
function compareResults(baseline, current, threshold) {
const results = [];
const allCommands = new Set([
...Object.keys(baseline),
...Object.keys(current),
]);
for (const cmd of allCommands) {
const baseStats = calculateStats(baseline[cmd] || []);
const currStats = calculateStats(current[cmd] || []);
// Calculate p95 change percentage
let p95Change = 0;
if (baseStats.p95 > 0) {
p95Change = ((currStats.p95 - baseStats.p95) / baseStats.p95) * 100;
} else if (currStats.p95 > 0) {
p95Change = 100; // New command with latency
}
const isCritical = CRITICAL_COMMANDS.includes(cmd);
const isRegression = p95Change > threshold;
results.push({
command: cmd,
isCritical,
baseline: baseStats,
current: currStats,
p95Change: Math.round(p95Change * 100) / 100,
isRegression,
});
}
// Sort: regressions first, then by p95 change descending
results.sort((a, b) => {
if (a.isRegression !== b.isRegression) return b.isRegression - a.isRegression;
return b.p95Change - a.p95Change;
});
return results;
}
// ---------------------------------------------------------------------------
// Print report
// ---------------------------------------------------------------------------
function printReport(results, threshold) {
console.log("\n=== Performance Regression Report ===\n");
console.log(`Threshold: p95 increase > ${threshold}%\n`);
// Print table header
const header = [
"Command".padEnd(30),
"Baseline p95".padStart(12),
"Current p95".padStart(12),
"Change".padStart(10),
"Status".padStart(10),
].join(" | ");
console.log(header);
console.log("-".repeat(header.length));
// Print results
for (const r of results) {
const baseP95 = `${Math.round(r.baseline.p95)}ms`;
const currP95 = `${Math.round(r.current.p95)}ms`;
const change = `${r.p95Change > 0 ? "+" : ""}${r.p95Change}%`;
const status = r.isRegression ? "FAIL" : "OK";
const critical = r.isCritical ? " *" : "";
const row = [
(r.command + critical).padEnd(30),
baseP95.padStart(12),
currP95.padStart(12),
change.padStart(10),
status.padStart(10),
].join(" | ");
console.log(row);
}
// Print legend
console.log("\n* = Critical command (always checked)");
// Print regressions summary
const regressions = results.filter((r) => r.isRegression);
if (regressions.length > 0) {
console.log(`\n❌ REGRESSION DETECTED: ${regressions.length} command(s) exceeded threshold`);
for (const r of regressions) {
console.log(` - ${r.command}: p95 ${Math.round(r.baseline.p95)}ms → ${Math.round(r.current.p95)}ms (+${r.p95Change}%)`);
}
} else {
console.log("\n✅ No regressions detected");
}
return regressions.length;
}
// ---------------------------------------------------------------------------
// Main
// ---------------------------------------------------------------------------
function main() {
const args = process.argv.slice(2);
// Parse arguments
let baselineFile = null;
let currentFile = null;
let threshold = DEFAULT_THRESHOLD;
for (let i = 0; i < args.length; i++) {
if (args[i] === "--threshold" && args[i + 1]) {
threshold = parseInt(args[i + 1], 10);
i++;
} else if (!baselineFile) {
baselineFile = args[i];
} else if (!currentFile) {
currentFile = args[i];
}
}
// Validate arguments
if (!baselineFile || !currentFile) {
console.error("Usage: node compare-results.js <baseline.json> <current.json> [--threshold N]");
console.error("");
console.error("Arguments:");
console.error(" baseline.json k6 JSON output from base branch");
console.error(" current.json k6 JSON output from PR branch");
console.error(" --threshold N Fail if p95 increases > N% (default: 20)");
process.exit(2);
}
// Check files exist
if (!fs.existsSync(baselineFile)) {
console.error(`Error: Baseline file not found: ${baselineFile}`);
process.exit(2);
}
if (!fs.existsSync(currentFile)) {
console.error(`Error: Current file not found: ${currentFile}`);
process.exit(2);
}
// Parse files
console.log(`Parsing baseline: ${path.basename(baselineFile)}`);
const baseline = parseK6Json(baselineFile);
const baseCommands = Object.keys(baseline).length;
console.log(` Found ${baseCommands} RPC commands`);
console.log(`Parsing current: ${path.basename(currentFile)}`);
const current = parseK6Json(currentFile);
const currCommands = Object.keys(current).length;
console.log(` Found ${currCommands} RPC commands`);
if (baseCommands === 0 && currCommands === 0) {
console.error("Error: No RPC command data found in either file");
process.exit(2);
}
// Compare and report
const results = compareResults(baseline, current, threshold);
const regressionCount = printReport(results, threshold);
// Exit with appropriate code
process.exit(regressionCount > 0 ? 1 : 0);
}
main();

View File

@ -34,7 +34,8 @@ performance/
│ ├── workspace-edit-concurrent.js # Concurrent editing: same-file or multi-file mode
│ ├── file-size-matrix.js # File size matrix: latency vs shape count (10, 100, 500, 1000)
│ ├── media-upload.js # Image uploads: SVG/PNG direct, JPG chunked
│ └── font-upload.js # Font uploads: TTF+OTF chunked, create-font-variant
│ ├── font-upload.js # Font uploads: TTF+OTF chunked, create-font-variant
│ └── compare-results.cjs # Compare two k6 JSON results for regression
├── results/ # k6 JSON output (gitignored)
└── baselines/ # for regression baselines
```
@ -111,14 +112,15 @@ Setup is sequential (~0.13ms/user with `derive-password-weak`), excluded from k6
| Phase 3 Scenarios | **Done** | `./run.sh all` runs all flows in parallel |
| Phase 4 Concurrent Editing | **Done** | `workspace-edit-concurrent.js` with same-file and multi-file modes |
| Phase 4 File Size Matrix | **Done** | `file-size-matrix.js` with 4 tiers (10, 100, 500, 1000 shapes) |
| Phase 5 CI & Reporting | **Not started** | Grafana dashboards, regression guard |
| Phase 5 Regression Guard | **Done** | `compare-results.cjs` + CI workflow (relative comparison) |
| Phase 5 Grafana Dashboards | **Deferred** | No Prometheus remote write or InfluxDB in current stack |
### Immediate Next Steps
1. ~~Phase 2 Fast password for demo users~~ ✅ Done
2. ~~Phase 4: File size matrix (`update-file` latency vs shape count: 10, 100, 500, 1000 shapes).~~ ✅ Done — `file-size-matrix.js` with 4 tiers
3. ~~Phase 4: Concurrent editing test (23 VUs per file, measure conflict rate).~~ ✅ Done — `workspace-edit-concurrent.js` with same-file and multi-file modes
4. Phase 5: Grafana dashboard panels (p95 latency by RPC, error rate, JVM, DB pool).
4. ~~Phase 5: Regression guard — implement `compare-results.cjs` and CI workflow.~~ ✅ Done
5. ~~Add `--scenario` flag to `run.sh`~~ ✅ Done
6. Write `viewer.js``get-view-only-bundle` + `get-comment-threads` (deferred per user request).
@ -504,14 +506,14 @@ Run `workspace-edit.js` against each tier separately and plot:
1. **Runner script (`run.sh`):**
- `./run.sh smoke` for a 1-VU, 1-iteration smoke test. ✅ Done
- `./run.sh lifecycle -v 100 -n 10` for the standard run.
- Add `--scenario` flag to run individual flows or the full mix.
- Add `--scenario` flag to run individual flows or the full mix. ✅ Done
2. **Output:**
- k6 JSON/CSV output to `performance/results/<timestamp>/`.
- Prometheus snapshot diff (before vs after).
- Grafana screenshot or dashboard export.
3. **Grafana Dashboard:**
3. **Grafana Dashboard:** *(Deferred — no Prometheus remote write or InfluxDB configured in current stack)*
- Panel: `p95 latency by RPC command` (from `rpc_main_timing_seconds`).
- Panel: `HTTP requests/sec` (from k6).
- Panel: `Error rate by command` (from k6).
@ -520,9 +522,19 @@ Run `workspace-edit.js` against each tier separately and plot:
- Panel: `update-file conflict rate` (custom metric from k6).
- Panel: `File size vs latency` (from the matrix test).
4. **Regression guard:**
- Store baseline results in `performance/baselines/`.
- After any backend change, run the baseline scenario. If p95 increases by >20% for any critical command, fail the CI step.
4. **Regression guard (relative comparison):**
- **Approach:** Run performance tests twice in the same CI job — once on base branch, once on PR branch. Compare p95/p99 directly. No stored baselines needed.
- **Trigger:** Only when backend files change (`backend/src/**`).
- **Comparison script:** `scripts/compare-results.js` — parses two k6 JSON outputs, compares p50/p95/p99 for each RPC command.
- **Threshold:** Fail if p95 increases >20% for any critical command (`get-file`, `update-file`, `login-with-password`, `create-demo-profile`).
- **Workflow:**
1. Checkout base branch (main)
2. Run performance tests → store as "baseline"
3. Checkout PR branch
4. Run performance tests → store as "current"
5. Compare baseline vs current
6. If p95 increases >20% → fail CI
- **Advantages:** Same hardware, same conditions. No stored baselines. Only runs when backend changes.
---
@ -581,5 +593,5 @@ Run `workspace-edit.js` against each tier separately and plot:
---
**Plan Author:** Senior Software Architect
**Status:** Phase 14 complete. All core scripts implemented. Phase 5 (CI & Reporting) remains.
**Status:** Phase 15 complete. Regression guard implemented (relative comparison). Grafana dashboards deferred.