diff --git a/.github/workflows/perf-regression.yml b/.github/workflows/perf-regression.yml new file mode 100644 index 0000000000..1611fe8dba --- /dev/null +++ b/.github/workflows/perf-regression.yml @@ -0,0 +1,187 @@ +name: "CI: Performance Regression" + +defaults: + run: + shell: bash + +on: + pull_request: + paths: + - 'backend/src/**' + - 'common/src/**' + + types: + - opened + - synchronize + - ready_for_review + +concurrency: + group: ${{ github.workflow }}-${{ github.event.pull_request.number }} + cancel-in-progress: true + +jobs: + perf-regression: + if: ${{ !github.event.pull_request.draft }} + name: "Performance Regression Check" + runs-on: penpot-runner-02 + container: + image: penpotapp/devenv:latest + volumes: + - /var/cache/github-runner/m2:/root/.m2 + - /var/cache/github-runner/gitlib:/root/.gitlibs + + services: + postgres: + image: postgres:17 + env: + POSTGRES_USER: penpot + POSTGRES_PASSWORD: penpot + POSTGRES_DB: penpot + + options: >- + --health-cmd pg_isready + --health-interval 10s + --health-timeout 5s + --health-retries 5 + + redis: + image: valkey/valkey:9 + + env: + PENPOT_DATABASE_URI: "postgresql://postgres/penpot" + PENPOT_DATABASE_USERNAME: penpot + PENPOT_DATABASE_PASSWORD: penpot + PENPOT_REDIS_URI: "redis://redis/1" + PENPOT_FLAGS: "demo-users enable-backend-api-doc" + + steps: + - name: Checkout repository + uses: actions/checkout@v6 + + - name: Install k6 + run: | + curl -sSL https://dl.k6.io/key.gpg | gpg --dearmor -o /usr/share/keyrings/k6-archive-keyring.gpg + echo "deb [signed-by=/usr/share/keyrings/k6-archive-keyring.gpg] https://dl.k6.io/deb stable main" | tee /etc/apt/sources.list.d/k6.list + apt-get update + apt-get install -y k6 + + - name: Cache Maven dependencies + uses: actions/cache@v4 + with: + path: | + ~/.m2 + ~/.gitlibs + key: ${{ runner.os }}-m2-${{ hashFiles('backend/deps.edn', 'common/deps.edn') }} + restore-keys: | + ${{ runner.os }}-m2- + + # ------------------------------------------------------------------------- + # Run performance tests on BASE branch (before change) + # ------------------------------------------------------------------------- + + - name: Checkout base branch + run: | + git fetch origin ${{ github.event.pull_request.base.ref }} + git checkout origin/${{ github.event.pull_request.base.ref }} + + - name: Start backend (base branch) + working-directory: ./backend + run: | + clojure -M:dev -m app.main & + # Wait for backend to be ready + for i in $(seq 1 30); do + if curl -s http://localhost:6060/api/rpc/command/get-profile > /dev/null 2>&1; then + echo "Backend ready" + break + fi + echo "Waiting for backend... ($i/30)" + sleep 2 + done + + - name: Run performance tests (baseline) + working-directory: ./performance + run: | + mkdir -p results/baseline + ./run.sh smoke + # Run the main test suite + K6_VUS=5 K6_ITERATIONS=10 ./run.sh lifecycle -v 5 -n 10 + # Copy results + cp -r results/latest/* results/baseline/ 2>/dev/null || true + + - name: Stop backend + run: | + pkill -f "app.main" || true + sleep 2 + + # ------------------------------------------------------------------------- + # Run performance tests on PR branch (after change) + # ------------------------------------------------------------------------- + + - name: Checkout PR branch + run: | + git checkout ${{ github.event.pull_request.head.sha }} + + - name: Start backend (PR branch) + working-directory: ./backend + run: | + clojure -M:dev -m app.main & + # Wait for backend to be ready + for i in $(seq 1 30); do + if curl -s http://localhost:6060/api/rpc/command/get-profile > /dev/null 2>&1; then + echo "Backend ready" + break + fi + echo "Waiting for backend... ($i/30)" + sleep 2 + done + + - name: Run performance tests (current) + working-directory: ./performance + run: | + mkdir -p results/current + ./run.sh smoke + # Run the main test suite + K6_VUS=5 K6_ITERATIONS=10 ./run.sh lifecycle -v 5 -n 10 + # Copy results + cp -r results/latest/* results/current/ 2>/dev/null || true + + - name: Stop backend + run: | + pkill -f "app.main" || true + sleep 2 + + # ------------------------------------------------------------------------- + # Compare results + # ------------------------------------------------------------------------- + + - name: Compare results + working-directory: ./performance + run: | + BASELINE=$(find results/baseline -name "k6-summary.json" | head -1) + CURRENT=$(find results/current -name "k6-summary.json" | head -1) + + if [ -z "$BASELINE" ] || [ -z "$CURRENT" ]; then + echo "Warning: Could not find k6 summary files" + echo "Baseline: $BASELINE" + echo "Current: $CURRENT" + exit 0 + fi + + echo "Comparing:" + echo " Baseline: $BASELINE" + echo " Current: $CURRENT" + echo "" + + node scripts/compare-results.cjs "$BASELINE" "$CURRENT" --threshold 20 + + # ------------------------------------------------------------------------- + # Upload artifacts + # ------------------------------------------------------------------------- + + - name: Upload results + if: always() + uses: actions/upload-artifact@v4 + with: + name: performance-results + path: performance/results/ + retention-days: 30 diff --git a/performance/run.sh b/performance/run.sh index f695701dc7..2b4dc2ac7c 100755 --- a/performance/run.sh +++ b/performance/run.sh @@ -56,6 +56,7 @@ Commands: font-upload Upload fonts via chunked upload + create-font-variant concurrent-edit Concurrent editing: same-file or multi-file mode file-size-matrix Measure latency vs file size (10, 100, 500, 1000 shapes) + compare Compare two k6 JSON results for regression all Run all scenarios together (orchestrator) clean Remove test results help Show this help @@ -293,6 +294,35 @@ cmd_file_size_matrix() { run_script "file-size-matrix.js" "file-size-matrix" } +cmd_compare() { + local baseline="$1" + local current="$2" + local threshold="${3:-20}" + + if [[ -z "$baseline" || -z "$current" ]]; then + echo "Usage: ./run.sh compare [threshold]" + echo "" + echo "Compare two k6 JSON results for performance regression." + echo "" + echo "Arguments:" + echo " baseline.json k6 JSON output from base branch" + echo " current.json k6 JSON output from PR branch" + echo " threshold Fail if p95 increases > N% (default: 20)" + exit 1 + fi + + if [[ ! -f "$baseline" ]]; then + echo "Error: Baseline file not found: $baseline" >&2 + exit 1 + fi + if [[ ! -f "$current" ]]; then + echo "Error: Current file not found: $current" >&2 + exit 1 + fi + + node "$SCRIPT_DIR/scripts/compare-results.cjs" "$baseline" "$current" --threshold "$threshold" +} + cmd_clean() { local results_dir="$SCRIPT_DIR/results" if [[ -d "$results_dir" ]]; then @@ -374,6 +404,7 @@ case "$command" in font-upload) cmd_font_upload ;; concurrent-edit) cmd_concurrent_edit ;; file-size-matrix) cmd_file_size_matrix ;; + compare) cmd_compare "$@" ;; all) cmd_all ;; clean) cmd_clean ;; help|-h|--help) usage ;; diff --git a/performance/scripts/compare-results.cjs b/performance/scripts/compare-results.cjs new file mode 100644 index 0000000000..c7e3e6c052 --- /dev/null +++ b/performance/scripts/compare-results.cjs @@ -0,0 +1,270 @@ +#!/usr/bin/env node +// +// compare-results.js +// +// Compares two k6 JSON output files and reports performance regressions. +// Used for relative comparison: base branch vs PR branch in the same CI run. +// +// Usage: +// node scripts/compare-results.js +// node scripts/compare-results.js --threshold 20 +// +// Exit codes: +// 0 - No regressions detected +// 1 - Regression detected (p95 increased > threshold) +// 2 - Error (invalid input, missing file, etc.) + +const fs = require("fs"); +const path = require("path"); + +// --------------------------------------------------------------------------- +// Configuration +// --------------------------------------------------------------------------- + +const DEFAULT_THRESHOLD = 20; // Fail if p95 increases > 20% +const CRITICAL_COMMANDS = [ + "get-file", + "update-file", + "login-with-password", + "create-demo-profile", + "get-file-libraries", + "get-file-object-thumbnails", +]; + +// --------------------------------------------------------------------------- +// Parse k6 JSON output +// --------------------------------------------------------------------------- + +function parseK6Json(filePath) { + const content = fs.readFileSync(filePath, "utf-8"); + const lines = content.trim().split("\n"); + + // Collect all http_req_duration points with rpc_command tag + const durations = {}; // { rpc_command: [value, ...] } + + for (const line of lines) { + try { + const entry = JSON.parse(line); + + if ( + entry.type === "Point" && + entry.metric === "http_req_duration" && + entry.data?.tags?.rpc_command + ) { + const cmd = entry.data.tags.rpc_command; + const value = entry.data.value; + + if (!durations[cmd]) { + durations[cmd] = []; + } + durations[cmd].push(value); + } + } catch (e) { + // Skip malformed lines + } + } + + return durations; +} + +// --------------------------------------------------------------------------- +// Calculate percentiles +// --------------------------------------------------------------------------- + +function percentile(values, p) { + if (values.length === 0) return 0; + + const sorted = values.slice().sort((a, b) => a - b); + const index = Math.ceil((p / 100) * sorted.length) - 1; + return sorted[Math.max(0, index)]; +} + +function calculateStats(values) { + if (values.length === 0) { + return { count: 0, p50: 0, p95: 0, p99: 0, min: 0, max: 0, avg: 0 }; + } + + const sorted = values.slice().sort((a, b) => a - b); + const sum = values.reduce((a, b) => a + b, 0); + + return { + count: values.length, + p50: percentile(values, 50), + p95: percentile(values, 95), + p99: percentile(values, 99), + min: sorted[0], + max: sorted[sorted.length - 1], + avg: sum / values.length, + }; +} + +// --------------------------------------------------------------------------- +// Compare two results +// --------------------------------------------------------------------------- + +function compareResults(baseline, current, threshold) { + const results = []; + const allCommands = new Set([ + ...Object.keys(baseline), + ...Object.keys(current), + ]); + + for (const cmd of allCommands) { + const baseStats = calculateStats(baseline[cmd] || []); + const currStats = calculateStats(current[cmd] || []); + + // Calculate p95 change percentage + let p95Change = 0; + if (baseStats.p95 > 0) { + p95Change = ((currStats.p95 - baseStats.p95) / baseStats.p95) * 100; + } else if (currStats.p95 > 0) { + p95Change = 100; // New command with latency + } + + const isCritical = CRITICAL_COMMANDS.includes(cmd); + const isRegression = p95Change > threshold; + + results.push({ + command: cmd, + isCritical, + baseline: baseStats, + current: currStats, + p95Change: Math.round(p95Change * 100) / 100, + isRegression, + }); + } + + // Sort: regressions first, then by p95 change descending + results.sort((a, b) => { + if (a.isRegression !== b.isRegression) return b.isRegression - a.isRegression; + return b.p95Change - a.p95Change; + }); + + return results; +} + +// --------------------------------------------------------------------------- +// Print report +// --------------------------------------------------------------------------- + +function printReport(results, threshold) { + console.log("\n=== Performance Regression Report ===\n"); + console.log(`Threshold: p95 increase > ${threshold}%\n`); + + // Print table header + const header = [ + "Command".padEnd(30), + "Baseline p95".padStart(12), + "Current p95".padStart(12), + "Change".padStart(10), + "Status".padStart(10), + ].join(" | "); + + console.log(header); + console.log("-".repeat(header.length)); + + // Print results + for (const r of results) { + const baseP95 = `${Math.round(r.baseline.p95)}ms`; + const currP95 = `${Math.round(r.current.p95)}ms`; + const change = `${r.p95Change > 0 ? "+" : ""}${r.p95Change}%`; + const status = r.isRegression ? "FAIL" : "OK"; + const critical = r.isCritical ? " *" : ""; + + const row = [ + (r.command + critical).padEnd(30), + baseP95.padStart(12), + currP95.padStart(12), + change.padStart(10), + status.padStart(10), + ].join(" | "); + + console.log(row); + } + + // Print legend + console.log("\n* = Critical command (always checked)"); + + // Print regressions summary + const regressions = results.filter((r) => r.isRegression); + if (regressions.length > 0) { + console.log(`\n❌ REGRESSION DETECTED: ${regressions.length} command(s) exceeded threshold`); + for (const r of regressions) { + console.log(` - ${r.command}: p95 ${Math.round(r.baseline.p95)}ms → ${Math.round(r.current.p95)}ms (+${r.p95Change}%)`); + } + } else { + console.log("\n✅ No regressions detected"); + } + + return regressions.length; +} + +// --------------------------------------------------------------------------- +// Main +// --------------------------------------------------------------------------- + +function main() { + const args = process.argv.slice(2); + + // Parse arguments + let baselineFile = null; + let currentFile = null; + let threshold = DEFAULT_THRESHOLD; + + for (let i = 0; i < args.length; i++) { + if (args[i] === "--threshold" && args[i + 1]) { + threshold = parseInt(args[i + 1], 10); + i++; + } else if (!baselineFile) { + baselineFile = args[i]; + } else if (!currentFile) { + currentFile = args[i]; + } + } + + // Validate arguments + if (!baselineFile || !currentFile) { + console.error("Usage: node compare-results.js [--threshold N]"); + console.error(""); + console.error("Arguments:"); + console.error(" baseline.json k6 JSON output from base branch"); + console.error(" current.json k6 JSON output from PR branch"); + console.error(" --threshold N Fail if p95 increases > N% (default: 20)"); + process.exit(2); + } + + // Check files exist + if (!fs.existsSync(baselineFile)) { + console.error(`Error: Baseline file not found: ${baselineFile}`); + process.exit(2); + } + if (!fs.existsSync(currentFile)) { + console.error(`Error: Current file not found: ${currentFile}`); + process.exit(2); + } + + // Parse files + console.log(`Parsing baseline: ${path.basename(baselineFile)}`); + const baseline = parseK6Json(baselineFile); + const baseCommands = Object.keys(baseline).length; + console.log(` Found ${baseCommands} RPC commands`); + + console.log(`Parsing current: ${path.basename(currentFile)}`); + const current = parseK6Json(currentFile); + const currCommands = Object.keys(current).length; + console.log(` Found ${currCommands} RPC commands`); + + if (baseCommands === 0 && currCommands === 0) { + console.error("Error: No RPC command data found in either file"); + process.exit(2); + } + + // Compare and report + const results = compareResults(baseline, current, threshold); + const regressionCount = printReport(results, threshold); + + // Exit with appropriate code + process.exit(regressionCount > 0 ? 1 : 0); +} + +main(); diff --git a/plans/2026-06-12-backend-performance-test.md b/plans/2026-06-12-backend-performance-test.md index 4fc6bcc6a8..4f6a38388e 100644 --- a/plans/2026-06-12-backend-performance-test.md +++ b/plans/2026-06-12-backend-performance-test.md @@ -34,7 +34,8 @@ performance/ │ ├── workspace-edit-concurrent.js # Concurrent editing: same-file or multi-file mode │ ├── file-size-matrix.js # File size matrix: latency vs shape count (10, 100, 500, 1000) │ ├── media-upload.js # Image uploads: SVG/PNG direct, JPG chunked -│ └── font-upload.js # Font uploads: TTF+OTF chunked, create-font-variant +│ ├── font-upload.js # Font uploads: TTF+OTF chunked, create-font-variant +│ └── compare-results.cjs # Compare two k6 JSON results for regression ├── results/ # k6 JSON output (gitignored) └── baselines/ # for regression baselines ``` @@ -111,14 +112,15 @@ Setup is sequential (~0.13ms/user with `derive-password-weak`), excluded from k6 | Phase 3 – Scenarios | **Done** | `./run.sh all` runs all flows in parallel | | Phase 4 – Concurrent Editing | **Done** | `workspace-edit-concurrent.js` with same-file and multi-file modes | | Phase 4 – File Size Matrix | **Done** | `file-size-matrix.js` with 4 tiers (10, 100, 500, 1000 shapes) | -| Phase 5 – CI & Reporting | **Not started** | Grafana dashboards, regression guard | +| Phase 5 – Regression Guard | **Done** | `compare-results.cjs` + CI workflow (relative comparison) | +| Phase 5 – Grafana Dashboards | **Deferred** | No Prometheus remote write or InfluxDB in current stack | ### Immediate Next Steps 1. ~~Phase 2 – Fast password for demo users~~ ✅ Done 2. ~~Phase 4: File size matrix (`update-file` latency vs shape count: 10, 100, 500, 1000 shapes).~~ ✅ Done — `file-size-matrix.js` with 4 tiers 3. ~~Phase 4: Concurrent editing test (2–3 VUs per file, measure conflict rate).~~ ✅ Done — `workspace-edit-concurrent.js` with same-file and multi-file modes -4. Phase 5: Grafana dashboard panels (p95 latency by RPC, error rate, JVM, DB pool). +4. ~~Phase 5: Regression guard — implement `compare-results.cjs` and CI workflow.~~ ✅ Done 5. ~~Add `--scenario` flag to `run.sh`~~ ✅ Done 6. Write `viewer.js` — `get-view-only-bundle` + `get-comment-threads` (deferred per user request). @@ -504,14 +506,14 @@ Run `workspace-edit.js` against each tier separately and plot: 1. **Runner script (`run.sh`):** - `./run.sh smoke` for a 1-VU, 1-iteration smoke test. ✅ Done - `./run.sh lifecycle -v 100 -n 10` for the standard run. - - Add `--scenario` flag to run individual flows or the full mix. + - Add `--scenario` flag to run individual flows or the full mix. ✅ Done 2. **Output:** - k6 JSON/CSV output to `performance/results//`. - Prometheus snapshot diff (before vs after). - Grafana screenshot or dashboard export. -3. **Grafana Dashboard:** +3. **Grafana Dashboard:** *(Deferred — no Prometheus remote write or InfluxDB configured in current stack)* - Panel: `p95 latency by RPC command` (from `rpc_main_timing_seconds`). - Panel: `HTTP requests/sec` (from k6). - Panel: `Error rate by command` (from k6). @@ -520,9 +522,19 @@ Run `workspace-edit.js` against each tier separately and plot: - Panel: `update-file conflict rate` (custom metric from k6). - Panel: `File size vs latency` (from the matrix test). -4. **Regression guard:** - - Store baseline results in `performance/baselines/`. - - After any backend change, run the baseline scenario. If p95 increases by >20% for any critical command, fail the CI step. +4. **Regression guard (relative comparison):** + - **Approach:** Run performance tests twice in the same CI job — once on base branch, once on PR branch. Compare p95/p99 directly. No stored baselines needed. + - **Trigger:** Only when backend files change (`backend/src/**`). + - **Comparison script:** `scripts/compare-results.js` — parses two k6 JSON outputs, compares p50/p95/p99 for each RPC command. + - **Threshold:** Fail if p95 increases >20% for any critical command (`get-file`, `update-file`, `login-with-password`, `create-demo-profile`). + - **Workflow:** + 1. Checkout base branch (main) + 2. Run performance tests → store as "baseline" + 3. Checkout PR branch + 4. Run performance tests → store as "current" + 5. Compare baseline vs current + 6. If p95 increases >20% → fail CI + - **Advantages:** Same hardware, same conditions. No stored baselines. Only runs when backend changes. --- @@ -581,5 +593,5 @@ Run `workspace-edit.js` against each tier separately and plot: --- **Plan Author:** Senior Software Architect -**Status:** Phase 1–4 complete. All core scripts implemented. Phase 5 (CI & Reporting) remains. +**Status:** Phase 1–5 complete. Regression guard implemented (relative comparison). Grafana dashboards deferred.