feat(ci): PR/issue auto-labeling + declarative label sync (#3360)

- .github/labels.yml: declarative source of truth (29 namespaced labels)
- scripts/sync_labels.py + label-sync.yml: idempotent label sync (self-bootstraps on merge)
- labeler.yml + pr-labeler.yml: area:* labels by changed path (actions/labeler)
- pr-triage.yml: size/*, risk:*, needs-validation, first-time-contributor, reviewing
- issue-triage.yml: needs-triage on new issues (self-healing)

All PR workflows use pull_request_target but never check out or run PR code
(read changed-file metadata via the API only).
This commit is contained in:
Xinmin Zeng 2026-06-03 16:40:24 +08:00 committed by GitHub
parent 3ae82dc663
commit aca7acc105
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
7 changed files with 558 additions and 0 deletions

72
.github/labeler.yml vendored Normal file
View File

@ -0,0 +1,72 @@
# Path-based PR auto-labeling config for actions/labeler@v5.
# Each key is a label (must exist — see .github/labels.yml); the globs decide
# when it is applied. A PR can match several areas, which is expected.
"area:frontend":
- changed-files:
- any-glob-to-any-file:
- "frontend/**"
"area:backend":
- changed-files:
- any-glob-to-any-file:
- "backend/app/**"
- "backend/packages/harness/deerflow/runtime/**"
- "backend/packages/harness/deerflow/persistence/**"
- "backend/packages/harness/deerflow/config/**"
- "backend/packages/harness/deerflow/tools/**"
- "backend/packages/harness/deerflow/guardrails/**"
- "backend/packages/harness/deerflow/tracing/**"
- "backend/packages/harness/deerflow/models/**"
- "backend/packages/harness/deerflow/utils/**"
- "backend/packages/harness/deerflow/uploads/**"
"area:agents":
- changed-files:
- any-glob-to-any-file:
- "backend/packages/harness/deerflow/agents/**"
- "backend/packages/harness/deerflow/subagents/**"
- "backend/packages/harness/deerflow/reflection/**"
- "backend/langgraph.json"
- "backend/**/prompts/**"
"area:sandbox":
- changed-files:
- any-glob-to-any-file:
- "docker/**"
- "backend/packages/harness/deerflow/sandbox/**"
- "backend/Dockerfile"
- "frontend/Dockerfile"
"area:skills":
- changed-files:
- any-glob-to-any-file:
- "skills/**"
- "backend/packages/harness/deerflow/skills/**"
- "frontend/src/core/skills/**"
"area:mcp":
- changed-files:
- any-glob-to-any-file:
- "backend/packages/harness/deerflow/mcp/**"
- "frontend/src/core/mcp/**"
"area:ci":
- changed-files:
- any-glob-to-any-file:
- ".github/**"
- "scripts/**"
"area:docs":
- changed-files:
- any-glob-to-any-file:
- "docs/**"
- "**/*.md"
"area:deps":
- changed-files:
- any-glob-to-any-file:
- "backend/pyproject.toml"
- "backend/uv.lock"
- "frontend/package.json"
- "frontend/pnpm-lock.yaml"

119
.github/labels.yml vendored Normal file
View File

@ -0,0 +1,119 @@
# Declarative label source of truth for DeerFlow.
#
# This file is the single source of truth for repository labels used by the
# auto-labeling workflows (.github/workflows/pr-labeler.yml, pr-triage.yml,
# issue-triage.yml). Auto-labelers can only apply labels that already exist,
# so every label referenced by a workflow MUST be declared here.
#
# Apply with: uv run --with pyyaml python scripts/sync_labels.py [--repo OWNER/NAME]
# CI keeps it in sync via .github/workflows/label-sync.yml (runs on changes here).
#
# Sync is additive/update-only: it creates or updates the labels listed below
# and never deletes labels that are not listed.
#
# Color = 6-digit hex without the leading '#'.
labels:
# ── Type ─────────────────────────────────────────────────────────────────
# Mostly GitHub defaults; declared here so colors/descriptions stay stable
# and so issue templates can rely on them existing.
- name: bug
color: d73a4a
description: Something isn't working
- name: enhancement
color: a2eeef
description: New feature or request
- name: documentation
color: 0075ca
description: Improvements or additions to documentation
- name: question
color: d876e3
description: Further information is requested
# ── Area (auto, by changed paths — see .github/labeler.yml) ───────────────
# Mirrors the "Surface area" section of the pull request template.
- name: "area:frontend"
color: c5def5
description: Next.js frontend under frontend/
- name: "area:backend"
color: c5def5
description: Gateway / runtime / core backend under backend/
- name: "area:agents"
color: c5def5
description: Agents, subagents, graph wiring, prompts, langgraph.json
- name: "area:sandbox"
color: c5def5
description: Sandboxed execution and docker/
- name: "area:skills"
color: c5def5
description: Skills under skills/ or the skills harness
- name: "area:mcp"
color: c5def5
description: Model Context Protocol integration
- name: "area:ci"
color: c5def5
description: GitHub Actions, CI config, repo tooling
- name: "area:docs"
color: c5def5
description: Documentation and Markdown only
- name: "area:deps"
color: c5def5
description: Dependency manifests / lockfiles
# ── Size (auto, by additions + deletions — see pr-triage.yml) ─────────────
- name: "size/XS"
color: "009900"
description: PR changes < 20 lines
- name: "size/S"
color: 77bb00
description: PR changes 20-100 lines
- name: "size/M"
color: eebb00
description: PR changes 100-300 lines
- name: "size/L"
color: ee9900
description: PR changes 300-700 lines
- name: "size/XL"
color: ee5500
description: PR changes 700+ lines
# ── Risk (auto, by changed paths — see pr-triage.yml) ─────────────────────
- name: "risk:low"
color: 0e8a16
description: "Low risk: docs / i18n / assets only"
- name: "risk:medium"
color: fbca04
description: "Medium risk: regular code changes"
- name: "risk:high"
color: b60205
description: "High risk: backend API, agents, sandbox, auth, deps, CI"
# ── Priority (manual) ─────────────────────────────────────────────────────
- name: P0
color: b60205
description: Critical priority
- name: P1
color: d93f0b
description: Major priority
- name: P2
color: e99695
description: Normal priority
# ── Status (auto + manual) ────────────────────────────────────────────────
- name: needs-triage
color: fef2c0
description: Awaiting maintainer triage
- name: needs-validation
color: d4c5f9
description: Touches front/back contract surface; needs real-path validation
- name: skip-validation
color: cccccc
description: "Maintainer override: do not auto-add needs-validation on this PR"
- name: reviewing
color: 5319e7
description: A maintainer is reviewing this PR
# ── Contributor ───────────────────────────────────────────────────────────
- name: first-time-contributor
color: c2e0c6
description: First contribution to this repository — be welcoming

44
.github/workflows/issue-triage.yml vendored Normal file
View File

@ -0,0 +1,44 @@
name: Issue Triage
# Ensures every newly opened issue carries `needs-triage`, even blank or
# API-created ones that bypass the issue templates. Creates the label if it is
# somehow missing, so the workflow is self-healing.
on:
issues:
types: [opened]
permissions:
issues: write
jobs:
needs-triage:
runs-on: ubuntu-latest
steps:
- name: Add needs-triage label
uses: actions/github-script@v7
with:
script: |
const { owner, repo } = context.repo;
const issue_number = context.payload.issue.number;
const current = (context.payload.issue.labels || []).map(l => l.name);
if (current.includes('needs-triage')) {
core.info('Issue already has needs-triage; nothing to do.');
return;
}
// Self-heal: create the label if it does not exist yet.
try {
await github.rest.issues.createLabel({
owner, repo, name: 'needs-triage', color: 'fef2c0',
description: 'Awaiting maintainer triage',
});
} catch (e) {
if (e.status !== 422) throw e; // 422 = already exists
}
await github.rest.issues.addLabels({
owner, repo, issue_number, labels: ['needs-triage'],
});
core.info(`Added needs-triage to #${issue_number}.`);

38
.github/workflows/label-sync.yml vendored Normal file
View File

@ -0,0 +1,38 @@
name: Label Sync
# Keeps repository labels in sync with the declarative source of truth
# (.github/labels.yml). Runs whenever that file changes on main, and can be
# triggered manually. Additive/update-only — never deletes labels.
on:
push:
branches: [main]
paths:
- ".github/labels.yml"
- "scripts/sync_labels.py"
- ".github/workflows/label-sync.yml"
workflow_dispatch:
permissions:
contents: read
issues: write
concurrency:
group: label-sync
cancel-in-progress: false
jobs:
sync:
runs-on: ubuntu-latest
steps:
- name: Checkout
uses: actions/checkout@v6
- name: Install uv
uses: astral-sh/setup-uv@v7
- name: Sync labels
run: uv run --with pyyaml python scripts/sync_labels.py
env:
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
GH_REPO: ${{ github.repository }}

28
.github/workflows/pr-labeler.yml vendored Normal file
View File

@ -0,0 +1,28 @@
name: PR Labeler
# Applies area:* labels based on which files a PR changes (see .github/labeler.yml).
# Uses pull_request_target so it also works on fork PRs. SAFE: actions/labeler
# only reads the changed-file list via the API — it never checks out or runs PR code.
on:
pull_request_target:
types: [opened, synchronize, reopened, ready_for_review]
permissions:
contents: read
pull-requests: write
concurrency:
group: pr-labeler-${{ github.event.pull_request.number }}
cancel-in-progress: true
jobs:
label:
if: github.event.pull_request.draft == false
runs-on: ubuntu-latest
steps:
- name: Apply area labels
uses: actions/labeler@v5
with:
configuration-path: .github/labeler.yml
sync-labels: true

164
.github/workflows/pr-triage.yml vendored Normal file
View File

@ -0,0 +1,164 @@
name: PR Triage
# Two responsibilities, both pure-metadata (no PR code is checked out or run):
# 1. On open/sync: apply size/* + risk:* labels, and needs-validation when the
# PR touches the front/back contract surface (backend API, SSE, agents, or
# the frontend streaming client). A `skip-validation` label opts out.
# 2. On maintainer review: apply the `reviewing` label.
#
# All labels are managed within their own namespace — labels outside size/*,
# risk:*, needs-validation and reviewing are never touched here.
on:
pull_request_target:
types: [opened, synchronize, reopened, ready_for_review]
pull_request_review:
types: [submitted]
permissions:
contents: read
pull-requests: write
concurrency:
group: pr-triage-${{ github.event.pull_request.number }}
cancel-in-progress: false
jobs:
size-and-risk:
if: github.event_name == 'pull_request_target' && github.event.pull_request.draft == false
runs-on: ubuntu-latest
steps:
- name: Label size, risk and validation need
uses: actions/github-script@v7
with:
script: |
const pr = context.payload.pull_request;
const { owner, repo } = context.repo;
const prNumber = pr.number;
// ---- size, from additions + deletions ----
const churn = (pr.additions || 0) + (pr.deletions || 0);
const sizeLabel =
churn < 20 ? 'size/XS' :
churn < 100 ? 'size/S' :
churn < 300 ? 'size/M' :
churn < 700 ? 'size/L' : 'size/XL';
// ---- changed paths ----
const files = await github.paginate(github.rest.pulls.listFiles, {
owner, repo, pull_number: prNumber, per_page: 100,
});
const paths = files.map(f => f.filename);
const matches = (re) => paths.some(p => re.test(p));
const docsOnly = paths.length > 0 && paths.every(p =>
/\.(md|mdx|txt)$/i.test(p) || p.startsWith('docs/') ||
/\.(png|jpe?g|gif|svg|webp|ico)$/i.test(p));
const highRisk = matches(
/^backend\/app\/gateway\//) || matches(
/^backend\/packages\/harness\/deerflow\/(agents|subagents|sandbox)\//) || matches(
/(^|\/)langgraph\.json$/) || matches(
/(^|\/)(auth|authz|security)/i) || matches(
/(pyproject\.toml|uv\.lock|package\.json|pnpm-lock\.yaml)$/) || matches(
/^docker\//) || matches(
/^\.github\/workflows\//);
const riskLabel = docsOnly ? 'risk:low' : (highRisk ? 'risk:high' : 'risk:medium');
// needs-validation: front/back contract surface
const contractSurface =
matches(/^backend\/app\/gateway\//) ||
matches(/^backend\/packages\/harness\/deerflow\/(agents|subagents)\//) ||
matches(/(^|\/)langgraph\.json$/) ||
matches(/^frontend\/src\/core\/(api|threads|messages)\//);
const current = (pr.labels || []).map(l => l.name);
const hasSkip = current.includes('skip-validation');
const desired = [sizeLabel, riskLabel];
if (contractSurface && !hasSkip) desired.push('needs-validation');
const managed = (name) =>
name.startsWith('size/') || name.startsWith('risk:') || name === 'needs-validation';
const toRemove = current.filter(l => managed(l) && !desired.includes(l));
const toAdd = desired.filter(l => !current.includes(l));
for (const name of toRemove) {
try {
await github.rest.issues.removeLabel({ owner, repo, issue_number: prNumber, name });
} catch (e) {
if (e.status !== 404) throw e;
}
}
if (toAdd.length) {
await github.rest.issues.addLabels({ owner, repo, issue_number: prNumber, labels: toAdd });
}
core.info(`size=${sizeLabel} risk=${riskLabel} churn=${churn} ` +
`validation=${desired.includes('needs-validation')} ` +
`(+${toAdd.join(',') || '-'} / -${toRemove.join(',') || '-'})`);
first-time:
if: github.event_name == 'pull_request_target' && github.event.action == 'opened'
runs-on: ubuntu-latest
steps:
- name: Label first-time contributors
uses: actions/github-script@v7
with:
script: |
const pr = context.payload.pull_request;
const { owner, repo } = context.repo;
const assoc = pr.author_association;
const isBot = pr.user.type === 'Bot';
core.info(`author=${pr.user.login} association=${assoc} bot=${isBot}`);
// FIRST_TIME_CONTRIBUTOR = no prior merged commit to this repo;
// FIRST_TIMER = no prior commit anywhere on GitHub. Either counts.
if (isBot || !['FIRST_TIME_CONTRIBUTOR', 'FIRST_TIMER'].includes(assoc)) {
core.info('Not a first-time contributor; skipping.');
return;
}
await github.rest.issues.addLabels({
owner, repo, issue_number: pr.number, labels: ['first-time-contributor'],
});
core.info(`Added first-time-contributor to #${pr.number}.`);
reviewing:
if: github.event_name == 'pull_request_review'
runs-on: ubuntu-latest
steps:
- name: Add reviewing label for maintainer reviews
uses: actions/github-script@v7
with:
script: |
const { owner, repo } = context.repo;
const prNumber = context.payload.pull_request.number;
const reviewer = context.payload.review.user.login;
const { data: perm } = await github.rest.repos.getCollaboratorPermissionLevel({
owner, repo, username: reviewer,
});
if (!['admin', 'write', 'maintain'].includes(perm.permission)) {
core.info(`Reviewer ${reviewer} (${perm.permission}) is not a maintainer; skipping.`);
return;
}
const { data: labels } = await github.rest.issues.listLabelsOnIssue({
owner, repo, issue_number: prNumber,
});
if (labels.some(l => l.name === 'reviewing')) {
core.info('Already labeled reviewing; skipping.');
return;
}
try {
await github.rest.issues.addLabels({
owner, repo, issue_number: prNumber, labels: ['reviewing'],
});
core.info(`Added "reviewing" (reviewer ${reviewer}).`);
} catch (e) {
// 403 is expected for review events on some fork PR contexts.
if (e.status === 403) core.info('No permission to label (expected on some fork PRs).');
else throw e;
}

93
scripts/sync_labels.py Normal file
View File

@ -0,0 +1,93 @@
#!/usr/bin/env python3
"""Sync GitHub labels from the declarative source of truth.
Reads ``.github/labels.yml`` and creates/updates each label via the GitHub CLI
(``gh label create --force``). Sync is additive/update-only: labels not listed
in the file are left untouched (never deleted).
Usage:
uv run --with pyyaml python scripts/sync_labels.py [--repo OWNER/NAME] [--dry-run]
Requires the ``gh`` CLI to be installed and authenticated (or ``GH_TOKEN`` set,
as in CI). When ``--repo`` is omitted, ``gh`` uses the current repository.
"""
from __future__ import annotations
import argparse
import subprocess
import sys
from pathlib import Path
try:
import yaml
except ModuleNotFoundError: # pragma: no cover - guidance for local runs
sys.exit(
"PyYAML is required. Run via:\n"
" uv run --with pyyaml python scripts/sync_labels.py"
)
LABELS_FILE = Path(__file__).resolve().parent.parent / ".github" / "labels.yml"
def load_labels(path: Path) -> list[dict[str, str]]:
data = yaml.safe_load(path.read_text(encoding="utf-8")) or {}
labels = data.get("labels")
if not isinstance(labels, list) or not labels:
sys.exit(f"No labels found in {path}")
for label in labels:
if not isinstance(label, dict) or "name" not in label:
sys.exit(f"Invalid label entry (missing 'name'): {label!r}")
return labels
def sync_label(label: dict[str, str], repo: str | None, dry_run: bool) -> bool:
name = str(label["name"])
color = str(label.get("color", "ededed")).lstrip("#")
description = str(label.get("description", ""))
cmd = ["gh", "label", "create", name, "--color", color, "--force"]
if description:
cmd += ["--description", description]
if repo:
cmd += ["--repo", repo]
if dry_run:
print(f"[dry-run] {' '.join(cmd)}")
return True
result = subprocess.run(cmd, capture_output=True, text=True)
if result.returncode != 0:
print(f"{name}: {result.stderr.strip()}", file=sys.stderr)
return False
print(f"{name}")
return True
def main() -> int:
parser = argparse.ArgumentParser(description=__doc__)
parser.add_argument("--repo", help="Target repository as OWNER/NAME")
parser.add_argument(
"--dry-run",
action="store_true",
help="Print the gh commands without executing them",
)
args = parser.parse_args()
labels = load_labels(LABELS_FILE)
target = args.repo or "(current repository)"
print(f"Syncing {len(labels)} labels to {target}")
failures = sum(
0 if sync_label(label, args.repo, args.dry_run) else 1 for label in labels
)
if failures:
print(f"\n{failures} label(s) failed to sync", file=sys.stderr)
return 1
print(f"\nDone — {len(labels)} labels in sync.")
return 0
if __name__ == "__main__":
raise SystemExit(main())