name: Reachability Benchmark # Sprint: SPRINT_3500_0003_0001 # Task: CORPUS-009 - Create Gitea workflow for reachability benchmark # Task: CORPUS-010 - Configure nightly + per-PR benchmark runs on: workflow_dispatch: inputs: baseline_version: description: 'Baseline version to compare against' required: false default: 'latest' verbose: description: 'Enable verbose output' required: false type: boolean default: false push: branches: [ main ] paths: - 'datasets/reachability/**' - 'src/Scanner/__Libraries/StellaOps.Scanner.Benchmarks/**' - 'bench/reachability-benchmark/**' - '.gitea/workflows/reachability-bench.yaml' pull_request: paths: - 'datasets/reachability/**' - 'src/Scanner/__Libraries/StellaOps.Scanner.Benchmarks/**' - 'bench/reachability-benchmark/**' schedule: # Nightly at 02:00 UTC - cron: '0 2 * * *' jobs: benchmark: runs-on: ubuntu-22.04 env: DOTNET_NOLOGO: 1 DOTNET_CLI_TELEMETRY_OPTOUT: 1 DOTNET_SYSTEM_GLOBALIZATION_INVARIANT: 1 TZ: UTC STELLAOPS_OFFLINE: 'true' STELLAOPS_DETERMINISTIC: 'true' outputs: precision: ${{ steps.metrics.outputs.precision }} recall: ${{ steps.metrics.outputs.recall }} f1: ${{ steps.metrics.outputs.f1 }} pr_auc: ${{ steps.metrics.outputs.pr_auc }} regression: ${{ steps.compare.outputs.regression }} steps: - name: Checkout uses: actions/checkout@v4 with: fetch-depth: 0 - name: Setup .NET 10 uses: actions/setup-dotnet@v4 with: dotnet-version: 10.0.100 include-prerelease: true - name: Cache NuGet packages uses: actions/cache@v4 with: path: ~/.nuget/packages key: ${{ runner.os }}-nuget-${{ hashFiles('**/*.csproj') }} restore-keys: | ${{ runner.os }}-nuget- - name: Restore benchmark project run: | dotnet restore src/Scanner/__Libraries/StellaOps.Scanner.Benchmarks/StellaOps.Scanner.Benchmarks.csproj \ --configfile nuget.config - name: Build benchmark project run: | dotnet build src/Scanner/__Libraries/StellaOps.Scanner.Benchmarks/StellaOps.Scanner.Benchmarks.csproj \ -c Release \ --no-restore - name: Validate corpus integrity run: | echo "::group::Validating corpus index" if [ ! -f datasets/reachability/corpus.json ]; then echo "::error::corpus.json not found" exit 1 fi python3 -c "import json; data = json.load(open('datasets/reachability/corpus.json')); print(f'Corpus contains {len(data.get(\"samples\", []))} samples')" echo "::endgroup::" - name: Run benchmark id: benchmark run: | echo "::group::Running reachability benchmark" mkdir -p bench/results # Run the corpus benchmark dotnet run \ --project src/Scanner/__Libraries/StellaOps.Scanner.Benchmarks/StellaOps.Scanner.Benchmarks.csproj \ -c Release \ --no-build \ -- corpus run \ --corpus datasets/reachability/corpus.json \ --output bench/results/benchmark-${{ github.sha }}.json \ --format json \ ${{ inputs.verbose == 'true' && '--verbose' || '' }} echo "::endgroup::" - name: Extract metrics id: metrics run: | echo "::group::Extracting metrics" RESULT_FILE="bench/results/benchmark-${{ github.sha }}.json" if [ -f "$RESULT_FILE" ]; then PRECISION=$(jq -r '.metrics.precision // 0' "$RESULT_FILE") RECALL=$(jq -r '.metrics.recall // 0' "$RESULT_FILE") F1=$(jq -r '.metrics.f1 // 0' "$RESULT_FILE") PR_AUC=$(jq -r '.metrics.pr_auc // 0' "$RESULT_FILE") echo "precision=$PRECISION" >> $GITHUB_OUTPUT echo "recall=$RECALL" >> $GITHUB_OUTPUT echo "f1=$F1" >> $GITHUB_OUTPUT echo "pr_auc=$PR_AUC" >> $GITHUB_OUTPUT echo "Precision: $PRECISION" echo "Recall: $RECALL" echo "F1: $F1" echo "PR-AUC: $PR_AUC" else echo "::error::Benchmark result file not found" exit 1 fi echo "::endgroup::" - name: Get baseline id: baseline run: | echo "::group::Loading baseline" BASELINE_VERSION="${{ inputs.baseline_version || 'latest' }}" if [ "$BASELINE_VERSION" = "latest" ]; then BASELINE_FILE=$(ls -t bench/baselines/*.json 2>/dev/null | head -1) else BASELINE_FILE="bench/baselines/$BASELINE_VERSION.json" fi if [ -f "$BASELINE_FILE" ]; then echo "baseline_file=$BASELINE_FILE" >> $GITHUB_OUTPUT echo "Using baseline: $BASELINE_FILE" else echo "::warning::No baseline found, skipping comparison" echo "baseline_file=" >> $GITHUB_OUTPUT fi echo "::endgroup::" - name: Compare to baseline id: compare if: steps.baseline.outputs.baseline_file != '' run: | echo "::group::Comparing to baseline" BASELINE_FILE="${{ steps.baseline.outputs.baseline_file }}" RESULT_FILE="bench/results/benchmark-${{ github.sha }}.json" # Extract baseline metrics BASELINE_PRECISION=$(jq -r '.metrics.precision // 0' "$BASELINE_FILE") BASELINE_RECALL=$(jq -r '.metrics.recall // 0' "$BASELINE_FILE") BASELINE_PR_AUC=$(jq -r '.metrics.pr_auc // 0' "$BASELINE_FILE") # Extract current metrics CURRENT_PRECISION=$(jq -r '.metrics.precision // 0' "$RESULT_FILE") CURRENT_RECALL=$(jq -r '.metrics.recall // 0' "$RESULT_FILE") CURRENT_PR_AUC=$(jq -r '.metrics.pr_auc // 0' "$RESULT_FILE") # Calculate deltas PRECISION_DELTA=$(echo "$CURRENT_PRECISION - $BASELINE_PRECISION" | bc -l) RECALL_DELTA=$(echo "$CURRENT_RECALL - $BASELINE_RECALL" | bc -l) PR_AUC_DELTA=$(echo "$CURRENT_PR_AUC - $BASELINE_PR_AUC" | bc -l) echo "Precision delta: $PRECISION_DELTA" echo "Recall delta: $RECALL_DELTA" echo "PR-AUC delta: $PR_AUC_DELTA" # Check for regression (PR-AUC drop > 2%) REGRESSION_THRESHOLD=-0.02 if (( $(echo "$PR_AUC_DELTA < $REGRESSION_THRESHOLD" | bc -l) )); then echo "::error::PR-AUC regression detected: $PR_AUC_DELTA (threshold: $REGRESSION_THRESHOLD)" echo "regression=true" >> $GITHUB_OUTPUT else echo "regression=false" >> $GITHUB_OUTPUT fi echo "::endgroup::" - name: Generate markdown report run: | echo "::group::Generating report" RESULT_FILE="bench/results/benchmark-${{ github.sha }}.json" REPORT_FILE="bench/results/benchmark-${{ github.sha }}.md" cat > "$REPORT_FILE" << 'EOF' # Reachability Benchmark Report **Commit:** ${{ github.sha }} **Run:** ${{ github.run_number }} **Date:** $(date -u +"%Y-%m-%dT%H:%M:%SZ") ## Metrics | Metric | Value | |--------|-------| | Precision | ${{ steps.metrics.outputs.precision }} | | Recall | ${{ steps.metrics.outputs.recall }} | | F1 Score | ${{ steps.metrics.outputs.f1 }} | | PR-AUC | ${{ steps.metrics.outputs.pr_auc }} | ## Comparison ${{ steps.compare.outputs.regression == 'true' && '⚠️ **REGRESSION DETECTED**' || '✅ No regression' }} EOF echo "Report generated: $REPORT_FILE" echo "::endgroup::" - name: Upload results uses: actions/upload-artifact@v4 with: name: benchmark-results-${{ github.sha }} path: | bench/results/benchmark-${{ github.sha }}.json bench/results/benchmark-${{ github.sha }}.md retention-days: 90 - name: Fail on regression if: steps.compare.outputs.regression == 'true' && github.event_name == 'pull_request' run: | echo "::error::Benchmark regression detected. PR-AUC dropped below threshold." exit 1 update-baseline: needs: benchmark if: github.event_name == 'push' && github.ref == 'refs/heads/main' && needs.benchmark.outputs.regression != 'true' runs-on: ubuntu-22.04 steps: - name: Checkout uses: actions/checkout@v4 - name: Download results uses: actions/download-artifact@v4 with: name: benchmark-results-${{ github.sha }} path: bench/results/ - name: Update baseline (nightly only) if: github.event_name == 'schedule' run: | DATE=$(date +%Y%m%d) cp bench/results/benchmark-${{ github.sha }}.json bench/baselines/baseline-$DATE.json echo "Updated baseline to baseline-$DATE.json" notify-pr: needs: benchmark if: github.event_name == 'pull_request' runs-on: ubuntu-22.04 permissions: pull-requests: write steps: - name: Comment on PR uses: actions/github-script@v7 with: script: | const precision = '${{ needs.benchmark.outputs.precision }}'; const recall = '${{ needs.benchmark.outputs.recall }}'; const f1 = '${{ needs.benchmark.outputs.f1 }}'; const prAuc = '${{ needs.benchmark.outputs.pr_auc }}'; const regression = '${{ needs.benchmark.outputs.regression }}' === 'true'; const status = regression ? '⚠️ REGRESSION' : '✅ PASS'; const body = `## Reachability Benchmark Results ${status} | Metric | Value | |--------|-------| | Precision | ${precision} | | Recall | ${recall} | | F1 Score | ${f1} | | PR-AUC | ${prAuc} | ${regression ? '### ⚠️ Regression Detected\nPR-AUC dropped below threshold. Please review changes.' : ''}
Details - Commit: \`${{ github.sha }}\` - Run: [#${{ github.run_number }}](${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }})
`; github.rest.issues.createComment({ issue_number: context.issue.number, owner: context.repo.owner, repo: context.repo.repo, body: body });