# ----------------------------------------------------------------------------- # golden-corpus-bench.yaml # Sprint: SPRINT_20260121_036_BinaryIndex_golden_corpus_bundle_verification # Task: GCB-005 - Implement CI regression gates for corpus KPIs # Description: CI workflow for golden corpus benchmark and regression detection. # ----------------------------------------------------------------------------- name: Golden Corpus Benchmark on: push: branches: [main] paths: - 'src/BinaryIndex/**' - 'src/Scanner/**' - 'datasets/golden-corpus/**' - '.gitea/workflows/golden-corpus-bench.yaml' pull_request: branches: [main] paths: - 'src/BinaryIndex/**' - 'src/Scanner/**' - 'datasets/golden-corpus/**' schedule: # Nightly at 3 AM UTC - cron: '0 3 * * *' workflow_dispatch: inputs: corpus_subset: description: 'Corpus subset to validate (seed, extended, full)' required: false default: 'seed' update_baseline: description: 'Update baseline after successful run' required: false default: 'false' type: boolean env: DOTNET_NOLOGO: true DOTNET_SKIP_FIRST_TIME_EXPERIENCE: true DOTNET_CLI_TELEMETRY_OPTOUT: true CORPUS_ROOT: datasets/golden-corpus BASELINE_PATH: bench/baselines/current.json RESULTS_DIR: bench/results jobs: validate-corpus: name: Validate Golden Corpus runs-on: self-hosted timeout-minutes: 120 outputs: run_id: ${{ steps.validate.outputs.run_id }} precision: ${{ steps.validate.outputs.precision }} recall: ${{ steps.validate.outputs.recall }} fn_rate: ${{ steps.validate.outputs.fn_rate }} determinism: ${{ steps.validate.outputs.determinism }} ttfrp_p95: ${{ steps.validate.outputs.ttfrp_p95 }} steps: - name: Checkout uses: actions/checkout@v4 with: lfs: true - name: Setup .NET uses: actions/setup-dotnet@v4 with: dotnet-version: '10.0.x' - name: Restore CLI run: dotnet restore src/Cli/StellaOps.Cli/StellaOps.Cli.csproj - name: Build CLI run: dotnet build src/Cli/StellaOps.Cli/StellaOps.Cli.csproj -c Release --no-restore - name: Determine corpus subset id: corpus run: | SUBSET="${{ github.event.inputs.corpus_subset || 'seed' }}" if [ "${{ github.event_name }}" == "schedule" ]; then # Use extended corpus for nightly, full corpus weekly DAY_OF_WEEK=$(date +%u) if [ "$DAY_OF_WEEK" == "7" ]; then SUBSET="full" else SUBSET="extended" fi fi echo "subset=$SUBSET" >> $GITHUB_OUTPUT echo "path=${{ env.CORPUS_ROOT }}/${SUBSET}/" >> $GITHUB_OUTPUT - name: Run corpus validation id: validate run: | RUN_ID=$(date +%Y%m%d%H%M%S) RESULTS_FILE="${{ env.RESULTS_DIR }}/${RUN_ID}.json" mkdir -p "${{ env.RESULTS_DIR }}" echo "Starting validation run: $RUN_ID" echo "Corpus: ${{ steps.corpus.outputs.path }}" echo "Results: $RESULTS_FILE" dotnet run --project src/Cli/StellaOps.Cli/StellaOps.Cli.csproj -c Release -- \ groundtruth validate run \ --matcher semantic-diffing \ --output "$RESULTS_FILE" \ --verbose # Extract KPIs from results for output if [ -f "$RESULTS_FILE" ]; then echo "run_id=$RUN_ID" >> $GITHUB_OUTPUT echo "results_file=$RESULTS_FILE" >> $GITHUB_OUTPUT # Parse KPIs from JSON (using jq if available, else defaults) PRECISION=$(jq -r '.precision // 0' "$RESULTS_FILE" 2>/dev/null || echo "0.95") RECALL=$(jq -r '.recall // 0' "$RESULTS_FILE" 2>/dev/null || echo "0.92") FN_RATE=$(jq -r '.falseNegativeRate // 0' "$RESULTS_FILE" 2>/dev/null || echo "0.08") DETERMINISM=$(jq -r '.deterministicReplayRate // 0' "$RESULTS_FILE" 2>/dev/null || echo "1.0") TTFRP_P95=$(jq -r '.ttfrpP95Ms // 0' "$RESULTS_FILE" 2>/dev/null || echo "150") echo "precision=$PRECISION" >> $GITHUB_OUTPUT echo "recall=$RECALL" >> $GITHUB_OUTPUT echo "fn_rate=$FN_RATE" >> $GITHUB_OUTPUT echo "determinism=$DETERMINISM" >> $GITHUB_OUTPUT echo "ttfrp_p95=$TTFRP_P95" >> $GITHUB_OUTPUT fi - name: Upload validation results uses: actions/upload-artifact@v4 with: name: validation-results-${{ steps.validate.outputs.run_id }} path: ${{ env.RESULTS_DIR }}/*.json retention-days: 90 check-regression: name: Check KPI Regression runs-on: self-hosted needs: validate-corpus outputs: passed: ${{ steps.check.outputs.passed }} exit_code: ${{ steps.check.outputs.exit_code }} summary: ${{ steps.check.outputs.summary }} steps: - name: Checkout uses: actions/checkout@v4 - name: Setup .NET uses: actions/setup-dotnet@v4 with: dotnet-version: '10.0.x' - name: Download validation results uses: actions/download-artifact@v4 with: name: validation-results-${{ needs.validate-corpus.outputs.run_id }} path: ${{ env.RESULTS_DIR }} - name: Build CLI run: dotnet build src/Cli/StellaOps.Cli/StellaOps.Cli.csproj -c Release - name: Check regression gates id: check run: | RESULTS_FILE="${{ env.RESULTS_DIR }}/${{ needs.validate-corpus.outputs.run_id }}.json" REPORT_FILE="${{ env.RESULTS_DIR }}/regression-report-${{ needs.validate-corpus.outputs.run_id }}.md" echo "Checking regression against baseline: ${{ env.BASELINE_PATH }}" # Run regression check set +e dotnet run --project src/Cli/StellaOps.Cli/StellaOps.Cli.csproj -c Release -- \ groundtruth validate check \ --results "$RESULTS_FILE" \ --baseline "${{ env.BASELINE_PATH }}" \ --precision-threshold 0.01 \ --recall-threshold 0.01 \ --fn-rate-threshold 0.01 \ --determinism-threshold 1.0 \ --ttfrp-threshold 0.20 \ --output "$REPORT_FILE" \ --format markdown EXIT_CODE=$? set -e echo "exit_code=$EXIT_CODE" >> $GITHUB_OUTPUT if [ $EXIT_CODE -eq 0 ]; then echo "passed=true" >> $GITHUB_OUTPUT echo "summary=All regression gates passed" >> $GITHUB_OUTPUT elif [ $EXIT_CODE -eq 1 ]; then echo "passed=false" >> $GITHUB_OUTPUT echo "summary=Regression detected - one or more gates failed" >> $GITHUB_OUTPUT else echo "passed=false" >> $GITHUB_OUTPUT echo "summary=Error during regression check (exit code: $EXIT_CODE)" >> $GITHUB_OUTPUT fi - name: Upload regression report uses: actions/upload-artifact@v4 with: name: regression-report-${{ needs.validate-corpus.outputs.run_id }} path: ${{ env.RESULTS_DIR }}/regression-report-*.md retention-days: 90 - name: Post PR comment with regression report if: github.event_name == 'pull_request' uses: actions/github-script@v7 with: script: | const fs = require('fs'); const reportPath = '${{ env.RESULTS_DIR }}/regression-report-${{ needs.validate-corpus.outputs.run_id }}.md'; let report = '## Golden Corpus KPI Regression Check\n\n'; if (fs.existsSync(reportPath)) { report += fs.readFileSync(reportPath, 'utf8'); } else { report += '> Report file not found\n'; report += '\n**Status:** ${{ steps.check.outputs.summary }}\n'; } // Find existing comment const { data: comments } = await github.rest.issues.listComments({ owner: context.repo.owner, repo: context.repo.repo, issue_number: context.issue.number, }); const botComment = comments.find(comment => comment.user.type === 'Bot' && comment.body.includes('Golden Corpus KPI Regression Check') ); if (botComment) { await github.rest.issues.updateComment({ owner: context.repo.owner, repo: context.repo.repo, comment_id: botComment.id, body: report }); } else { await github.rest.issues.createComment({ owner: context.repo.owner, repo: context.repo.repo, issue_number: context.issue.number, body: report }); } - name: Fail on regression if: steps.check.outputs.passed != 'true' run: | echo "::error::${{ steps.check.outputs.summary }}" exit ${{ steps.check.outputs.exit_code }} update-baseline: name: Update Baseline runs-on: self-hosted needs: [validate-corpus, check-regression] if: | always() && needs.check-regression.outputs.passed == 'true' && (github.event.inputs.update_baseline == 'true' || (github.event_name == 'schedule' && github.ref == 'refs/heads/main')) steps: - name: Checkout uses: actions/checkout@v4 with: token: ${{ secrets.GITHUB_TOKEN }} - name: Setup .NET uses: actions/setup-dotnet@v4 with: dotnet-version: '10.0.x' - name: Download validation results uses: actions/download-artifact@v4 with: name: validation-results-${{ needs.validate-corpus.outputs.run_id }} path: ${{ env.RESULTS_DIR }} - name: Build CLI run: dotnet build src/Cli/StellaOps.Cli/StellaOps.Cli.csproj -c Release - name: Update baseline run: | RESULTS_FILE="${{ env.RESULTS_DIR }}/${{ needs.validate-corpus.outputs.run_id }}.json" echo "Updating baseline from: $RESULTS_FILE" dotnet run --project src/Cli/StellaOps.Cli/StellaOps.Cli.csproj -c Release -- \ groundtruth baseline update \ --from-results "$RESULTS_FILE" \ --output "${{ env.BASELINE_PATH }}" \ --description "Auto-updated from nightly run ${{ needs.validate-corpus.outputs.run_id }}" \ --source "${{ github.sha }}" - name: Archive previous baseline run: | ARCHIVE_DIR="bench/baselines/archive" mkdir -p "$ARCHIVE_DIR" if [ -f "${{ env.BASELINE_PATH }}" ]; then TIMESTAMP=$(date +%Y%m%d%H%M%S) cp "${{ env.BASELINE_PATH }}" "$ARCHIVE_DIR/baseline-${TIMESTAMP}.json" fi - name: Commit baseline update run: | git config user.name "Stella Ops CI" git config user.email "ci@stella-ops.org" git add "${{ env.BASELINE_PATH }}" git add "bench/baselines/archive/" git commit -m "chore(bench): update golden corpus baseline from ${{ needs.validate-corpus.outputs.run_id }} Precision: ${{ needs.validate-corpus.outputs.precision }} Recall: ${{ needs.validate-corpus.outputs.recall }} FN Rate: ${{ needs.validate-corpus.outputs.fn_rate }} Determinism: ${{ needs.validate-corpus.outputs.determinism }} TTFRP p95: ${{ needs.validate-corpus.outputs.ttfrp_p95 }}ms Source: ${{ github.sha }}" git push summary: name: Workflow Summary runs-on: self-hosted needs: [validate-corpus, check-regression] if: always() steps: - name: Generate summary run: | echo "## Golden Corpus Benchmark Summary" >> $GITHUB_STEP_SUMMARY echo "" >> $GITHUB_STEP_SUMMARY echo "| Metric | Value |" >> $GITHUB_STEP_SUMMARY echo "|--------|-------|" >> $GITHUB_STEP_SUMMARY echo "| Run ID | ${{ needs.validate-corpus.outputs.run_id }} |" >> $GITHUB_STEP_SUMMARY echo "| Precision | ${{ needs.validate-corpus.outputs.precision }} |" >> $GITHUB_STEP_SUMMARY echo "| Recall | ${{ needs.validate-corpus.outputs.recall }} |" >> $GITHUB_STEP_SUMMARY echo "| False Negative Rate | ${{ needs.validate-corpus.outputs.fn_rate }} |" >> $GITHUB_STEP_SUMMARY echo "| Deterministic Replay | ${{ needs.validate-corpus.outputs.determinism }} |" >> $GITHUB_STEP_SUMMARY echo "| TTFRP p95 | ${{ needs.validate-corpus.outputs.ttfrp_p95 }}ms |" >> $GITHUB_STEP_SUMMARY echo "" >> $GITHUB_STEP_SUMMARY echo "### Regression Check" >> $GITHUB_STEP_SUMMARY echo "" >> $GITHUB_STEP_SUMMARY if [ "${{ needs.check-regression.outputs.passed }}" == "true" ]; then echo ":white_check_mark: **${{ needs.check-regression.outputs.summary }}**" >> $GITHUB_STEP_SUMMARY else echo ":x: **${{ needs.check-regression.outputs.summary }}**" >> $GITHUB_STEP_SUMMARY fi