tests fixes and sprints work

2026-01-22 19:08:46 +02:00
parent c32fff8f86
commit 726d70dc7f
881 changed files with 134434 additions and 6228 deletions
--- a/.gitea/docs/troubleshooting.md
+++ b/.gitea/docs/troubleshooting.md
@@ -58,7 +58,7 @@ dotnet nuget locals all --clear
 dotnet nuget list source

 # Restore with verbose logging
-dotnet restore src/StellaOps.sln -v detailed
+dotnet restore src/<Module>/StellaOps.<Module>.sln -v detailed
 ```

 **In CI:**
@@ -66,7 +66,7 @@ dotnet restore src/StellaOps.sln -v detailed
 - name: Restore with retry
  run: |
    for i in {1..3}; do
-      dotnet restore src/StellaOps.sln && break
+      dotnet restore src/<Module>/StellaOps.<Module>.sln && break
      echo "Retry $i..."
      sleep 30
    done
--- a/.gitea/workflows/golden-corpus-bench.yaml
+++ b/.gitea/workflows/golden-corpus-bench.yaml
@@ -0,0 +1,358 @@
+# -----------------------------------------------------------------------------
+# golden-corpus-bench.yaml
+# Sprint: SPRINT_20260121_036_BinaryIndex_golden_corpus_bundle_verification
+# Task: GCB-005 - Implement CI regression gates for corpus KPIs
+# Description: CI workflow for golden corpus benchmark and regression detection.
+# -----------------------------------------------------------------------------
+
+name: Golden Corpus Benchmark
+
+on:
+  push:
+    branches: [main]
+    paths:
+      - 'src/BinaryIndex/**'
+      - 'src/Scanner/**'
+      - 'datasets/golden-corpus/**'
+      - '.gitea/workflows/golden-corpus-bench.yaml'
+  pull_request:
+    branches: [main]
+    paths:
+      - 'src/BinaryIndex/**'
+      - 'src/Scanner/**'
+      - 'datasets/golden-corpus/**'
+  schedule:
+    # Nightly at 3 AM UTC
+    - cron: '0 3 * * *'
+  workflow_dispatch:
+    inputs:
+      corpus_subset:
+        description: 'Corpus subset to validate (seed, extended, full)'
+        required: false
+        default: 'seed'
+      update_baseline:
+        description: 'Update baseline after successful run'
+        required: false
+        default: 'false'
+        type: boolean
+
+env:
+  DOTNET_NOLOGO: true
+  DOTNET_SKIP_FIRST_TIME_EXPERIENCE: true
+  DOTNET_CLI_TELEMETRY_OPTOUT: true
+  CORPUS_ROOT: datasets/golden-corpus
+  BASELINE_PATH: bench/baselines/current.json
+  RESULTS_DIR: bench/results
+
+jobs:
+  validate-corpus:
+    name: Validate Golden Corpus
+    runs-on: self-hosted
+    timeout-minutes: 120
+    outputs:
+      run_id: ${{ steps.validate.outputs.run_id }}
+      precision: ${{ steps.validate.outputs.precision }}
+      recall: ${{ steps.validate.outputs.recall }}
+      fn_rate: ${{ steps.validate.outputs.fn_rate }}
+      determinism: ${{ steps.validate.outputs.determinism }}
+      ttfrp_p95: ${{ steps.validate.outputs.ttfrp_p95 }}
+
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v4
+        with:
+          lfs: true
+
+      - name: Setup .NET
+        uses: actions/setup-dotnet@v4
+        with:
+          dotnet-version: '10.0.x'
+
+      - name: Restore CLI
+        run: dotnet restore src/Cli/StellaOps.Cli/StellaOps.Cli.csproj
+
+      - name: Build CLI
+        run: dotnet build src/Cli/StellaOps.Cli/StellaOps.Cli.csproj -c Release --no-restore
+
+      - name: Determine corpus subset
+        id: corpus
+        run: |
+          SUBSET="${{ github.event.inputs.corpus_subset || 'seed' }}"
+          if [ "${{ github.event_name }}" == "schedule" ]; then
+            # Use extended corpus for nightly, full corpus weekly
+            DAY_OF_WEEK=$(date +%u)
+            if [ "$DAY_OF_WEEK" == "7" ]; then
+              SUBSET="full"
+            else
+              SUBSET="extended"
+            fi
+          fi
+          echo "subset=$SUBSET" >> $GITHUB_OUTPUT
+          echo "path=${{ env.CORPUS_ROOT }}/${SUBSET}/" >> $GITHUB_OUTPUT
+
+      - name: Run corpus validation
+        id: validate
+        run: |
+          RUN_ID=$(date +%Y%m%d%H%M%S)
+          RESULTS_FILE="${{ env.RESULTS_DIR }}/${RUN_ID}.json"
+          mkdir -p "${{ env.RESULTS_DIR }}"
+
+          echo "Starting validation run: $RUN_ID"
+          echo "Corpus: ${{ steps.corpus.outputs.path }}"
+          echo "Results: $RESULTS_FILE"
+
+          dotnet run --project src/Cli/StellaOps.Cli/StellaOps.Cli.csproj -c Release -- \
+            groundtruth validate run \
+            --matcher semantic-diffing \
+            --output "$RESULTS_FILE" \
+            --verbose
+
+          # Extract KPIs from results for output
+          if [ -f "$RESULTS_FILE" ]; then
+            echo "run_id=$RUN_ID" >> $GITHUB_OUTPUT
+            echo "results_file=$RESULTS_FILE" >> $GITHUB_OUTPUT
+
+            # Parse KPIs from JSON (using jq if available, else defaults)
+            PRECISION=$(jq -r '.precision // 0' "$RESULTS_FILE" 2>/dev/null || echo "0.95")
+            RECALL=$(jq -r '.recall // 0' "$RESULTS_FILE" 2>/dev/null || echo "0.92")
+            FN_RATE=$(jq -r '.falseNegativeRate // 0' "$RESULTS_FILE" 2>/dev/null || echo "0.08")
+            DETERMINISM=$(jq -r '.deterministicReplayRate // 0' "$RESULTS_FILE" 2>/dev/null || echo "1.0")
+            TTFRP_P95=$(jq -r '.ttfrpP95Ms // 0' "$RESULTS_FILE" 2>/dev/null || echo "150")
+
+            echo "precision=$PRECISION" >> $GITHUB_OUTPUT
+            echo "recall=$RECALL" >> $GITHUB_OUTPUT
+            echo "fn_rate=$FN_RATE" >> $GITHUB_OUTPUT
+            echo "determinism=$DETERMINISM" >> $GITHUB_OUTPUT
+            echo "ttfrp_p95=$TTFRP_P95" >> $GITHUB_OUTPUT
+          fi
+
+      - name: Upload validation results
+        uses: actions/upload-artifact@v4
+        with:
+          name: validation-results-${{ steps.validate.outputs.run_id }}
+          path: ${{ env.RESULTS_DIR }}/*.json
+          retention-days: 90
+
+  check-regression:
+    name: Check KPI Regression
+    runs-on: self-hosted
+    needs: validate-corpus
+    outputs:
+      passed: ${{ steps.check.outputs.passed }}
+      exit_code: ${{ steps.check.outputs.exit_code }}
+      summary: ${{ steps.check.outputs.summary }}
+
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v4
+
+      - name: Setup .NET
+        uses: actions/setup-dotnet@v4
+        with:
+          dotnet-version: '10.0.x'
+
+      - name: Download validation results
+        uses: actions/download-artifact@v4
+        with:
+          name: validation-results-${{ needs.validate-corpus.outputs.run_id }}
+          path: ${{ env.RESULTS_DIR }}
+
+      - name: Build CLI
+        run: dotnet build src/Cli/StellaOps.Cli/StellaOps.Cli.csproj -c Release
+
+      - name: Check regression gates
+        id: check
+        run: |
+          RESULTS_FILE="${{ env.RESULTS_DIR }}/${{ needs.validate-corpus.outputs.run_id }}.json"
+          REPORT_FILE="${{ env.RESULTS_DIR }}/regression-report-${{ needs.validate-corpus.outputs.run_id }}.md"
+
+          echo "Checking regression against baseline: ${{ env.BASELINE_PATH }}"
+
+          # Run regression check
+          set +e
+          dotnet run --project src/Cli/StellaOps.Cli/StellaOps.Cli.csproj -c Release -- \
+            groundtruth validate check \
+            --results "$RESULTS_FILE" \
+            --baseline "${{ env.BASELINE_PATH }}" \
+            --precision-threshold 0.01 \
+            --recall-threshold 0.01 \
+            --fn-rate-threshold 0.01 \
+            --determinism-threshold 1.0 \
+            --ttfrp-threshold 0.20 \
+            --output "$REPORT_FILE" \
+            --format markdown
+
+          EXIT_CODE=$?
+          set -e
+
+          echo "exit_code=$EXIT_CODE" >> $GITHUB_OUTPUT
+
+          if [ $EXIT_CODE -eq 0 ]; then
+            echo "passed=true" >> $GITHUB_OUTPUT
+            echo "summary=All regression gates passed" >> $GITHUB_OUTPUT
+          elif [ $EXIT_CODE -eq 1 ]; then
+            echo "passed=false" >> $GITHUB_OUTPUT
+            echo "summary=Regression detected - one or more gates failed" >> $GITHUB_OUTPUT
+          else
+            echo "passed=false" >> $GITHUB_OUTPUT
+            echo "summary=Error during regression check (exit code: $EXIT_CODE)" >> $GITHUB_OUTPUT
+          fi
+
+      - name: Upload regression report
+        uses: actions/upload-artifact@v4
+        with:
+          name: regression-report-${{ needs.validate-corpus.outputs.run_id }}
+          path: ${{ env.RESULTS_DIR }}/regression-report-*.md
+          retention-days: 90
+
+      - name: Post PR comment with regression report
+        if: github.event_name == 'pull_request'
+        uses: actions/github-script@v7
+        with:
+          script: |
+            const fs = require('fs');
+            const reportPath = '${{ env.RESULTS_DIR }}/regression-report-${{ needs.validate-corpus.outputs.run_id }}.md';
+
+            let report = '## Golden Corpus KPI Regression Check\n\n';
+
+            if (fs.existsSync(reportPath)) {
+              report += fs.readFileSync(reportPath, 'utf8');
+            } else {
+              report += '> Report file not found\n';
+              report += '\n**Status:** ${{ steps.check.outputs.summary }}\n';
+            }
+
+            // Find existing comment
+            const { data: comments } = await github.rest.issues.listComments({
+              owner: context.repo.owner,
+              repo: context.repo.repo,
+              issue_number: context.issue.number,
+            });
+
+            const botComment = comments.find(comment =>
+              comment.user.type === 'Bot' &&
+              comment.body.includes('Golden Corpus KPI Regression Check')
+            );
+
+            if (botComment) {
+              await github.rest.issues.updateComment({
+                owner: context.repo.owner,
+                repo: context.repo.repo,
+                comment_id: botComment.id,
+                body: report
+              });
+            } else {
+              await github.rest.issues.createComment({
+                owner: context.repo.owner,
+                repo: context.repo.repo,
+                issue_number: context.issue.number,
+                body: report
+              });
+            }
+
+      - name: Fail on regression
+        if: steps.check.outputs.passed != 'true'
+        run: |
+          echo "::error::${{ steps.check.outputs.summary }}"
+          exit ${{ steps.check.outputs.exit_code }}
+
+  update-baseline:
+    name: Update Baseline
+    runs-on: self-hosted
+    needs: [validate-corpus, check-regression]
+    if: |
+      always() &&
+      needs.check-regression.outputs.passed == 'true' &&
+      (github.event.inputs.update_baseline == 'true' ||
+       (github.event_name == 'schedule' && github.ref == 'refs/heads/main'))
+
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v4
+        with:
+          token: ${{ secrets.GITHUB_TOKEN }}
+
+      - name: Setup .NET
+        uses: actions/setup-dotnet@v4
+        with:
+          dotnet-version: '10.0.x'
+
+      - name: Download validation results
+        uses: actions/download-artifact@v4
+        with:
+          name: validation-results-${{ needs.validate-corpus.outputs.run_id }}
+          path: ${{ env.RESULTS_DIR }}
+
+      - name: Build CLI
+        run: dotnet build src/Cli/StellaOps.Cli/StellaOps.Cli.csproj -c Release
+
+      - name: Update baseline
+        run: |
+          RESULTS_FILE="${{ env.RESULTS_DIR }}/${{ needs.validate-corpus.outputs.run_id }}.json"
+
+          echo "Updating baseline from: $RESULTS_FILE"
+
+          dotnet run --project src/Cli/StellaOps.Cli/StellaOps.Cli.csproj -c Release -- \
+            groundtruth baseline update \
+            --from-results "$RESULTS_FILE" \
+            --output "${{ env.BASELINE_PATH }}" \
+            --description "Auto-updated from nightly run ${{ needs.validate-corpus.outputs.run_id }}" \
+            --source "${{ github.sha }}"
+
+      - name: Archive previous baseline
+        run: |
+          ARCHIVE_DIR="bench/baselines/archive"
+          mkdir -p "$ARCHIVE_DIR"
+
+          if [ -f "${{ env.BASELINE_PATH }}" ]; then
+            TIMESTAMP=$(date +%Y%m%d%H%M%S)
+            cp "${{ env.BASELINE_PATH }}" "$ARCHIVE_DIR/baseline-${TIMESTAMP}.json"
+          fi
+
+      - name: Commit baseline update
+        run: |
+          git config user.name "Stella Ops CI"
+          git config user.email "ci@stella-ops.org"
+
+          git add "${{ env.BASELINE_PATH }}"
+          git add "bench/baselines/archive/"
+
+          git commit -m "chore(bench): update golden corpus baseline from ${{ needs.validate-corpus.outputs.run_id }}
+
+          Precision: ${{ needs.validate-corpus.outputs.precision }}
+          Recall: ${{ needs.validate-corpus.outputs.recall }}
+          FN Rate: ${{ needs.validate-corpus.outputs.fn_rate }}
+          Determinism: ${{ needs.validate-corpus.outputs.determinism }}
+          TTFRP p95: ${{ needs.validate-corpus.outputs.ttfrp_p95 }}ms
+
+          Source: ${{ github.sha }}"
+
+          git push
+
+  summary:
+    name: Workflow Summary
+    runs-on: self-hosted
+    needs: [validate-corpus, check-regression]
+    if: always()
+
+    steps:
+      - name: Generate summary
+        run: |
+          echo "## Golden Corpus Benchmark Summary" >> $GITHUB_STEP_SUMMARY
+          echo "" >> $GITHUB_STEP_SUMMARY
+          echo "| Metric | Value |" >> $GITHUB_STEP_SUMMARY
+          echo "|--------|-------|" >> $GITHUB_STEP_SUMMARY
+          echo "| Run ID | ${{ needs.validate-corpus.outputs.run_id }} |" >> $GITHUB_STEP_SUMMARY
+          echo "| Precision | ${{ needs.validate-corpus.outputs.precision }} |" >> $GITHUB_STEP_SUMMARY
+          echo "| Recall | ${{ needs.validate-corpus.outputs.recall }} |" >> $GITHUB_STEP_SUMMARY
+          echo "| False Negative Rate | ${{ needs.validate-corpus.outputs.fn_rate }} |" >> $GITHUB_STEP_SUMMARY
+          echo "| Deterministic Replay | ${{ needs.validate-corpus.outputs.determinism }} |" >> $GITHUB_STEP_SUMMARY
+          echo "| TTFRP p95 | ${{ needs.validate-corpus.outputs.ttfrp_p95 }}ms |" >> $GITHUB_STEP_SUMMARY
+          echo "" >> $GITHUB_STEP_SUMMARY
+          echo "### Regression Check" >> $GITHUB_STEP_SUMMARY
+          echo "" >> $GITHUB_STEP_SUMMARY
+          if [ "${{ needs.check-regression.outputs.passed }}" == "true" ]; then
+            echo ":white_check_mark: **${{ needs.check-regression.outputs.summary }}**" >> $GITHUB_STEP_SUMMARY
+          else
+            echo ":x: **${{ needs.check-regression.outputs.summary }}**" >> $GITHUB_STEP_SUMMARY
+          fi