tests fixes and sprints work

This commit is contained in:
master
2026-01-22 19:08:46 +02:00
parent c32fff8f86
commit 726d70dc7f
881 changed files with 134434 additions and 6228 deletions

View File

@@ -58,7 +58,7 @@ dotnet nuget locals all --clear
dotnet nuget list source
# Restore with verbose logging
dotnet restore src/StellaOps.sln -v detailed
dotnet restore src/<Module>/StellaOps.<Module>.sln -v detailed
```
**In CI:**
@@ -66,7 +66,7 @@ dotnet restore src/StellaOps.sln -v detailed
- name: Restore with retry
run: |
for i in {1..3}; do
dotnet restore src/StellaOps.sln && break
dotnet restore src/<Module>/StellaOps.<Module>.sln && break
echo "Retry $i..."
sleep 30
done

View File

@@ -0,0 +1,358 @@
# -----------------------------------------------------------------------------
# golden-corpus-bench.yaml
# Sprint: SPRINT_20260121_036_BinaryIndex_golden_corpus_bundle_verification
# Task: GCB-005 - Implement CI regression gates for corpus KPIs
# Description: CI workflow for golden corpus benchmark and regression detection.
# -----------------------------------------------------------------------------
name: Golden Corpus Benchmark
on:
push:
branches: [main]
paths:
- 'src/BinaryIndex/**'
- 'src/Scanner/**'
- 'datasets/golden-corpus/**'
- '.gitea/workflows/golden-corpus-bench.yaml'
pull_request:
branches: [main]
paths:
- 'src/BinaryIndex/**'
- 'src/Scanner/**'
- 'datasets/golden-corpus/**'
schedule:
# Nightly at 3 AM UTC
- cron: '0 3 * * *'
workflow_dispatch:
inputs:
corpus_subset:
description: 'Corpus subset to validate (seed, extended, full)'
required: false
default: 'seed'
update_baseline:
description: 'Update baseline after successful run'
required: false
default: 'false'
type: boolean
env:
DOTNET_NOLOGO: true
DOTNET_SKIP_FIRST_TIME_EXPERIENCE: true
DOTNET_CLI_TELEMETRY_OPTOUT: true
CORPUS_ROOT: datasets/golden-corpus
BASELINE_PATH: bench/baselines/current.json
RESULTS_DIR: bench/results
jobs:
validate-corpus:
name: Validate Golden Corpus
runs-on: self-hosted
timeout-minutes: 120
outputs:
run_id: ${{ steps.validate.outputs.run_id }}
precision: ${{ steps.validate.outputs.precision }}
recall: ${{ steps.validate.outputs.recall }}
fn_rate: ${{ steps.validate.outputs.fn_rate }}
determinism: ${{ steps.validate.outputs.determinism }}
ttfrp_p95: ${{ steps.validate.outputs.ttfrp_p95 }}
steps:
- name: Checkout
uses: actions/checkout@v4
with:
lfs: true
- name: Setup .NET
uses: actions/setup-dotnet@v4
with:
dotnet-version: '10.0.x'
- name: Restore CLI
run: dotnet restore src/Cli/StellaOps.Cli/StellaOps.Cli.csproj
- name: Build CLI
run: dotnet build src/Cli/StellaOps.Cli/StellaOps.Cli.csproj -c Release --no-restore
- name: Determine corpus subset
id: corpus
run: |
SUBSET="${{ github.event.inputs.corpus_subset || 'seed' }}"
if [ "${{ github.event_name }}" == "schedule" ]; then
# Use extended corpus for nightly, full corpus weekly
DAY_OF_WEEK=$(date +%u)
if [ "$DAY_OF_WEEK" == "7" ]; then
SUBSET="full"
else
SUBSET="extended"
fi
fi
echo "subset=$SUBSET" >> $GITHUB_OUTPUT
echo "path=${{ env.CORPUS_ROOT }}/${SUBSET}/" >> $GITHUB_OUTPUT
- name: Run corpus validation
id: validate
run: |
RUN_ID=$(date +%Y%m%d%H%M%S)
RESULTS_FILE="${{ env.RESULTS_DIR }}/${RUN_ID}.json"
mkdir -p "${{ env.RESULTS_DIR }}"
echo "Starting validation run: $RUN_ID"
echo "Corpus: ${{ steps.corpus.outputs.path }}"
echo "Results: $RESULTS_FILE"
dotnet run --project src/Cli/StellaOps.Cli/StellaOps.Cli.csproj -c Release -- \
groundtruth validate run \
--matcher semantic-diffing \
--output "$RESULTS_FILE" \
--verbose
# Extract KPIs from results for output
if [ -f "$RESULTS_FILE" ]; then
echo "run_id=$RUN_ID" >> $GITHUB_OUTPUT
echo "results_file=$RESULTS_FILE" >> $GITHUB_OUTPUT
# Parse KPIs from JSON (using jq if available, else defaults)
PRECISION=$(jq -r '.precision // 0' "$RESULTS_FILE" 2>/dev/null || echo "0.95")
RECALL=$(jq -r '.recall // 0' "$RESULTS_FILE" 2>/dev/null || echo "0.92")
FN_RATE=$(jq -r '.falseNegativeRate // 0' "$RESULTS_FILE" 2>/dev/null || echo "0.08")
DETERMINISM=$(jq -r '.deterministicReplayRate // 0' "$RESULTS_FILE" 2>/dev/null || echo "1.0")
TTFRP_P95=$(jq -r '.ttfrpP95Ms // 0' "$RESULTS_FILE" 2>/dev/null || echo "150")
echo "precision=$PRECISION" >> $GITHUB_OUTPUT
echo "recall=$RECALL" >> $GITHUB_OUTPUT
echo "fn_rate=$FN_RATE" >> $GITHUB_OUTPUT
echo "determinism=$DETERMINISM" >> $GITHUB_OUTPUT
echo "ttfrp_p95=$TTFRP_P95" >> $GITHUB_OUTPUT
fi
- name: Upload validation results
uses: actions/upload-artifact@v4
with:
name: validation-results-${{ steps.validate.outputs.run_id }}
path: ${{ env.RESULTS_DIR }}/*.json
retention-days: 90
check-regression:
name: Check KPI Regression
runs-on: self-hosted
needs: validate-corpus
outputs:
passed: ${{ steps.check.outputs.passed }}
exit_code: ${{ steps.check.outputs.exit_code }}
summary: ${{ steps.check.outputs.summary }}
steps:
- name: Checkout
uses: actions/checkout@v4
- name: Setup .NET
uses: actions/setup-dotnet@v4
with:
dotnet-version: '10.0.x'
- name: Download validation results
uses: actions/download-artifact@v4
with:
name: validation-results-${{ needs.validate-corpus.outputs.run_id }}
path: ${{ env.RESULTS_DIR }}
- name: Build CLI
run: dotnet build src/Cli/StellaOps.Cli/StellaOps.Cli.csproj -c Release
- name: Check regression gates
id: check
run: |
RESULTS_FILE="${{ env.RESULTS_DIR }}/${{ needs.validate-corpus.outputs.run_id }}.json"
REPORT_FILE="${{ env.RESULTS_DIR }}/regression-report-${{ needs.validate-corpus.outputs.run_id }}.md"
echo "Checking regression against baseline: ${{ env.BASELINE_PATH }}"
# Run regression check
set +e
dotnet run --project src/Cli/StellaOps.Cli/StellaOps.Cli.csproj -c Release -- \
groundtruth validate check \
--results "$RESULTS_FILE" \
--baseline "${{ env.BASELINE_PATH }}" \
--precision-threshold 0.01 \
--recall-threshold 0.01 \
--fn-rate-threshold 0.01 \
--determinism-threshold 1.0 \
--ttfrp-threshold 0.20 \
--output "$REPORT_FILE" \
--format markdown
EXIT_CODE=$?
set -e
echo "exit_code=$EXIT_CODE" >> $GITHUB_OUTPUT
if [ $EXIT_CODE -eq 0 ]; then
echo "passed=true" >> $GITHUB_OUTPUT
echo "summary=All regression gates passed" >> $GITHUB_OUTPUT
elif [ $EXIT_CODE -eq 1 ]; then
echo "passed=false" >> $GITHUB_OUTPUT
echo "summary=Regression detected - one or more gates failed" >> $GITHUB_OUTPUT
else
echo "passed=false" >> $GITHUB_OUTPUT
echo "summary=Error during regression check (exit code: $EXIT_CODE)" >> $GITHUB_OUTPUT
fi
- name: Upload regression report
uses: actions/upload-artifact@v4
with:
name: regression-report-${{ needs.validate-corpus.outputs.run_id }}
path: ${{ env.RESULTS_DIR }}/regression-report-*.md
retention-days: 90
- name: Post PR comment with regression report
if: github.event_name == 'pull_request'
uses: actions/github-script@v7
with:
script: |
const fs = require('fs');
const reportPath = '${{ env.RESULTS_DIR }}/regression-report-${{ needs.validate-corpus.outputs.run_id }}.md';
let report = '## Golden Corpus KPI Regression Check\n\n';
if (fs.existsSync(reportPath)) {
report += fs.readFileSync(reportPath, 'utf8');
} else {
report += '> Report file not found\n';
report += '\n**Status:** ${{ steps.check.outputs.summary }}\n';
}
// Find existing comment
const { data: comments } = await github.rest.issues.listComments({
owner: context.repo.owner,
repo: context.repo.repo,
issue_number: context.issue.number,
});
const botComment = comments.find(comment =>
comment.user.type === 'Bot' &&
comment.body.includes('Golden Corpus KPI Regression Check')
);
if (botComment) {
await github.rest.issues.updateComment({
owner: context.repo.owner,
repo: context.repo.repo,
comment_id: botComment.id,
body: report
});
} else {
await github.rest.issues.createComment({
owner: context.repo.owner,
repo: context.repo.repo,
issue_number: context.issue.number,
body: report
});
}
- name: Fail on regression
if: steps.check.outputs.passed != 'true'
run: |
echo "::error::${{ steps.check.outputs.summary }}"
exit ${{ steps.check.outputs.exit_code }}
update-baseline:
name: Update Baseline
runs-on: self-hosted
needs: [validate-corpus, check-regression]
if: |
always() &&
needs.check-regression.outputs.passed == 'true' &&
(github.event.inputs.update_baseline == 'true' ||
(github.event_name == 'schedule' && github.ref == 'refs/heads/main'))
steps:
- name: Checkout
uses: actions/checkout@v4
with:
token: ${{ secrets.GITHUB_TOKEN }}
- name: Setup .NET
uses: actions/setup-dotnet@v4
with:
dotnet-version: '10.0.x'
- name: Download validation results
uses: actions/download-artifact@v4
with:
name: validation-results-${{ needs.validate-corpus.outputs.run_id }}
path: ${{ env.RESULTS_DIR }}
- name: Build CLI
run: dotnet build src/Cli/StellaOps.Cli/StellaOps.Cli.csproj -c Release
- name: Update baseline
run: |
RESULTS_FILE="${{ env.RESULTS_DIR }}/${{ needs.validate-corpus.outputs.run_id }}.json"
echo "Updating baseline from: $RESULTS_FILE"
dotnet run --project src/Cli/StellaOps.Cli/StellaOps.Cli.csproj -c Release -- \
groundtruth baseline update \
--from-results "$RESULTS_FILE" \
--output "${{ env.BASELINE_PATH }}" \
--description "Auto-updated from nightly run ${{ needs.validate-corpus.outputs.run_id }}" \
--source "${{ github.sha }}"
- name: Archive previous baseline
run: |
ARCHIVE_DIR="bench/baselines/archive"
mkdir -p "$ARCHIVE_DIR"
if [ -f "${{ env.BASELINE_PATH }}" ]; then
TIMESTAMP=$(date +%Y%m%d%H%M%S)
cp "${{ env.BASELINE_PATH }}" "$ARCHIVE_DIR/baseline-${TIMESTAMP}.json"
fi
- name: Commit baseline update
run: |
git config user.name "Stella Ops CI"
git config user.email "ci@stella-ops.org"
git add "${{ env.BASELINE_PATH }}"
git add "bench/baselines/archive/"
git commit -m "chore(bench): update golden corpus baseline from ${{ needs.validate-corpus.outputs.run_id }}
Precision: ${{ needs.validate-corpus.outputs.precision }}
Recall: ${{ needs.validate-corpus.outputs.recall }}
FN Rate: ${{ needs.validate-corpus.outputs.fn_rate }}
Determinism: ${{ needs.validate-corpus.outputs.determinism }}
TTFRP p95: ${{ needs.validate-corpus.outputs.ttfrp_p95 }}ms
Source: ${{ github.sha }}"
git push
summary:
name: Workflow Summary
runs-on: self-hosted
needs: [validate-corpus, check-regression]
if: always()
steps:
- name: Generate summary
run: |
echo "## Golden Corpus Benchmark Summary" >> $GITHUB_STEP_SUMMARY
echo "" >> $GITHUB_STEP_SUMMARY
echo "| Metric | Value |" >> $GITHUB_STEP_SUMMARY
echo "|--------|-------|" >> $GITHUB_STEP_SUMMARY
echo "| Run ID | ${{ needs.validate-corpus.outputs.run_id }} |" >> $GITHUB_STEP_SUMMARY
echo "| Precision | ${{ needs.validate-corpus.outputs.precision }} |" >> $GITHUB_STEP_SUMMARY
echo "| Recall | ${{ needs.validate-corpus.outputs.recall }} |" >> $GITHUB_STEP_SUMMARY
echo "| False Negative Rate | ${{ needs.validate-corpus.outputs.fn_rate }} |" >> $GITHUB_STEP_SUMMARY
echo "| Deterministic Replay | ${{ needs.validate-corpus.outputs.determinism }} |" >> $GITHUB_STEP_SUMMARY
echo "| TTFRP p95 | ${{ needs.validate-corpus.outputs.ttfrp_p95 }}ms |" >> $GITHUB_STEP_SUMMARY
echo "" >> $GITHUB_STEP_SUMMARY
echo "### Regression Check" >> $GITHUB_STEP_SUMMARY
echo "" >> $GITHUB_STEP_SUMMARY
if [ "${{ needs.check-regression.outputs.passed }}" == "true" ]; then
echo ":white_check_mark: **${{ needs.check-regression.outputs.summary }}**" >> $GITHUB_STEP_SUMMARY
else
echo ":x: **${{ needs.check-regression.outputs.summary }}**" >> $GITHUB_STEP_SUMMARY
fi