name: Benchmark vs Competitors on: schedule: # Run weekly on Sunday at 00:00 UTC - cron: '0 0 * * 0' workflow_dispatch: inputs: competitors: description: 'Comma-separated list of competitors to benchmark against' required: false default: 'trivy,grype' corpus_size: description: 'Number of images from corpus to test' required: false default: '50' push: paths: - 'src/Scanner/__Libraries/StellaOps.Scanner.Benchmark/**' - 'bench/competitors/**' env: DOTNET_VERSION: '10.0.x' TRIVY_VERSION: '0.50.1' GRYPE_VERSION: '0.74.0' SYFT_VERSION: '0.100.0' jobs: benchmark: name: Run Competitive Benchmark runs-on: ubuntu-latest timeout-minutes: 60 steps: - name: Checkout repository uses: actions/checkout@v4 - name: Setup .NET uses: actions/setup-dotnet@v4 with: dotnet-version: ${{ env.DOTNET_VERSION }} - name: Install Trivy run: | curl -sfL https://raw.githubusercontent.com/aquasecurity/trivy/main/contrib/install.sh | sh -s -- -b /usr/local/bin v${{ env.TRIVY_VERSION }} trivy --version - name: Install Grype run: | curl -sSfL https://raw.githubusercontent.com/anchore/grype/main/install.sh | sh -s -- -b /usr/local/bin v${{ env.GRYPE_VERSION }} grype version - name: Install Syft run: | curl -sSfL https://raw.githubusercontent.com/anchore/syft/main/install.sh | sh -s -- -b /usr/local/bin v${{ env.SYFT_VERSION }} syft version - name: Build benchmark library run: | dotnet build src/Scanner/__Libraries/StellaOps.Scanner.Benchmark/StellaOps.Scanner.Benchmark.csproj -c Release - name: Load corpus manifest id: corpus run: | echo "corpus_path=bench/competitors/corpus/corpus-manifest.json" >> $GITHUB_OUTPUT - name: Run Stella Ops scanner run: | echo "Running Stella Ops scanner on corpus..." # TODO: Implement actual scan command # stella scan --corpus ${{ steps.corpus.outputs.corpus_path }} --output bench/results/stellaops.json - name: Run Trivy on corpus run: | echo "Running Trivy on corpus images..." # Process each image in corpus mkdir -p bench/results/trivy - name: Run Grype on corpus run: | echo "Running Grype on corpus images..." mkdir -p bench/results/grype - name: Calculate metrics run: | echo "Calculating precision/recall/F1 metrics..." # dotnet run --project src/Scanner/__Libraries/StellaOps.Scanner.Benchmark \ # --calculate-metrics \ # --ground-truth ${{ steps.corpus.outputs.corpus_path }} \ # --results bench/results/ \ # --output bench/results/metrics.json - name: Generate comparison report run: | echo "Generating comparison report..." mkdir -p bench/results cat > bench/results/summary.json << 'EOF' { "timestamp": "$(date -u +%Y-%m-%dT%H:%M:%SZ)", "competitors": ["trivy", "grype", "syft"], "status": "pending_implementation" } EOF - name: Upload benchmark results uses: actions/upload-artifact@v4 with: name: benchmark-results-${{ github.run_id }} path: bench/results/ retention-days: 90 - name: Update claims index if: github.ref == 'refs/heads/main' run: | echo "Updating claims index with new evidence..." # dotnet run --project src/Scanner/__Libraries/StellaOps.Scanner.Benchmark \ # --update-claims \ # --metrics bench/results/metrics.json \ # --output docs/claims-index.md - name: Comment on PR if: github.event_name == 'pull_request' uses: actions/github-script@v7 with: script: | const fs = require('fs'); const metrics = fs.existsSync('bench/results/metrics.json') ? JSON.parse(fs.readFileSync('bench/results/metrics.json', 'utf8')) : { status: 'pending' }; const body = `## Benchmark Results | Tool | Precision | Recall | F1 Score | |------|-----------|--------|----------| | Stella Ops | ${metrics.stellaops?.precision || 'N/A'} | ${metrics.stellaops?.recall || 'N/A'} | ${metrics.stellaops?.f1 || 'N/A'} | | Trivy | ${metrics.trivy?.precision || 'N/A'} | ${metrics.trivy?.recall || 'N/A'} | ${metrics.trivy?.f1 || 'N/A'} | | Grype | ${metrics.grype?.precision || 'N/A'} | ${metrics.grype?.recall || 'N/A'} | ${metrics.grype?.f1 || 'N/A'} | [Full report](${process.env.GITHUB_SERVER_URL}/${process.env.GITHUB_REPOSITORY}/actions/runs/${process.env.GITHUB_RUN_ID}) `; github.rest.issues.createComment({ issue_number: context.issue.number, owner: context.repo.owner, repo: context.repo.repo, body: body }); verify-claims: name: Verify Claims runs-on: ubuntu-latest needs: benchmark if: github.ref == 'refs/heads/main' steps: - name: Checkout repository uses: actions/checkout@v4 - name: Download benchmark results uses: actions/download-artifact@v4 with: name: benchmark-results-${{ github.run_id }} path: bench/results/ - name: Verify all claims run: | echo "Verifying all claims against new evidence..." # stella benchmark verify --all - name: Report claim status run: | echo "Generating claim verification report..." # Output claim status summary