git.stella-ops.org/.gitea/workflows/benchmark-vs-competitors.yml

name: Benchmark vs Competitors

on:
  schedule:
    # Run weekly on Sunday at 00:00 UTC
    - cron: '0 0 * * 0'
  workflow_dispatch:
    inputs:
      competitors:
        description: 'Comma-separated list of competitors to benchmark against'
        required: false
        default: 'trivy,grype'
      corpus_size:
        description: 'Number of images from corpus to test'
        required: false
        default: '50'
  push:
    paths:
      - 'src/Scanner/__Libraries/StellaOps.Scanner.Benchmark/**'
      - 'bench/competitors/**'

env:
  DOTNET_VERSION: '10.0.x'
  TRIVY_VERSION: '0.50.1'
  GRYPE_VERSION: '0.74.0'
  SYFT_VERSION: '0.100.0'

jobs:
  benchmark:
    name: Run Competitive Benchmark
    runs-on: ubuntu-latest
    timeout-minutes: 60

    steps:
      - name: Checkout repository
        uses: actions/checkout@v4

      - name: Setup .NET
        uses: actions/setup-dotnet@v4
        with:
          dotnet-version: ${{ env.DOTNET_VERSION }}

      - name: Install Trivy
        run: |
          curl -sfL https://raw.githubusercontent.com/aquasecurity/trivy/main/contrib/install.sh | sh -s -- -b /usr/local/bin v${{ env.TRIVY_VERSION }}
          trivy --version

      - name: Install Grype
        run: |
          curl -sSfL https://raw.githubusercontent.com/anchore/grype/main/install.sh | sh -s -- -b /usr/local/bin v${{ env.GRYPE_VERSION }}
          grype version

      - name: Install Syft
        run: |
          curl -sSfL https://raw.githubusercontent.com/anchore/syft/main/install.sh | sh -s -- -b /usr/local/bin v${{ env.SYFT_VERSION }}
          syft version

      - name: Build benchmark library
        run: |
          dotnet build src/Scanner/__Libraries/StellaOps.Scanner.Benchmark/StellaOps.Scanner.Benchmark.csproj -c Release

      - name: Load corpus manifest
        id: corpus
        run: |
          echo "corpus_path=bench/competitors/corpus/corpus-manifest.json" >> $GITHUB_OUTPUT

      - name: Run Stella Ops scanner
        run: |
          echo "Running Stella Ops scanner on corpus..."
          # TODO: Implement actual scan command
          # stella scan --corpus ${{ steps.corpus.outputs.corpus_path }} --output bench/results/stellaops.json

      - name: Run Trivy on corpus
        run: |
          echo "Running Trivy on corpus images..."
          # Process each image in corpus
          mkdir -p bench/results/trivy

      - name: Run Grype on corpus
        run: |
          echo "Running Grype on corpus images..."
          mkdir -p bench/results/grype

      - name: Calculate metrics
        run: |
          echo "Calculating precision/recall/F1 metrics..."
          # dotnet run --project src/Scanner/__Libraries/StellaOps.Scanner.Benchmark \
          #   --calculate-metrics \
          #   --ground-truth ${{ steps.corpus.outputs.corpus_path }} \
          #   --results bench/results/ \
          #   --output bench/results/metrics.json

      - name: Generate comparison report
        run: |
          echo "Generating comparison report..."
          mkdir -p bench/results
          cat > bench/results/summary.json << 'EOF'
          {
            "timestamp": "$(date -u +%Y-%m-%dT%H:%M:%SZ)",
            "competitors": ["trivy", "grype", "syft"],
            "status": "pending_implementation"
          }
          EOF

      - name: Upload benchmark results
        uses: actions/upload-artifact@v4
        with:
          name: benchmark-results-${{ github.run_id }}
          path: bench/results/
          retention-days: 90

      - name: Update claims index
        if: github.ref == 'refs/heads/main'
        run: |
          echo "Updating claims index with new evidence..."
          # dotnet run --project src/Scanner/__Libraries/StellaOps.Scanner.Benchmark \
          #   --update-claims \
          #   --metrics bench/results/metrics.json \
          #   --output docs/claims-index.md

      - name: Comment on PR
        if: github.event_name == 'pull_request'
        uses: actions/github-script@v7
        with:
          script: |
            const fs = require('fs');
            const metrics = fs.existsSync('bench/results/metrics.json')
              ? JSON.parse(fs.readFileSync('bench/results/metrics.json', 'utf8'))
              : { status: 'pending' };

            const body = `## Benchmark Results

            | Tool | Precision | Recall | F1 Score |
            |------|-----------|--------|----------|
            | Stella Ops | ${metrics.stellaops?.precision || 'N/A'} | ${metrics.stellaops?.recall || 'N/A'} | ${metrics.stellaops?.f1 || 'N/A'} |
            | Trivy | ${metrics.trivy?.precision || 'N/A'} | ${metrics.trivy?.recall || 'N/A'} | ${metrics.trivy?.f1 || 'N/A'} |
            | Grype | ${metrics.grype?.precision || 'N/A'} | ${metrics.grype?.recall || 'N/A'} | ${metrics.grype?.f1 || 'N/A'} |

            [Full report](${process.env.GITHUB_SERVER_URL}/${process.env.GITHUB_REPOSITORY}/actions/runs/${process.env.GITHUB_RUN_ID})
            `;

            github.rest.issues.createComment({
              issue_number: context.issue.number,
              owner: context.repo.owner,
              repo: context.repo.repo,
              body: body
            });

  verify-claims:
    name: Verify Claims
    runs-on: ubuntu-latest
    needs: benchmark
    if: github.ref == 'refs/heads/main'

    steps:
      - name: Checkout repository
        uses: actions/checkout@v4

      - name: Download benchmark results
        uses: actions/download-artifact@v4
        with:
          name: benchmark-results-${{ github.run_id }}
          path: bench/results/

      - name: Verify all claims
        run: |
          echo "Verifying all claims against new evidence..."
          # stella benchmark verify --all

      - name: Report claim status
        run: |
          echo "Generating claim verification report..."
          # Output claim status summary