Files
git.stella-ops.org/.gitea/workflows/benchmark-vs-competitors.yml
StellaOps Bot 5146204f1b feat: add security sink detection patterns for JavaScript/TypeScript
- Introduced `sink-detect.js` with various security sink detection patterns categorized by type (e.g., command injection, SQL injection, file operations).
- Implemented functions to build a lookup map for fast sink detection and to match sink calls against known patterns.
- Added `package-lock.json` for dependency management.
2025-12-22 23:21:21 +02:00

174 lines
5.8 KiB
YAML

name: Benchmark vs Competitors
on:
schedule:
# Run weekly on Sunday at 00:00 UTC
- cron: '0 0 * * 0'
workflow_dispatch:
inputs:
competitors:
description: 'Comma-separated list of competitors to benchmark against'
required: false
default: 'trivy,grype'
corpus_size:
description: 'Number of images from corpus to test'
required: false
default: '50'
push:
paths:
- 'src/Scanner/__Libraries/StellaOps.Scanner.Benchmark/**'
- 'bench/competitors/**'
env:
DOTNET_VERSION: '10.0.x'
TRIVY_VERSION: '0.50.1'
GRYPE_VERSION: '0.74.0'
SYFT_VERSION: '0.100.0'
jobs:
benchmark:
name: Run Competitive Benchmark
runs-on: ubuntu-latest
timeout-minutes: 60
steps:
- name: Checkout repository
uses: actions/checkout@v4
- name: Setup .NET
uses: actions/setup-dotnet@v4
with:
dotnet-version: ${{ env.DOTNET_VERSION }}
- name: Install Trivy
run: |
curl -sfL https://raw.githubusercontent.com/aquasecurity/trivy/main/contrib/install.sh | sh -s -- -b /usr/local/bin v${{ env.TRIVY_VERSION }}
trivy --version
- name: Install Grype
run: |
curl -sSfL https://raw.githubusercontent.com/anchore/grype/main/install.sh | sh -s -- -b /usr/local/bin v${{ env.GRYPE_VERSION }}
grype version
- name: Install Syft
run: |
curl -sSfL https://raw.githubusercontent.com/anchore/syft/main/install.sh | sh -s -- -b /usr/local/bin v${{ env.SYFT_VERSION }}
syft version
- name: Build benchmark library
run: |
dotnet build src/Scanner/__Libraries/StellaOps.Scanner.Benchmark/StellaOps.Scanner.Benchmark.csproj -c Release
- name: Load corpus manifest
id: corpus
run: |
echo "corpus_path=bench/competitors/corpus/corpus-manifest.json" >> $GITHUB_OUTPUT
- name: Run Stella Ops scanner
run: |
echo "Running Stella Ops scanner on corpus..."
# TODO: Implement actual scan command
# stella scan --corpus ${{ steps.corpus.outputs.corpus_path }} --output bench/results/stellaops.json
- name: Run Trivy on corpus
run: |
echo "Running Trivy on corpus images..."
# Process each image in corpus
mkdir -p bench/results/trivy
- name: Run Grype on corpus
run: |
echo "Running Grype on corpus images..."
mkdir -p bench/results/grype
- name: Calculate metrics
run: |
echo "Calculating precision/recall/F1 metrics..."
# dotnet run --project src/Scanner/__Libraries/StellaOps.Scanner.Benchmark \
# --calculate-metrics \
# --ground-truth ${{ steps.corpus.outputs.corpus_path }} \
# --results bench/results/ \
# --output bench/results/metrics.json
- name: Generate comparison report
run: |
echo "Generating comparison report..."
mkdir -p bench/results
cat > bench/results/summary.json << 'EOF'
{
"timestamp": "$(date -u +%Y-%m-%dT%H:%M:%SZ)",
"competitors": ["trivy", "grype", "syft"],
"status": "pending_implementation"
}
EOF
- name: Upload benchmark results
uses: actions/upload-artifact@v4
with:
name: benchmark-results-${{ github.run_id }}
path: bench/results/
retention-days: 90
- name: Update claims index
if: github.ref == 'refs/heads/main'
run: |
echo "Updating claims index with new evidence..."
# dotnet run --project src/Scanner/__Libraries/StellaOps.Scanner.Benchmark \
# --update-claims \
# --metrics bench/results/metrics.json \
# --output docs/claims-index.md
- name: Comment on PR
if: github.event_name == 'pull_request'
uses: actions/github-script@v7
with:
script: |
const fs = require('fs');
const metrics = fs.existsSync('bench/results/metrics.json')
? JSON.parse(fs.readFileSync('bench/results/metrics.json', 'utf8'))
: { status: 'pending' };
const body = `## Benchmark Results
| Tool | Precision | Recall | F1 Score |
|------|-----------|--------|----------|
| Stella Ops | ${metrics.stellaops?.precision || 'N/A'} | ${metrics.stellaops?.recall || 'N/A'} | ${metrics.stellaops?.f1 || 'N/A'} |
| Trivy | ${metrics.trivy?.precision || 'N/A'} | ${metrics.trivy?.recall || 'N/A'} | ${metrics.trivy?.f1 || 'N/A'} |
| Grype | ${metrics.grype?.precision || 'N/A'} | ${metrics.grype?.recall || 'N/A'} | ${metrics.grype?.f1 || 'N/A'} |
[Full report](${process.env.GITHUB_SERVER_URL}/${process.env.GITHUB_REPOSITORY}/actions/runs/${process.env.GITHUB_RUN_ID})
`;
github.rest.issues.createComment({
issue_number: context.issue.number,
owner: context.repo.owner,
repo: context.repo.repo,
body: body
});
verify-claims:
name: Verify Claims
runs-on: ubuntu-latest
needs: benchmark
if: github.ref == 'refs/heads/main'
steps:
- name: Checkout repository
uses: actions/checkout@v4
- name: Download benchmark results
uses: actions/download-artifact@v4
with:
name: benchmark-results-${{ github.run_id }}
path: bench/results/
- name: Verify all claims
run: |
echo "Verifying all claims against new evidence..."
# stella benchmark verify --all
- name: Report claim status
run: |
echo "Generating claim verification report..."
# Output claim status summary