- Add RpmVersionComparer for RPM version comparison with epoch, version, and release handling. - Introduce DebianVersion for parsing Debian EVR (Epoch:Version-Release) strings. - Create ApkVersion for parsing Alpine APK version strings with suffix support. - Define IVersionComparator interface for version comparison with proof-line generation. - Implement VersionComparisonResult struct to encapsulate comparison results and proof lines. - Add tests for Debian and RPM version comparers to ensure correct functionality and edge case handling. - Create project files for the version comparison library and its tests.
445 lines
13 KiB
Markdown
445 lines
13 KiB
Markdown
# Benchmark Module Architecture
|
|
|
|
## Overview
|
|
|
|
The Benchmark module provides infrastructure for validating and demonstrating Stella Ops' competitive advantages through automated comparison against other container security scanners (Trivy, Grype, Syft, etc.).
|
|
|
|
**Module Path**: `src/Scanner/__Libraries/StellaOps.Scanner.Benchmark/`
|
|
**Status**: PLANNED (Sprint 7000.0001.0001)
|
|
|
|
---
|
|
|
|
## Mission
|
|
|
|
Establish verifiable, reproducible benchmarks that:
|
|
1. Validate competitive claims with evidence
|
|
2. Detect regressions in accuracy or performance
|
|
3. Generate marketing-ready comparison materials
|
|
4. Provide ground-truth corpus for testing
|
|
|
|
---
|
|
|
|
## Architecture
|
|
|
|
```
|
|
┌─────────────────────────────────────────────────────────────────┐
|
|
│ Benchmark Module │
|
|
├─────────────────────────────────────────────────────────────────┤
|
|
│ │
|
|
│ ┌─────────────┐ ┌─────────────┐ ┌─────────────┐ │
|
|
│ │ Corpus │ │ Harness │ │ Metrics │ │
|
|
│ │ Manager │───▶│ Runner │───▶│ Calculator │ │
|
|
│ └─────────────┘ └─────────────┘ └─────────────┘ │
|
|
│ │ │ │ │
|
|
│ │ │ │ │
|
|
│ ▼ ▼ ▼ │
|
|
│ ┌─────────────┐ ┌─────────────┐ ┌─────────────┐ │
|
|
│ │Ground Truth │ │ Competitor │ │ Claims │ │
|
|
│ │ Manifest │ │ Adapters │ │ Index │ │
|
|
│ └─────────────┘ └─────────────┘ └─────────────┘ │
|
|
│ │
|
|
└─────────────────────────────────────────────────────────────────┘
|
|
```
|
|
|
|
---
|
|
|
|
## Components
|
|
|
|
### 1. Corpus Manager
|
|
|
|
**Namespace**: `StellaOps.Scanner.Benchmark.Corpus`
|
|
|
|
Manages the ground-truth corpus of container images with known vulnerabilities.
|
|
|
|
```csharp
|
|
public interface ICorpusManager
|
|
{
|
|
Task<Corpus> LoadCorpusAsync(string corpusPath, CancellationToken ct);
|
|
Task<CorpusImage> GetImageAsync(string digest, CancellationToken ct);
|
|
Task<GroundTruth> GetGroundTruthAsync(string digest, CancellationToken ct);
|
|
}
|
|
|
|
public record Corpus(
|
|
string Version,
|
|
DateTimeOffset CreatedAt,
|
|
ImmutableArray<CorpusImage> Images
|
|
);
|
|
|
|
public record CorpusImage(
|
|
string Digest,
|
|
string Name,
|
|
string Tag,
|
|
CorpusCategory Category,
|
|
GroundTruth GroundTruth
|
|
);
|
|
|
|
public record GroundTruth(
|
|
ImmutableArray<string> TruePositives,
|
|
ImmutableArray<string> KnownFalsePositives,
|
|
ImmutableArray<string> Notes
|
|
);
|
|
|
|
public enum CorpusCategory
|
|
{
|
|
BaseOS, // Alpine, Debian, Ubuntu, RHEL
|
|
ApplicationNode, // Node.js applications
|
|
ApplicationPython,// Python applications
|
|
ApplicationJava, // Java applications
|
|
ApplicationDotNet,// .NET applications
|
|
BackportScenario, // Known backported fixes
|
|
Unreachable // Known unreachable vulns
|
|
}
|
|
```
|
|
|
|
### 2. Harness Runner
|
|
|
|
**Namespace**: `StellaOps.Scanner.Benchmark.Harness`
|
|
|
|
Executes scans using Stella Ops and competitor tools.
|
|
|
|
```csharp
|
|
public interface IHarnessRunner
|
|
{
|
|
Task<BenchmarkRun> RunAsync(
|
|
Corpus corpus,
|
|
ImmutableArray<ITool> tools,
|
|
BenchmarkOptions options,
|
|
CancellationToken ct
|
|
);
|
|
}
|
|
|
|
public interface ITool
|
|
{
|
|
string Name { get; }
|
|
string Version { get; }
|
|
Task<ToolResult> ScanAsync(string imageRef, CancellationToken ct);
|
|
}
|
|
|
|
public record BenchmarkRun(
|
|
string RunId,
|
|
DateTimeOffset StartedAt,
|
|
DateTimeOffset CompletedAt,
|
|
ImmutableArray<ToolResult> Results
|
|
);
|
|
|
|
public record ToolResult(
|
|
string ToolName,
|
|
string ToolVersion,
|
|
string ImageDigest,
|
|
ImmutableArray<NormalizedFinding> Findings,
|
|
TimeSpan Duration
|
|
);
|
|
```
|
|
|
|
### 3. Competitor Adapters
|
|
|
|
**Namespace**: `StellaOps.Scanner.Benchmark.Adapters`
|
|
|
|
Normalize output from competitor tools.
|
|
|
|
```csharp
|
|
public interface ICompetitorAdapter : ITool
|
|
{
|
|
Task<ImmutableArray<NormalizedFinding>> ParseOutputAsync(
|
|
string output,
|
|
CancellationToken ct
|
|
);
|
|
}
|
|
|
|
// Implementations
|
|
public class TrivyAdapter : ICompetitorAdapter { }
|
|
public class GrypeAdapter : ICompetitorAdapter { }
|
|
public class SyftAdapter : ICompetitorAdapter { }
|
|
public class StellaOpsAdapter : ICompetitorAdapter { }
|
|
```
|
|
|
|
### 4. Metrics Calculator
|
|
|
|
**Namespace**: `StellaOps.Scanner.Benchmark.Metrics`
|
|
|
|
Calculate precision, recall, F1, and other metrics.
|
|
|
|
```csharp
|
|
public interface IMetricsCalculator
|
|
{
|
|
BenchmarkMetrics Calculate(
|
|
ToolResult result,
|
|
GroundTruth groundTruth
|
|
);
|
|
|
|
ComparativeMetrics Compare(
|
|
BenchmarkMetrics baseline,
|
|
BenchmarkMetrics comparison
|
|
);
|
|
}
|
|
|
|
public record BenchmarkMetrics(
|
|
int TruePositives,
|
|
int FalsePositives,
|
|
int TrueNegatives,
|
|
int FalseNegatives,
|
|
double Precision,
|
|
double Recall,
|
|
double F1Score,
|
|
ImmutableDictionary<string, BenchmarkMetrics> ByCategory
|
|
);
|
|
|
|
public record ComparativeMetrics(
|
|
string BaselineTool,
|
|
string ComparisonTool,
|
|
double PrecisionDelta,
|
|
double RecallDelta,
|
|
double F1Delta,
|
|
ImmutableArray<string> UniqueFindings,
|
|
ImmutableArray<string> MissedFindings
|
|
);
|
|
```
|
|
|
|
### 5. Claims Index
|
|
|
|
**Namespace**: `StellaOps.Scanner.Benchmark.Claims`
|
|
|
|
Manage verifiable claims with evidence links.
|
|
|
|
```csharp
|
|
public interface IClaimsIndex
|
|
{
|
|
Task<ImmutableArray<Claim>> GetAllClaimsAsync(CancellationToken ct);
|
|
Task<ClaimVerification> VerifyClaimAsync(string claimId, CancellationToken ct);
|
|
Task UpdateClaimsAsync(BenchmarkRun run, CancellationToken ct);
|
|
}
|
|
|
|
public record Claim(
|
|
string Id,
|
|
ClaimCategory Category,
|
|
string Statement,
|
|
string EvidencePath,
|
|
ClaimStatus Status,
|
|
DateTimeOffset LastVerified
|
|
);
|
|
|
|
public enum ClaimStatus { Pending, Verified, Published, Disputed, Resolved }
|
|
|
|
public record ClaimVerification(
|
|
string ClaimId,
|
|
bool IsValid,
|
|
string? Evidence,
|
|
string? FailureReason
|
|
);
|
|
```
|
|
|
|
---
|
|
|
|
## Data Flow
|
|
|
|
```
|
|
┌────────────────┐
|
|
│ Corpus Images │
|
|
│ (50+ images) │
|
|
└───────┬────────┘
|
|
│
|
|
▼
|
|
┌────────────────┐ ┌────────────────┐
|
|
│ Stella Ops Scan│ │ Trivy/Grype │
|
|
│ │ │ Scan │
|
|
└───────┬────────┘ └───────┬────────┘
|
|
│ │
|
|
▼ ▼
|
|
┌────────────────┐ ┌────────────────┐
|
|
│ Normalized │ │ Normalized │
|
|
│ Findings │ │ Findings │
|
|
└───────┬────────┘ └───────┬────────┘
|
|
│ │
|
|
└──────────┬───────────┘
|
|
│
|
|
▼
|
|
┌──────────────┐
|
|
│ Ground Truth │
|
|
│ Comparison │
|
|
└──────┬───────┘
|
|
│
|
|
▼
|
|
┌──────────────┐
|
|
│ Metrics │
|
|
│ (P/R/F1) │
|
|
└──────┬───────┘
|
|
│
|
|
▼
|
|
┌──────────────┐
|
|
│ Claims Index │
|
|
│ Update │
|
|
└──────────────┘
|
|
```
|
|
|
|
---
|
|
|
|
## Corpus Structure
|
|
|
|
```
|
|
bench/competitors/
|
|
├── corpus/
|
|
│ ├── manifest.json # Corpus metadata
|
|
│ ├── ground-truth/
|
|
│ │ ├── alpine-3.18.json # Per-image ground truth
|
|
│ │ ├── debian-bookworm.json
|
|
│ │ └── ...
|
|
│ └── images/
|
|
│ ├── base-os/
|
|
│ ├── applications/
|
|
│ └── edge-cases/
|
|
├── results/
|
|
│ ├── 2025-12-22/
|
|
│ │ ├── stellaops.json
|
|
│ │ ├── trivy.json
|
|
│ │ ├── grype.json
|
|
│ │ └── comparison.json
|
|
│ └── latest -> 2025-12-22/
|
|
└── fixtures/
|
|
└── adapters/ # Test fixtures for adapters
|
|
```
|
|
|
|
---
|
|
|
|
## Ground Truth Format
|
|
|
|
```json
|
|
{
|
|
"imageDigest": "sha256:abc123...",
|
|
"imageName": "alpine:3.18",
|
|
"category": "BaseOS",
|
|
"groundTruth": {
|
|
"truePositives": [
|
|
{
|
|
"cveId": "CVE-2024-1234",
|
|
"package": "openssl",
|
|
"version": "3.0.8",
|
|
"notes": "Fixed in 3.0.9"
|
|
}
|
|
],
|
|
"knownFalsePositives": [
|
|
{
|
|
"cveId": "CVE-2024-9999",
|
|
"package": "zlib",
|
|
"version": "1.2.13",
|
|
"reason": "Backported in alpine:3.18"
|
|
}
|
|
],
|
|
"expectedUnreachable": [
|
|
{
|
|
"cveId": "CVE-2024-5678",
|
|
"package": "curl",
|
|
"reason": "Vulnerable function not linked"
|
|
}
|
|
]
|
|
},
|
|
"lastVerified": "2025-12-01T00:00:00Z",
|
|
"verifiedBy": "security-team"
|
|
}
|
|
```
|
|
|
|
---
|
|
|
|
## CI Integration
|
|
|
|
### Workflow: `benchmark-vs-competitors.yml`
|
|
|
|
```yaml
|
|
name: Competitive Benchmark
|
|
|
|
on:
|
|
schedule:
|
|
- cron: '0 2 * * 0' # Weekly Sunday 2 AM
|
|
workflow_dispatch:
|
|
push:
|
|
paths:
|
|
- 'src/Scanner/__Libraries/StellaOps.Scanner.Benchmark/**'
|
|
- 'bench/competitors/**'
|
|
|
|
jobs:
|
|
benchmark:
|
|
runs-on: ubuntu-latest
|
|
steps:
|
|
- uses: actions/checkout@v4
|
|
|
|
- name: Install competitor tools
|
|
run: |
|
|
# Install Trivy
|
|
curl -sfL https://raw.githubusercontent.com/aquasecurity/trivy/main/contrib/install.sh | sh
|
|
# Install Grype
|
|
curl -sSfL https://raw.githubusercontent.com/anchore/grype/main/install.sh | sh
|
|
|
|
- name: Run benchmark
|
|
run: stella benchmark run --corpus bench/competitors/corpus --output bench/competitors/results/$(date +%Y-%m-%d)
|
|
|
|
- name: Update claims index
|
|
run: stella benchmark claims --output docs/claims-index.md
|
|
|
|
- name: Upload results
|
|
uses: actions/upload-artifact@v4
|
|
with:
|
|
name: benchmark-results
|
|
path: bench/competitors/results/
|
|
```
|
|
|
|
---
|
|
|
|
## CLI Commands
|
|
|
|
```bash
|
|
# Run full benchmark
|
|
stella benchmark run --corpus <path> --competitors trivy,grype,syft
|
|
|
|
# Verify a specific claim
|
|
stella benchmark verify <CLAIM_ID>
|
|
|
|
# Generate claims index
|
|
stella benchmark claims --output docs/claims-index.md
|
|
|
|
# Generate marketing battlecard
|
|
stella benchmark battlecard --output docs/marketing/battlecard.md
|
|
|
|
# Show comparison summary
|
|
stella benchmark summary --format table|json|markdown
|
|
```
|
|
|
|
---
|
|
|
|
## Testing
|
|
|
|
| Test Type | Location | Purpose |
|
|
|-----------|----------|---------|
|
|
| Unit | `StellaOps.Scanner.Benchmark.Tests/` | Adapter parsing, metrics calculation |
|
|
| Integration | `StellaOps.Scanner.Benchmark.Integration.Tests/` | Full benchmark flow |
|
|
| Golden | `bench/competitors/fixtures/` | Deterministic output verification |
|
|
|
|
---
|
|
|
|
## Security Considerations
|
|
|
|
1. **Competitor binaries**: Run in isolated containers, no network access during scan
|
|
2. **Corpus images**: Verified digests, no external pulls during benchmark
|
|
3. **Results**: Signed with DSSE before publishing
|
|
4. **Claims**: Require PR review before status change
|
|
|
|
---
|
|
|
|
## Dependencies
|
|
|
|
- `StellaOps.Scanner.Core` - Normalized finding models
|
|
- `StellaOps.Attestor.Dsse` - Result signing
|
|
- Docker - Competitor tool execution
|
|
- Ground-truth corpus (maintained separately)
|
|
|
|
---
|
|
|
|
## Related Documentation
|
|
|
|
- [Claims Index](../../claims-index.md)
|
|
- [Sprint 7000.0001.0001](../../implplan/SPRINT_7000_0001_0001_competitive_benchmarking.md)
|
|
- [Testing Strategy](../../implplan/SPRINT_5100_SUMMARY.md)
|
|
|
|
---
|
|
|
|
*Document Version*: 1.0.0
|
|
*Created*: 2025-12-22
|