Files
git.stella-ops.org/bench/tools/compare.py
StellaOps Bot 233873f620
Some checks failed
Docs CI / lint-and-preview (push) Has been cancelled
Signals CI & Image / signals-ci (push) Has been cancelled
Signals Reachability Scoring & Events / reachability-smoke (push) Has been cancelled
Signals Reachability Scoring & Events / sign-and-upload (push) Has been cancelled
AOC Guard CI / aoc-guard (push) Has been cancelled
AOC Guard CI / aoc-verify (push) Has been cancelled
Reachability Corpus Validation / validate-corpus (push) Has been cancelled
Reachability Corpus Validation / validate-ground-truths (push) Has been cancelled
Scanner Analyzers / Discover Analyzers (push) Has been cancelled
Scanner Analyzers / Validate Test Fixtures (push) Has been cancelled
Reachability Corpus Validation / determinism-check (push) Has been cancelled
Scanner Analyzers / Build Analyzers (push) Has been cancelled
Scanner Analyzers / Test Language Analyzers (push) Has been cancelled
Scanner Analyzers / Verify Deterministic Output (push) Has been cancelled
Notify Smoke Test / Notify Unit Tests (push) Has been cancelled
Notify Smoke Test / Notifier Service Tests (push) Has been cancelled
Notify Smoke Test / Notification Smoke Test (push) Has been cancelled
Policy Lint & Smoke / policy-lint (push) Has been cancelled
up
2025-12-14 15:50:38 +02:00

339 lines
11 KiB
Python

#!/usr/bin/env python3
# SPDX-License-Identifier: AGPL-3.0-or-later
# BENCH-AUTO-401-019: Baseline scanner comparison script
"""
Compare StellaOps findings against baseline scanner results.
Generates comparison metrics:
- True positives (reachability-confirmed)
- False positives (unreachable code paths)
- MTTD (mean time to detect)
- Reproducibility score
Usage:
python bench/tools/compare.py --stellaops PATH --baseline PATH --output PATH
"""
import argparse
import csv
import json
import sys
from dataclasses import dataclass, field
from datetime import datetime, timezone
from pathlib import Path
from typing import Any
@dataclass
class Finding:
"""A vulnerability finding."""
cve_id: str
purl: str
status: str # affected, not_affected
reachability: str # reachable, unreachable, unknown
source: str # stellaops, baseline
detected_at: str = ""
evidence_hash: str = ""
@dataclass
class ComparisonResult:
"""Result of comparing two findings."""
cve_id: str
purl: str
stellaops_status: str
baseline_status: str
agreement: bool
stellaops_reachability: str
notes: str = ""
def load_stellaops_findings(findings_dir: Path) -> list[Finding]:
"""Load StellaOps findings from bench/findings directory."""
findings = []
if not findings_dir.exists():
return findings
for finding_dir in sorted(findings_dir.iterdir()):
if not finding_dir.is_dir():
continue
metadata_path = finding_dir / "metadata.json"
openvex_path = finding_dir / "decision.openvex.json"
if not metadata_path.exists() or not openvex_path.exists():
continue
with open(metadata_path, 'r', encoding='utf-8') as f:
metadata = json.load(f)
with open(openvex_path, 'r', encoding='utf-8') as f:
openvex = json.load(f)
statements = openvex.get("statements", [])
if not statements:
continue
stmt = statements[0]
products = stmt.get("products", [])
purl = products[0].get("@id", "") if products else ""
findings.append(Finding(
cve_id=metadata.get("cve_id", ""),
purl=purl,
status=stmt.get("status", "unknown"),
reachability=metadata.get("variant", "unknown"),
source="stellaops",
detected_at=openvex.get("timestamp", ""),
evidence_hash=metadata.get("evidence_hash", "")
))
return findings
def load_baseline_findings(baseline_path: Path) -> list[Finding]:
"""Load baseline scanner findings from JSON file."""
findings = []
if not baseline_path.exists():
return findings
with open(baseline_path, 'r', encoding='utf-8') as f:
data = json.load(f)
# Support multiple baseline formats
vulns = data.get("vulnerabilities", data.get("findings", data.get("results", [])))
for vuln in vulns:
cve_id = vuln.get("cve_id", vuln.get("id", vuln.get("vulnerability_id", "")))
purl = vuln.get("purl", vuln.get("package_url", ""))
# Map baseline status to our normalized form
raw_status = vuln.get("status", vuln.get("severity", ""))
if raw_status.lower() in ["affected", "vulnerable", "high", "critical", "medium"]:
status = "affected"
elif raw_status.lower() in ["not_affected", "fixed", "not_vulnerable"]:
status = "not_affected"
else:
status = "unknown"
findings.append(Finding(
cve_id=cve_id,
purl=purl,
status=status,
reachability="unknown", # Baseline scanners typically don't have reachability
source="baseline"
))
return findings
def compare_findings(
stellaops: list[Finding],
baseline: list[Finding]
) -> list[ComparisonResult]:
"""Compare StellaOps findings with baseline."""
results = []
# Index baseline by CVE+purl
baseline_index = {}
for f in baseline:
key = (f.cve_id, f.purl)
baseline_index[key] = f
# Compare each StellaOps finding
for sf in stellaops:
key = (sf.cve_id, sf.purl)
bf = baseline_index.get(key)
if bf:
agreement = sf.status == bf.status
notes = ""
if agreement and sf.status == "not_affected":
notes = "Both agree: not affected"
elif agreement and sf.status == "affected":
notes = "Both agree: affected"
elif sf.status == "not_affected" and bf.status == "affected":
if sf.reachability == "unreachable":
notes = "FP reduction: StellaOps correctly identified unreachable code"
else:
notes = "Disagreement: investigate"
elif sf.status == "affected" and bf.status == "not_affected":
notes = "StellaOps detected, baseline missed"
results.append(ComparisonResult(
cve_id=sf.cve_id,
purl=sf.purl,
stellaops_status=sf.status,
baseline_status=bf.status,
agreement=agreement,
stellaops_reachability=sf.reachability,
notes=notes
))
else:
# StellaOps found something baseline didn't
results.append(ComparisonResult(
cve_id=sf.cve_id,
purl=sf.purl,
stellaops_status=sf.status,
baseline_status="not_found",
agreement=False,
stellaops_reachability=sf.reachability,
notes="Only found by StellaOps"
))
# Find baseline-only findings
stellaops_keys = {(f.cve_id, f.purl) for f in stellaops}
for bf in baseline:
key = (bf.cve_id, bf.purl)
if key not in stellaops_keys:
results.append(ComparisonResult(
cve_id=bf.cve_id,
purl=bf.purl,
stellaops_status="not_found",
baseline_status=bf.status,
agreement=False,
stellaops_reachability="unknown",
notes="Only found by baseline"
))
return results
def compute_comparison_metrics(results: list[ComparisonResult]) -> dict:
"""Compute comparison metrics."""
total = len(results)
agreements = sum(1 for r in results if r.agreement)
fp_reductions = sum(1 for r in results if r.notes and "FP reduction" in r.notes)
stellaops_only = sum(1 for r in results if "Only found by StellaOps" in r.notes)
baseline_only = sum(1 for r in results if "Only found by baseline" in r.notes)
return {
"total_comparisons": total,
"agreements": agreements,
"agreement_rate": agreements / total if total > 0 else 0,
"fp_reductions": fp_reductions,
"stellaops_unique": stellaops_only,
"baseline_unique": baseline_only,
"generated_at": datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ")
}
def write_comparison_csv(results: list[ComparisonResult], output_path: Path):
"""Write comparison results to CSV."""
output_path.parent.mkdir(parents=True, exist_ok=True)
with open(output_path, 'w', newline='', encoding='utf-8') as f:
writer = csv.writer(f)
writer.writerow([
"cve_id",
"purl",
"stellaops_status",
"baseline_status",
"agreement",
"reachability",
"notes"
])
for r in results:
writer.writerow([
r.cve_id,
r.purl,
r.stellaops_status,
r.baseline_status,
"yes" if r.agreement else "no",
r.stellaops_reachability,
r.notes
])
def main():
parser = argparse.ArgumentParser(
description="Compare StellaOps findings against baseline scanner"
)
parser.add_argument(
"--stellaops",
type=Path,
default=Path("bench/findings"),
help="Path to StellaOps findings directory"
)
parser.add_argument(
"--baseline",
type=Path,
required=True,
help="Path to baseline scanner results JSON"
)
parser.add_argument(
"--output",
type=Path,
default=Path("bench/results/comparison.csv"),
help="Output CSV path"
)
parser.add_argument(
"--json",
action="store_true",
help="Also output JSON summary"
)
args = parser.parse_args()
# Resolve paths
repo_root = Path(__file__).parent.parent.parent
stellaops_path = args.stellaops if args.stellaops.is_absolute() else repo_root / args.stellaops
baseline_path = args.baseline if args.baseline.is_absolute() else repo_root / args.baseline
output_path = args.output if args.output.is_absolute() else repo_root / args.output
print(f"StellaOps findings: {stellaops_path}")
print(f"Baseline results: {baseline_path}")
# Load findings
stellaops_findings = load_stellaops_findings(stellaops_path)
print(f"Loaded {len(stellaops_findings)} StellaOps findings")
baseline_findings = load_baseline_findings(baseline_path)
print(f"Loaded {len(baseline_findings)} baseline findings")
# Compare
results = compare_findings(stellaops_findings, baseline_findings)
metrics = compute_comparison_metrics(results)
print(f"\nComparison Results:")
print(f" Total comparisons: {metrics['total_comparisons']}")
print(f" Agreements: {metrics['agreements']} ({metrics['agreement_rate']:.1%})")
print(f" FP reductions: {metrics['fp_reductions']}")
print(f" StellaOps unique: {metrics['stellaops_unique']}")
print(f" Baseline unique: {metrics['baseline_unique']}")
# Write outputs
write_comparison_csv(results, output_path)
print(f"\nWrote comparison to: {output_path}")
if args.json:
json_path = output_path.with_suffix('.json')
with open(json_path, 'w', encoding='utf-8') as f:
json.dump({
"metrics": metrics,
"results": [
{
"cve_id": r.cve_id,
"purl": r.purl,
"stellaops_status": r.stellaops_status,
"baseline_status": r.baseline_status,
"agreement": r.agreement,
"reachability": r.stellaops_reachability,
"notes": r.notes
}
for r in results
]
}, f, indent=2, sort_keys=True)
print(f"Wrote JSON to: {json_path}")
return 0
if __name__ == "__main__":
sys.exit(main())