up
Some checks failed
Docs CI / lint-and-preview (push) Has been cancelled
Signals CI & Image / signals-ci (push) Has been cancelled
Signals Reachability Scoring & Events / reachability-smoke (push) Has been cancelled
Signals Reachability Scoring & Events / sign-and-upload (push) Has been cancelled
AOC Guard CI / aoc-guard (push) Has been cancelled
AOC Guard CI / aoc-verify (push) Has been cancelled
Reachability Corpus Validation / validate-corpus (push) Has been cancelled
Reachability Corpus Validation / validate-ground-truths (push) Has been cancelled
Scanner Analyzers / Discover Analyzers (push) Has been cancelled
Scanner Analyzers / Validate Test Fixtures (push) Has been cancelled
Reachability Corpus Validation / determinism-check (push) Has been cancelled
Scanner Analyzers / Build Analyzers (push) Has been cancelled
Scanner Analyzers / Test Language Analyzers (push) Has been cancelled
Scanner Analyzers / Verify Deterministic Output (push) Has been cancelled
Notify Smoke Test / Notify Unit Tests (push) Has been cancelled
Notify Smoke Test / Notifier Service Tests (push) Has been cancelled
Notify Smoke Test / Notification Smoke Test (push) Has been cancelled
Policy Lint & Smoke / policy-lint (push) Has been cancelled
Some checks failed
Docs CI / lint-and-preview (push) Has been cancelled
Signals CI & Image / signals-ci (push) Has been cancelled
Signals Reachability Scoring & Events / reachability-smoke (push) Has been cancelled
Signals Reachability Scoring & Events / sign-and-upload (push) Has been cancelled
AOC Guard CI / aoc-guard (push) Has been cancelled
AOC Guard CI / aoc-verify (push) Has been cancelled
Reachability Corpus Validation / validate-corpus (push) Has been cancelled
Reachability Corpus Validation / validate-ground-truths (push) Has been cancelled
Scanner Analyzers / Discover Analyzers (push) Has been cancelled
Scanner Analyzers / Validate Test Fixtures (push) Has been cancelled
Reachability Corpus Validation / determinism-check (push) Has been cancelled
Scanner Analyzers / Build Analyzers (push) Has been cancelled
Scanner Analyzers / Test Language Analyzers (push) Has been cancelled
Scanner Analyzers / Verify Deterministic Output (push) Has been cancelled
Notify Smoke Test / Notify Unit Tests (push) Has been cancelled
Notify Smoke Test / Notifier Service Tests (push) Has been cancelled
Notify Smoke Test / Notification Smoke Test (push) Has been cancelled
Policy Lint & Smoke / policy-lint (push) Has been cancelled
This commit is contained in:
338
bench/tools/compare.py
Normal file
338
bench/tools/compare.py
Normal file
@@ -0,0 +1,338 @@
|
||||
#!/usr/bin/env python3
|
||||
# SPDX-License-Identifier: AGPL-3.0-or-later
|
||||
# BENCH-AUTO-401-019: Baseline scanner comparison script
|
||||
|
||||
"""
|
||||
Compare StellaOps findings against baseline scanner results.
|
||||
|
||||
Generates comparison metrics:
|
||||
- True positives (reachability-confirmed)
|
||||
- False positives (unreachable code paths)
|
||||
- MTTD (mean time to detect)
|
||||
- Reproducibility score
|
||||
|
||||
Usage:
|
||||
python bench/tools/compare.py --stellaops PATH --baseline PATH --output PATH
|
||||
"""
|
||||
|
||||
import argparse
|
||||
import csv
|
||||
import json
|
||||
import sys
|
||||
from dataclasses import dataclass, field
|
||||
from datetime import datetime, timezone
|
||||
from pathlib import Path
|
||||
from typing import Any
|
||||
|
||||
|
||||
@dataclass
|
||||
class Finding:
|
||||
"""A vulnerability finding."""
|
||||
cve_id: str
|
||||
purl: str
|
||||
status: str # affected, not_affected
|
||||
reachability: str # reachable, unreachable, unknown
|
||||
source: str # stellaops, baseline
|
||||
detected_at: str = ""
|
||||
evidence_hash: str = ""
|
||||
|
||||
|
||||
@dataclass
|
||||
class ComparisonResult:
|
||||
"""Result of comparing two findings."""
|
||||
cve_id: str
|
||||
purl: str
|
||||
stellaops_status: str
|
||||
baseline_status: str
|
||||
agreement: bool
|
||||
stellaops_reachability: str
|
||||
notes: str = ""
|
||||
|
||||
|
||||
def load_stellaops_findings(findings_dir: Path) -> list[Finding]:
|
||||
"""Load StellaOps findings from bench/findings directory."""
|
||||
findings = []
|
||||
|
||||
if not findings_dir.exists():
|
||||
return findings
|
||||
|
||||
for finding_dir in sorted(findings_dir.iterdir()):
|
||||
if not finding_dir.is_dir():
|
||||
continue
|
||||
|
||||
metadata_path = finding_dir / "metadata.json"
|
||||
openvex_path = finding_dir / "decision.openvex.json"
|
||||
|
||||
if not metadata_path.exists() or not openvex_path.exists():
|
||||
continue
|
||||
|
||||
with open(metadata_path, 'r', encoding='utf-8') as f:
|
||||
metadata = json.load(f)
|
||||
|
||||
with open(openvex_path, 'r', encoding='utf-8') as f:
|
||||
openvex = json.load(f)
|
||||
|
||||
statements = openvex.get("statements", [])
|
||||
if not statements:
|
||||
continue
|
||||
|
||||
stmt = statements[0]
|
||||
products = stmt.get("products", [])
|
||||
purl = products[0].get("@id", "") if products else ""
|
||||
|
||||
findings.append(Finding(
|
||||
cve_id=metadata.get("cve_id", ""),
|
||||
purl=purl,
|
||||
status=stmt.get("status", "unknown"),
|
||||
reachability=metadata.get("variant", "unknown"),
|
||||
source="stellaops",
|
||||
detected_at=openvex.get("timestamp", ""),
|
||||
evidence_hash=metadata.get("evidence_hash", "")
|
||||
))
|
||||
|
||||
return findings
|
||||
|
||||
|
||||
def load_baseline_findings(baseline_path: Path) -> list[Finding]:
|
||||
"""Load baseline scanner findings from JSON file."""
|
||||
findings = []
|
||||
|
||||
if not baseline_path.exists():
|
||||
return findings
|
||||
|
||||
with open(baseline_path, 'r', encoding='utf-8') as f:
|
||||
data = json.load(f)
|
||||
|
||||
# Support multiple baseline formats
|
||||
vulns = data.get("vulnerabilities", data.get("findings", data.get("results", [])))
|
||||
|
||||
for vuln in vulns:
|
||||
cve_id = vuln.get("cve_id", vuln.get("id", vuln.get("vulnerability_id", "")))
|
||||
purl = vuln.get("purl", vuln.get("package_url", ""))
|
||||
|
||||
# Map baseline status to our normalized form
|
||||
raw_status = vuln.get("status", vuln.get("severity", ""))
|
||||
if raw_status.lower() in ["affected", "vulnerable", "high", "critical", "medium"]:
|
||||
status = "affected"
|
||||
elif raw_status.lower() in ["not_affected", "fixed", "not_vulnerable"]:
|
||||
status = "not_affected"
|
||||
else:
|
||||
status = "unknown"
|
||||
|
||||
findings.append(Finding(
|
||||
cve_id=cve_id,
|
||||
purl=purl,
|
||||
status=status,
|
||||
reachability="unknown", # Baseline scanners typically don't have reachability
|
||||
source="baseline"
|
||||
))
|
||||
|
||||
return findings
|
||||
|
||||
|
||||
def compare_findings(
|
||||
stellaops: list[Finding],
|
||||
baseline: list[Finding]
|
||||
) -> list[ComparisonResult]:
|
||||
"""Compare StellaOps findings with baseline."""
|
||||
results = []
|
||||
|
||||
# Index baseline by CVE+purl
|
||||
baseline_index = {}
|
||||
for f in baseline:
|
||||
key = (f.cve_id, f.purl)
|
||||
baseline_index[key] = f
|
||||
|
||||
# Compare each StellaOps finding
|
||||
for sf in stellaops:
|
||||
key = (sf.cve_id, sf.purl)
|
||||
bf = baseline_index.get(key)
|
||||
|
||||
if bf:
|
||||
agreement = sf.status == bf.status
|
||||
notes = ""
|
||||
|
||||
if agreement and sf.status == "not_affected":
|
||||
notes = "Both agree: not affected"
|
||||
elif agreement and sf.status == "affected":
|
||||
notes = "Both agree: affected"
|
||||
elif sf.status == "not_affected" and bf.status == "affected":
|
||||
if sf.reachability == "unreachable":
|
||||
notes = "FP reduction: StellaOps correctly identified unreachable code"
|
||||
else:
|
||||
notes = "Disagreement: investigate"
|
||||
elif sf.status == "affected" and bf.status == "not_affected":
|
||||
notes = "StellaOps detected, baseline missed"
|
||||
|
||||
results.append(ComparisonResult(
|
||||
cve_id=sf.cve_id,
|
||||
purl=sf.purl,
|
||||
stellaops_status=sf.status,
|
||||
baseline_status=bf.status,
|
||||
agreement=agreement,
|
||||
stellaops_reachability=sf.reachability,
|
||||
notes=notes
|
||||
))
|
||||
else:
|
||||
# StellaOps found something baseline didn't
|
||||
results.append(ComparisonResult(
|
||||
cve_id=sf.cve_id,
|
||||
purl=sf.purl,
|
||||
stellaops_status=sf.status,
|
||||
baseline_status="not_found",
|
||||
agreement=False,
|
||||
stellaops_reachability=sf.reachability,
|
||||
notes="Only found by StellaOps"
|
||||
))
|
||||
|
||||
# Find baseline-only findings
|
||||
stellaops_keys = {(f.cve_id, f.purl) for f in stellaops}
|
||||
for bf in baseline:
|
||||
key = (bf.cve_id, bf.purl)
|
||||
if key not in stellaops_keys:
|
||||
results.append(ComparisonResult(
|
||||
cve_id=bf.cve_id,
|
||||
purl=bf.purl,
|
||||
stellaops_status="not_found",
|
||||
baseline_status=bf.status,
|
||||
agreement=False,
|
||||
stellaops_reachability="unknown",
|
||||
notes="Only found by baseline"
|
||||
))
|
||||
|
||||
return results
|
||||
|
||||
|
||||
def compute_comparison_metrics(results: list[ComparisonResult]) -> dict:
|
||||
"""Compute comparison metrics."""
|
||||
total = len(results)
|
||||
agreements = sum(1 for r in results if r.agreement)
|
||||
fp_reductions = sum(1 for r in results if r.notes and "FP reduction" in r.notes)
|
||||
stellaops_only = sum(1 for r in results if "Only found by StellaOps" in r.notes)
|
||||
baseline_only = sum(1 for r in results if "Only found by baseline" in r.notes)
|
||||
|
||||
return {
|
||||
"total_comparisons": total,
|
||||
"agreements": agreements,
|
||||
"agreement_rate": agreements / total if total > 0 else 0,
|
||||
"fp_reductions": fp_reductions,
|
||||
"stellaops_unique": stellaops_only,
|
||||
"baseline_unique": baseline_only,
|
||||
"generated_at": datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ")
|
||||
}
|
||||
|
||||
|
||||
def write_comparison_csv(results: list[ComparisonResult], output_path: Path):
|
||||
"""Write comparison results to CSV."""
|
||||
output_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
with open(output_path, 'w', newline='', encoding='utf-8') as f:
|
||||
writer = csv.writer(f)
|
||||
writer.writerow([
|
||||
"cve_id",
|
||||
"purl",
|
||||
"stellaops_status",
|
||||
"baseline_status",
|
||||
"agreement",
|
||||
"reachability",
|
||||
"notes"
|
||||
])
|
||||
|
||||
for r in results:
|
||||
writer.writerow([
|
||||
r.cve_id,
|
||||
r.purl,
|
||||
r.stellaops_status,
|
||||
r.baseline_status,
|
||||
"yes" if r.agreement else "no",
|
||||
r.stellaops_reachability,
|
||||
r.notes
|
||||
])
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(
|
||||
description="Compare StellaOps findings against baseline scanner"
|
||||
)
|
||||
parser.add_argument(
|
||||
"--stellaops",
|
||||
type=Path,
|
||||
default=Path("bench/findings"),
|
||||
help="Path to StellaOps findings directory"
|
||||
)
|
||||
parser.add_argument(
|
||||
"--baseline",
|
||||
type=Path,
|
||||
required=True,
|
||||
help="Path to baseline scanner results JSON"
|
||||
)
|
||||
parser.add_argument(
|
||||
"--output",
|
||||
type=Path,
|
||||
default=Path("bench/results/comparison.csv"),
|
||||
help="Output CSV path"
|
||||
)
|
||||
parser.add_argument(
|
||||
"--json",
|
||||
action="store_true",
|
||||
help="Also output JSON summary"
|
||||
)
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
# Resolve paths
|
||||
repo_root = Path(__file__).parent.parent.parent
|
||||
stellaops_path = args.stellaops if args.stellaops.is_absolute() else repo_root / args.stellaops
|
||||
baseline_path = args.baseline if args.baseline.is_absolute() else repo_root / args.baseline
|
||||
output_path = args.output if args.output.is_absolute() else repo_root / args.output
|
||||
|
||||
print(f"StellaOps findings: {stellaops_path}")
|
||||
print(f"Baseline results: {baseline_path}")
|
||||
|
||||
# Load findings
|
||||
stellaops_findings = load_stellaops_findings(stellaops_path)
|
||||
print(f"Loaded {len(stellaops_findings)} StellaOps findings")
|
||||
|
||||
baseline_findings = load_baseline_findings(baseline_path)
|
||||
print(f"Loaded {len(baseline_findings)} baseline findings")
|
||||
|
||||
# Compare
|
||||
results = compare_findings(stellaops_findings, baseline_findings)
|
||||
metrics = compute_comparison_metrics(results)
|
||||
|
||||
print(f"\nComparison Results:")
|
||||
print(f" Total comparisons: {metrics['total_comparisons']}")
|
||||
print(f" Agreements: {metrics['agreements']} ({metrics['agreement_rate']:.1%})")
|
||||
print(f" FP reductions: {metrics['fp_reductions']}")
|
||||
print(f" StellaOps unique: {metrics['stellaops_unique']}")
|
||||
print(f" Baseline unique: {metrics['baseline_unique']}")
|
||||
|
||||
# Write outputs
|
||||
write_comparison_csv(results, output_path)
|
||||
print(f"\nWrote comparison to: {output_path}")
|
||||
|
||||
if args.json:
|
||||
json_path = output_path.with_suffix('.json')
|
||||
with open(json_path, 'w', encoding='utf-8') as f:
|
||||
json.dump({
|
||||
"metrics": metrics,
|
||||
"results": [
|
||||
{
|
||||
"cve_id": r.cve_id,
|
||||
"purl": r.purl,
|
||||
"stellaops_status": r.stellaops_status,
|
||||
"baseline_status": r.baseline_status,
|
||||
"agreement": r.agreement,
|
||||
"reachability": r.stellaops_reachability,
|
||||
"notes": r.notes
|
||||
}
|
||||
for r in results
|
||||
]
|
||||
}, f, indent=2, sort_keys=True)
|
||||
print(f"Wrote JSON to: {json_path}")
|
||||
|
||||
return 0
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
sys.exit(main())
|
||||
Reference in New Issue
Block a user