Some checks failed
Docs CI / lint-and-preview (push) Has been cancelled
Signals CI & Image / signals-ci (push) Has been cancelled
Signals Reachability Scoring & Events / reachability-smoke (push) Has been cancelled
Signals Reachability Scoring & Events / sign-and-upload (push) Has been cancelled
AOC Guard CI / aoc-guard (push) Has been cancelled
AOC Guard CI / aoc-verify (push) Has been cancelled
Reachability Corpus Validation / validate-corpus (push) Has been cancelled
Reachability Corpus Validation / validate-ground-truths (push) Has been cancelled
Scanner Analyzers / Discover Analyzers (push) Has been cancelled
Scanner Analyzers / Validate Test Fixtures (push) Has been cancelled
Reachability Corpus Validation / determinism-check (push) Has been cancelled
Scanner Analyzers / Build Analyzers (push) Has been cancelled
Scanner Analyzers / Test Language Analyzers (push) Has been cancelled
Scanner Analyzers / Verify Deterministic Output (push) Has been cancelled
Notify Smoke Test / Notify Unit Tests (push) Has been cancelled
Notify Smoke Test / Notifier Service Tests (push) Has been cancelled
Notify Smoke Test / Notification Smoke Test (push) Has been cancelled
Policy Lint & Smoke / policy-lint (push) Has been cancelled
339 lines
11 KiB
Python
339 lines
11 KiB
Python
#!/usr/bin/env python3
|
|
# SPDX-License-Identifier: AGPL-3.0-or-later
|
|
# BENCH-AUTO-401-019: Baseline scanner comparison script
|
|
|
|
"""
|
|
Compare StellaOps findings against baseline scanner results.
|
|
|
|
Generates comparison metrics:
|
|
- True positives (reachability-confirmed)
|
|
- False positives (unreachable code paths)
|
|
- MTTD (mean time to detect)
|
|
- Reproducibility score
|
|
|
|
Usage:
|
|
python bench/tools/compare.py --stellaops PATH --baseline PATH --output PATH
|
|
"""
|
|
|
|
import argparse
|
|
import csv
|
|
import json
|
|
import sys
|
|
from dataclasses import dataclass, field
|
|
from datetime import datetime, timezone
|
|
from pathlib import Path
|
|
from typing import Any
|
|
|
|
|
|
@dataclass
|
|
class Finding:
|
|
"""A vulnerability finding."""
|
|
cve_id: str
|
|
purl: str
|
|
status: str # affected, not_affected
|
|
reachability: str # reachable, unreachable, unknown
|
|
source: str # stellaops, baseline
|
|
detected_at: str = ""
|
|
evidence_hash: str = ""
|
|
|
|
|
|
@dataclass
|
|
class ComparisonResult:
|
|
"""Result of comparing two findings."""
|
|
cve_id: str
|
|
purl: str
|
|
stellaops_status: str
|
|
baseline_status: str
|
|
agreement: bool
|
|
stellaops_reachability: str
|
|
notes: str = ""
|
|
|
|
|
|
def load_stellaops_findings(findings_dir: Path) -> list[Finding]:
|
|
"""Load StellaOps findings from bench/findings directory."""
|
|
findings = []
|
|
|
|
if not findings_dir.exists():
|
|
return findings
|
|
|
|
for finding_dir in sorted(findings_dir.iterdir()):
|
|
if not finding_dir.is_dir():
|
|
continue
|
|
|
|
metadata_path = finding_dir / "metadata.json"
|
|
openvex_path = finding_dir / "decision.openvex.json"
|
|
|
|
if not metadata_path.exists() or not openvex_path.exists():
|
|
continue
|
|
|
|
with open(metadata_path, 'r', encoding='utf-8') as f:
|
|
metadata = json.load(f)
|
|
|
|
with open(openvex_path, 'r', encoding='utf-8') as f:
|
|
openvex = json.load(f)
|
|
|
|
statements = openvex.get("statements", [])
|
|
if not statements:
|
|
continue
|
|
|
|
stmt = statements[0]
|
|
products = stmt.get("products", [])
|
|
purl = products[0].get("@id", "") if products else ""
|
|
|
|
findings.append(Finding(
|
|
cve_id=metadata.get("cve_id", ""),
|
|
purl=purl,
|
|
status=stmt.get("status", "unknown"),
|
|
reachability=metadata.get("variant", "unknown"),
|
|
source="stellaops",
|
|
detected_at=openvex.get("timestamp", ""),
|
|
evidence_hash=metadata.get("evidence_hash", "")
|
|
))
|
|
|
|
return findings
|
|
|
|
|
|
def load_baseline_findings(baseline_path: Path) -> list[Finding]:
|
|
"""Load baseline scanner findings from JSON file."""
|
|
findings = []
|
|
|
|
if not baseline_path.exists():
|
|
return findings
|
|
|
|
with open(baseline_path, 'r', encoding='utf-8') as f:
|
|
data = json.load(f)
|
|
|
|
# Support multiple baseline formats
|
|
vulns = data.get("vulnerabilities", data.get("findings", data.get("results", [])))
|
|
|
|
for vuln in vulns:
|
|
cve_id = vuln.get("cve_id", vuln.get("id", vuln.get("vulnerability_id", "")))
|
|
purl = vuln.get("purl", vuln.get("package_url", ""))
|
|
|
|
# Map baseline status to our normalized form
|
|
raw_status = vuln.get("status", vuln.get("severity", ""))
|
|
if raw_status.lower() in ["affected", "vulnerable", "high", "critical", "medium"]:
|
|
status = "affected"
|
|
elif raw_status.lower() in ["not_affected", "fixed", "not_vulnerable"]:
|
|
status = "not_affected"
|
|
else:
|
|
status = "unknown"
|
|
|
|
findings.append(Finding(
|
|
cve_id=cve_id,
|
|
purl=purl,
|
|
status=status,
|
|
reachability="unknown", # Baseline scanners typically don't have reachability
|
|
source="baseline"
|
|
))
|
|
|
|
return findings
|
|
|
|
|
|
def compare_findings(
|
|
stellaops: list[Finding],
|
|
baseline: list[Finding]
|
|
) -> list[ComparisonResult]:
|
|
"""Compare StellaOps findings with baseline."""
|
|
results = []
|
|
|
|
# Index baseline by CVE+purl
|
|
baseline_index = {}
|
|
for f in baseline:
|
|
key = (f.cve_id, f.purl)
|
|
baseline_index[key] = f
|
|
|
|
# Compare each StellaOps finding
|
|
for sf in stellaops:
|
|
key = (sf.cve_id, sf.purl)
|
|
bf = baseline_index.get(key)
|
|
|
|
if bf:
|
|
agreement = sf.status == bf.status
|
|
notes = ""
|
|
|
|
if agreement and sf.status == "not_affected":
|
|
notes = "Both agree: not affected"
|
|
elif agreement and sf.status == "affected":
|
|
notes = "Both agree: affected"
|
|
elif sf.status == "not_affected" and bf.status == "affected":
|
|
if sf.reachability == "unreachable":
|
|
notes = "FP reduction: StellaOps correctly identified unreachable code"
|
|
else:
|
|
notes = "Disagreement: investigate"
|
|
elif sf.status == "affected" and bf.status == "not_affected":
|
|
notes = "StellaOps detected, baseline missed"
|
|
|
|
results.append(ComparisonResult(
|
|
cve_id=sf.cve_id,
|
|
purl=sf.purl,
|
|
stellaops_status=sf.status,
|
|
baseline_status=bf.status,
|
|
agreement=agreement,
|
|
stellaops_reachability=sf.reachability,
|
|
notes=notes
|
|
))
|
|
else:
|
|
# StellaOps found something baseline didn't
|
|
results.append(ComparisonResult(
|
|
cve_id=sf.cve_id,
|
|
purl=sf.purl,
|
|
stellaops_status=sf.status,
|
|
baseline_status="not_found",
|
|
agreement=False,
|
|
stellaops_reachability=sf.reachability,
|
|
notes="Only found by StellaOps"
|
|
))
|
|
|
|
# Find baseline-only findings
|
|
stellaops_keys = {(f.cve_id, f.purl) for f in stellaops}
|
|
for bf in baseline:
|
|
key = (bf.cve_id, bf.purl)
|
|
if key not in stellaops_keys:
|
|
results.append(ComparisonResult(
|
|
cve_id=bf.cve_id,
|
|
purl=bf.purl,
|
|
stellaops_status="not_found",
|
|
baseline_status=bf.status,
|
|
agreement=False,
|
|
stellaops_reachability="unknown",
|
|
notes="Only found by baseline"
|
|
))
|
|
|
|
return results
|
|
|
|
|
|
def compute_comparison_metrics(results: list[ComparisonResult]) -> dict:
|
|
"""Compute comparison metrics."""
|
|
total = len(results)
|
|
agreements = sum(1 for r in results if r.agreement)
|
|
fp_reductions = sum(1 for r in results if r.notes and "FP reduction" in r.notes)
|
|
stellaops_only = sum(1 for r in results if "Only found by StellaOps" in r.notes)
|
|
baseline_only = sum(1 for r in results if "Only found by baseline" in r.notes)
|
|
|
|
return {
|
|
"total_comparisons": total,
|
|
"agreements": agreements,
|
|
"agreement_rate": agreements / total if total > 0 else 0,
|
|
"fp_reductions": fp_reductions,
|
|
"stellaops_unique": stellaops_only,
|
|
"baseline_unique": baseline_only,
|
|
"generated_at": datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ")
|
|
}
|
|
|
|
|
|
def write_comparison_csv(results: list[ComparisonResult], output_path: Path):
|
|
"""Write comparison results to CSV."""
|
|
output_path.parent.mkdir(parents=True, exist_ok=True)
|
|
|
|
with open(output_path, 'w', newline='', encoding='utf-8') as f:
|
|
writer = csv.writer(f)
|
|
writer.writerow([
|
|
"cve_id",
|
|
"purl",
|
|
"stellaops_status",
|
|
"baseline_status",
|
|
"agreement",
|
|
"reachability",
|
|
"notes"
|
|
])
|
|
|
|
for r in results:
|
|
writer.writerow([
|
|
r.cve_id,
|
|
r.purl,
|
|
r.stellaops_status,
|
|
r.baseline_status,
|
|
"yes" if r.agreement else "no",
|
|
r.stellaops_reachability,
|
|
r.notes
|
|
])
|
|
|
|
|
|
def main():
|
|
parser = argparse.ArgumentParser(
|
|
description="Compare StellaOps findings against baseline scanner"
|
|
)
|
|
parser.add_argument(
|
|
"--stellaops",
|
|
type=Path,
|
|
default=Path("bench/findings"),
|
|
help="Path to StellaOps findings directory"
|
|
)
|
|
parser.add_argument(
|
|
"--baseline",
|
|
type=Path,
|
|
required=True,
|
|
help="Path to baseline scanner results JSON"
|
|
)
|
|
parser.add_argument(
|
|
"--output",
|
|
type=Path,
|
|
default=Path("bench/results/comparison.csv"),
|
|
help="Output CSV path"
|
|
)
|
|
parser.add_argument(
|
|
"--json",
|
|
action="store_true",
|
|
help="Also output JSON summary"
|
|
)
|
|
|
|
args = parser.parse_args()
|
|
|
|
# Resolve paths
|
|
repo_root = Path(__file__).parent.parent.parent
|
|
stellaops_path = args.stellaops if args.stellaops.is_absolute() else repo_root / args.stellaops
|
|
baseline_path = args.baseline if args.baseline.is_absolute() else repo_root / args.baseline
|
|
output_path = args.output if args.output.is_absolute() else repo_root / args.output
|
|
|
|
print(f"StellaOps findings: {stellaops_path}")
|
|
print(f"Baseline results: {baseline_path}")
|
|
|
|
# Load findings
|
|
stellaops_findings = load_stellaops_findings(stellaops_path)
|
|
print(f"Loaded {len(stellaops_findings)} StellaOps findings")
|
|
|
|
baseline_findings = load_baseline_findings(baseline_path)
|
|
print(f"Loaded {len(baseline_findings)} baseline findings")
|
|
|
|
# Compare
|
|
results = compare_findings(stellaops_findings, baseline_findings)
|
|
metrics = compute_comparison_metrics(results)
|
|
|
|
print(f"\nComparison Results:")
|
|
print(f" Total comparisons: {metrics['total_comparisons']}")
|
|
print(f" Agreements: {metrics['agreements']} ({metrics['agreement_rate']:.1%})")
|
|
print(f" FP reductions: {metrics['fp_reductions']}")
|
|
print(f" StellaOps unique: {metrics['stellaops_unique']}")
|
|
print(f" Baseline unique: {metrics['baseline_unique']}")
|
|
|
|
# Write outputs
|
|
write_comparison_csv(results, output_path)
|
|
print(f"\nWrote comparison to: {output_path}")
|
|
|
|
if args.json:
|
|
json_path = output_path.with_suffix('.json')
|
|
with open(json_path, 'w', encoding='utf-8') as f:
|
|
json.dump({
|
|
"metrics": metrics,
|
|
"results": [
|
|
{
|
|
"cve_id": r.cve_id,
|
|
"purl": r.purl,
|
|
"stellaops_status": r.stellaops_status,
|
|
"baseline_status": r.baseline_status,
|
|
"agreement": r.agreement,
|
|
"reachability": r.stellaops_reachability,
|
|
"notes": r.notes
|
|
}
|
|
for r in results
|
|
]
|
|
}, f, indent=2, sort_keys=True)
|
|
print(f"Wrote JSON to: {json_path}")
|
|
|
|
return 0
|
|
|
|
|
|
if __name__ == "__main__":
|
|
sys.exit(main())
|