#!/usr/bin/env python3 # SPDX-License-Identifier: AGPL-3.0-or-later # BENCH-AUTO-401-019: Baseline scanner comparison script """ Compare StellaOps findings against baseline scanner results. Generates comparison metrics: - True positives (reachability-confirmed) - False positives (unreachable code paths) - MTTD (mean time to detect) - Reproducibility score Usage: python bench/tools/compare.py --stellaops PATH --baseline PATH --output PATH """ import argparse import csv import json import sys from dataclasses import dataclass, field from datetime import datetime, timezone from pathlib import Path from typing import Any @dataclass class Finding: """A vulnerability finding.""" cve_id: str purl: str status: str # affected, not_affected reachability: str # reachable, unreachable, unknown source: str # stellaops, baseline detected_at: str = "" evidence_hash: str = "" @dataclass class ComparisonResult: """Result of comparing two findings.""" cve_id: str purl: str stellaops_status: str baseline_status: str agreement: bool stellaops_reachability: str notes: str = "" def load_stellaops_findings(findings_dir: Path) -> list[Finding]: """Load StellaOps findings from bench/findings directory.""" findings = [] if not findings_dir.exists(): return findings for finding_dir in sorted(findings_dir.iterdir()): if not finding_dir.is_dir(): continue metadata_path = finding_dir / "metadata.json" openvex_path = finding_dir / "decision.openvex.json" if not metadata_path.exists() or not openvex_path.exists(): continue with open(metadata_path, 'r', encoding='utf-8') as f: metadata = json.load(f) with open(openvex_path, 'r', encoding='utf-8') as f: openvex = json.load(f) statements = openvex.get("statements", []) if not statements: continue stmt = statements[0] products = stmt.get("products", []) purl = products[0].get("@id", "") if products else "" findings.append(Finding( cve_id=metadata.get("cve_id", ""), purl=purl, status=stmt.get("status", "unknown"), reachability=metadata.get("variant", "unknown"), source="stellaops", detected_at=openvex.get("timestamp", ""), evidence_hash=metadata.get("evidence_hash", "") )) return findings def load_baseline_findings(baseline_path: Path) -> list[Finding]: """Load baseline scanner findings from JSON file.""" findings = [] if not baseline_path.exists(): return findings with open(baseline_path, 'r', encoding='utf-8') as f: data = json.load(f) # Support multiple baseline formats vulns = data.get("vulnerabilities", data.get("findings", data.get("results", []))) for vuln in vulns: cve_id = vuln.get("cve_id", vuln.get("id", vuln.get("vulnerability_id", ""))) purl = vuln.get("purl", vuln.get("package_url", "")) # Map baseline status to our normalized form raw_status = vuln.get("status", vuln.get("severity", "")) if raw_status.lower() in ["affected", "vulnerable", "high", "critical", "medium"]: status = "affected" elif raw_status.lower() in ["not_affected", "fixed", "not_vulnerable"]: status = "not_affected" else: status = "unknown" findings.append(Finding( cve_id=cve_id, purl=purl, status=status, reachability="unknown", # Baseline scanners typically don't have reachability source="baseline" )) return findings def compare_findings( stellaops: list[Finding], baseline: list[Finding] ) -> list[ComparisonResult]: """Compare StellaOps findings with baseline.""" results = [] # Index baseline by CVE+purl baseline_index = {} for f in baseline: key = (f.cve_id, f.purl) baseline_index[key] = f # Compare each StellaOps finding for sf in stellaops: key = (sf.cve_id, sf.purl) bf = baseline_index.get(key) if bf: agreement = sf.status == bf.status notes = "" if agreement and sf.status == "not_affected": notes = "Both agree: not affected" elif agreement and sf.status == "affected": notes = "Both agree: affected" elif sf.status == "not_affected" and bf.status == "affected": if sf.reachability == "unreachable": notes = "FP reduction: StellaOps correctly identified unreachable code" else: notes = "Disagreement: investigate" elif sf.status == "affected" and bf.status == "not_affected": notes = "StellaOps detected, baseline missed" results.append(ComparisonResult( cve_id=sf.cve_id, purl=sf.purl, stellaops_status=sf.status, baseline_status=bf.status, agreement=agreement, stellaops_reachability=sf.reachability, notes=notes )) else: # StellaOps found something baseline didn't results.append(ComparisonResult( cve_id=sf.cve_id, purl=sf.purl, stellaops_status=sf.status, baseline_status="not_found", agreement=False, stellaops_reachability=sf.reachability, notes="Only found by StellaOps" )) # Find baseline-only findings stellaops_keys = {(f.cve_id, f.purl) for f in stellaops} for bf in baseline: key = (bf.cve_id, bf.purl) if key not in stellaops_keys: results.append(ComparisonResult( cve_id=bf.cve_id, purl=bf.purl, stellaops_status="not_found", baseline_status=bf.status, agreement=False, stellaops_reachability="unknown", notes="Only found by baseline" )) return results def compute_comparison_metrics(results: list[ComparisonResult]) -> dict: """Compute comparison metrics.""" total = len(results) agreements = sum(1 for r in results if r.agreement) fp_reductions = sum(1 for r in results if r.notes and "FP reduction" in r.notes) stellaops_only = sum(1 for r in results if "Only found by StellaOps" in r.notes) baseline_only = sum(1 for r in results if "Only found by baseline" in r.notes) return { "total_comparisons": total, "agreements": agreements, "agreement_rate": agreements / total if total > 0 else 0, "fp_reductions": fp_reductions, "stellaops_unique": stellaops_only, "baseline_unique": baseline_only, "generated_at": datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ") } def write_comparison_csv(results: list[ComparisonResult], output_path: Path): """Write comparison results to CSV.""" output_path.parent.mkdir(parents=True, exist_ok=True) with open(output_path, 'w', newline='', encoding='utf-8') as f: writer = csv.writer(f) writer.writerow([ "cve_id", "purl", "stellaops_status", "baseline_status", "agreement", "reachability", "notes" ]) for r in results: writer.writerow([ r.cve_id, r.purl, r.stellaops_status, r.baseline_status, "yes" if r.agreement else "no", r.stellaops_reachability, r.notes ]) def main(): parser = argparse.ArgumentParser( description="Compare StellaOps findings against baseline scanner" ) parser.add_argument( "--stellaops", type=Path, default=Path("bench/findings"), help="Path to StellaOps findings directory" ) parser.add_argument( "--baseline", type=Path, required=True, help="Path to baseline scanner results JSON" ) parser.add_argument( "--output", type=Path, default=Path("bench/results/comparison.csv"), help="Output CSV path" ) parser.add_argument( "--json", action="store_true", help="Also output JSON summary" ) args = parser.parse_args() # Resolve paths repo_root = Path(__file__).parent.parent.parent stellaops_path = args.stellaops if args.stellaops.is_absolute() else repo_root / args.stellaops baseline_path = args.baseline if args.baseline.is_absolute() else repo_root / args.baseline output_path = args.output if args.output.is_absolute() else repo_root / args.output print(f"StellaOps findings: {stellaops_path}") print(f"Baseline results: {baseline_path}") # Load findings stellaops_findings = load_stellaops_findings(stellaops_path) print(f"Loaded {len(stellaops_findings)} StellaOps findings") baseline_findings = load_baseline_findings(baseline_path) print(f"Loaded {len(baseline_findings)} baseline findings") # Compare results = compare_findings(stellaops_findings, baseline_findings) metrics = compute_comparison_metrics(results) print(f"\nComparison Results:") print(f" Total comparisons: {metrics['total_comparisons']}") print(f" Agreements: {metrics['agreements']} ({metrics['agreement_rate']:.1%})") print(f" FP reductions: {metrics['fp_reductions']}") print(f" StellaOps unique: {metrics['stellaops_unique']}") print(f" Baseline unique: {metrics['baseline_unique']}") # Write outputs write_comparison_csv(results, output_path) print(f"\nWrote comparison to: {output_path}") if args.json: json_path = output_path.with_suffix('.json') with open(json_path, 'w', encoding='utf-8') as f: json.dump({ "metrics": metrics, "results": [ { "cve_id": r.cve_id, "purl": r.purl, "stellaops_status": r.stellaops_status, "baseline_status": r.baseline_status, "agreement": r.agreement, "reachability": r.stellaops_reachability, "notes": r.notes } for r in results ] }, f, indent=2, sort_keys=True) print(f"Wrote JSON to: {json_path}") return 0 if __name__ == "__main__": sys.exit(main())