up

2025-12-14 15:50:38 +02:00
parent f1a39c4ce3
commit 233873f620
249 changed files with 29746 additions and 154 deletions
--- a/bench/tools/compare.py
+++ b/bench/tools/compare.py
@@ -0,0 +1,338 @@
+#!/usr/bin/env python3
+# SPDX-License-Identifier: AGPL-3.0-or-later
+# BENCH-AUTO-401-019: Baseline scanner comparison script
+
+"""
+Compare StellaOps findings against baseline scanner results.
+
+Generates comparison metrics:
+- True positives (reachability-confirmed)
+- False positives (unreachable code paths)
+- MTTD (mean time to detect)
+- Reproducibility score
+
+Usage:
+    python bench/tools/compare.py --stellaops PATH --baseline PATH --output PATH
+"""
+
+import argparse
+import csv
+import json
+import sys
+from dataclasses import dataclass, field
+from datetime import datetime, timezone
+from pathlib import Path
+from typing import Any
+
+
+@dataclass
+class Finding:
+    """A vulnerability finding."""
+    cve_id: str
+    purl: str
+    status: str  # affected, not_affected
+    reachability: str  # reachable, unreachable, unknown
+    source: str  # stellaops, baseline
+    detected_at: str = ""
+    evidence_hash: str = ""
+
+
+@dataclass
+class ComparisonResult:
+    """Result of comparing two findings."""
+    cve_id: str
+    purl: str
+    stellaops_status: str
+    baseline_status: str
+    agreement: bool
+    stellaops_reachability: str
+    notes: str = ""
+
+
+def load_stellaops_findings(findings_dir: Path) -> list[Finding]:
+    """Load StellaOps findings from bench/findings directory."""
+    findings = []
+
+    if not findings_dir.exists():
+        return findings
+
+    for finding_dir in sorted(findings_dir.iterdir()):
+        if not finding_dir.is_dir():
+            continue
+
+        metadata_path = finding_dir / "metadata.json"
+        openvex_path = finding_dir / "decision.openvex.json"
+
+        if not metadata_path.exists() or not openvex_path.exists():
+            continue
+
+        with open(metadata_path, 'r', encoding='utf-8') as f:
+            metadata = json.load(f)
+
+        with open(openvex_path, 'r', encoding='utf-8') as f:
+            openvex = json.load(f)
+
+        statements = openvex.get("statements", [])
+        if not statements:
+            continue
+
+        stmt = statements[0]
+        products = stmt.get("products", [])
+        purl = products[0].get("@id", "") if products else ""
+
+        findings.append(Finding(
+            cve_id=metadata.get("cve_id", ""),
+            purl=purl,
+            status=stmt.get("status", "unknown"),
+            reachability=metadata.get("variant", "unknown"),
+            source="stellaops",
+            detected_at=openvex.get("timestamp", ""),
+            evidence_hash=metadata.get("evidence_hash", "")
+        ))
+
+    return findings
+
+
+def load_baseline_findings(baseline_path: Path) -> list[Finding]:
+    """Load baseline scanner findings from JSON file."""
+    findings = []
+
+    if not baseline_path.exists():
+        return findings
+
+    with open(baseline_path, 'r', encoding='utf-8') as f:
+        data = json.load(f)
+
+    # Support multiple baseline formats
+    vulns = data.get("vulnerabilities", data.get("findings", data.get("results", [])))
+
+    for vuln in vulns:
+        cve_id = vuln.get("cve_id", vuln.get("id", vuln.get("vulnerability_id", "")))
+        purl = vuln.get("purl", vuln.get("package_url", ""))
+
+        # Map baseline status to our normalized form
+        raw_status = vuln.get("status", vuln.get("severity", ""))
+        if raw_status.lower() in ["affected", "vulnerable", "high", "critical", "medium"]:
+            status = "affected"
+        elif raw_status.lower() in ["not_affected", "fixed", "not_vulnerable"]:
+            status = "not_affected"
+        else:
+            status = "unknown"
+
+        findings.append(Finding(
+            cve_id=cve_id,
+            purl=purl,
+            status=status,
+            reachability="unknown",  # Baseline scanners typically don't have reachability
+            source="baseline"
+        ))
+
+    return findings
+
+
+def compare_findings(
+    stellaops: list[Finding],
+    baseline: list[Finding]
+) -> list[ComparisonResult]:
+    """Compare StellaOps findings with baseline."""
+    results = []
+
+    # Index baseline by CVE+purl
+    baseline_index = {}
+    for f in baseline:
+        key = (f.cve_id, f.purl)
+        baseline_index[key] = f
+
+    # Compare each StellaOps finding
+    for sf in stellaops:
+        key = (sf.cve_id, sf.purl)
+        bf = baseline_index.get(key)
+
+        if bf:
+            agreement = sf.status == bf.status
+            notes = ""
+
+            if agreement and sf.status == "not_affected":
+                notes = "Both agree: not affected"
+            elif agreement and sf.status == "affected":
+                notes = "Both agree: affected"
+            elif sf.status == "not_affected" and bf.status == "affected":
+                if sf.reachability == "unreachable":
+                    notes = "FP reduction: StellaOps correctly identified unreachable code"
+                else:
+                    notes = "Disagreement: investigate"
+            elif sf.status == "affected" and bf.status == "not_affected":
+                notes = "StellaOps detected, baseline missed"
+
+            results.append(ComparisonResult(
+                cve_id=sf.cve_id,
+                purl=sf.purl,
+                stellaops_status=sf.status,
+                baseline_status=bf.status,
+                agreement=agreement,
+                stellaops_reachability=sf.reachability,
+                notes=notes
+            ))
+        else:
+            # StellaOps found something baseline didn't
+            results.append(ComparisonResult(
+                cve_id=sf.cve_id,
+                purl=sf.purl,
+                stellaops_status=sf.status,
+                baseline_status="not_found",
+                agreement=False,
+                stellaops_reachability=sf.reachability,
+                notes="Only found by StellaOps"
+            ))
+
+    # Find baseline-only findings
+    stellaops_keys = {(f.cve_id, f.purl) for f in stellaops}
+    for bf in baseline:
+        key = (bf.cve_id, bf.purl)
+        if key not in stellaops_keys:
+            results.append(ComparisonResult(
+                cve_id=bf.cve_id,
+                purl=bf.purl,
+                stellaops_status="not_found",
+                baseline_status=bf.status,
+                agreement=False,
+                stellaops_reachability="unknown",
+                notes="Only found by baseline"
+            ))
+
+    return results
+
+
+def compute_comparison_metrics(results: list[ComparisonResult]) -> dict:
+    """Compute comparison metrics."""
+    total = len(results)
+    agreements = sum(1 for r in results if r.agreement)
+    fp_reductions = sum(1 for r in results if r.notes and "FP reduction" in r.notes)
+    stellaops_only = sum(1 for r in results if "Only found by StellaOps" in r.notes)
+    baseline_only = sum(1 for r in results if "Only found by baseline" in r.notes)
+
+    return {
+        "total_comparisons": total,
+        "agreements": agreements,
+        "agreement_rate": agreements / total if total > 0 else 0,
+        "fp_reductions": fp_reductions,
+        "stellaops_unique": stellaops_only,
+        "baseline_unique": baseline_only,
+        "generated_at": datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ")
+    }
+
+
+def write_comparison_csv(results: list[ComparisonResult], output_path: Path):
+    """Write comparison results to CSV."""
+    output_path.parent.mkdir(parents=True, exist_ok=True)
+
+    with open(output_path, 'w', newline='', encoding='utf-8') as f:
+        writer = csv.writer(f)
+        writer.writerow([
+            "cve_id",
+            "purl",
+            "stellaops_status",
+            "baseline_status",
+            "agreement",
+            "reachability",
+            "notes"
+        ])
+
+        for r in results:
+            writer.writerow([
+                r.cve_id,
+                r.purl,
+                r.stellaops_status,
+                r.baseline_status,
+                "yes" if r.agreement else "no",
+                r.stellaops_reachability,
+                r.notes
+            ])
+
+
+def main():
+    parser = argparse.ArgumentParser(
+        description="Compare StellaOps findings against baseline scanner"
+    )
+    parser.add_argument(
+        "--stellaops",
+        type=Path,
+        default=Path("bench/findings"),
+        help="Path to StellaOps findings directory"
+    )
+    parser.add_argument(
+        "--baseline",
+        type=Path,
+        required=True,
+        help="Path to baseline scanner results JSON"
+    )
+    parser.add_argument(
+        "--output",
+        type=Path,
+        default=Path("bench/results/comparison.csv"),
+        help="Output CSV path"
+    )
+    parser.add_argument(
+        "--json",
+        action="store_true",
+        help="Also output JSON summary"
+    )
+
+    args = parser.parse_args()
+
+    # Resolve paths
+    repo_root = Path(__file__).parent.parent.parent
+    stellaops_path = args.stellaops if args.stellaops.is_absolute() else repo_root / args.stellaops
+    baseline_path = args.baseline if args.baseline.is_absolute() else repo_root / args.baseline
+    output_path = args.output if args.output.is_absolute() else repo_root / args.output
+
+    print(f"StellaOps findings: {stellaops_path}")
+    print(f"Baseline results: {baseline_path}")
+
+    # Load findings
+    stellaops_findings = load_stellaops_findings(stellaops_path)
+    print(f"Loaded {len(stellaops_findings)} StellaOps findings")
+
+    baseline_findings = load_baseline_findings(baseline_path)
+    print(f"Loaded {len(baseline_findings)} baseline findings")
+
+    # Compare
+    results = compare_findings(stellaops_findings, baseline_findings)
+    metrics = compute_comparison_metrics(results)
+
+    print(f"\nComparison Results:")
+    print(f"  Total comparisons: {metrics['total_comparisons']}")
+    print(f"  Agreements: {metrics['agreements']} ({metrics['agreement_rate']:.1%})")
+    print(f"  FP reductions: {metrics['fp_reductions']}")
+    print(f"  StellaOps unique: {metrics['stellaops_unique']}")
+    print(f"  Baseline unique: {metrics['baseline_unique']}")
+
+    # Write outputs
+    write_comparison_csv(results, output_path)
+    print(f"\nWrote comparison to: {output_path}")
+
+    if args.json:
+        json_path = output_path.with_suffix('.json')
+        with open(json_path, 'w', encoding='utf-8') as f:
+            json.dump({
+                "metrics": metrics,
+                "results": [
+                    {
+                        "cve_id": r.cve_id,
+                        "purl": r.purl,
+                        "stellaops_status": r.stellaops_status,
+                        "baseline_status": r.baseline_status,
+                        "agreement": r.agreement,
+                        "reachability": r.stellaops_reachability,
+                        "notes": r.notes
+                    }
+                    for r in results
+                ]
+            }, f, indent=2, sort_keys=True)
+        print(f"Wrote JSON to: {json_path}")
+
+    return 0
+
+
+if __name__ == "__main__":
+    sys.exit(main())