git.stella-ops.org/bench/tools/compare.py

#!/usr/bin/env python3
# SPDX-License-Identifier: AGPL-3.0-or-later
# BENCH-AUTO-401-019: Baseline scanner comparison script

"""
Compare StellaOps findings against baseline scanner results.

Generates comparison metrics:
- True positives (reachability-confirmed)
- False positives (unreachable code paths)
- MTTD (mean time to detect)
- Reproducibility score

Usage:
    python bench/tools/compare.py --stellaops PATH --baseline PATH --output PATH
"""

import argparse
import csv
import json
import sys
from dataclasses import dataclass, field
from datetime import datetime, timezone
from pathlib import Path
from typing import Any


@dataclass
class Finding:
    """A vulnerability finding."""
    cve_id: str
    purl: str
    status: str  # affected, not_affected
    reachability: str  # reachable, unreachable, unknown
    source: str  # stellaops, baseline
    detected_at: str = ""
    evidence_hash: str = ""


@dataclass
class ComparisonResult:
    """Result of comparing two findings."""
    cve_id: str
    purl: str
    stellaops_status: str
    baseline_status: str
    agreement: bool
    stellaops_reachability: str
    notes: str = ""


def load_stellaops_findings(findings_dir: Path) -> list[Finding]:
    """Load StellaOps findings from bench/findings directory."""
    findings = []

    if not findings_dir.exists():
        return findings

    for finding_dir in sorted(findings_dir.iterdir()):
        if not finding_dir.is_dir():
            continue

        metadata_path = finding_dir / "metadata.json"
        openvex_path = finding_dir / "decision.openvex.json"

        if not metadata_path.exists() or not openvex_path.exists():
            continue

        with open(metadata_path, 'r', encoding='utf-8') as f:
            metadata = json.load(f)

        with open(openvex_path, 'r', encoding='utf-8') as f:
            openvex = json.load(f)

        statements = openvex.get("statements", [])
        if not statements:
            continue

        stmt = statements[0]
        products = stmt.get("products", [])
        purl = products[0].get("@id", "") if products else ""

        findings.append(Finding(
            cve_id=metadata.get("cve_id", ""),
            purl=purl,
            status=stmt.get("status", "unknown"),
            reachability=metadata.get("variant", "unknown"),
            source="stellaops",
            detected_at=openvex.get("timestamp", ""),
            evidence_hash=metadata.get("evidence_hash", "")
        ))

    return findings


def load_baseline_findings(baseline_path: Path) -> list[Finding]:
    """Load baseline scanner findings from JSON file."""
    findings = []

    if not baseline_path.exists():
        return findings

    with open(baseline_path, 'r', encoding='utf-8') as f:
        data = json.load(f)

    # Support multiple baseline formats
    vulns = data.get("vulnerabilities", data.get("findings", data.get("results", [])))

    for vuln in vulns:
        cve_id = vuln.get("cve_id", vuln.get("id", vuln.get("vulnerability_id", "")))
        purl = vuln.get("purl", vuln.get("package_url", ""))

        # Map baseline status to our normalized form
        raw_status = vuln.get("status", vuln.get("severity", ""))
        if raw_status.lower() in ["affected", "vulnerable", "high", "critical", "medium"]:
            status = "affected"
        elif raw_status.lower() in ["not_affected", "fixed", "not_vulnerable"]:
            status = "not_affected"
        else:
            status = "unknown"

        findings.append(Finding(
            cve_id=cve_id,
            purl=purl,
            status=status,
            reachability="unknown",  # Baseline scanners typically don't have reachability
            source="baseline"
        ))

    return findings


def compare_findings(
    stellaops: list[Finding],
    baseline: list[Finding]
) -> list[ComparisonResult]:
    """Compare StellaOps findings with baseline."""
    results = []

    # Index baseline by CVE+purl
    baseline_index = {}
    for f in baseline:
        key = (f.cve_id, f.purl)
        baseline_index[key] = f

    # Compare each StellaOps finding
    for sf in stellaops:
        key = (sf.cve_id, sf.purl)
        bf = baseline_index.get(key)

        if bf:
            agreement = sf.status == bf.status
            notes = ""

            if agreement and sf.status == "not_affected":
                notes = "Both agree: not affected"
            elif agreement and sf.status == "affected":
                notes = "Both agree: affected"
            elif sf.status == "not_affected" and bf.status == "affected":
                if sf.reachability == "unreachable":
                    notes = "FP reduction: StellaOps correctly identified unreachable code"
                else:
                    notes = "Disagreement: investigate"
            elif sf.status == "affected" and bf.status == "not_affected":
                notes = "StellaOps detected, baseline missed"

            results.append(ComparisonResult(
                cve_id=sf.cve_id,
                purl=sf.purl,
                stellaops_status=sf.status,
                baseline_status=bf.status,
                agreement=agreement,
                stellaops_reachability=sf.reachability,
                notes=notes
            ))
        else:
            # StellaOps found something baseline didn't
            results.append(ComparisonResult(
                cve_id=sf.cve_id,
                purl=sf.purl,
                stellaops_status=sf.status,
                baseline_status="not_found",
                agreement=False,
                stellaops_reachability=sf.reachability,
                notes="Only found by StellaOps"
            ))

    # Find baseline-only findings
    stellaops_keys = {(f.cve_id, f.purl) for f in stellaops}
    for bf in baseline:
        key = (bf.cve_id, bf.purl)
        if key not in stellaops_keys:
            results.append(ComparisonResult(
                cve_id=bf.cve_id,
                purl=bf.purl,
                stellaops_status="not_found",
                baseline_status=bf.status,
                agreement=False,
                stellaops_reachability="unknown",
                notes="Only found by baseline"
            ))

    return results


def compute_comparison_metrics(results: list[ComparisonResult]) -> dict:
    """Compute comparison metrics."""
    total = len(results)
    agreements = sum(1 for r in results if r.agreement)
    fp_reductions = sum(1 for r in results if r.notes and "FP reduction" in r.notes)
    stellaops_only = sum(1 for r in results if "Only found by StellaOps" in r.notes)
    baseline_only = sum(1 for r in results if "Only found by baseline" in r.notes)

    return {
        "total_comparisons": total,
        "agreements": agreements,
        "agreement_rate": agreements / total if total > 0 else 0,
        "fp_reductions": fp_reductions,
        "stellaops_unique": stellaops_only,
        "baseline_unique": baseline_only,
        "generated_at": datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ")
    }


def write_comparison_csv(results: list[ComparisonResult], output_path: Path):
    """Write comparison results to CSV."""
    output_path.parent.mkdir(parents=True, exist_ok=True)

    with open(output_path, 'w', newline='', encoding='utf-8') as f:
        writer = csv.writer(f)
        writer.writerow([
            "cve_id",
            "purl",
            "stellaops_status",
            "baseline_status",
            "agreement",
            "reachability",
            "notes"
        ])

        for r in results:
            writer.writerow([
                r.cve_id,
                r.purl,
                r.stellaops_status,
                r.baseline_status,
                "yes" if r.agreement else "no",
                r.stellaops_reachability,
                r.notes
            ])


def main():
    parser = argparse.ArgumentParser(
        description="Compare StellaOps findings against baseline scanner"
    )
    parser.add_argument(
        "--stellaops",
        type=Path,
        default=Path("bench/findings"),
        help="Path to StellaOps findings directory"
    )
    parser.add_argument(
        "--baseline",
        type=Path,
        required=True,
        help="Path to baseline scanner results JSON"
    )
    parser.add_argument(
        "--output",
        type=Path,
        default=Path("bench/results/comparison.csv"),
        help="Output CSV path"
    )
    parser.add_argument(
        "--json",
        action="store_true",
        help="Also output JSON summary"
    )

    args = parser.parse_args()

    # Resolve paths
    repo_root = Path(__file__).parent.parent.parent
    stellaops_path = args.stellaops if args.stellaops.is_absolute() else repo_root / args.stellaops
    baseline_path = args.baseline if args.baseline.is_absolute() else repo_root / args.baseline
    output_path = args.output if args.output.is_absolute() else repo_root / args.output

    print(f"StellaOps findings: {stellaops_path}")
    print(f"Baseline results: {baseline_path}")

    # Load findings
    stellaops_findings = load_stellaops_findings(stellaops_path)
    print(f"Loaded {len(stellaops_findings)} StellaOps findings")

    baseline_findings = load_baseline_findings(baseline_path)
    print(f"Loaded {len(baseline_findings)} baseline findings")

    # Compare
    results = compare_findings(stellaops_findings, baseline_findings)
    metrics = compute_comparison_metrics(results)

    print(f"\nComparison Results:")
    print(f"  Total comparisons: {metrics['total_comparisons']}")
    print(f"  Agreements: {metrics['agreements']} ({metrics['agreement_rate']:.1%})")
    print(f"  FP reductions: {metrics['fp_reductions']}")
    print(f"  StellaOps unique: {metrics['stellaops_unique']}")
    print(f"  Baseline unique: {metrics['baseline_unique']}")

    # Write outputs
    write_comparison_csv(results, output_path)
    print(f"\nWrote comparison to: {output_path}")

    if args.json:
        json_path = output_path.with_suffix('.json')
        with open(json_path, 'w', encoding='utf-8') as f:
            json.dump({
                "metrics": metrics,
                "results": [
                    {
                        "cve_id": r.cve_id,
                        "purl": r.purl,
                        "stellaops_status": r.stellaops_status,
                        "baseline_status": r.baseline_status,
                        "agreement": r.agreement,
                        "reachability": r.stellaops_reachability,
                        "notes": r.notes
                    }
                    for r in results
                ]
            }, f, indent=2, sort_keys=True)
        print(f"Wrote JSON to: {json_path}")

    return 0


if __name__ == "__main__":
    sys.exit(main())