// ----------------------------------------------------------------------------- // KpiComputation.cs // Sprint: SPRINT_20260121_034_BinaryIndex_golden_corpus_foundation // Task: GCF-004 - Define KPI tracking schema and infrastructure // Description: Utility methods for computing KPIs from validation results // ----------------------------------------------------------------------------- using System.Collections.Immutable; namespace StellaOps.BinaryIndex.GroundTruth.Abstractions; /// /// Utility methods for computing KPIs from validation results. /// public static class KpiComputation { /// /// Computes KPIs from a validation run result. /// /// The validation run result. /// The tenant ID. /// The scanner version. /// Computed KPIs. public static ValidationKpis ComputeFromResult( ValidationRunResult result, string tenantId, string? scannerVersion = null) { var successfulPairs = result.PairResults.Where(p => p.Success).ToList(); // Compute function match rate statistics var matchRates = successfulPairs .Where(p => p.TotalFunctionsPost > 0) .Select(p => p.FunctionMatchRate) .ToList(); // Compute false-negative rates var fnRates = successfulPairs .Where(p => p.TotalPatchedFunctions > 0) .Select(p => (p.TotalPatchedFunctions - p.PatchedFunctionsDetected) * 100.0 / p.TotalPatchedFunctions) .ToList(); // Compute verify times var verifyTimes = successfulPairs .Where(p => p.VerifyTimeMs.HasValue) .Select(p => p.VerifyTimeMs!.Value) .OrderBy(t => t) .ToList(); // Stability counts var stability3of3 = successfulPairs.Count(p => p.SbomHash is not null); // Since we're using placeholder implementation, count all with hashes as 3/3 // Totals for precision/recall var totalFunctionsPost = successfulPairs.Sum(p => p.TotalFunctionsPost); var matchedFunctions = successfulPairs.Sum(p => p.MatchedFunctions); var totalPatched = successfulPairs.Sum(p => p.TotalPatchedFunctions); var patchedDetected = successfulPairs.Sum(p => p.PatchedFunctionsDetected); var missedPatched = totalPatched - patchedDetected; // Compute precision and recall // Precision = TP / (TP + FP) - in this context, how many of our matches are correct // Recall = TP / (TP + FN) - in this context, how many true patches did we detect double? precision = matchedFunctions > 0 ? (double)matchedFunctions / totalFunctionsPost : null; double? recall = totalPatched > 0 ? (double)patchedDetected / totalPatched : null; double? f1 = precision.HasValue && recall.HasValue && (precision.Value + recall.Value) > 0 ? 2 * precision.Value * recall.Value / (precision.Value + recall.Value) : null; // Deterministic replay rate (100% if all SBOMs are stable) double? deterministicRate = successfulPairs.Count > 0 ? (double)stability3of3 / successfulPairs.Count : null; // Compute per-pair KPIs var pairKpis = result.PairResults.Select(p => new PairKpis { PairId = p.PairId, CveId = p.CveId, PackageName = p.PackageName, FunctionMatchRate = p.FunctionMatchRate, FalseNegativeRate = p.TotalPatchedFunctions > 0 ? (p.TotalPatchedFunctions - p.PatchedFunctionsDetected) * 100.0 / p.TotalPatchedFunctions : null, SbomHashStability = p.SbomHash is not null ? 3 : 0, ReconstructionEquivalent = p.ReconstructionEquivalent, TotalFunctionsPost = p.TotalFunctionsPost, MatchedFunctions = p.MatchedFunctions, TotalPatchedFunctions = p.TotalPatchedFunctions, PatchedFunctionsDetected = p.PatchedFunctionsDetected, VerifyTimeMs = p.VerifyTimeMs, Success = p.Success, ErrorMessage = p.Error, SbomHash = p.SbomHash }).ToImmutableArray(); return new ValidationKpis { RunId = Guid.TryParse(result.RunId, out var runGuid) ? runGuid : Guid.NewGuid(), TenantId = tenantId, CorpusVersion = result.CorpusVersion ?? "unknown", ScannerVersion = scannerVersion ?? "0.0.0", PairCount = result.PairResults.Length, FunctionMatchRateMean = matchRates.Count > 0 ? matchRates.Average() : null, FunctionMatchRateMin = matchRates.Count > 0 ? matchRates.Min() : null, FunctionMatchRateMax = matchRates.Count > 0 ? matchRates.Max() : null, FalseNegativeRateMean = fnRates.Count > 0 ? fnRates.Average() : null, FalseNegativeRateMax = fnRates.Count > 0 ? fnRates.Max() : null, SbomHashStability3of3Count = stability3of3, SbomHashStability2of3Count = 0, SbomHashStability1of3Count = 0, ReconstructionEquivCount = successfulPairs.Count(p => p.ReconstructionEquivalent == true), ReconstructionTotalCount = successfulPairs.Count(p => p.ReconstructionEquivalent.HasValue), VerifyTimeMedianMs = verifyTimes.Count > 0 ? Percentile(verifyTimes, 50) : null, VerifyTimeP95Ms = verifyTimes.Count > 0 ? Percentile(verifyTimes, 95) : null, VerifyTimeP99Ms = verifyTimes.Count > 0 ? Percentile(verifyTimes, 99) : null, Precision = precision, Recall = recall, F1Score = f1, DeterministicReplayRate = deterministicRate, TotalFunctionsPost = totalFunctionsPost, MatchedFunctions = matchedFunctions, TotalTruePatched = totalPatched, MissedPatched = missedPatched, ComputedAt = DateTimeOffset.UtcNow, StartedAt = result.StartedAt, CompletedAt = result.CompletedAt, PairResults = pairKpis }; } /// /// Performs a regression check against a baseline. /// /// The current KPIs. /// The baseline to compare against. /// The regression check result. public static RegressionCheckResult CompareToBaseline( ValidationKpis kpis, KpiBaseline baseline) { // Compute deltas double? precisionDelta = kpis.Precision.HasValue ? kpis.Precision.Value - baseline.PrecisionBaseline : null; double? recallDelta = kpis.Recall.HasValue ? kpis.Recall.Value - baseline.RecallBaseline : null; double? f1Delta = kpis.F1Score.HasValue ? kpis.F1Score.Value - baseline.F1Baseline : null; // False-negative rate is inverse - higher is worse double? fnRateDelta = kpis.FalseNegativeRateMean.HasValue ? kpis.FalseNegativeRateMean.Value / 100.0 - baseline.FnRateBaseline : null; double? verifyDeltaPct = kpis.VerifyTimeP95Ms.HasValue && baseline.VerifyP95BaselineMs > 0 ? (kpis.VerifyTimeP95Ms.Value - baseline.VerifyP95BaselineMs) * 100.0 / baseline.VerifyP95BaselineMs : null; // Evaluate statuses var precisionStatus = EvaluateMetricStatus( precisionDelta, -baseline.PrecisionWarnDelta, -baseline.PrecisionFailDelta); var recallStatus = EvaluateMetricStatus( recallDelta, -baseline.RecallWarnDelta, -baseline.RecallFailDelta); // For FN rate, higher is worse, so we invert the check var fnRateStatus = fnRateDelta.HasValue ? EvaluateMetricStatus(-fnRateDelta, -baseline.FnRateWarnDelta, -baseline.FnRateFailDelta) : RegressionStatus.Pass; var verifyStatus = verifyDeltaPct.HasValue ? EvaluateMetricStatus(-verifyDeltaPct, -baseline.VerifyWarnDeltaPct, -baseline.VerifyFailDeltaPct) : RegressionStatus.Pass; // Determinism must be 100% var determinismStatus = kpis.DeterministicReplayRate.HasValue ? (kpis.DeterministicReplayRate.Value >= 1.0 ? RegressionStatus.Pass : RegressionStatus.Fail) : RegressionStatus.Pass; // Overall status is the worst of all statuses var statuses = new[] { precisionStatus, recallStatus, fnRateStatus, verifyStatus, determinismStatus }; var overallStatus = statuses.Contains(RegressionStatus.Fail) ? RegressionStatus.Fail : statuses.Contains(RegressionStatus.Warn) ? RegressionStatus.Warn : statuses.All(s => s == RegressionStatus.Improved) ? RegressionStatus.Improved : RegressionStatus.Pass; return new RegressionCheckResult { CheckId = Guid.NewGuid(), RunId = kpis.RunId, BaselineId = baseline.BaselineId, PrecisionDelta = precisionDelta, RecallDelta = recallDelta, F1Delta = f1Delta, FnRateDelta = fnRateDelta, VerifyP95DeltaPct = verifyDeltaPct, OverallStatus = overallStatus, PrecisionStatus = precisionStatus, RecallStatus = recallStatus, FnRateStatus = fnRateStatus, VerifyTimeStatus = verifyStatus, DeterminismStatus = determinismStatus, CheckedAt = DateTimeOffset.UtcNow }; } /// /// Evaluates the status of a metric based on its delta. /// private static RegressionStatus EvaluateMetricStatus( double? delta, double warnThreshold, double failThreshold) { if (!delta.HasValue) return RegressionStatus.Pass; if (delta.Value > 0) return RegressionStatus.Improved; if (delta.Value < failThreshold) return RegressionStatus.Fail; if (delta.Value < warnThreshold) return RegressionStatus.Warn; return RegressionStatus.Pass; } /// /// Computes a percentile value from a sorted list. /// private static int Percentile(List sortedValues, int percentile) { if (sortedValues.Count == 0) return 0; var index = (int)Math.Ceiling(sortedValues.Count * percentile / 100.0) - 1; return sortedValues[Math.Clamp(index, 0, sortedValues.Count - 1)]; } }