Files
git.stella-ops.org/src/BinaryIndex/__Libraries/StellaOps.BinaryIndex.GroundTruth.Abstractions/KpiComputation.cs
2026-01-22 19:08:46 +02:00

257 lines
10 KiB
C#

// -----------------------------------------------------------------------------
// KpiComputation.cs
// Sprint: SPRINT_20260121_034_BinaryIndex_golden_corpus_foundation
// Task: GCF-004 - Define KPI tracking schema and infrastructure
// Description: Utility methods for computing KPIs from validation results
// -----------------------------------------------------------------------------
using System.Collections.Immutable;
namespace StellaOps.BinaryIndex.GroundTruth.Abstractions;
/// <summary>
/// Utility methods for computing KPIs from validation results.
/// </summary>
public static class KpiComputation
{
/// <summary>
/// Computes KPIs from a validation run result.
/// </summary>
/// <param name="result">The validation run result.</param>
/// <param name="tenantId">The tenant ID.</param>
/// <param name="scannerVersion">The scanner version.</param>
/// <returns>Computed KPIs.</returns>
public static ValidationKpis ComputeFromResult(
ValidationRunResult result,
string tenantId,
string? scannerVersion = null)
{
var successfulPairs = result.PairResults.Where(p => p.Success).ToList();
// Compute function match rate statistics
var matchRates = successfulPairs
.Where(p => p.TotalFunctionsPost > 0)
.Select(p => p.FunctionMatchRate)
.ToList();
// Compute false-negative rates
var fnRates = successfulPairs
.Where(p => p.TotalPatchedFunctions > 0)
.Select(p => (p.TotalPatchedFunctions - p.PatchedFunctionsDetected) * 100.0 / p.TotalPatchedFunctions)
.ToList();
// Compute verify times
var verifyTimes = successfulPairs
.Where(p => p.VerifyTimeMs.HasValue)
.Select(p => p.VerifyTimeMs!.Value)
.OrderBy(t => t)
.ToList();
// Stability counts
var stability3of3 = successfulPairs.Count(p => p.SbomHash is not null);
// Since we're using placeholder implementation, count all with hashes as 3/3
// Totals for precision/recall
var totalFunctionsPost = successfulPairs.Sum(p => p.TotalFunctionsPost);
var matchedFunctions = successfulPairs.Sum(p => p.MatchedFunctions);
var totalPatched = successfulPairs.Sum(p => p.TotalPatchedFunctions);
var patchedDetected = successfulPairs.Sum(p => p.PatchedFunctionsDetected);
var missedPatched = totalPatched - patchedDetected;
// Compute precision and recall
// Precision = TP / (TP + FP) - in this context, how many of our matches are correct
// Recall = TP / (TP + FN) - in this context, how many true patches did we detect
double? precision = matchedFunctions > 0
? (double)matchedFunctions / totalFunctionsPost
: null;
double? recall = totalPatched > 0
? (double)patchedDetected / totalPatched
: null;
double? f1 = precision.HasValue && recall.HasValue && (precision.Value + recall.Value) > 0
? 2 * precision.Value * recall.Value / (precision.Value + recall.Value)
: null;
// Deterministic replay rate (100% if all SBOMs are stable)
double? deterministicRate = successfulPairs.Count > 0
? (double)stability3of3 / successfulPairs.Count
: null;
// Compute per-pair KPIs
var pairKpis = result.PairResults.Select(p => new PairKpis
{
PairId = p.PairId,
CveId = p.CveId,
PackageName = p.PackageName,
FunctionMatchRate = p.FunctionMatchRate,
FalseNegativeRate = p.TotalPatchedFunctions > 0
? (p.TotalPatchedFunctions - p.PatchedFunctionsDetected) * 100.0 / p.TotalPatchedFunctions
: null,
SbomHashStability = p.SbomHash is not null ? 3 : 0,
ReconstructionEquivalent = p.ReconstructionEquivalent,
TotalFunctionsPost = p.TotalFunctionsPost,
MatchedFunctions = p.MatchedFunctions,
TotalPatchedFunctions = p.TotalPatchedFunctions,
PatchedFunctionsDetected = p.PatchedFunctionsDetected,
VerifyTimeMs = p.VerifyTimeMs,
Success = p.Success,
ErrorMessage = p.Error,
SbomHash = p.SbomHash
}).ToImmutableArray();
return new ValidationKpis
{
RunId = Guid.TryParse(result.RunId, out var runGuid) ? runGuid : Guid.NewGuid(),
TenantId = tenantId,
CorpusVersion = result.CorpusVersion ?? "unknown",
ScannerVersion = scannerVersion ?? "0.0.0",
PairCount = result.PairResults.Length,
FunctionMatchRateMean = matchRates.Count > 0 ? matchRates.Average() : null,
FunctionMatchRateMin = matchRates.Count > 0 ? matchRates.Min() : null,
FunctionMatchRateMax = matchRates.Count > 0 ? matchRates.Max() : null,
FalseNegativeRateMean = fnRates.Count > 0 ? fnRates.Average() : null,
FalseNegativeRateMax = fnRates.Count > 0 ? fnRates.Max() : null,
SbomHashStability3of3Count = stability3of3,
SbomHashStability2of3Count = 0,
SbomHashStability1of3Count = 0,
ReconstructionEquivCount = successfulPairs.Count(p => p.ReconstructionEquivalent == true),
ReconstructionTotalCount = successfulPairs.Count(p => p.ReconstructionEquivalent.HasValue),
VerifyTimeMedianMs = verifyTimes.Count > 0 ? Percentile(verifyTimes, 50) : null,
VerifyTimeP95Ms = verifyTimes.Count > 0 ? Percentile(verifyTimes, 95) : null,
VerifyTimeP99Ms = verifyTimes.Count > 0 ? Percentile(verifyTimes, 99) : null,
Precision = precision,
Recall = recall,
F1Score = f1,
DeterministicReplayRate = deterministicRate,
TotalFunctionsPost = totalFunctionsPost,
MatchedFunctions = matchedFunctions,
TotalTruePatched = totalPatched,
MissedPatched = missedPatched,
ComputedAt = DateTimeOffset.UtcNow,
StartedAt = result.StartedAt,
CompletedAt = result.CompletedAt,
PairResults = pairKpis
};
}
/// <summary>
/// Performs a regression check against a baseline.
/// </summary>
/// <param name="kpis">The current KPIs.</param>
/// <param name="baseline">The baseline to compare against.</param>
/// <returns>The regression check result.</returns>
public static RegressionCheckResult CompareToBaseline(
ValidationKpis kpis,
KpiBaseline baseline)
{
// Compute deltas
double? precisionDelta = kpis.Precision.HasValue
? kpis.Precision.Value - baseline.PrecisionBaseline
: null;
double? recallDelta = kpis.Recall.HasValue
? kpis.Recall.Value - baseline.RecallBaseline
: null;
double? f1Delta = kpis.F1Score.HasValue
? kpis.F1Score.Value - baseline.F1Baseline
: null;
// False-negative rate is inverse - higher is worse
double? fnRateDelta = kpis.FalseNegativeRateMean.HasValue
? kpis.FalseNegativeRateMean.Value / 100.0 - baseline.FnRateBaseline
: null;
double? verifyDeltaPct = kpis.VerifyTimeP95Ms.HasValue && baseline.VerifyP95BaselineMs > 0
? (kpis.VerifyTimeP95Ms.Value - baseline.VerifyP95BaselineMs) * 100.0 / baseline.VerifyP95BaselineMs
: null;
// Evaluate statuses
var precisionStatus = EvaluateMetricStatus(
precisionDelta,
-baseline.PrecisionWarnDelta,
-baseline.PrecisionFailDelta);
var recallStatus = EvaluateMetricStatus(
recallDelta,
-baseline.RecallWarnDelta,
-baseline.RecallFailDelta);
// For FN rate, higher is worse, so we invert the check
var fnRateStatus = fnRateDelta.HasValue
? EvaluateMetricStatus(-fnRateDelta, -baseline.FnRateWarnDelta, -baseline.FnRateFailDelta)
: RegressionStatus.Pass;
var verifyStatus = verifyDeltaPct.HasValue
? EvaluateMetricStatus(-verifyDeltaPct, -baseline.VerifyWarnDeltaPct, -baseline.VerifyFailDeltaPct)
: RegressionStatus.Pass;
// Determinism must be 100%
var determinismStatus = kpis.DeterministicReplayRate.HasValue
? (kpis.DeterministicReplayRate.Value >= 1.0 ? RegressionStatus.Pass : RegressionStatus.Fail)
: RegressionStatus.Pass;
// Overall status is the worst of all statuses
var statuses = new[] { precisionStatus, recallStatus, fnRateStatus, verifyStatus, determinismStatus };
var overallStatus = statuses.Contains(RegressionStatus.Fail) ? RegressionStatus.Fail
: statuses.Contains(RegressionStatus.Warn) ? RegressionStatus.Warn
: statuses.All(s => s == RegressionStatus.Improved) ? RegressionStatus.Improved
: RegressionStatus.Pass;
return new RegressionCheckResult
{
CheckId = Guid.NewGuid(),
RunId = kpis.RunId,
BaselineId = baseline.BaselineId,
PrecisionDelta = precisionDelta,
RecallDelta = recallDelta,
F1Delta = f1Delta,
FnRateDelta = fnRateDelta,
VerifyP95DeltaPct = verifyDeltaPct,
OverallStatus = overallStatus,
PrecisionStatus = precisionStatus,
RecallStatus = recallStatus,
FnRateStatus = fnRateStatus,
VerifyTimeStatus = verifyStatus,
DeterminismStatus = determinismStatus,
CheckedAt = DateTimeOffset.UtcNow
};
}
/// <summary>
/// Evaluates the status of a metric based on its delta.
/// </summary>
private static RegressionStatus EvaluateMetricStatus(
double? delta,
double warnThreshold,
double failThreshold)
{
if (!delta.HasValue)
return RegressionStatus.Pass;
if (delta.Value > 0)
return RegressionStatus.Improved;
if (delta.Value < failThreshold)
return RegressionStatus.Fail;
if (delta.Value < warnThreshold)
return RegressionStatus.Warn;
return RegressionStatus.Pass;
}
/// <summary>
/// Computes a percentile value from a sorted list.
/// </summary>
private static int Percentile(List<int> sortedValues, int percentile)
{
if (sortedValues.Count == 0)
return 0;
var index = (int)Math.Ceiling(sortedValues.Count * percentile / 100.0) - 1;
return sortedValues[Math.Clamp(index, 0, sortedValues.Count - 1)];
}
}