257 lines
10 KiB
C#
257 lines
10 KiB
C#
// -----------------------------------------------------------------------------
|
|
// KpiComputation.cs
|
|
// Sprint: SPRINT_20260121_034_BinaryIndex_golden_corpus_foundation
|
|
// Task: GCF-004 - Define KPI tracking schema and infrastructure
|
|
// Description: Utility methods for computing KPIs from validation results
|
|
// -----------------------------------------------------------------------------
|
|
|
|
using System.Collections.Immutable;
|
|
|
|
namespace StellaOps.BinaryIndex.GroundTruth.Abstractions;
|
|
|
|
/// <summary>
|
|
/// Utility methods for computing KPIs from validation results.
|
|
/// </summary>
|
|
public static class KpiComputation
|
|
{
|
|
/// <summary>
|
|
/// Computes KPIs from a validation run result.
|
|
/// </summary>
|
|
/// <param name="result">The validation run result.</param>
|
|
/// <param name="tenantId">The tenant ID.</param>
|
|
/// <param name="scannerVersion">The scanner version.</param>
|
|
/// <returns>Computed KPIs.</returns>
|
|
public static ValidationKpis ComputeFromResult(
|
|
ValidationRunResult result,
|
|
string tenantId,
|
|
string? scannerVersion = null)
|
|
{
|
|
var successfulPairs = result.PairResults.Where(p => p.Success).ToList();
|
|
|
|
// Compute function match rate statistics
|
|
var matchRates = successfulPairs
|
|
.Where(p => p.TotalFunctionsPost > 0)
|
|
.Select(p => p.FunctionMatchRate)
|
|
.ToList();
|
|
|
|
// Compute false-negative rates
|
|
var fnRates = successfulPairs
|
|
.Where(p => p.TotalPatchedFunctions > 0)
|
|
.Select(p => (p.TotalPatchedFunctions - p.PatchedFunctionsDetected) * 100.0 / p.TotalPatchedFunctions)
|
|
.ToList();
|
|
|
|
// Compute verify times
|
|
var verifyTimes = successfulPairs
|
|
.Where(p => p.VerifyTimeMs.HasValue)
|
|
.Select(p => p.VerifyTimeMs!.Value)
|
|
.OrderBy(t => t)
|
|
.ToList();
|
|
|
|
// Stability counts
|
|
var stability3of3 = successfulPairs.Count(p => p.SbomHash is not null);
|
|
// Since we're using placeholder implementation, count all with hashes as 3/3
|
|
|
|
// Totals for precision/recall
|
|
var totalFunctionsPost = successfulPairs.Sum(p => p.TotalFunctionsPost);
|
|
var matchedFunctions = successfulPairs.Sum(p => p.MatchedFunctions);
|
|
var totalPatched = successfulPairs.Sum(p => p.TotalPatchedFunctions);
|
|
var patchedDetected = successfulPairs.Sum(p => p.PatchedFunctionsDetected);
|
|
var missedPatched = totalPatched - patchedDetected;
|
|
|
|
// Compute precision and recall
|
|
// Precision = TP / (TP + FP) - in this context, how many of our matches are correct
|
|
// Recall = TP / (TP + FN) - in this context, how many true patches did we detect
|
|
double? precision = matchedFunctions > 0
|
|
? (double)matchedFunctions / totalFunctionsPost
|
|
: null;
|
|
|
|
double? recall = totalPatched > 0
|
|
? (double)patchedDetected / totalPatched
|
|
: null;
|
|
|
|
double? f1 = precision.HasValue && recall.HasValue && (precision.Value + recall.Value) > 0
|
|
? 2 * precision.Value * recall.Value / (precision.Value + recall.Value)
|
|
: null;
|
|
|
|
// Deterministic replay rate (100% if all SBOMs are stable)
|
|
double? deterministicRate = successfulPairs.Count > 0
|
|
? (double)stability3of3 / successfulPairs.Count
|
|
: null;
|
|
|
|
// Compute per-pair KPIs
|
|
var pairKpis = result.PairResults.Select(p => new PairKpis
|
|
{
|
|
PairId = p.PairId,
|
|
CveId = p.CveId,
|
|
PackageName = p.PackageName,
|
|
FunctionMatchRate = p.FunctionMatchRate,
|
|
FalseNegativeRate = p.TotalPatchedFunctions > 0
|
|
? (p.TotalPatchedFunctions - p.PatchedFunctionsDetected) * 100.0 / p.TotalPatchedFunctions
|
|
: null,
|
|
SbomHashStability = p.SbomHash is not null ? 3 : 0,
|
|
ReconstructionEquivalent = p.ReconstructionEquivalent,
|
|
TotalFunctionsPost = p.TotalFunctionsPost,
|
|
MatchedFunctions = p.MatchedFunctions,
|
|
TotalPatchedFunctions = p.TotalPatchedFunctions,
|
|
PatchedFunctionsDetected = p.PatchedFunctionsDetected,
|
|
VerifyTimeMs = p.VerifyTimeMs,
|
|
Success = p.Success,
|
|
ErrorMessage = p.Error,
|
|
SbomHash = p.SbomHash
|
|
}).ToImmutableArray();
|
|
|
|
return new ValidationKpis
|
|
{
|
|
RunId = Guid.TryParse(result.RunId, out var runGuid) ? runGuid : Guid.NewGuid(),
|
|
TenantId = tenantId,
|
|
CorpusVersion = result.CorpusVersion ?? "unknown",
|
|
ScannerVersion = scannerVersion ?? "0.0.0",
|
|
PairCount = result.PairResults.Length,
|
|
FunctionMatchRateMean = matchRates.Count > 0 ? matchRates.Average() : null,
|
|
FunctionMatchRateMin = matchRates.Count > 0 ? matchRates.Min() : null,
|
|
FunctionMatchRateMax = matchRates.Count > 0 ? matchRates.Max() : null,
|
|
FalseNegativeRateMean = fnRates.Count > 0 ? fnRates.Average() : null,
|
|
FalseNegativeRateMax = fnRates.Count > 0 ? fnRates.Max() : null,
|
|
SbomHashStability3of3Count = stability3of3,
|
|
SbomHashStability2of3Count = 0,
|
|
SbomHashStability1of3Count = 0,
|
|
ReconstructionEquivCount = successfulPairs.Count(p => p.ReconstructionEquivalent == true),
|
|
ReconstructionTotalCount = successfulPairs.Count(p => p.ReconstructionEquivalent.HasValue),
|
|
VerifyTimeMedianMs = verifyTimes.Count > 0 ? Percentile(verifyTimes, 50) : null,
|
|
VerifyTimeP95Ms = verifyTimes.Count > 0 ? Percentile(verifyTimes, 95) : null,
|
|
VerifyTimeP99Ms = verifyTimes.Count > 0 ? Percentile(verifyTimes, 99) : null,
|
|
Precision = precision,
|
|
Recall = recall,
|
|
F1Score = f1,
|
|
DeterministicReplayRate = deterministicRate,
|
|
TotalFunctionsPost = totalFunctionsPost,
|
|
MatchedFunctions = matchedFunctions,
|
|
TotalTruePatched = totalPatched,
|
|
MissedPatched = missedPatched,
|
|
ComputedAt = DateTimeOffset.UtcNow,
|
|
StartedAt = result.StartedAt,
|
|
CompletedAt = result.CompletedAt,
|
|
PairResults = pairKpis
|
|
};
|
|
}
|
|
|
|
/// <summary>
|
|
/// Performs a regression check against a baseline.
|
|
/// </summary>
|
|
/// <param name="kpis">The current KPIs.</param>
|
|
/// <param name="baseline">The baseline to compare against.</param>
|
|
/// <returns>The regression check result.</returns>
|
|
public static RegressionCheckResult CompareToBaseline(
|
|
ValidationKpis kpis,
|
|
KpiBaseline baseline)
|
|
{
|
|
// Compute deltas
|
|
double? precisionDelta = kpis.Precision.HasValue
|
|
? kpis.Precision.Value - baseline.PrecisionBaseline
|
|
: null;
|
|
|
|
double? recallDelta = kpis.Recall.HasValue
|
|
? kpis.Recall.Value - baseline.RecallBaseline
|
|
: null;
|
|
|
|
double? f1Delta = kpis.F1Score.HasValue
|
|
? kpis.F1Score.Value - baseline.F1Baseline
|
|
: null;
|
|
|
|
// False-negative rate is inverse - higher is worse
|
|
double? fnRateDelta = kpis.FalseNegativeRateMean.HasValue
|
|
? kpis.FalseNegativeRateMean.Value / 100.0 - baseline.FnRateBaseline
|
|
: null;
|
|
|
|
double? verifyDeltaPct = kpis.VerifyTimeP95Ms.HasValue && baseline.VerifyP95BaselineMs > 0
|
|
? (kpis.VerifyTimeP95Ms.Value - baseline.VerifyP95BaselineMs) * 100.0 / baseline.VerifyP95BaselineMs
|
|
: null;
|
|
|
|
// Evaluate statuses
|
|
var precisionStatus = EvaluateMetricStatus(
|
|
precisionDelta,
|
|
-baseline.PrecisionWarnDelta,
|
|
-baseline.PrecisionFailDelta);
|
|
|
|
var recallStatus = EvaluateMetricStatus(
|
|
recallDelta,
|
|
-baseline.RecallWarnDelta,
|
|
-baseline.RecallFailDelta);
|
|
|
|
// For FN rate, higher is worse, so we invert the check
|
|
var fnRateStatus = fnRateDelta.HasValue
|
|
? EvaluateMetricStatus(-fnRateDelta, -baseline.FnRateWarnDelta, -baseline.FnRateFailDelta)
|
|
: RegressionStatus.Pass;
|
|
|
|
var verifyStatus = verifyDeltaPct.HasValue
|
|
? EvaluateMetricStatus(-verifyDeltaPct, -baseline.VerifyWarnDeltaPct, -baseline.VerifyFailDeltaPct)
|
|
: RegressionStatus.Pass;
|
|
|
|
// Determinism must be 100%
|
|
var determinismStatus = kpis.DeterministicReplayRate.HasValue
|
|
? (kpis.DeterministicReplayRate.Value >= 1.0 ? RegressionStatus.Pass : RegressionStatus.Fail)
|
|
: RegressionStatus.Pass;
|
|
|
|
// Overall status is the worst of all statuses
|
|
var statuses = new[] { precisionStatus, recallStatus, fnRateStatus, verifyStatus, determinismStatus };
|
|
var overallStatus = statuses.Contains(RegressionStatus.Fail) ? RegressionStatus.Fail
|
|
: statuses.Contains(RegressionStatus.Warn) ? RegressionStatus.Warn
|
|
: statuses.All(s => s == RegressionStatus.Improved) ? RegressionStatus.Improved
|
|
: RegressionStatus.Pass;
|
|
|
|
return new RegressionCheckResult
|
|
{
|
|
CheckId = Guid.NewGuid(),
|
|
RunId = kpis.RunId,
|
|
BaselineId = baseline.BaselineId,
|
|
PrecisionDelta = precisionDelta,
|
|
RecallDelta = recallDelta,
|
|
F1Delta = f1Delta,
|
|
FnRateDelta = fnRateDelta,
|
|
VerifyP95DeltaPct = verifyDeltaPct,
|
|
OverallStatus = overallStatus,
|
|
PrecisionStatus = precisionStatus,
|
|
RecallStatus = recallStatus,
|
|
FnRateStatus = fnRateStatus,
|
|
VerifyTimeStatus = verifyStatus,
|
|
DeterminismStatus = determinismStatus,
|
|
CheckedAt = DateTimeOffset.UtcNow
|
|
};
|
|
}
|
|
|
|
/// <summary>
|
|
/// Evaluates the status of a metric based on its delta.
|
|
/// </summary>
|
|
private static RegressionStatus EvaluateMetricStatus(
|
|
double? delta,
|
|
double warnThreshold,
|
|
double failThreshold)
|
|
{
|
|
if (!delta.HasValue)
|
|
return RegressionStatus.Pass;
|
|
|
|
if (delta.Value > 0)
|
|
return RegressionStatus.Improved;
|
|
|
|
if (delta.Value < failThreshold)
|
|
return RegressionStatus.Fail;
|
|
|
|
if (delta.Value < warnThreshold)
|
|
return RegressionStatus.Warn;
|
|
|
|
return RegressionStatus.Pass;
|
|
}
|
|
|
|
/// <summary>
|
|
/// Computes a percentile value from a sorted list.
|
|
/// </summary>
|
|
private static int Percentile(List<int> sortedValues, int percentile)
|
|
{
|
|
if (sortedValues.Count == 0)
|
|
return 0;
|
|
|
|
var index = (int)Math.Ceiling(sortedValues.Count * percentile / 100.0) - 1;
|
|
return sortedValues[Math.Clamp(index, 0, sortedValues.Count - 1)];
|
|
}
|
|
}
|