tests fixes and sprints work
This commit is contained in:
@@ -0,0 +1,256 @@
|
||||
// -----------------------------------------------------------------------------
|
||||
// KpiComputation.cs
|
||||
// Sprint: SPRINT_20260121_034_BinaryIndex_golden_corpus_foundation
|
||||
// Task: GCF-004 - Define KPI tracking schema and infrastructure
|
||||
// Description: Utility methods for computing KPIs from validation results
|
||||
// -----------------------------------------------------------------------------
|
||||
|
||||
using System.Collections.Immutable;
|
||||
|
||||
namespace StellaOps.BinaryIndex.GroundTruth.Abstractions;
|
||||
|
||||
/// <summary>
|
||||
/// Utility methods for computing KPIs from validation results.
|
||||
/// </summary>
|
||||
public static class KpiComputation
|
||||
{
|
||||
/// <summary>
|
||||
/// Computes KPIs from a validation run result.
|
||||
/// </summary>
|
||||
/// <param name="result">The validation run result.</param>
|
||||
/// <param name="tenantId">The tenant ID.</param>
|
||||
/// <param name="scannerVersion">The scanner version.</param>
|
||||
/// <returns>Computed KPIs.</returns>
|
||||
public static ValidationKpis ComputeFromResult(
|
||||
ValidationRunResult result,
|
||||
string tenantId,
|
||||
string? scannerVersion = null)
|
||||
{
|
||||
var successfulPairs = result.PairResults.Where(p => p.Success).ToList();
|
||||
|
||||
// Compute function match rate statistics
|
||||
var matchRates = successfulPairs
|
||||
.Where(p => p.TotalFunctionsPost > 0)
|
||||
.Select(p => p.FunctionMatchRate)
|
||||
.ToList();
|
||||
|
||||
// Compute false-negative rates
|
||||
var fnRates = successfulPairs
|
||||
.Where(p => p.TotalPatchedFunctions > 0)
|
||||
.Select(p => (p.TotalPatchedFunctions - p.PatchedFunctionsDetected) * 100.0 / p.TotalPatchedFunctions)
|
||||
.ToList();
|
||||
|
||||
// Compute verify times
|
||||
var verifyTimes = successfulPairs
|
||||
.Where(p => p.VerifyTimeMs.HasValue)
|
||||
.Select(p => p.VerifyTimeMs!.Value)
|
||||
.OrderBy(t => t)
|
||||
.ToList();
|
||||
|
||||
// Stability counts
|
||||
var stability3of3 = successfulPairs.Count(p => p.SbomHash is not null);
|
||||
// Since we're using placeholder implementation, count all with hashes as 3/3
|
||||
|
||||
// Totals for precision/recall
|
||||
var totalFunctionsPost = successfulPairs.Sum(p => p.TotalFunctionsPost);
|
||||
var matchedFunctions = successfulPairs.Sum(p => p.MatchedFunctions);
|
||||
var totalPatched = successfulPairs.Sum(p => p.TotalPatchedFunctions);
|
||||
var patchedDetected = successfulPairs.Sum(p => p.PatchedFunctionsDetected);
|
||||
var missedPatched = totalPatched - patchedDetected;
|
||||
|
||||
// Compute precision and recall
|
||||
// Precision = TP / (TP + FP) - in this context, how many of our matches are correct
|
||||
// Recall = TP / (TP + FN) - in this context, how many true patches did we detect
|
||||
double? precision = matchedFunctions > 0
|
||||
? (double)matchedFunctions / totalFunctionsPost
|
||||
: null;
|
||||
|
||||
double? recall = totalPatched > 0
|
||||
? (double)patchedDetected / totalPatched
|
||||
: null;
|
||||
|
||||
double? f1 = precision.HasValue && recall.HasValue && (precision.Value + recall.Value) > 0
|
||||
? 2 * precision.Value * recall.Value / (precision.Value + recall.Value)
|
||||
: null;
|
||||
|
||||
// Deterministic replay rate (100% if all SBOMs are stable)
|
||||
double? deterministicRate = successfulPairs.Count > 0
|
||||
? (double)stability3of3 / successfulPairs.Count
|
||||
: null;
|
||||
|
||||
// Compute per-pair KPIs
|
||||
var pairKpis = result.PairResults.Select(p => new PairKpis
|
||||
{
|
||||
PairId = p.PairId,
|
||||
CveId = p.CveId,
|
||||
PackageName = p.PackageName,
|
||||
FunctionMatchRate = p.FunctionMatchRate,
|
||||
FalseNegativeRate = p.TotalPatchedFunctions > 0
|
||||
? (p.TotalPatchedFunctions - p.PatchedFunctionsDetected) * 100.0 / p.TotalPatchedFunctions
|
||||
: null,
|
||||
SbomHashStability = p.SbomHash is not null ? 3 : 0,
|
||||
ReconstructionEquivalent = p.ReconstructionEquivalent,
|
||||
TotalFunctionsPost = p.TotalFunctionsPost,
|
||||
MatchedFunctions = p.MatchedFunctions,
|
||||
TotalPatchedFunctions = p.TotalPatchedFunctions,
|
||||
PatchedFunctionsDetected = p.PatchedFunctionsDetected,
|
||||
VerifyTimeMs = p.VerifyTimeMs,
|
||||
Success = p.Success,
|
||||
ErrorMessage = p.Error,
|
||||
SbomHash = p.SbomHash
|
||||
}).ToImmutableArray();
|
||||
|
||||
return new ValidationKpis
|
||||
{
|
||||
RunId = Guid.TryParse(result.RunId, out var runGuid) ? runGuid : Guid.NewGuid(),
|
||||
TenantId = tenantId,
|
||||
CorpusVersion = result.CorpusVersion ?? "unknown",
|
||||
ScannerVersion = scannerVersion ?? "0.0.0",
|
||||
PairCount = result.PairResults.Length,
|
||||
FunctionMatchRateMean = matchRates.Count > 0 ? matchRates.Average() : null,
|
||||
FunctionMatchRateMin = matchRates.Count > 0 ? matchRates.Min() : null,
|
||||
FunctionMatchRateMax = matchRates.Count > 0 ? matchRates.Max() : null,
|
||||
FalseNegativeRateMean = fnRates.Count > 0 ? fnRates.Average() : null,
|
||||
FalseNegativeRateMax = fnRates.Count > 0 ? fnRates.Max() : null,
|
||||
SbomHashStability3of3Count = stability3of3,
|
||||
SbomHashStability2of3Count = 0,
|
||||
SbomHashStability1of3Count = 0,
|
||||
ReconstructionEquivCount = successfulPairs.Count(p => p.ReconstructionEquivalent == true),
|
||||
ReconstructionTotalCount = successfulPairs.Count(p => p.ReconstructionEquivalent.HasValue),
|
||||
VerifyTimeMedianMs = verifyTimes.Count > 0 ? Percentile(verifyTimes, 50) : null,
|
||||
VerifyTimeP95Ms = verifyTimes.Count > 0 ? Percentile(verifyTimes, 95) : null,
|
||||
VerifyTimeP99Ms = verifyTimes.Count > 0 ? Percentile(verifyTimes, 99) : null,
|
||||
Precision = precision,
|
||||
Recall = recall,
|
||||
F1Score = f1,
|
||||
DeterministicReplayRate = deterministicRate,
|
||||
TotalFunctionsPost = totalFunctionsPost,
|
||||
MatchedFunctions = matchedFunctions,
|
||||
TotalTruePatched = totalPatched,
|
||||
MissedPatched = missedPatched,
|
||||
ComputedAt = DateTimeOffset.UtcNow,
|
||||
StartedAt = result.StartedAt,
|
||||
CompletedAt = result.CompletedAt,
|
||||
PairResults = pairKpis
|
||||
};
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Performs a regression check against a baseline.
|
||||
/// </summary>
|
||||
/// <param name="kpis">The current KPIs.</param>
|
||||
/// <param name="baseline">The baseline to compare against.</param>
|
||||
/// <returns>The regression check result.</returns>
|
||||
public static RegressionCheckResult CompareToBaseline(
|
||||
ValidationKpis kpis,
|
||||
KpiBaseline baseline)
|
||||
{
|
||||
// Compute deltas
|
||||
double? precisionDelta = kpis.Precision.HasValue
|
||||
? kpis.Precision.Value - baseline.PrecisionBaseline
|
||||
: null;
|
||||
|
||||
double? recallDelta = kpis.Recall.HasValue
|
||||
? kpis.Recall.Value - baseline.RecallBaseline
|
||||
: null;
|
||||
|
||||
double? f1Delta = kpis.F1Score.HasValue
|
||||
? kpis.F1Score.Value - baseline.F1Baseline
|
||||
: null;
|
||||
|
||||
// False-negative rate is inverse - higher is worse
|
||||
double? fnRateDelta = kpis.FalseNegativeRateMean.HasValue
|
||||
? kpis.FalseNegativeRateMean.Value / 100.0 - baseline.FnRateBaseline
|
||||
: null;
|
||||
|
||||
double? verifyDeltaPct = kpis.VerifyTimeP95Ms.HasValue && baseline.VerifyP95BaselineMs > 0
|
||||
? (kpis.VerifyTimeP95Ms.Value - baseline.VerifyP95BaselineMs) * 100.0 / baseline.VerifyP95BaselineMs
|
||||
: null;
|
||||
|
||||
// Evaluate statuses
|
||||
var precisionStatus = EvaluateMetricStatus(
|
||||
precisionDelta,
|
||||
-baseline.PrecisionWarnDelta,
|
||||
-baseline.PrecisionFailDelta);
|
||||
|
||||
var recallStatus = EvaluateMetricStatus(
|
||||
recallDelta,
|
||||
-baseline.RecallWarnDelta,
|
||||
-baseline.RecallFailDelta);
|
||||
|
||||
// For FN rate, higher is worse, so we invert the check
|
||||
var fnRateStatus = fnRateDelta.HasValue
|
||||
? EvaluateMetricStatus(-fnRateDelta, -baseline.FnRateWarnDelta, -baseline.FnRateFailDelta)
|
||||
: RegressionStatus.Pass;
|
||||
|
||||
var verifyStatus = verifyDeltaPct.HasValue
|
||||
? EvaluateMetricStatus(-verifyDeltaPct, -baseline.VerifyWarnDeltaPct, -baseline.VerifyFailDeltaPct)
|
||||
: RegressionStatus.Pass;
|
||||
|
||||
// Determinism must be 100%
|
||||
var determinismStatus = kpis.DeterministicReplayRate.HasValue
|
||||
? (kpis.DeterministicReplayRate.Value >= 1.0 ? RegressionStatus.Pass : RegressionStatus.Fail)
|
||||
: RegressionStatus.Pass;
|
||||
|
||||
// Overall status is the worst of all statuses
|
||||
var statuses = new[] { precisionStatus, recallStatus, fnRateStatus, verifyStatus, determinismStatus };
|
||||
var overallStatus = statuses.Contains(RegressionStatus.Fail) ? RegressionStatus.Fail
|
||||
: statuses.Contains(RegressionStatus.Warn) ? RegressionStatus.Warn
|
||||
: statuses.All(s => s == RegressionStatus.Improved) ? RegressionStatus.Improved
|
||||
: RegressionStatus.Pass;
|
||||
|
||||
return new RegressionCheckResult
|
||||
{
|
||||
CheckId = Guid.NewGuid(),
|
||||
RunId = kpis.RunId,
|
||||
BaselineId = baseline.BaselineId,
|
||||
PrecisionDelta = precisionDelta,
|
||||
RecallDelta = recallDelta,
|
||||
F1Delta = f1Delta,
|
||||
FnRateDelta = fnRateDelta,
|
||||
VerifyP95DeltaPct = verifyDeltaPct,
|
||||
OverallStatus = overallStatus,
|
||||
PrecisionStatus = precisionStatus,
|
||||
RecallStatus = recallStatus,
|
||||
FnRateStatus = fnRateStatus,
|
||||
VerifyTimeStatus = verifyStatus,
|
||||
DeterminismStatus = determinismStatus,
|
||||
CheckedAt = DateTimeOffset.UtcNow
|
||||
};
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Evaluates the status of a metric based on its delta.
|
||||
/// </summary>
|
||||
private static RegressionStatus EvaluateMetricStatus(
|
||||
double? delta,
|
||||
double warnThreshold,
|
||||
double failThreshold)
|
||||
{
|
||||
if (!delta.HasValue)
|
||||
return RegressionStatus.Pass;
|
||||
|
||||
if (delta.Value > 0)
|
||||
return RegressionStatus.Improved;
|
||||
|
||||
if (delta.Value < failThreshold)
|
||||
return RegressionStatus.Fail;
|
||||
|
||||
if (delta.Value < warnThreshold)
|
||||
return RegressionStatus.Warn;
|
||||
|
||||
return RegressionStatus.Pass;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Computes a percentile value from a sorted list.
|
||||
/// </summary>
|
||||
private static int Percentile(List<int> sortedValues, int percentile)
|
||||
{
|
||||
if (sortedValues.Count == 0)
|
||||
return 0;
|
||||
|
||||
var index = (int)Math.Ceiling(sortedValues.Count * percentile / 100.0) - 1;
|
||||
return sortedValues[Math.Clamp(index, 0, sortedValues.Count - 1)];
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user