// ----------------------------------------------------------------------------- // ValidationHarnessService.cs // Sprint: SPRINT_20260121_034_BinaryIndex_golden_corpus_foundation // Task: GCF-003 - Implement validation harness skeleton // Description: Orchestrates end-to-end validation of patch-paired artifacts // ----------------------------------------------------------------------------- using System.Collections.Concurrent; using System.Collections.Immutable; using System.Diagnostics; using System.Text; using Microsoft.Extensions.Logging; using StellaOps.BinaryIndex.GroundTruth.Abstractions; namespace StellaOps.BinaryIndex.GroundTruth.Reproducible; /// /// Implementation of that orchestrates /// end-to-end validation of patch-paired artifacts. /// public sealed class ValidationHarnessService : IValidationHarness { private readonly ISecurityPairService _pairService; private readonly ILogger _logger; private readonly ConcurrentDictionary _activeRuns = new(); /// /// Initializes a new instance of the class. /// public ValidationHarnessService( ISecurityPairService pairService, ILogger logger) { _pairService = pairService; _logger = logger; } /// public async Task RunAsync( ValidationRunRequest request, CancellationToken ct = default) { var runId = GenerateRunId(); var startedAt = DateTimeOffset.UtcNow; var stopwatch = Stopwatch.StartNew(); var context = new ValidationRunContext(runId, request, ct); _activeRuns[runId] = context; _logger.LogInformation( "Starting validation run {RunId} with {PairCount} pairs", runId, request.Pairs.Length); try { using var cts = CancellationTokenSource.CreateLinkedTokenSource(ct); cts.CancelAfter(request.Timeout); // Phase 1: Initialize context.UpdateState(ValidationState.Initializing, "Initializing validation environment"); await InitializeAsync(context, cts.Token); // Phase 2: Validate pairs var pairResults = await ValidatePairsAsync(context, cts.Token); // Phase 3: Compute aggregate metrics context.UpdateState(ValidationState.ComputingMetrics, "Computing aggregate metrics"); var metrics = ComputeMetrics(pairResults, request.Metrics); // Phase 4: Generate report context.UpdateState(ValidationState.GeneratingReport, "Generating report"); var report = GenerateMarkdownReport(request, metrics, pairResults); stopwatch.Stop(); context.UpdateState(ValidationState.Completed, "Validation completed"); _logger.LogInformation( "Validation run {RunId} completed in {Duration}. Match rate: {MatchRate:F1}%", runId, stopwatch.Elapsed, metrics.FunctionMatchRate); return new ValidationRunResult { RunId = runId, StartedAt = startedAt, CompletedAt = DateTimeOffset.UtcNow, Status = context.GetStatus(), Metrics = metrics, PairResults = pairResults, CorpusVersion = request.CorpusVersion, TenantId = request.TenantId, MatcherConfig = request.Matcher, MarkdownReport = report }; } catch (OperationCanceledException) when (context.IsCancelled) { _logger.LogWarning("Validation run {RunId} was cancelled", runId); context.UpdateState(ValidationState.Cancelled, "Validation cancelled"); return CreateFailedResult(runId, startedAt, context, "Validation was cancelled"); } catch (Exception ex) { _logger.LogError(ex, "Validation run {RunId} failed", runId); context.UpdateState(ValidationState.Failed, ex.Message); return CreateFailedResult(runId, startedAt, context, ex.Message); } finally { _activeRuns.TryRemove(runId, out _); } } /// public Task GetStatusAsync(string runId, CancellationToken ct = default) { if (_activeRuns.TryGetValue(runId, out var context)) { return Task.FromResult(context.GetStatus()); } return Task.FromResult(null); } /// public Task CancelAsync(string runId, CancellationToken ct = default) { if (_activeRuns.TryGetValue(runId, out var context)) { context.Cancel(); return Task.FromResult(true); } return Task.FromResult(false); } private static string GenerateRunId() { return $"vr-{DateTimeOffset.UtcNow:yyyyMMddHHmmss}-{Guid.NewGuid():N}"[..32]; } private Task InitializeAsync(ValidationRunContext context, CancellationToken ct) { // Placeholder: Initialize any required resources // - Verify corpus access // - Pre-warm caches // - Validate configuration return Task.CompletedTask; } private async Task> ValidatePairsAsync( ValidationRunContext context, CancellationToken ct) { var results = new List(); var request = context.Request; var pairs = request.Pairs; var completed = 0; context.UpdateState(ValidationState.Assembling, $"Validating {pairs.Length} pairs"); // Process pairs with controlled parallelism var semaphore = new SemaphoreSlim(request.MaxParallelism); var tasks = pairs.Select(async pair => { await semaphore.WaitAsync(ct); try { var result = await ValidateSinglePairAsync(pair, request, ct); Interlocked.Increment(ref completed); context.UpdateProgress(completed, pairs.Length); return result; } finally { semaphore.Release(); } }); var taskResults = await Task.WhenAll(tasks); return [.. taskResults]; } private async Task ValidateSinglePairAsync( SecurityPairReference pairRef, ValidationRunRequest request, CancellationToken ct) { var stopwatch = Stopwatch.StartNew(); try { // Step 1: Assemble - Load the security pair from corpus var pair = await _pairService.FindByIdAsync(pairRef.PairId, ct); if (pair is null) { return CreateFailedPairResult(pairRef, "Security pair not found in corpus"); } // Step 2: Recover symbols via ground-truth connectors // Placeholder: Would call ISymbolSourceConnector implementations var (prePatchSymbols, postPatchSymbols) = await RecoverSymbolsAsync(pair, ct); // Step 3: Lift to intermediate representation // Placeholder: Would call semantic analysis pipeline var (prePatchIr, postPatchIr) = await LiftToIrAsync(pair, prePatchSymbols, postPatchSymbols, ct); // Step 4: Generate fingerprints // Placeholder: Would call fingerprint generator var (prePatchFingerprints, postPatchFingerprints) = await GenerateFingerprintsAsync( prePatchIr, postPatchIr, ct); // Step 5: Match functions var matches = await MatchFunctionsAsync( prePatchFingerprints, postPatchFingerprints, request.Matcher, ct); // Step 6: Compute pair metrics var totalPost = postPatchFingerprints.Count; var matchedCount = matches.Count(m => m.Matched); var patchedDetected = matches.Count(m => m.WasPatched && m.PatchDetected); var totalPatched = pair.ChangedFunctions.Length; stopwatch.Stop(); return new PairValidationResult { PairId = pairRef.PairId, CveId = pairRef.CveId, PackageName = pairRef.PackageName, Success = true, FunctionMatchRate = totalPost > 0 ? (matchedCount * 100.0 / totalPost) : 0, TotalFunctionsPost = totalPost, MatchedFunctions = matchedCount, PatchedFunctionsDetected = patchedDetected, TotalPatchedFunctions = totalPatched, SbomHash = ComputeSbomHash(pair), VerifyTimeMs = (int)stopwatch.ElapsedMilliseconds, FunctionMatches = [.. matches], Duration = stopwatch.Elapsed }; } catch (Exception ex) { _logger.LogWarning(ex, "Failed to validate pair {PairId}", pairRef.PairId); return CreateFailedPairResult(pairRef, ex.Message); } } private Task<(IReadOnlyList PrePatch, IReadOnlyList PostPatch)> RecoverSymbolsAsync( SecurityPair pair, CancellationToken ct) { // Placeholder: Would integrate with ISymbolSourceConnector implementations // For now, return empty symbol lists - actual implementation will come with GCF-002 IReadOnlyList prePatch = []; IReadOnlyList postPatch = []; return Task.FromResult((prePatch, postPatch)); } private Task<(IReadOnlyList PrePatch, IReadOnlyList PostPatch)> LiftToIrAsync( SecurityPair pair, IReadOnlyList prePatchSymbols, IReadOnlyList postPatchSymbols, CancellationToken ct) { // Placeholder: Would integrate with semantic analysis pipeline // For now, return empty IR lists IReadOnlyList prePatch = []; IReadOnlyList postPatch = []; return Task.FromResult((prePatch, postPatch)); } private Task<(IReadOnlyList PrePatch, IReadOnlyList PostPatch)> GenerateFingerprintsAsync( IReadOnlyList prePatchIr, IReadOnlyList postPatchIr, CancellationToken ct) { // Placeholder: Would integrate with fingerprint generator // For now, return empty fingerprint lists IReadOnlyList prePatch = []; IReadOnlyList postPatch = []; return Task.FromResult((prePatch, postPatch)); } private Task> MatchFunctionsAsync( IReadOnlyList prePatchFingerprints, IReadOnlyList postPatchFingerprints, MatcherConfiguration config, CancellationToken ct) { // Placeholder: Would integrate with function matcher // For now, return empty match results IReadOnlyList matches = []; return Task.FromResult(matches); } private static string? ComputeSbomHash(SecurityPair pair) { // Placeholder: Would compute deterministic SBOM hash return null; } private static ValidationMetrics ComputeMetrics( ImmutableArray pairResults, MetricsConfiguration config) { var successful = pairResults.Where(r => r.Success).ToList(); var totalFunctionsPost = successful.Sum(r => r.TotalFunctionsPost); var matchedFunctions = successful.Sum(r => r.MatchedFunctions); var totalPatched = successful.Sum(r => r.TotalPatchedFunctions); var patchedDetected = successful.Sum(r => r.PatchedFunctionsDetected); var missedPatched = totalPatched - patchedDetected; var matchRate = totalFunctionsPost > 0 ? (matchedFunctions * 100.0 / totalFunctionsPost) : 0; var falseNegativeRate = totalPatched > 0 ? (missedPatched * 100.0 / totalPatched) : 0; // SBOM stability: count unique hashes across successful pairs var uniqueHashes = successful .Where(r => r.SbomHash is not null) .Select(r => r.SbomHash) .Distinct() .Count(); var sbomStability = uniqueHashes == 1 ? config.SbomStabilityRuns : 0; // Verify times var verifyTimes = successful .Where(r => r.VerifyTimeMs.HasValue) .Select(r => r.VerifyTimeMs!.Value) .OrderBy(t => t) .ToList(); int? medianMs = null; int? p95Ms = null; if (verifyTimes.Count > 0) { medianMs = verifyTimes[verifyTimes.Count / 2]; var p95Index = (int)(verifyTimes.Count * 0.95); p95Ms = verifyTimes[Math.Min(p95Index, verifyTimes.Count - 1)]; } // Mismatch buckets var buckets = new Dictionary(); if (config.GenerateMismatchBuckets) { foreach (var result in successful) { if (result.FunctionMatches is null) continue; foreach (var match in result.FunctionMatches) { if (!match.Matched && match.MismatchCategory.HasValue) { var category = match.MismatchCategory.Value; buckets[category] = buckets.GetValueOrDefault(category) + 1; } } } } return new ValidationMetrics { TotalPairs = pairResults.Length, SuccessfulPairs = successful.Count, FailedPairs = pairResults.Length - successful.Count, FunctionMatchRate = matchRate, FalseNegativeRate = falseNegativeRate, SbomHashStability = sbomStability, VerifyTimeMedianMs = medianMs, VerifyTimeP95Ms = p95Ms, TotalFunctionsPost = totalFunctionsPost, MatchedFunctions = matchedFunctions, TotalTruePatchedFunctions = totalPatched, MissedPatchedFunctions = missedPatched, MismatchBuckets = buckets.ToImmutableDictionary() }; } private static string GenerateMarkdownReport( ValidationRunRequest request, ValidationMetrics metrics, ImmutableArray pairResults) { var sb = new StringBuilder(); sb.AppendLine("# Validation Run Report"); sb.AppendLine(); sb.AppendLine($"**Corpus Version:** {request.CorpusVersion ?? "N/A"}"); sb.AppendLine($"**Generated:** {DateTimeOffset.UtcNow:O}"); sb.AppendLine(); sb.AppendLine("## Summary Metrics"); sb.AppendLine(); sb.AppendLine("| Metric | Value | Target |"); sb.AppendLine("|--------|-------|--------|"); sb.AppendLine($"| Function Match Rate | {metrics.FunctionMatchRate:F1}% | >= 90% |"); sb.AppendLine($"| False-Negative Rate | {metrics.FalseNegativeRate:F1}% | <= 5% |"); sb.AppendLine($"| SBOM Hash Stability | {metrics.SbomHashStability}/3 | 3/3 |"); if (metrics.VerifyTimeMedianMs.HasValue) { sb.AppendLine($"| Verify Time (p50) | {metrics.VerifyTimeMedianMs}ms | - |"); } if (metrics.VerifyTimeP95Ms.HasValue) { sb.AppendLine($"| Verify Time (p95) | {metrics.VerifyTimeP95Ms}ms | - |"); } sb.AppendLine(); sb.AppendLine("## Pair Results"); sb.AppendLine(); sb.AppendLine("| Package | CVE | Match Rate | Patched Detected | Status |"); sb.AppendLine("|---------|-----|------------|------------------|--------|"); foreach (var result in pairResults.OrderBy(r => r.PackageName)) { var status = result.Success ? "Pass" : "Fail"; var detected = result.TotalPatchedFunctions > 0 ? $"{result.PatchedFunctionsDetected}/{result.TotalPatchedFunctions}" : "N/A"; sb.AppendLine($"| {result.PackageName} | {result.CveId} | {result.FunctionMatchRate:F1}% | {detected} | {status} |"); } if (metrics.MismatchBuckets is not null && metrics.MismatchBuckets.Count > 0) { sb.AppendLine(); sb.AppendLine("## Mismatch Analysis"); sb.AppendLine(); sb.AppendLine("| Category | Count |"); sb.AppendLine("|----------|-------|"); foreach (var (category, count) in metrics.MismatchBuckets.OrderByDescending(x => x.Value)) { sb.AppendLine($"| {category} | {count} |"); } } return sb.ToString(); } private static PairValidationResult CreateFailedPairResult(SecurityPairReference pairRef, string error) { return new PairValidationResult { PairId = pairRef.PairId, CveId = pairRef.CveId, PackageName = pairRef.PackageName, Success = false, Error = error }; } private static ValidationRunResult CreateFailedResult( string runId, DateTimeOffset startedAt, ValidationRunContext context, string error) { return new ValidationRunResult { RunId = runId, StartedAt = startedAt, CompletedAt = DateTimeOffset.UtcNow, Status = context.GetStatus(), Metrics = new ValidationMetrics { TotalPairs = context.Request.Pairs.Length, SuccessfulPairs = 0, FailedPairs = context.Request.Pairs.Length }, PairResults = [], Error = error }; } /// /// Context for a running validation. /// private sealed class ValidationRunContext { private readonly CancellationTokenSource _cts; private ValidationState _state = ValidationState.Queued; private string? _currentStage; private int _pairsCompleted; public string RunId { get; } public ValidationRunRequest Request { get; } public DateTimeOffset StartedAt { get; } = DateTimeOffset.UtcNow; public bool IsCancelled => _cts.IsCancellationRequested; public ValidationRunContext(string runId, ValidationRunRequest request, CancellationToken ct) { RunId = runId; Request = request; _cts = CancellationTokenSource.CreateLinkedTokenSource(ct); } public void UpdateState(ValidationState state, string? stage = null) { _state = state; _currentStage = stage; } public void UpdateProgress(int completed, int total) { _pairsCompleted = completed; } public void Cancel() { _cts.Cancel(); } public ValidationRunStatus GetStatus() { var total = Request.Pairs.Length; var progress = total > 0 ? (_pairsCompleted * 100 / total) : 0; return new ValidationRunStatus { RunId = RunId, State = _state, Progress = progress, CurrentStage = _currentStage, PairsCompleted = _pairsCompleted, TotalPairs = total, StartedAt = StartedAt }; } } } /// /// Symbol information recovered from ground-truth sources. /// Placeholder for full implementation. /// internal sealed record SymbolInfo( string Name, ulong Address, int Size); /// /// Lifted intermediate representation of a function. /// Placeholder for full implementation. /// internal sealed record IrFunction( string Name, ulong Address, byte[] IrBytes); /// /// Function fingerprint for matching. /// Placeholder for full implementation. /// internal sealed record FunctionFingerprint( string Name, ulong Address, byte[] Hash, int BasicBlockCount, int InstructionCount);