Files
git.stella-ops.org/src/BinaryIndex/__Libraries/StellaOps.BinaryIndex.GroundTruth.Reproducible/ValidationHarnessService.cs
2026-01-22 19:08:46 +02:00

572 lines
20 KiB
C#

// -----------------------------------------------------------------------------
// ValidationHarnessService.cs
// Sprint: SPRINT_20260121_034_BinaryIndex_golden_corpus_foundation
// Task: GCF-003 - Implement validation harness skeleton
// Description: Orchestrates end-to-end validation of patch-paired artifacts
// -----------------------------------------------------------------------------
using System.Collections.Concurrent;
using System.Collections.Immutable;
using System.Diagnostics;
using System.Text;
using Microsoft.Extensions.Logging;
using StellaOps.BinaryIndex.GroundTruth.Abstractions;
namespace StellaOps.BinaryIndex.GroundTruth.Reproducible;
/// <summary>
/// Implementation of <see cref="IValidationHarness"/> that orchestrates
/// end-to-end validation of patch-paired artifacts.
/// </summary>
public sealed class ValidationHarnessService : IValidationHarness
{
private readonly ISecurityPairService _pairService;
private readonly ILogger<ValidationHarnessService> _logger;
private readonly ConcurrentDictionary<string, ValidationRunContext> _activeRuns = new();
/// <summary>
/// Initializes a new instance of the <see cref="ValidationHarnessService"/> class.
/// </summary>
public ValidationHarnessService(
ISecurityPairService pairService,
ILogger<ValidationHarnessService> logger)
{
_pairService = pairService;
_logger = logger;
}
/// <inheritdoc/>
public async Task<ValidationRunResult> RunAsync(
ValidationRunRequest request,
CancellationToken ct = default)
{
var runId = GenerateRunId();
var startedAt = DateTimeOffset.UtcNow;
var stopwatch = Stopwatch.StartNew();
var context = new ValidationRunContext(runId, request, ct);
_activeRuns[runId] = context;
_logger.LogInformation(
"Starting validation run {RunId} with {PairCount} pairs",
runId,
request.Pairs.Length);
try
{
using var cts = CancellationTokenSource.CreateLinkedTokenSource(ct);
cts.CancelAfter(request.Timeout);
// Phase 1: Initialize
context.UpdateState(ValidationState.Initializing, "Initializing validation environment");
await InitializeAsync(context, cts.Token);
// Phase 2: Validate pairs
var pairResults = await ValidatePairsAsync(context, cts.Token);
// Phase 3: Compute aggregate metrics
context.UpdateState(ValidationState.ComputingMetrics, "Computing aggregate metrics");
var metrics = ComputeMetrics(pairResults, request.Metrics);
// Phase 4: Generate report
context.UpdateState(ValidationState.GeneratingReport, "Generating report");
var report = GenerateMarkdownReport(request, metrics, pairResults);
stopwatch.Stop();
context.UpdateState(ValidationState.Completed, "Validation completed");
_logger.LogInformation(
"Validation run {RunId} completed in {Duration}. Match rate: {MatchRate:F1}%",
runId,
stopwatch.Elapsed,
metrics.FunctionMatchRate);
return new ValidationRunResult
{
RunId = runId,
StartedAt = startedAt,
CompletedAt = DateTimeOffset.UtcNow,
Status = context.GetStatus(),
Metrics = metrics,
PairResults = pairResults,
CorpusVersion = request.CorpusVersion,
TenantId = request.TenantId,
MatcherConfig = request.Matcher,
MarkdownReport = report
};
}
catch (OperationCanceledException) when (context.IsCancelled)
{
_logger.LogWarning("Validation run {RunId} was cancelled", runId);
context.UpdateState(ValidationState.Cancelled, "Validation cancelled");
return CreateFailedResult(runId, startedAt, context, "Validation was cancelled");
}
catch (Exception ex)
{
_logger.LogError(ex, "Validation run {RunId} failed", runId);
context.UpdateState(ValidationState.Failed, ex.Message);
return CreateFailedResult(runId, startedAt, context, ex.Message);
}
finally
{
_activeRuns.TryRemove(runId, out _);
}
}
/// <inheritdoc/>
public Task<ValidationRunStatus?> GetStatusAsync(string runId, CancellationToken ct = default)
{
if (_activeRuns.TryGetValue(runId, out var context))
{
return Task.FromResult<ValidationRunStatus?>(context.GetStatus());
}
return Task.FromResult<ValidationRunStatus?>(null);
}
/// <inheritdoc/>
public Task<bool> CancelAsync(string runId, CancellationToken ct = default)
{
if (_activeRuns.TryGetValue(runId, out var context))
{
context.Cancel();
return Task.FromResult(true);
}
return Task.FromResult(false);
}
private static string GenerateRunId()
{
return $"vr-{DateTimeOffset.UtcNow:yyyyMMddHHmmss}-{Guid.NewGuid():N}"[..32];
}
private Task InitializeAsync(ValidationRunContext context, CancellationToken ct)
{
// Placeholder: Initialize any required resources
// - Verify corpus access
// - Pre-warm caches
// - Validate configuration
return Task.CompletedTask;
}
private async Task<ImmutableArray<PairValidationResult>> ValidatePairsAsync(
ValidationRunContext context,
CancellationToken ct)
{
var results = new List<PairValidationResult>();
var request = context.Request;
var pairs = request.Pairs;
var completed = 0;
context.UpdateState(ValidationState.Assembling, $"Validating {pairs.Length} pairs");
// Process pairs with controlled parallelism
var semaphore = new SemaphoreSlim(request.MaxParallelism);
var tasks = pairs.Select(async pair =>
{
await semaphore.WaitAsync(ct);
try
{
var result = await ValidateSinglePairAsync(pair, request, ct);
Interlocked.Increment(ref completed);
context.UpdateProgress(completed, pairs.Length);
return result;
}
finally
{
semaphore.Release();
}
});
var taskResults = await Task.WhenAll(tasks);
return [.. taskResults];
}
private async Task<PairValidationResult> ValidateSinglePairAsync(
SecurityPairReference pairRef,
ValidationRunRequest request,
CancellationToken ct)
{
var stopwatch = Stopwatch.StartNew();
try
{
// Step 1: Assemble - Load the security pair from corpus
var pair = await _pairService.FindByIdAsync(pairRef.PairId, ct);
if (pair is null)
{
return CreateFailedPairResult(pairRef, "Security pair not found in corpus");
}
// Step 2: Recover symbols via ground-truth connectors
// Placeholder: Would call ISymbolSourceConnector implementations
var (prePatchSymbols, postPatchSymbols) = await RecoverSymbolsAsync(pair, ct);
// Step 3: Lift to intermediate representation
// Placeholder: Would call semantic analysis pipeline
var (prePatchIr, postPatchIr) = await LiftToIrAsync(pair, prePatchSymbols, postPatchSymbols, ct);
// Step 4: Generate fingerprints
// Placeholder: Would call fingerprint generator
var (prePatchFingerprints, postPatchFingerprints) = await GenerateFingerprintsAsync(
prePatchIr, postPatchIr, ct);
// Step 5: Match functions
var matches = await MatchFunctionsAsync(
prePatchFingerprints,
postPatchFingerprints,
request.Matcher,
ct);
// Step 6: Compute pair metrics
var totalPost = postPatchFingerprints.Count;
var matchedCount = matches.Count(m => m.Matched);
var patchedDetected = matches.Count(m => m.WasPatched && m.PatchDetected);
var totalPatched = pair.ChangedFunctions.Length;
stopwatch.Stop();
return new PairValidationResult
{
PairId = pairRef.PairId,
CveId = pairRef.CveId,
PackageName = pairRef.PackageName,
Success = true,
FunctionMatchRate = totalPost > 0 ? (matchedCount * 100.0 / totalPost) : 0,
TotalFunctionsPost = totalPost,
MatchedFunctions = matchedCount,
PatchedFunctionsDetected = patchedDetected,
TotalPatchedFunctions = totalPatched,
SbomHash = ComputeSbomHash(pair),
VerifyTimeMs = (int)stopwatch.ElapsedMilliseconds,
FunctionMatches = [.. matches],
Duration = stopwatch.Elapsed
};
}
catch (Exception ex)
{
_logger.LogWarning(ex, "Failed to validate pair {PairId}", pairRef.PairId);
return CreateFailedPairResult(pairRef, ex.Message);
}
}
private Task<(IReadOnlyList<SymbolInfo> PrePatch, IReadOnlyList<SymbolInfo> PostPatch)> RecoverSymbolsAsync(
SecurityPair pair,
CancellationToken ct)
{
// Placeholder: Would integrate with ISymbolSourceConnector implementations
// For now, return empty symbol lists - actual implementation will come with GCF-002
IReadOnlyList<SymbolInfo> prePatch = [];
IReadOnlyList<SymbolInfo> postPatch = [];
return Task.FromResult((prePatch, postPatch));
}
private Task<(IReadOnlyList<IrFunction> PrePatch, IReadOnlyList<IrFunction> PostPatch)> LiftToIrAsync(
SecurityPair pair,
IReadOnlyList<SymbolInfo> prePatchSymbols,
IReadOnlyList<SymbolInfo> postPatchSymbols,
CancellationToken ct)
{
// Placeholder: Would integrate with semantic analysis pipeline
// For now, return empty IR lists
IReadOnlyList<IrFunction> prePatch = [];
IReadOnlyList<IrFunction> postPatch = [];
return Task.FromResult((prePatch, postPatch));
}
private Task<(IReadOnlyList<FunctionFingerprint> PrePatch, IReadOnlyList<FunctionFingerprint> PostPatch)> GenerateFingerprintsAsync(
IReadOnlyList<IrFunction> prePatchIr,
IReadOnlyList<IrFunction> postPatchIr,
CancellationToken ct)
{
// Placeholder: Would integrate with fingerprint generator
// For now, return empty fingerprint lists
IReadOnlyList<FunctionFingerprint> prePatch = [];
IReadOnlyList<FunctionFingerprint> postPatch = [];
return Task.FromResult((prePatch, postPatch));
}
private Task<IReadOnlyList<FunctionMatchResult>> MatchFunctionsAsync(
IReadOnlyList<FunctionFingerprint> prePatchFingerprints,
IReadOnlyList<FunctionFingerprint> postPatchFingerprints,
MatcherConfiguration config,
CancellationToken ct)
{
// Placeholder: Would integrate with function matcher
// For now, return empty match results
IReadOnlyList<FunctionMatchResult> matches = [];
return Task.FromResult(matches);
}
private static string? ComputeSbomHash(SecurityPair pair)
{
// Placeholder: Would compute deterministic SBOM hash
return null;
}
private static ValidationMetrics ComputeMetrics(
ImmutableArray<PairValidationResult> pairResults,
MetricsConfiguration config)
{
var successful = pairResults.Where(r => r.Success).ToList();
var totalFunctionsPost = successful.Sum(r => r.TotalFunctionsPost);
var matchedFunctions = successful.Sum(r => r.MatchedFunctions);
var totalPatched = successful.Sum(r => r.TotalPatchedFunctions);
var patchedDetected = successful.Sum(r => r.PatchedFunctionsDetected);
var missedPatched = totalPatched - patchedDetected;
var matchRate = totalFunctionsPost > 0
? (matchedFunctions * 100.0 / totalFunctionsPost)
: 0;
var falseNegativeRate = totalPatched > 0
? (missedPatched * 100.0 / totalPatched)
: 0;
// SBOM stability: count unique hashes across successful pairs
var uniqueHashes = successful
.Where(r => r.SbomHash is not null)
.Select(r => r.SbomHash)
.Distinct()
.Count();
var sbomStability = uniqueHashes == 1 ? config.SbomStabilityRuns : 0;
// Verify times
var verifyTimes = successful
.Where(r => r.VerifyTimeMs.HasValue)
.Select(r => r.VerifyTimeMs!.Value)
.OrderBy(t => t)
.ToList();
int? medianMs = null;
int? p95Ms = null;
if (verifyTimes.Count > 0)
{
medianMs = verifyTimes[verifyTimes.Count / 2];
var p95Index = (int)(verifyTimes.Count * 0.95);
p95Ms = verifyTimes[Math.Min(p95Index, verifyTimes.Count - 1)];
}
// Mismatch buckets
var buckets = new Dictionary<MismatchCategory, int>();
if (config.GenerateMismatchBuckets)
{
foreach (var result in successful)
{
if (result.FunctionMatches is null) continue;
foreach (var match in result.FunctionMatches)
{
if (!match.Matched && match.MismatchCategory.HasValue)
{
var category = match.MismatchCategory.Value;
buckets[category] = buckets.GetValueOrDefault(category) + 1;
}
}
}
}
return new ValidationMetrics
{
TotalPairs = pairResults.Length,
SuccessfulPairs = successful.Count,
FailedPairs = pairResults.Length - successful.Count,
FunctionMatchRate = matchRate,
FalseNegativeRate = falseNegativeRate,
SbomHashStability = sbomStability,
VerifyTimeMedianMs = medianMs,
VerifyTimeP95Ms = p95Ms,
TotalFunctionsPost = totalFunctionsPost,
MatchedFunctions = matchedFunctions,
TotalTruePatchedFunctions = totalPatched,
MissedPatchedFunctions = missedPatched,
MismatchBuckets = buckets.ToImmutableDictionary()
};
}
private static string GenerateMarkdownReport(
ValidationRunRequest request,
ValidationMetrics metrics,
ImmutableArray<PairValidationResult> pairResults)
{
var sb = new StringBuilder();
sb.AppendLine("# Validation Run Report");
sb.AppendLine();
sb.AppendLine($"**Corpus Version:** {request.CorpusVersion ?? "N/A"}");
sb.AppendLine($"**Generated:** {DateTimeOffset.UtcNow:O}");
sb.AppendLine();
sb.AppendLine("## Summary Metrics");
sb.AppendLine();
sb.AppendLine("| Metric | Value | Target |");
sb.AppendLine("|--------|-------|--------|");
sb.AppendLine($"| Function Match Rate | {metrics.FunctionMatchRate:F1}% | >= 90% |");
sb.AppendLine($"| False-Negative Rate | {metrics.FalseNegativeRate:F1}% | <= 5% |");
sb.AppendLine($"| SBOM Hash Stability | {metrics.SbomHashStability}/3 | 3/3 |");
if (metrics.VerifyTimeMedianMs.HasValue)
{
sb.AppendLine($"| Verify Time (p50) | {metrics.VerifyTimeMedianMs}ms | - |");
}
if (metrics.VerifyTimeP95Ms.HasValue)
{
sb.AppendLine($"| Verify Time (p95) | {metrics.VerifyTimeP95Ms}ms | - |");
}
sb.AppendLine();
sb.AppendLine("## Pair Results");
sb.AppendLine();
sb.AppendLine("| Package | CVE | Match Rate | Patched Detected | Status |");
sb.AppendLine("|---------|-----|------------|------------------|--------|");
foreach (var result in pairResults.OrderBy(r => r.PackageName))
{
var status = result.Success ? "Pass" : "Fail";
var detected = result.TotalPatchedFunctions > 0
? $"{result.PatchedFunctionsDetected}/{result.TotalPatchedFunctions}"
: "N/A";
sb.AppendLine($"| {result.PackageName} | {result.CveId} | {result.FunctionMatchRate:F1}% | {detected} | {status} |");
}
if (metrics.MismatchBuckets is not null && metrics.MismatchBuckets.Count > 0)
{
sb.AppendLine();
sb.AppendLine("## Mismatch Analysis");
sb.AppendLine();
sb.AppendLine("| Category | Count |");
sb.AppendLine("|----------|-------|");
foreach (var (category, count) in metrics.MismatchBuckets.OrderByDescending(x => x.Value))
{
sb.AppendLine($"| {category} | {count} |");
}
}
return sb.ToString();
}
private static PairValidationResult CreateFailedPairResult(SecurityPairReference pairRef, string error)
{
return new PairValidationResult
{
PairId = pairRef.PairId,
CveId = pairRef.CveId,
PackageName = pairRef.PackageName,
Success = false,
Error = error
};
}
private static ValidationRunResult CreateFailedResult(
string runId,
DateTimeOffset startedAt,
ValidationRunContext context,
string error)
{
return new ValidationRunResult
{
RunId = runId,
StartedAt = startedAt,
CompletedAt = DateTimeOffset.UtcNow,
Status = context.GetStatus(),
Metrics = new ValidationMetrics
{
TotalPairs = context.Request.Pairs.Length,
SuccessfulPairs = 0,
FailedPairs = context.Request.Pairs.Length
},
PairResults = [],
Error = error
};
}
/// <summary>
/// Context for a running validation.
/// </summary>
private sealed class ValidationRunContext
{
private readonly CancellationTokenSource _cts;
private ValidationState _state = ValidationState.Queued;
private string? _currentStage;
private int _pairsCompleted;
public string RunId { get; }
public ValidationRunRequest Request { get; }
public DateTimeOffset StartedAt { get; } = DateTimeOffset.UtcNow;
public bool IsCancelled => _cts.IsCancellationRequested;
public ValidationRunContext(string runId, ValidationRunRequest request, CancellationToken ct)
{
RunId = runId;
Request = request;
_cts = CancellationTokenSource.CreateLinkedTokenSource(ct);
}
public void UpdateState(ValidationState state, string? stage = null)
{
_state = state;
_currentStage = stage;
}
public void UpdateProgress(int completed, int total)
{
_pairsCompleted = completed;
}
public void Cancel()
{
_cts.Cancel();
}
public ValidationRunStatus GetStatus()
{
var total = Request.Pairs.Length;
var progress = total > 0 ? (_pairsCompleted * 100 / total) : 0;
return new ValidationRunStatus
{
RunId = RunId,
State = _state,
Progress = progress,
CurrentStage = _currentStage,
PairsCompleted = _pairsCompleted,
TotalPairs = total,
StartedAt = StartedAt
};
}
}
}
/// <summary>
/// Symbol information recovered from ground-truth sources.
/// Placeholder for full implementation.
/// </summary>
internal sealed record SymbolInfo(
string Name,
ulong Address,
int Size);
/// <summary>
/// Lifted intermediate representation of a function.
/// Placeholder for full implementation.
/// </summary>
internal sealed record IrFunction(
string Name,
ulong Address,
byte[] IrBytes);
/// <summary>
/// Function fingerprint for matching.
/// Placeholder for full implementation.
/// </summary>
internal sealed record FunctionFingerprint(
string Name,
ulong Address,
byte[] Hash,
int BasicBlockCount,
int InstructionCount);