572 lines
20 KiB
C#
572 lines
20 KiB
C#
// -----------------------------------------------------------------------------
|
|
// ValidationHarnessService.cs
|
|
// Sprint: SPRINT_20260121_034_BinaryIndex_golden_corpus_foundation
|
|
// Task: GCF-003 - Implement validation harness skeleton
|
|
// Description: Orchestrates end-to-end validation of patch-paired artifacts
|
|
// -----------------------------------------------------------------------------
|
|
|
|
using System.Collections.Concurrent;
|
|
using System.Collections.Immutable;
|
|
using System.Diagnostics;
|
|
using System.Text;
|
|
using Microsoft.Extensions.Logging;
|
|
using StellaOps.BinaryIndex.GroundTruth.Abstractions;
|
|
|
|
namespace StellaOps.BinaryIndex.GroundTruth.Reproducible;
|
|
|
|
/// <summary>
|
|
/// Implementation of <see cref="IValidationHarness"/> that orchestrates
|
|
/// end-to-end validation of patch-paired artifacts.
|
|
/// </summary>
|
|
public sealed class ValidationHarnessService : IValidationHarness
|
|
{
|
|
private readonly ISecurityPairService _pairService;
|
|
private readonly ILogger<ValidationHarnessService> _logger;
|
|
private readonly ConcurrentDictionary<string, ValidationRunContext> _activeRuns = new();
|
|
|
|
/// <summary>
|
|
/// Initializes a new instance of the <see cref="ValidationHarnessService"/> class.
|
|
/// </summary>
|
|
public ValidationHarnessService(
|
|
ISecurityPairService pairService,
|
|
ILogger<ValidationHarnessService> logger)
|
|
{
|
|
_pairService = pairService;
|
|
_logger = logger;
|
|
}
|
|
|
|
/// <inheritdoc/>
|
|
public async Task<ValidationRunResult> RunAsync(
|
|
ValidationRunRequest request,
|
|
CancellationToken ct = default)
|
|
{
|
|
var runId = GenerateRunId();
|
|
var startedAt = DateTimeOffset.UtcNow;
|
|
var stopwatch = Stopwatch.StartNew();
|
|
|
|
var context = new ValidationRunContext(runId, request, ct);
|
|
_activeRuns[runId] = context;
|
|
|
|
_logger.LogInformation(
|
|
"Starting validation run {RunId} with {PairCount} pairs",
|
|
runId,
|
|
request.Pairs.Length);
|
|
|
|
try
|
|
{
|
|
using var cts = CancellationTokenSource.CreateLinkedTokenSource(ct);
|
|
cts.CancelAfter(request.Timeout);
|
|
|
|
// Phase 1: Initialize
|
|
context.UpdateState(ValidationState.Initializing, "Initializing validation environment");
|
|
await InitializeAsync(context, cts.Token);
|
|
|
|
// Phase 2: Validate pairs
|
|
var pairResults = await ValidatePairsAsync(context, cts.Token);
|
|
|
|
// Phase 3: Compute aggregate metrics
|
|
context.UpdateState(ValidationState.ComputingMetrics, "Computing aggregate metrics");
|
|
var metrics = ComputeMetrics(pairResults, request.Metrics);
|
|
|
|
// Phase 4: Generate report
|
|
context.UpdateState(ValidationState.GeneratingReport, "Generating report");
|
|
var report = GenerateMarkdownReport(request, metrics, pairResults);
|
|
|
|
stopwatch.Stop();
|
|
context.UpdateState(ValidationState.Completed, "Validation completed");
|
|
|
|
_logger.LogInformation(
|
|
"Validation run {RunId} completed in {Duration}. Match rate: {MatchRate:F1}%",
|
|
runId,
|
|
stopwatch.Elapsed,
|
|
metrics.FunctionMatchRate);
|
|
|
|
return new ValidationRunResult
|
|
{
|
|
RunId = runId,
|
|
StartedAt = startedAt,
|
|
CompletedAt = DateTimeOffset.UtcNow,
|
|
Status = context.GetStatus(),
|
|
Metrics = metrics,
|
|
PairResults = pairResults,
|
|
CorpusVersion = request.CorpusVersion,
|
|
TenantId = request.TenantId,
|
|
MatcherConfig = request.Matcher,
|
|
MarkdownReport = report
|
|
};
|
|
}
|
|
catch (OperationCanceledException) when (context.IsCancelled)
|
|
{
|
|
_logger.LogWarning("Validation run {RunId} was cancelled", runId);
|
|
context.UpdateState(ValidationState.Cancelled, "Validation cancelled");
|
|
|
|
return CreateFailedResult(runId, startedAt, context, "Validation was cancelled");
|
|
}
|
|
catch (Exception ex)
|
|
{
|
|
_logger.LogError(ex, "Validation run {RunId} failed", runId);
|
|
context.UpdateState(ValidationState.Failed, ex.Message);
|
|
|
|
return CreateFailedResult(runId, startedAt, context, ex.Message);
|
|
}
|
|
finally
|
|
{
|
|
_activeRuns.TryRemove(runId, out _);
|
|
}
|
|
}
|
|
|
|
/// <inheritdoc/>
|
|
public Task<ValidationRunStatus?> GetStatusAsync(string runId, CancellationToken ct = default)
|
|
{
|
|
if (_activeRuns.TryGetValue(runId, out var context))
|
|
{
|
|
return Task.FromResult<ValidationRunStatus?>(context.GetStatus());
|
|
}
|
|
|
|
return Task.FromResult<ValidationRunStatus?>(null);
|
|
}
|
|
|
|
/// <inheritdoc/>
|
|
public Task<bool> CancelAsync(string runId, CancellationToken ct = default)
|
|
{
|
|
if (_activeRuns.TryGetValue(runId, out var context))
|
|
{
|
|
context.Cancel();
|
|
return Task.FromResult(true);
|
|
}
|
|
|
|
return Task.FromResult(false);
|
|
}
|
|
|
|
private static string GenerateRunId()
|
|
{
|
|
return $"vr-{DateTimeOffset.UtcNow:yyyyMMddHHmmss}-{Guid.NewGuid():N}"[..32];
|
|
}
|
|
|
|
private Task InitializeAsync(ValidationRunContext context, CancellationToken ct)
|
|
{
|
|
// Placeholder: Initialize any required resources
|
|
// - Verify corpus access
|
|
// - Pre-warm caches
|
|
// - Validate configuration
|
|
return Task.CompletedTask;
|
|
}
|
|
|
|
private async Task<ImmutableArray<PairValidationResult>> ValidatePairsAsync(
|
|
ValidationRunContext context,
|
|
CancellationToken ct)
|
|
{
|
|
var results = new List<PairValidationResult>();
|
|
var request = context.Request;
|
|
var pairs = request.Pairs;
|
|
var completed = 0;
|
|
|
|
context.UpdateState(ValidationState.Assembling, $"Validating {pairs.Length} pairs");
|
|
|
|
// Process pairs with controlled parallelism
|
|
var semaphore = new SemaphoreSlim(request.MaxParallelism);
|
|
var tasks = pairs.Select(async pair =>
|
|
{
|
|
await semaphore.WaitAsync(ct);
|
|
try
|
|
{
|
|
var result = await ValidateSinglePairAsync(pair, request, ct);
|
|
Interlocked.Increment(ref completed);
|
|
context.UpdateProgress(completed, pairs.Length);
|
|
return result;
|
|
}
|
|
finally
|
|
{
|
|
semaphore.Release();
|
|
}
|
|
});
|
|
|
|
var taskResults = await Task.WhenAll(tasks);
|
|
return [.. taskResults];
|
|
}
|
|
|
|
private async Task<PairValidationResult> ValidateSinglePairAsync(
|
|
SecurityPairReference pairRef,
|
|
ValidationRunRequest request,
|
|
CancellationToken ct)
|
|
{
|
|
var stopwatch = Stopwatch.StartNew();
|
|
|
|
try
|
|
{
|
|
// Step 1: Assemble - Load the security pair from corpus
|
|
var pair = await _pairService.FindByIdAsync(pairRef.PairId, ct);
|
|
if (pair is null)
|
|
{
|
|
return CreateFailedPairResult(pairRef, "Security pair not found in corpus");
|
|
}
|
|
|
|
// Step 2: Recover symbols via ground-truth connectors
|
|
// Placeholder: Would call ISymbolSourceConnector implementations
|
|
var (prePatchSymbols, postPatchSymbols) = await RecoverSymbolsAsync(pair, ct);
|
|
|
|
// Step 3: Lift to intermediate representation
|
|
// Placeholder: Would call semantic analysis pipeline
|
|
var (prePatchIr, postPatchIr) = await LiftToIrAsync(pair, prePatchSymbols, postPatchSymbols, ct);
|
|
|
|
// Step 4: Generate fingerprints
|
|
// Placeholder: Would call fingerprint generator
|
|
var (prePatchFingerprints, postPatchFingerprints) = await GenerateFingerprintsAsync(
|
|
prePatchIr, postPatchIr, ct);
|
|
|
|
// Step 5: Match functions
|
|
var matches = await MatchFunctionsAsync(
|
|
prePatchFingerprints,
|
|
postPatchFingerprints,
|
|
request.Matcher,
|
|
ct);
|
|
|
|
// Step 6: Compute pair metrics
|
|
var totalPost = postPatchFingerprints.Count;
|
|
var matchedCount = matches.Count(m => m.Matched);
|
|
var patchedDetected = matches.Count(m => m.WasPatched && m.PatchDetected);
|
|
var totalPatched = pair.ChangedFunctions.Length;
|
|
|
|
stopwatch.Stop();
|
|
|
|
return new PairValidationResult
|
|
{
|
|
PairId = pairRef.PairId,
|
|
CveId = pairRef.CveId,
|
|
PackageName = pairRef.PackageName,
|
|
Success = true,
|
|
FunctionMatchRate = totalPost > 0 ? (matchedCount * 100.0 / totalPost) : 0,
|
|
TotalFunctionsPost = totalPost,
|
|
MatchedFunctions = matchedCount,
|
|
PatchedFunctionsDetected = patchedDetected,
|
|
TotalPatchedFunctions = totalPatched,
|
|
SbomHash = ComputeSbomHash(pair),
|
|
VerifyTimeMs = (int)stopwatch.ElapsedMilliseconds,
|
|
FunctionMatches = [.. matches],
|
|
Duration = stopwatch.Elapsed
|
|
};
|
|
}
|
|
catch (Exception ex)
|
|
{
|
|
_logger.LogWarning(ex, "Failed to validate pair {PairId}", pairRef.PairId);
|
|
return CreateFailedPairResult(pairRef, ex.Message);
|
|
}
|
|
}
|
|
|
|
private Task<(IReadOnlyList<SymbolInfo> PrePatch, IReadOnlyList<SymbolInfo> PostPatch)> RecoverSymbolsAsync(
|
|
SecurityPair pair,
|
|
CancellationToken ct)
|
|
{
|
|
// Placeholder: Would integrate with ISymbolSourceConnector implementations
|
|
// For now, return empty symbol lists - actual implementation will come with GCF-002
|
|
IReadOnlyList<SymbolInfo> prePatch = [];
|
|
IReadOnlyList<SymbolInfo> postPatch = [];
|
|
return Task.FromResult((prePatch, postPatch));
|
|
}
|
|
|
|
private Task<(IReadOnlyList<IrFunction> PrePatch, IReadOnlyList<IrFunction> PostPatch)> LiftToIrAsync(
|
|
SecurityPair pair,
|
|
IReadOnlyList<SymbolInfo> prePatchSymbols,
|
|
IReadOnlyList<SymbolInfo> postPatchSymbols,
|
|
CancellationToken ct)
|
|
{
|
|
// Placeholder: Would integrate with semantic analysis pipeline
|
|
// For now, return empty IR lists
|
|
IReadOnlyList<IrFunction> prePatch = [];
|
|
IReadOnlyList<IrFunction> postPatch = [];
|
|
return Task.FromResult((prePatch, postPatch));
|
|
}
|
|
|
|
private Task<(IReadOnlyList<FunctionFingerprint> PrePatch, IReadOnlyList<FunctionFingerprint> PostPatch)> GenerateFingerprintsAsync(
|
|
IReadOnlyList<IrFunction> prePatchIr,
|
|
IReadOnlyList<IrFunction> postPatchIr,
|
|
CancellationToken ct)
|
|
{
|
|
// Placeholder: Would integrate with fingerprint generator
|
|
// For now, return empty fingerprint lists
|
|
IReadOnlyList<FunctionFingerprint> prePatch = [];
|
|
IReadOnlyList<FunctionFingerprint> postPatch = [];
|
|
return Task.FromResult((prePatch, postPatch));
|
|
}
|
|
|
|
private Task<IReadOnlyList<FunctionMatchResult>> MatchFunctionsAsync(
|
|
IReadOnlyList<FunctionFingerprint> prePatchFingerprints,
|
|
IReadOnlyList<FunctionFingerprint> postPatchFingerprints,
|
|
MatcherConfiguration config,
|
|
CancellationToken ct)
|
|
{
|
|
// Placeholder: Would integrate with function matcher
|
|
// For now, return empty match results
|
|
IReadOnlyList<FunctionMatchResult> matches = [];
|
|
return Task.FromResult(matches);
|
|
}
|
|
|
|
private static string? ComputeSbomHash(SecurityPair pair)
|
|
{
|
|
// Placeholder: Would compute deterministic SBOM hash
|
|
return null;
|
|
}
|
|
|
|
private static ValidationMetrics ComputeMetrics(
|
|
ImmutableArray<PairValidationResult> pairResults,
|
|
MetricsConfiguration config)
|
|
{
|
|
var successful = pairResults.Where(r => r.Success).ToList();
|
|
var totalFunctionsPost = successful.Sum(r => r.TotalFunctionsPost);
|
|
var matchedFunctions = successful.Sum(r => r.MatchedFunctions);
|
|
var totalPatched = successful.Sum(r => r.TotalPatchedFunctions);
|
|
var patchedDetected = successful.Sum(r => r.PatchedFunctionsDetected);
|
|
var missedPatched = totalPatched - patchedDetected;
|
|
|
|
var matchRate = totalFunctionsPost > 0
|
|
? (matchedFunctions * 100.0 / totalFunctionsPost)
|
|
: 0;
|
|
|
|
var falseNegativeRate = totalPatched > 0
|
|
? (missedPatched * 100.0 / totalPatched)
|
|
: 0;
|
|
|
|
// SBOM stability: count unique hashes across successful pairs
|
|
var uniqueHashes = successful
|
|
.Where(r => r.SbomHash is not null)
|
|
.Select(r => r.SbomHash)
|
|
.Distinct()
|
|
.Count();
|
|
var sbomStability = uniqueHashes == 1 ? config.SbomStabilityRuns : 0;
|
|
|
|
// Verify times
|
|
var verifyTimes = successful
|
|
.Where(r => r.VerifyTimeMs.HasValue)
|
|
.Select(r => r.VerifyTimeMs!.Value)
|
|
.OrderBy(t => t)
|
|
.ToList();
|
|
|
|
int? medianMs = null;
|
|
int? p95Ms = null;
|
|
if (verifyTimes.Count > 0)
|
|
{
|
|
medianMs = verifyTimes[verifyTimes.Count / 2];
|
|
var p95Index = (int)(verifyTimes.Count * 0.95);
|
|
p95Ms = verifyTimes[Math.Min(p95Index, verifyTimes.Count - 1)];
|
|
}
|
|
|
|
// Mismatch buckets
|
|
var buckets = new Dictionary<MismatchCategory, int>();
|
|
if (config.GenerateMismatchBuckets)
|
|
{
|
|
foreach (var result in successful)
|
|
{
|
|
if (result.FunctionMatches is null) continue;
|
|
foreach (var match in result.FunctionMatches)
|
|
{
|
|
if (!match.Matched && match.MismatchCategory.HasValue)
|
|
{
|
|
var category = match.MismatchCategory.Value;
|
|
buckets[category] = buckets.GetValueOrDefault(category) + 1;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
return new ValidationMetrics
|
|
{
|
|
TotalPairs = pairResults.Length,
|
|
SuccessfulPairs = successful.Count,
|
|
FailedPairs = pairResults.Length - successful.Count,
|
|
FunctionMatchRate = matchRate,
|
|
FalseNegativeRate = falseNegativeRate,
|
|
SbomHashStability = sbomStability,
|
|
VerifyTimeMedianMs = medianMs,
|
|
VerifyTimeP95Ms = p95Ms,
|
|
TotalFunctionsPost = totalFunctionsPost,
|
|
MatchedFunctions = matchedFunctions,
|
|
TotalTruePatchedFunctions = totalPatched,
|
|
MissedPatchedFunctions = missedPatched,
|
|
MismatchBuckets = buckets.ToImmutableDictionary()
|
|
};
|
|
}
|
|
|
|
private static string GenerateMarkdownReport(
|
|
ValidationRunRequest request,
|
|
ValidationMetrics metrics,
|
|
ImmutableArray<PairValidationResult> pairResults)
|
|
{
|
|
var sb = new StringBuilder();
|
|
|
|
sb.AppendLine("# Validation Run Report");
|
|
sb.AppendLine();
|
|
sb.AppendLine($"**Corpus Version:** {request.CorpusVersion ?? "N/A"}");
|
|
sb.AppendLine($"**Generated:** {DateTimeOffset.UtcNow:O}");
|
|
sb.AppendLine();
|
|
|
|
sb.AppendLine("## Summary Metrics");
|
|
sb.AppendLine();
|
|
sb.AppendLine("| Metric | Value | Target |");
|
|
sb.AppendLine("|--------|-------|--------|");
|
|
sb.AppendLine($"| Function Match Rate | {metrics.FunctionMatchRate:F1}% | >= 90% |");
|
|
sb.AppendLine($"| False-Negative Rate | {metrics.FalseNegativeRate:F1}% | <= 5% |");
|
|
sb.AppendLine($"| SBOM Hash Stability | {metrics.SbomHashStability}/3 | 3/3 |");
|
|
|
|
if (metrics.VerifyTimeMedianMs.HasValue)
|
|
{
|
|
sb.AppendLine($"| Verify Time (p50) | {metrics.VerifyTimeMedianMs}ms | - |");
|
|
}
|
|
if (metrics.VerifyTimeP95Ms.HasValue)
|
|
{
|
|
sb.AppendLine($"| Verify Time (p95) | {metrics.VerifyTimeP95Ms}ms | - |");
|
|
}
|
|
|
|
sb.AppendLine();
|
|
sb.AppendLine("## Pair Results");
|
|
sb.AppendLine();
|
|
sb.AppendLine("| Package | CVE | Match Rate | Patched Detected | Status |");
|
|
sb.AppendLine("|---------|-----|------------|------------------|--------|");
|
|
|
|
foreach (var result in pairResults.OrderBy(r => r.PackageName))
|
|
{
|
|
var status = result.Success ? "Pass" : "Fail";
|
|
var detected = result.TotalPatchedFunctions > 0
|
|
? $"{result.PatchedFunctionsDetected}/{result.TotalPatchedFunctions}"
|
|
: "N/A";
|
|
|
|
sb.AppendLine($"| {result.PackageName} | {result.CveId} | {result.FunctionMatchRate:F1}% | {detected} | {status} |");
|
|
}
|
|
|
|
if (metrics.MismatchBuckets is not null && metrics.MismatchBuckets.Count > 0)
|
|
{
|
|
sb.AppendLine();
|
|
sb.AppendLine("## Mismatch Analysis");
|
|
sb.AppendLine();
|
|
sb.AppendLine("| Category | Count |");
|
|
sb.AppendLine("|----------|-------|");
|
|
|
|
foreach (var (category, count) in metrics.MismatchBuckets.OrderByDescending(x => x.Value))
|
|
{
|
|
sb.AppendLine($"| {category} | {count} |");
|
|
}
|
|
}
|
|
|
|
return sb.ToString();
|
|
}
|
|
|
|
private static PairValidationResult CreateFailedPairResult(SecurityPairReference pairRef, string error)
|
|
{
|
|
return new PairValidationResult
|
|
{
|
|
PairId = pairRef.PairId,
|
|
CveId = pairRef.CveId,
|
|
PackageName = pairRef.PackageName,
|
|
Success = false,
|
|
Error = error
|
|
};
|
|
}
|
|
|
|
private static ValidationRunResult CreateFailedResult(
|
|
string runId,
|
|
DateTimeOffset startedAt,
|
|
ValidationRunContext context,
|
|
string error)
|
|
{
|
|
return new ValidationRunResult
|
|
{
|
|
RunId = runId,
|
|
StartedAt = startedAt,
|
|
CompletedAt = DateTimeOffset.UtcNow,
|
|
Status = context.GetStatus(),
|
|
Metrics = new ValidationMetrics
|
|
{
|
|
TotalPairs = context.Request.Pairs.Length,
|
|
SuccessfulPairs = 0,
|
|
FailedPairs = context.Request.Pairs.Length
|
|
},
|
|
PairResults = [],
|
|
Error = error
|
|
};
|
|
}
|
|
|
|
/// <summary>
|
|
/// Context for a running validation.
|
|
/// </summary>
|
|
private sealed class ValidationRunContext
|
|
{
|
|
private readonly CancellationTokenSource _cts;
|
|
private ValidationState _state = ValidationState.Queued;
|
|
private string? _currentStage;
|
|
private int _pairsCompleted;
|
|
|
|
public string RunId { get; }
|
|
public ValidationRunRequest Request { get; }
|
|
public DateTimeOffset StartedAt { get; } = DateTimeOffset.UtcNow;
|
|
public bool IsCancelled => _cts.IsCancellationRequested;
|
|
|
|
public ValidationRunContext(string runId, ValidationRunRequest request, CancellationToken ct)
|
|
{
|
|
RunId = runId;
|
|
Request = request;
|
|
_cts = CancellationTokenSource.CreateLinkedTokenSource(ct);
|
|
}
|
|
|
|
public void UpdateState(ValidationState state, string? stage = null)
|
|
{
|
|
_state = state;
|
|
_currentStage = stage;
|
|
}
|
|
|
|
public void UpdateProgress(int completed, int total)
|
|
{
|
|
_pairsCompleted = completed;
|
|
}
|
|
|
|
public void Cancel()
|
|
{
|
|
_cts.Cancel();
|
|
}
|
|
|
|
public ValidationRunStatus GetStatus()
|
|
{
|
|
var total = Request.Pairs.Length;
|
|
var progress = total > 0 ? (_pairsCompleted * 100 / total) : 0;
|
|
|
|
return new ValidationRunStatus
|
|
{
|
|
RunId = RunId,
|
|
State = _state,
|
|
Progress = progress,
|
|
CurrentStage = _currentStage,
|
|
PairsCompleted = _pairsCompleted,
|
|
TotalPairs = total,
|
|
StartedAt = StartedAt
|
|
};
|
|
}
|
|
}
|
|
}
|
|
|
|
/// <summary>
|
|
/// Symbol information recovered from ground-truth sources.
|
|
/// Placeholder for full implementation.
|
|
/// </summary>
|
|
internal sealed record SymbolInfo(
|
|
string Name,
|
|
ulong Address,
|
|
int Size);
|
|
|
|
/// <summary>
|
|
/// Lifted intermediate representation of a function.
|
|
/// Placeholder for full implementation.
|
|
/// </summary>
|
|
internal sealed record IrFunction(
|
|
string Name,
|
|
ulong Address,
|
|
byte[] IrBytes);
|
|
|
|
/// <summary>
|
|
/// Function fingerprint for matching.
|
|
/// Placeholder for full implementation.
|
|
/// </summary>
|
|
internal sealed record FunctionFingerprint(
|
|
string Name,
|
|
ulong Address,
|
|
byte[] Hash,
|
|
int BasicBlockCount,
|
|
int InstructionCount);
|