478 lines
19 KiB
C#
478 lines
19 KiB
C#
// -----------------------------------------------------------------------------
|
|
// BenchCommandBuilder.cs
|
|
// Sprint: SPRINT_3500_0003_0001_ground_truth_corpus_ci_gates
|
|
// Task: CORPUS-007 - Add `stellaops bench run --corpus <path>` CLI command
|
|
// Task: CORPUS-008 - Add `stellaops bench check --baseline <path>` regression checker
|
|
// Task: CORPUS-011 - Implement baseline update tool
|
|
// Description: CLI commands for running and managing reachability benchmarks
|
|
// -----------------------------------------------------------------------------
|
|
|
|
|
|
using Microsoft.Extensions.DependencyInjection;
|
|
using StellaOps.Scanner.Benchmarks;
|
|
using System.CommandLine;
|
|
using System.Text.Json;
|
|
|
|
namespace StellaOps.Cli.Commands;
|
|
|
|
/// <summary>
|
|
/// Builds CLI commands for benchmark operations.
|
|
/// </summary>
|
|
internal static class BenchCommandBuilder
|
|
{
|
|
private static readonly JsonSerializerOptions JsonOptions = new()
|
|
{
|
|
WriteIndented = true,
|
|
PropertyNamingPolicy = JsonNamingPolicy.CamelCase
|
|
};
|
|
|
|
internal static Command BuildBenchCommand(
|
|
IServiceProvider services,
|
|
Option<bool> verboseOption,
|
|
CancellationToken cancellationToken)
|
|
{
|
|
var bench = new Command("bench", "Run and manage reachability benchmarks");
|
|
|
|
bench.Add(BuildRunCommand(services, verboseOption, cancellationToken));
|
|
bench.Add(BuildCheckCommand(services, verboseOption, cancellationToken));
|
|
bench.Add(BuildBaselineCommand(services, verboseOption, cancellationToken));
|
|
bench.Add(BuildReportCommand(services, verboseOption, cancellationToken));
|
|
|
|
return bench;
|
|
}
|
|
|
|
/// <summary>
|
|
/// Build the `bench run` command.
|
|
/// </summary>
|
|
private static Command BuildRunCommand(
|
|
IServiceProvider services,
|
|
Option<bool> verboseOption,
|
|
CancellationToken cancellationToken)
|
|
{
|
|
var corpusOption = new Option<string>("--corpus", "Path to corpus.json index file")
|
|
{
|
|
Required = true
|
|
};
|
|
var outputOption = new Option<string?>("--output", "Output path for results JSON");
|
|
var categoryOption = new Option<string[]?>("--category", "Filter to specific categories");
|
|
var sampleOption = new Option<string[]?>("--sample", "Filter to specific sample IDs");
|
|
var parallelOption = new Option<int>("--parallel", () => 1, "Number of parallel workers");
|
|
var timeoutOption = new Option<int>("--timeout", () => 30000, "Timeout per sample in milliseconds");
|
|
var determinismOption = new Option<bool>("--check-determinism", () => true, "Run determinism checks");
|
|
var runsOption = new Option<int>("--determinism-runs", () => 3, "Number of runs for determinism check");
|
|
var formatOption = new Option<string>("--format", () => "json", "Output format: json, markdown");
|
|
|
|
var run = new Command("run", "Run the ground-truth corpus benchmark");
|
|
run.Add(corpusOption);
|
|
run.Add(outputOption);
|
|
run.Add(categoryOption);
|
|
run.Add(sampleOption);
|
|
run.Add(parallelOption);
|
|
run.Add(timeoutOption);
|
|
run.Add(determinismOption);
|
|
run.Add(runsOption);
|
|
run.Add(formatOption);
|
|
|
|
run.SetAction(async parseResult =>
|
|
{
|
|
var corpusPath = parseResult.GetValue(corpusOption)!;
|
|
var outputPath = parseResult.GetValue(outputOption);
|
|
var categories = parseResult.GetValue(categoryOption);
|
|
var samples = parseResult.GetValue(sampleOption);
|
|
var parallel = parseResult.GetValue(parallelOption);
|
|
var timeout = parseResult.GetValue(timeoutOption);
|
|
var checkDeterminism = parseResult.GetValue(determinismOption);
|
|
var determinismRuns = parseResult.GetValue(runsOption);
|
|
var format = parseResult.GetValue(formatOption);
|
|
var verbose = parseResult.GetValue(verboseOption);
|
|
|
|
if (!File.Exists(corpusPath))
|
|
{
|
|
throw new CommandLineException($"Corpus file not found: {corpusPath}");
|
|
}
|
|
|
|
var options = new CorpusRunOptions
|
|
{
|
|
Categories = categories,
|
|
SampleIds = samples,
|
|
Parallelism = parallel,
|
|
TimeoutMs = timeout,
|
|
CheckDeterminism = checkDeterminism,
|
|
DeterminismRuns = determinismRuns
|
|
};
|
|
|
|
Console.WriteLine($"Running benchmark corpus: {corpusPath}");
|
|
Console.WriteLine($"Options: parallel={parallel}, timeout={timeout}ms, determinism={checkDeterminism}");
|
|
|
|
var runner = services.GetRequiredService<ICorpusRunner>();
|
|
var result = await runner.RunAsync(corpusPath, options, cancellationToken);
|
|
|
|
// Output results
|
|
if (format == "markdown")
|
|
{
|
|
var markdown = FormatMarkdownReport(result);
|
|
if (outputPath is not null)
|
|
{
|
|
await File.WriteAllTextAsync(outputPath, markdown, cancellationToken);
|
|
Console.WriteLine($"Markdown report written to: {outputPath}");
|
|
}
|
|
else
|
|
{
|
|
Console.WriteLine(markdown);
|
|
}
|
|
}
|
|
else
|
|
{
|
|
var json = JsonSerializer.Serialize(result, JsonOptions);
|
|
if (outputPath is not null)
|
|
{
|
|
await File.WriteAllTextAsync(outputPath, json, cancellationToken);
|
|
Console.WriteLine($"Results written to: {outputPath}");
|
|
}
|
|
else
|
|
{
|
|
Console.WriteLine(json);
|
|
}
|
|
}
|
|
|
|
// Print summary
|
|
Console.WriteLine();
|
|
Console.WriteLine("=== Benchmark Summary ===");
|
|
Console.WriteLine($"Precision: {result.Metrics.Precision:P1}");
|
|
Console.WriteLine($"Recall: {result.Metrics.Recall:P1}");
|
|
Console.WriteLine($"F1 Score: {result.Metrics.F1:P1}");
|
|
Console.WriteLine($"Determinism: {result.Metrics.DeterministicReplay:P0}");
|
|
Console.WriteLine($"Duration: {result.DurationMs}ms");
|
|
});
|
|
|
|
return run;
|
|
}
|
|
|
|
/// <summary>
|
|
/// Build the `bench check` command.
|
|
/// </summary>
|
|
private static Command BuildCheckCommand(
|
|
IServiceProvider services,
|
|
Option<bool> verboseOption,
|
|
CancellationToken cancellationToken)
|
|
{
|
|
var resultsOption = new Option<string>("--results", "Path to benchmark results JSON")
|
|
{
|
|
Required = true
|
|
};
|
|
var baselineOption = new Option<string>("--baseline", "Path to baseline JSON")
|
|
{
|
|
Required = true
|
|
};
|
|
var strictOption = new Option<bool>("--strict", () => false, "Fail on any metric degradation");
|
|
var outputOption = new Option<string?>("--output", "Output path for regression report");
|
|
|
|
var check = new Command("check", "Check benchmark results against baseline");
|
|
check.Add(resultsOption);
|
|
check.Add(baselineOption);
|
|
check.Add(strictOption);
|
|
check.Add(outputOption);
|
|
|
|
check.SetAction(async parseResult =>
|
|
{
|
|
var resultsPath = parseResult.GetValue(resultsOption)!;
|
|
var baselinePath = parseResult.GetValue(baselineOption)!;
|
|
var strict = parseResult.GetValue(strictOption);
|
|
var outputPath = parseResult.GetValue(outputOption);
|
|
var verbose = parseResult.GetValue(verboseOption);
|
|
|
|
if (!File.Exists(resultsPath))
|
|
{
|
|
throw new CommandLineException($"Results file not found: {resultsPath}");
|
|
}
|
|
if (!File.Exists(baselinePath))
|
|
{
|
|
throw new CommandLineException($"Baseline file not found: {baselinePath}");
|
|
}
|
|
|
|
var resultsJson = await File.ReadAllTextAsync(resultsPath, cancellationToken);
|
|
var baselineJson = await File.ReadAllTextAsync(baselinePath, cancellationToken);
|
|
|
|
var result = JsonSerializer.Deserialize<BenchmarkResult>(resultsJson, JsonOptions)
|
|
?? throw new CommandLineException("Failed to parse results JSON");
|
|
var baseline = JsonSerializer.Deserialize<BenchmarkBaseline>(baselineJson, JsonOptions)
|
|
?? throw new CommandLineException("Failed to parse baseline JSON");
|
|
|
|
var checkResult = result.CheckRegression(baseline);
|
|
|
|
Console.WriteLine("=== Regression Check Results ===");
|
|
Console.WriteLine($"Status: {(checkResult.Passed ? "PASSED" : "FAILED")}");
|
|
Console.WriteLine();
|
|
|
|
if (checkResult.Issues.Count > 0)
|
|
{
|
|
Console.WriteLine("Issues:");
|
|
foreach (var issue in checkResult.Issues)
|
|
{
|
|
var icon = issue.Severity == IssueSeverity.Error ? "❌" : "⚠️";
|
|
Console.WriteLine($" {icon} [{issue.Metric}] {issue.Message}");
|
|
Console.WriteLine($" Baseline: {issue.BaselineValue:F4}, Current: {issue.CurrentValue:F4}");
|
|
}
|
|
}
|
|
else
|
|
{
|
|
Console.WriteLine("No regressions detected.");
|
|
}
|
|
|
|
// Write report if requested
|
|
if (outputPath is not null)
|
|
{
|
|
var report = JsonSerializer.Serialize(checkResult, JsonOptions);
|
|
await File.WriteAllTextAsync(outputPath, report, cancellationToken);
|
|
Console.WriteLine($"\nReport written to: {outputPath}");
|
|
}
|
|
|
|
// Exit with error if failed
|
|
if (!checkResult.Passed)
|
|
{
|
|
Environment.ExitCode = 1;
|
|
}
|
|
});
|
|
|
|
return check;
|
|
}
|
|
|
|
/// <summary>
|
|
/// Build the `bench baseline` command group.
|
|
/// </summary>
|
|
private static Command BuildBaselineCommand(
|
|
IServiceProvider services,
|
|
Option<bool> verboseOption,
|
|
CancellationToken cancellationToken)
|
|
{
|
|
var baseline = new Command("baseline", "Manage benchmark baselines");
|
|
|
|
// baseline update
|
|
var resultsOption = new Option<string>("--results", "Path to benchmark results JSON")
|
|
{
|
|
Required = true
|
|
};
|
|
var outputOption = new Option<string>("--output", "Output path for new baseline")
|
|
{
|
|
Required = true
|
|
};
|
|
var noteOption = new Option<string?>("--note", "Note explaining the baseline update");
|
|
|
|
var update = new Command("update", "Update baseline from benchmark results");
|
|
update.Add(resultsOption);
|
|
update.Add(outputOption);
|
|
update.Add(noteOption);
|
|
|
|
update.SetAction(async parseResult =>
|
|
{
|
|
var resultsPath = parseResult.GetValue(resultsOption)!;
|
|
var outputPath = parseResult.GetValue(outputOption)!;
|
|
var note = parseResult.GetValue(noteOption);
|
|
|
|
if (!File.Exists(resultsPath))
|
|
{
|
|
throw new CommandLineException($"Results file not found: {resultsPath}");
|
|
}
|
|
|
|
var resultsJson = await File.ReadAllTextAsync(resultsPath, cancellationToken);
|
|
var result = JsonSerializer.Deserialize<BenchmarkResult>(resultsJson, JsonOptions)
|
|
?? throw new CommandLineException("Failed to parse results JSON");
|
|
|
|
var newBaseline = new BenchmarkBaseline(
|
|
Version: "1.0.0",
|
|
CreatedAt: DateTimeOffset.UtcNow,
|
|
CorpusVersion: result.CorpusVersion,
|
|
ScannerVersion: result.ScannerVersion,
|
|
Precision: result.Metrics.Precision,
|
|
Recall: result.Metrics.Recall,
|
|
F1: result.Metrics.F1,
|
|
TtfrpP95Ms: result.Metrics.TtfrpP95Ms,
|
|
DeterministicReplay: result.Metrics.DeterministicReplay,
|
|
Note: note);
|
|
|
|
var baselineJson = JsonSerializer.Serialize(newBaseline, JsonOptions);
|
|
await File.WriteAllTextAsync(outputPath, baselineJson, cancellationToken);
|
|
|
|
Console.WriteLine($"Baseline updated: {outputPath}");
|
|
Console.WriteLine($" Precision: {newBaseline.Precision:P1}");
|
|
Console.WriteLine($" Recall: {newBaseline.Recall:P1}");
|
|
Console.WriteLine($" F1: {newBaseline.F1:P1}");
|
|
Console.WriteLine($" TTFRP p95: {newBaseline.TtfrpP95Ms}ms");
|
|
Console.WriteLine($" Determinism: {newBaseline.DeterministicReplay:P0}");
|
|
});
|
|
|
|
baseline.Add(update);
|
|
|
|
// baseline show
|
|
var baselinePathOption = new Option<string>("--path", "Path to baseline JSON")
|
|
{
|
|
Required = true
|
|
};
|
|
|
|
var show = new Command("show", "Display baseline metrics");
|
|
show.Add(baselinePathOption);
|
|
|
|
show.SetAction(async parseResult =>
|
|
{
|
|
var path = parseResult.GetValue(baselinePathOption)!;
|
|
|
|
if (!File.Exists(path))
|
|
{
|
|
throw new CommandLineException($"Baseline file not found: {path}");
|
|
}
|
|
|
|
var json = await File.ReadAllTextAsync(path, cancellationToken);
|
|
var baseline = JsonSerializer.Deserialize<BenchmarkBaseline>(json, JsonOptions)
|
|
?? throw new CommandLineException("Failed to parse baseline JSON");
|
|
|
|
Console.WriteLine($"=== Baseline: {path} ===");
|
|
Console.WriteLine($"Version: {baseline.Version}");
|
|
Console.WriteLine($"Created: {baseline.CreatedAt:O}");
|
|
Console.WriteLine($"Corpus: {baseline.CorpusVersion}");
|
|
Console.WriteLine($"Scanner: {baseline.ScannerVersion}");
|
|
Console.WriteLine();
|
|
Console.WriteLine("Metrics:");
|
|
Console.WriteLine($" Precision: {baseline.Precision:P1}");
|
|
Console.WriteLine($" Recall: {baseline.Recall:P1}");
|
|
Console.WriteLine($" F1: {baseline.F1:P1}");
|
|
Console.WriteLine($" TTFRP p95: {baseline.TtfrpP95Ms}ms");
|
|
Console.WriteLine($" Determinism: {baseline.DeterministicReplay:P0}");
|
|
|
|
if (baseline.Note is not null)
|
|
{
|
|
Console.WriteLine();
|
|
Console.WriteLine($"Note: {baseline.Note}");
|
|
}
|
|
});
|
|
|
|
baseline.Add(show);
|
|
|
|
return baseline;
|
|
}
|
|
|
|
/// <summary>
|
|
/// Build the `bench report` command.
|
|
/// </summary>
|
|
private static Command BuildReportCommand(
|
|
IServiceProvider services,
|
|
Option<bool> verboseOption,
|
|
CancellationToken cancellationToken)
|
|
{
|
|
var resultsOption = new Option<string>("--results", "Path to benchmark results JSON")
|
|
{
|
|
Required = true
|
|
};
|
|
var formatOption = new Option<string>("--format", () => "markdown", "Output format: markdown, html");
|
|
var outputOption = new Option<string?>("--output", "Output path for report");
|
|
|
|
var report = new Command("report", "Generate benchmark report");
|
|
report.Add(resultsOption);
|
|
report.Add(formatOption);
|
|
report.Add(outputOption);
|
|
|
|
report.SetAction(async parseResult =>
|
|
{
|
|
var resultsPath = parseResult.GetValue(resultsOption)!;
|
|
var format = parseResult.GetValue(formatOption);
|
|
var outputPath = parseResult.GetValue(outputOption);
|
|
|
|
if (!File.Exists(resultsPath))
|
|
{
|
|
throw new CommandLineException($"Results file not found: {resultsPath}");
|
|
}
|
|
|
|
var resultsJson = await File.ReadAllTextAsync(resultsPath, cancellationToken);
|
|
var result = JsonSerializer.Deserialize<BenchmarkResult>(resultsJson, JsonOptions)
|
|
?? throw new CommandLineException("Failed to parse results JSON");
|
|
|
|
var reportContent = format == "html"
|
|
? FormatHtmlReport(result)
|
|
: FormatMarkdownReport(result);
|
|
|
|
if (outputPath is not null)
|
|
{
|
|
await File.WriteAllTextAsync(outputPath, reportContent, cancellationToken);
|
|
Console.WriteLine($"Report written to: {outputPath}");
|
|
}
|
|
else
|
|
{
|
|
Console.WriteLine(reportContent);
|
|
}
|
|
});
|
|
|
|
return report;
|
|
}
|
|
|
|
private static string FormatMarkdownReport(BenchmarkResult result)
|
|
{
|
|
var sb = new System.Text.StringBuilder();
|
|
|
|
sb.AppendLine("# Reachability Benchmark Report");
|
|
sb.AppendLine();
|
|
sb.AppendLine($"**Run ID:** {result.RunId}");
|
|
sb.AppendLine($"**Timestamp:** {result.Timestamp:O}");
|
|
sb.AppendLine($"**Corpus Version:** {result.CorpusVersion}");
|
|
sb.AppendLine($"**Scanner Version:** {result.ScannerVersion}");
|
|
sb.AppendLine($"**Duration:** {result.DurationMs}ms");
|
|
sb.AppendLine();
|
|
|
|
sb.AppendLine("## Summary Metrics");
|
|
sb.AppendLine();
|
|
sb.AppendLine("| Metric | Value |");
|
|
sb.AppendLine("|--------|-------|");
|
|
sb.AppendLine($"| Precision | {result.Metrics.Precision:P1} |");
|
|
sb.AppendLine($"| Recall | {result.Metrics.Recall:P1} |");
|
|
sb.AppendLine($"| F1 Score | {result.Metrics.F1:P1} |");
|
|
sb.AppendLine($"| TTFRP p50 | {result.Metrics.TtfrpP50Ms}ms |");
|
|
sb.AppendLine($"| TTFRP p95 | {result.Metrics.TtfrpP95Ms}ms |");
|
|
sb.AppendLine($"| Deterministic Replay | {result.Metrics.DeterministicReplay:P0} |");
|
|
sb.AppendLine();
|
|
|
|
sb.AppendLine("## Sample Results");
|
|
sb.AppendLine();
|
|
sb.AppendLine("| Sample | Expected | Actual | Match | Duration |");
|
|
sb.AppendLine("|--------|----------|--------|-------|----------|");
|
|
|
|
foreach (var sample in result.SampleResults)
|
|
{
|
|
var match = sample.MatchedExpected ? "✅" : "❌";
|
|
sb.AppendLine($"| {sample.SampleId} | {sample.ExpectedReachability} | {sample.ActualReachability} | {match} | {sample.DurationMs}ms |");
|
|
}
|
|
|
|
return sb.ToString();
|
|
}
|
|
|
|
private static string FormatHtmlReport(BenchmarkResult result)
|
|
{
|
|
// Basic HTML report
|
|
var sb = new System.Text.StringBuilder();
|
|
sb.AppendLine("<!DOCTYPE html>");
|
|
sb.AppendLine("<html><head><title>Benchmark Report</title>");
|
|
sb.AppendLine("<style>");
|
|
sb.AppendLine("body { font-family: system-ui; max-width: 900px; margin: 0 auto; padding: 20px; }");
|
|
sb.AppendLine("table { border-collapse: collapse; width: 100%; }");
|
|
sb.AppendLine("th, td { border: 1px solid #ddd; padding: 8px; text-align: left; }");
|
|
sb.AppendLine("th { background-color: #f2f2f2; }");
|
|
sb.AppendLine(".pass { color: green; }");
|
|
sb.AppendLine(".fail { color: red; }");
|
|
sb.AppendLine("</style></head><body>");
|
|
|
|
sb.AppendLine($"<h1>Reachability Benchmark Report</h1>");
|
|
sb.AppendLine($"<p><strong>Run ID:</strong> {result.RunId}</p>");
|
|
sb.AppendLine($"<p><strong>Timestamp:</strong> {result.Timestamp:O}</p>");
|
|
|
|
sb.AppendLine("<h2>Summary Metrics</h2>");
|
|
sb.AppendLine("<table>");
|
|
sb.AppendLine("<tr><th>Metric</th><th>Value</th></tr>");
|
|
sb.AppendLine($"<tr><td>Precision</td><td>{result.Metrics.Precision:P1}</td></tr>");
|
|
sb.AppendLine($"<tr><td>Recall</td><td>{result.Metrics.Recall:P1}</td></tr>");
|
|
sb.AppendLine($"<tr><td>F1 Score</td><td>{result.Metrics.F1:P1}</td></tr>");
|
|
sb.AppendLine($"<tr><td>Determinism</td><td>{result.Metrics.DeterministicReplay:P0}</td></tr>");
|
|
sb.AppendLine("</table>");
|
|
|
|
sb.AppendLine("</body></html>");
|
|
return sb.ToString();
|
|
}
|
|
}
|
|
|