Files
git.stella-ops.org/src/Cli/StellaOps.Cli/Commands/BenchCommandBuilder.cs
2026-02-01 21:37:40 +02:00

478 lines
19 KiB
C#

// -----------------------------------------------------------------------------
// BenchCommandBuilder.cs
// Sprint: SPRINT_3500_0003_0001_ground_truth_corpus_ci_gates
// Task: CORPUS-007 - Add `stellaops bench run --corpus <path>` CLI command
// Task: CORPUS-008 - Add `stellaops bench check --baseline <path>` regression checker
// Task: CORPUS-011 - Implement baseline update tool
// Description: CLI commands for running and managing reachability benchmarks
// -----------------------------------------------------------------------------
using Microsoft.Extensions.DependencyInjection;
using StellaOps.Scanner.Benchmarks;
using System.CommandLine;
using System.Text.Json;
namespace StellaOps.Cli.Commands;
/// <summary>
/// Builds CLI commands for benchmark operations.
/// </summary>
internal static class BenchCommandBuilder
{
private static readonly JsonSerializerOptions JsonOptions = new()
{
WriteIndented = true,
PropertyNamingPolicy = JsonNamingPolicy.CamelCase
};
internal static Command BuildBenchCommand(
IServiceProvider services,
Option<bool> verboseOption,
CancellationToken cancellationToken)
{
var bench = new Command("bench", "Run and manage reachability benchmarks");
bench.Add(BuildRunCommand(services, verboseOption, cancellationToken));
bench.Add(BuildCheckCommand(services, verboseOption, cancellationToken));
bench.Add(BuildBaselineCommand(services, verboseOption, cancellationToken));
bench.Add(BuildReportCommand(services, verboseOption, cancellationToken));
return bench;
}
/// <summary>
/// Build the `bench run` command.
/// </summary>
private static Command BuildRunCommand(
IServiceProvider services,
Option<bool> verboseOption,
CancellationToken cancellationToken)
{
var corpusOption = new Option<string>("--corpus", "Path to corpus.json index file")
{
Required = true
};
var outputOption = new Option<string?>("--output", "Output path for results JSON");
var categoryOption = new Option<string[]?>("--category", "Filter to specific categories");
var sampleOption = new Option<string[]?>("--sample", "Filter to specific sample IDs");
var parallelOption = new Option<int>("--parallel", () => 1, "Number of parallel workers");
var timeoutOption = new Option<int>("--timeout", () => 30000, "Timeout per sample in milliseconds");
var determinismOption = new Option<bool>("--check-determinism", () => true, "Run determinism checks");
var runsOption = new Option<int>("--determinism-runs", () => 3, "Number of runs for determinism check");
var formatOption = new Option<string>("--format", () => "json", "Output format: json, markdown");
var run = new Command("run", "Run the ground-truth corpus benchmark");
run.Add(corpusOption);
run.Add(outputOption);
run.Add(categoryOption);
run.Add(sampleOption);
run.Add(parallelOption);
run.Add(timeoutOption);
run.Add(determinismOption);
run.Add(runsOption);
run.Add(formatOption);
run.SetAction(async parseResult =>
{
var corpusPath = parseResult.GetValue(corpusOption)!;
var outputPath = parseResult.GetValue(outputOption);
var categories = parseResult.GetValue(categoryOption);
var samples = parseResult.GetValue(sampleOption);
var parallel = parseResult.GetValue(parallelOption);
var timeout = parseResult.GetValue(timeoutOption);
var checkDeterminism = parseResult.GetValue(determinismOption);
var determinismRuns = parseResult.GetValue(runsOption);
var format = parseResult.GetValue(formatOption);
var verbose = parseResult.GetValue(verboseOption);
if (!File.Exists(corpusPath))
{
throw new CommandLineException($"Corpus file not found: {corpusPath}");
}
var options = new CorpusRunOptions
{
Categories = categories,
SampleIds = samples,
Parallelism = parallel,
TimeoutMs = timeout,
CheckDeterminism = checkDeterminism,
DeterminismRuns = determinismRuns
};
Console.WriteLine($"Running benchmark corpus: {corpusPath}");
Console.WriteLine($"Options: parallel={parallel}, timeout={timeout}ms, determinism={checkDeterminism}");
var runner = services.GetRequiredService<ICorpusRunner>();
var result = await runner.RunAsync(corpusPath, options, cancellationToken);
// Output results
if (format == "markdown")
{
var markdown = FormatMarkdownReport(result);
if (outputPath is not null)
{
await File.WriteAllTextAsync(outputPath, markdown, cancellationToken);
Console.WriteLine($"Markdown report written to: {outputPath}");
}
else
{
Console.WriteLine(markdown);
}
}
else
{
var json = JsonSerializer.Serialize(result, JsonOptions);
if (outputPath is not null)
{
await File.WriteAllTextAsync(outputPath, json, cancellationToken);
Console.WriteLine($"Results written to: {outputPath}");
}
else
{
Console.WriteLine(json);
}
}
// Print summary
Console.WriteLine();
Console.WriteLine("=== Benchmark Summary ===");
Console.WriteLine($"Precision: {result.Metrics.Precision:P1}");
Console.WriteLine($"Recall: {result.Metrics.Recall:P1}");
Console.WriteLine($"F1 Score: {result.Metrics.F1:P1}");
Console.WriteLine($"Determinism: {result.Metrics.DeterministicReplay:P0}");
Console.WriteLine($"Duration: {result.DurationMs}ms");
});
return run;
}
/// <summary>
/// Build the `bench check` command.
/// </summary>
private static Command BuildCheckCommand(
IServiceProvider services,
Option<bool> verboseOption,
CancellationToken cancellationToken)
{
var resultsOption = new Option<string>("--results", "Path to benchmark results JSON")
{
Required = true
};
var baselineOption = new Option<string>("--baseline", "Path to baseline JSON")
{
Required = true
};
var strictOption = new Option<bool>("--strict", () => false, "Fail on any metric degradation");
var outputOption = new Option<string?>("--output", "Output path for regression report");
var check = new Command("check", "Check benchmark results against baseline");
check.Add(resultsOption);
check.Add(baselineOption);
check.Add(strictOption);
check.Add(outputOption);
check.SetAction(async parseResult =>
{
var resultsPath = parseResult.GetValue(resultsOption)!;
var baselinePath = parseResult.GetValue(baselineOption)!;
var strict = parseResult.GetValue(strictOption);
var outputPath = parseResult.GetValue(outputOption);
var verbose = parseResult.GetValue(verboseOption);
if (!File.Exists(resultsPath))
{
throw new CommandLineException($"Results file not found: {resultsPath}");
}
if (!File.Exists(baselinePath))
{
throw new CommandLineException($"Baseline file not found: {baselinePath}");
}
var resultsJson = await File.ReadAllTextAsync(resultsPath, cancellationToken);
var baselineJson = await File.ReadAllTextAsync(baselinePath, cancellationToken);
var result = JsonSerializer.Deserialize<BenchmarkResult>(resultsJson, JsonOptions)
?? throw new CommandLineException("Failed to parse results JSON");
var baseline = JsonSerializer.Deserialize<BenchmarkBaseline>(baselineJson, JsonOptions)
?? throw new CommandLineException("Failed to parse baseline JSON");
var checkResult = result.CheckRegression(baseline);
Console.WriteLine("=== Regression Check Results ===");
Console.WriteLine($"Status: {(checkResult.Passed ? "PASSED" : "FAILED")}");
Console.WriteLine();
if (checkResult.Issues.Count > 0)
{
Console.WriteLine("Issues:");
foreach (var issue in checkResult.Issues)
{
var icon = issue.Severity == IssueSeverity.Error ? "❌" : "⚠️";
Console.WriteLine($" {icon} [{issue.Metric}] {issue.Message}");
Console.WriteLine($" Baseline: {issue.BaselineValue:F4}, Current: {issue.CurrentValue:F4}");
}
}
else
{
Console.WriteLine("No regressions detected.");
}
// Write report if requested
if (outputPath is not null)
{
var report = JsonSerializer.Serialize(checkResult, JsonOptions);
await File.WriteAllTextAsync(outputPath, report, cancellationToken);
Console.WriteLine($"\nReport written to: {outputPath}");
}
// Exit with error if failed
if (!checkResult.Passed)
{
Environment.ExitCode = 1;
}
});
return check;
}
/// <summary>
/// Build the `bench baseline` command group.
/// </summary>
private static Command BuildBaselineCommand(
IServiceProvider services,
Option<bool> verboseOption,
CancellationToken cancellationToken)
{
var baseline = new Command("baseline", "Manage benchmark baselines");
// baseline update
var resultsOption = new Option<string>("--results", "Path to benchmark results JSON")
{
Required = true
};
var outputOption = new Option<string>("--output", "Output path for new baseline")
{
Required = true
};
var noteOption = new Option<string?>("--note", "Note explaining the baseline update");
var update = new Command("update", "Update baseline from benchmark results");
update.Add(resultsOption);
update.Add(outputOption);
update.Add(noteOption);
update.SetAction(async parseResult =>
{
var resultsPath = parseResult.GetValue(resultsOption)!;
var outputPath = parseResult.GetValue(outputOption)!;
var note = parseResult.GetValue(noteOption);
if (!File.Exists(resultsPath))
{
throw new CommandLineException($"Results file not found: {resultsPath}");
}
var resultsJson = await File.ReadAllTextAsync(resultsPath, cancellationToken);
var result = JsonSerializer.Deserialize<BenchmarkResult>(resultsJson, JsonOptions)
?? throw new CommandLineException("Failed to parse results JSON");
var newBaseline = new BenchmarkBaseline(
Version: "1.0.0",
CreatedAt: DateTimeOffset.UtcNow,
CorpusVersion: result.CorpusVersion,
ScannerVersion: result.ScannerVersion,
Precision: result.Metrics.Precision,
Recall: result.Metrics.Recall,
F1: result.Metrics.F1,
TtfrpP95Ms: result.Metrics.TtfrpP95Ms,
DeterministicReplay: result.Metrics.DeterministicReplay,
Note: note);
var baselineJson = JsonSerializer.Serialize(newBaseline, JsonOptions);
await File.WriteAllTextAsync(outputPath, baselineJson, cancellationToken);
Console.WriteLine($"Baseline updated: {outputPath}");
Console.WriteLine($" Precision: {newBaseline.Precision:P1}");
Console.WriteLine($" Recall: {newBaseline.Recall:P1}");
Console.WriteLine($" F1: {newBaseline.F1:P1}");
Console.WriteLine($" TTFRP p95: {newBaseline.TtfrpP95Ms}ms");
Console.WriteLine($" Determinism: {newBaseline.DeterministicReplay:P0}");
});
baseline.Add(update);
// baseline show
var baselinePathOption = new Option<string>("--path", "Path to baseline JSON")
{
Required = true
};
var show = new Command("show", "Display baseline metrics");
show.Add(baselinePathOption);
show.SetAction(async parseResult =>
{
var path = parseResult.GetValue(baselinePathOption)!;
if (!File.Exists(path))
{
throw new CommandLineException($"Baseline file not found: {path}");
}
var json = await File.ReadAllTextAsync(path, cancellationToken);
var baseline = JsonSerializer.Deserialize<BenchmarkBaseline>(json, JsonOptions)
?? throw new CommandLineException("Failed to parse baseline JSON");
Console.WriteLine($"=== Baseline: {path} ===");
Console.WriteLine($"Version: {baseline.Version}");
Console.WriteLine($"Created: {baseline.CreatedAt:O}");
Console.WriteLine($"Corpus: {baseline.CorpusVersion}");
Console.WriteLine($"Scanner: {baseline.ScannerVersion}");
Console.WriteLine();
Console.WriteLine("Metrics:");
Console.WriteLine($" Precision: {baseline.Precision:P1}");
Console.WriteLine($" Recall: {baseline.Recall:P1}");
Console.WriteLine($" F1: {baseline.F1:P1}");
Console.WriteLine($" TTFRP p95: {baseline.TtfrpP95Ms}ms");
Console.WriteLine($" Determinism: {baseline.DeterministicReplay:P0}");
if (baseline.Note is not null)
{
Console.WriteLine();
Console.WriteLine($"Note: {baseline.Note}");
}
});
baseline.Add(show);
return baseline;
}
/// <summary>
/// Build the `bench report` command.
/// </summary>
private static Command BuildReportCommand(
IServiceProvider services,
Option<bool> verboseOption,
CancellationToken cancellationToken)
{
var resultsOption = new Option<string>("--results", "Path to benchmark results JSON")
{
Required = true
};
var formatOption = new Option<string>("--format", () => "markdown", "Output format: markdown, html");
var outputOption = new Option<string?>("--output", "Output path for report");
var report = new Command("report", "Generate benchmark report");
report.Add(resultsOption);
report.Add(formatOption);
report.Add(outputOption);
report.SetAction(async parseResult =>
{
var resultsPath = parseResult.GetValue(resultsOption)!;
var format = parseResult.GetValue(formatOption);
var outputPath = parseResult.GetValue(outputOption);
if (!File.Exists(resultsPath))
{
throw new CommandLineException($"Results file not found: {resultsPath}");
}
var resultsJson = await File.ReadAllTextAsync(resultsPath, cancellationToken);
var result = JsonSerializer.Deserialize<BenchmarkResult>(resultsJson, JsonOptions)
?? throw new CommandLineException("Failed to parse results JSON");
var reportContent = format == "html"
? FormatHtmlReport(result)
: FormatMarkdownReport(result);
if (outputPath is not null)
{
await File.WriteAllTextAsync(outputPath, reportContent, cancellationToken);
Console.WriteLine($"Report written to: {outputPath}");
}
else
{
Console.WriteLine(reportContent);
}
});
return report;
}
private static string FormatMarkdownReport(BenchmarkResult result)
{
var sb = new System.Text.StringBuilder();
sb.AppendLine("# Reachability Benchmark Report");
sb.AppendLine();
sb.AppendLine($"**Run ID:** {result.RunId}");
sb.AppendLine($"**Timestamp:** {result.Timestamp:O}");
sb.AppendLine($"**Corpus Version:** {result.CorpusVersion}");
sb.AppendLine($"**Scanner Version:** {result.ScannerVersion}");
sb.AppendLine($"**Duration:** {result.DurationMs}ms");
sb.AppendLine();
sb.AppendLine("## Summary Metrics");
sb.AppendLine();
sb.AppendLine("| Metric | Value |");
sb.AppendLine("|--------|-------|");
sb.AppendLine($"| Precision | {result.Metrics.Precision:P1} |");
sb.AppendLine($"| Recall | {result.Metrics.Recall:P1} |");
sb.AppendLine($"| F1 Score | {result.Metrics.F1:P1} |");
sb.AppendLine($"| TTFRP p50 | {result.Metrics.TtfrpP50Ms}ms |");
sb.AppendLine($"| TTFRP p95 | {result.Metrics.TtfrpP95Ms}ms |");
sb.AppendLine($"| Deterministic Replay | {result.Metrics.DeterministicReplay:P0} |");
sb.AppendLine();
sb.AppendLine("## Sample Results");
sb.AppendLine();
sb.AppendLine("| Sample | Expected | Actual | Match | Duration |");
sb.AppendLine("|--------|----------|--------|-------|----------|");
foreach (var sample in result.SampleResults)
{
var match = sample.MatchedExpected ? "✅" : "❌";
sb.AppendLine($"| {sample.SampleId} | {sample.ExpectedReachability} | {sample.ActualReachability} | {match} | {sample.DurationMs}ms |");
}
return sb.ToString();
}
private static string FormatHtmlReport(BenchmarkResult result)
{
// Basic HTML report
var sb = new System.Text.StringBuilder();
sb.AppendLine("<!DOCTYPE html>");
sb.AppendLine("<html><head><title>Benchmark Report</title>");
sb.AppendLine("<style>");
sb.AppendLine("body { font-family: system-ui; max-width: 900px; margin: 0 auto; padding: 20px; }");
sb.AppendLine("table { border-collapse: collapse; width: 100%; }");
sb.AppendLine("th, td { border: 1px solid #ddd; padding: 8px; text-align: left; }");
sb.AppendLine("th { background-color: #f2f2f2; }");
sb.AppendLine(".pass { color: green; }");
sb.AppendLine(".fail { color: red; }");
sb.AppendLine("</style></head><body>");
sb.AppendLine($"<h1>Reachability Benchmark Report</h1>");
sb.AppendLine($"<p><strong>Run ID:</strong> {result.RunId}</p>");
sb.AppendLine($"<p><strong>Timestamp:</strong> {result.Timestamp:O}</p>");
sb.AppendLine("<h2>Summary Metrics</h2>");
sb.AppendLine("<table>");
sb.AppendLine("<tr><th>Metric</th><th>Value</th></tr>");
sb.AppendLine($"<tr><td>Precision</td><td>{result.Metrics.Precision:P1}</td></tr>");
sb.AppendLine($"<tr><td>Recall</td><td>{result.Metrics.Recall:P1}</td></tr>");
sb.AppendLine($"<tr><td>F1 Score</td><td>{result.Metrics.F1:P1}</td></tr>");
sb.AppendLine($"<tr><td>Determinism</td><td>{result.Metrics.DeterministicReplay:P0}</td></tr>");
sb.AppendLine("</table>");
sb.AppendLine("</body></html>");
return sb.ToString();
}
}