// ----------------------------------------------------------------------------- // BenchCommandBuilder.cs // Sprint: SPRINT_3500_0003_0001_ground_truth_corpus_ci_gates // Task: CORPUS-007 - Add `stellaops bench run --corpus ` CLI command // Task: CORPUS-008 - Add `stellaops bench check --baseline ` regression checker // Task: CORPUS-011 - Implement baseline update tool // Description: CLI commands for running and managing reachability benchmarks // ----------------------------------------------------------------------------- using Microsoft.Extensions.DependencyInjection; using StellaOps.Scanner.Benchmarks; using System.CommandLine; using System.Text.Json; namespace StellaOps.Cli.Commands; /// /// Builds CLI commands for benchmark operations. /// internal static class BenchCommandBuilder { private static readonly JsonSerializerOptions JsonOptions = new() { WriteIndented = true, PropertyNamingPolicy = JsonNamingPolicy.CamelCase }; internal static Command BuildBenchCommand( IServiceProvider services, Option verboseOption, CancellationToken cancellationToken) { var bench = new Command("bench", "Run and manage reachability benchmarks"); bench.Add(BuildRunCommand(services, verboseOption, cancellationToken)); bench.Add(BuildCheckCommand(services, verboseOption, cancellationToken)); bench.Add(BuildBaselineCommand(services, verboseOption, cancellationToken)); bench.Add(BuildReportCommand(services, verboseOption, cancellationToken)); return bench; } /// /// Build the `bench run` command. /// private static Command BuildRunCommand( IServiceProvider services, Option verboseOption, CancellationToken cancellationToken) { var corpusOption = new Option("--corpus", "Path to corpus.json index file") { Required = true }; var outputOption = new Option("--output", "Output path for results JSON"); var categoryOption = new Option("--category", "Filter to specific categories"); var sampleOption = new Option("--sample", "Filter to specific sample IDs"); var parallelOption = new Option("--parallel", () => 1, "Number of parallel workers"); var timeoutOption = new Option("--timeout", () => 30000, "Timeout per sample in milliseconds"); var determinismOption = new Option("--check-determinism", () => true, "Run determinism checks"); var runsOption = new Option("--determinism-runs", () => 3, "Number of runs for determinism check"); var formatOption = new Option("--format", () => "json", "Output format: json, markdown"); var run = new Command("run", "Run the ground-truth corpus benchmark"); run.Add(corpusOption); run.Add(outputOption); run.Add(categoryOption); run.Add(sampleOption); run.Add(parallelOption); run.Add(timeoutOption); run.Add(determinismOption); run.Add(runsOption); run.Add(formatOption); run.SetAction(async parseResult => { var corpusPath = parseResult.GetValue(corpusOption)!; var outputPath = parseResult.GetValue(outputOption); var categories = parseResult.GetValue(categoryOption); var samples = parseResult.GetValue(sampleOption); var parallel = parseResult.GetValue(parallelOption); var timeout = parseResult.GetValue(timeoutOption); var checkDeterminism = parseResult.GetValue(determinismOption); var determinismRuns = parseResult.GetValue(runsOption); var format = parseResult.GetValue(formatOption); var verbose = parseResult.GetValue(verboseOption); if (!File.Exists(corpusPath)) { throw new CommandLineException($"Corpus file not found: {corpusPath}"); } var options = new CorpusRunOptions { Categories = categories, SampleIds = samples, Parallelism = parallel, TimeoutMs = timeout, CheckDeterminism = checkDeterminism, DeterminismRuns = determinismRuns }; Console.WriteLine($"Running benchmark corpus: {corpusPath}"); Console.WriteLine($"Options: parallel={parallel}, timeout={timeout}ms, determinism={checkDeterminism}"); var runner = services.GetRequiredService(); var result = await runner.RunAsync(corpusPath, options, cancellationToken); // Output results if (format == "markdown") { var markdown = FormatMarkdownReport(result); if (outputPath is not null) { await File.WriteAllTextAsync(outputPath, markdown, cancellationToken); Console.WriteLine($"Markdown report written to: {outputPath}"); } else { Console.WriteLine(markdown); } } else { var json = JsonSerializer.Serialize(result, JsonOptions); if (outputPath is not null) { await File.WriteAllTextAsync(outputPath, json, cancellationToken); Console.WriteLine($"Results written to: {outputPath}"); } else { Console.WriteLine(json); } } // Print summary Console.WriteLine(); Console.WriteLine("=== Benchmark Summary ==="); Console.WriteLine($"Precision: {result.Metrics.Precision:P1}"); Console.WriteLine($"Recall: {result.Metrics.Recall:P1}"); Console.WriteLine($"F1 Score: {result.Metrics.F1:P1}"); Console.WriteLine($"Determinism: {result.Metrics.DeterministicReplay:P0}"); Console.WriteLine($"Duration: {result.DurationMs}ms"); }); return run; } /// /// Build the `bench check` command. /// private static Command BuildCheckCommand( IServiceProvider services, Option verboseOption, CancellationToken cancellationToken) { var resultsOption = new Option("--results", "Path to benchmark results JSON") { Required = true }; var baselineOption = new Option("--baseline", "Path to baseline JSON") { Required = true }; var strictOption = new Option("--strict", () => false, "Fail on any metric degradation"); var outputOption = new Option("--output", "Output path for regression report"); var check = new Command("check", "Check benchmark results against baseline"); check.Add(resultsOption); check.Add(baselineOption); check.Add(strictOption); check.Add(outputOption); check.SetAction(async parseResult => { var resultsPath = parseResult.GetValue(resultsOption)!; var baselinePath = parseResult.GetValue(baselineOption)!; var strict = parseResult.GetValue(strictOption); var outputPath = parseResult.GetValue(outputOption); var verbose = parseResult.GetValue(verboseOption); if (!File.Exists(resultsPath)) { throw new CommandLineException($"Results file not found: {resultsPath}"); } if (!File.Exists(baselinePath)) { throw new CommandLineException($"Baseline file not found: {baselinePath}"); } var resultsJson = await File.ReadAllTextAsync(resultsPath, cancellationToken); var baselineJson = await File.ReadAllTextAsync(baselinePath, cancellationToken); var result = JsonSerializer.Deserialize(resultsJson, JsonOptions) ?? throw new CommandLineException("Failed to parse results JSON"); var baseline = JsonSerializer.Deserialize(baselineJson, JsonOptions) ?? throw new CommandLineException("Failed to parse baseline JSON"); var checkResult = result.CheckRegression(baseline); Console.WriteLine("=== Regression Check Results ==="); Console.WriteLine($"Status: {(checkResult.Passed ? "PASSED" : "FAILED")}"); Console.WriteLine(); if (checkResult.Issues.Count > 0) { Console.WriteLine("Issues:"); foreach (var issue in checkResult.Issues) { var icon = issue.Severity == IssueSeverity.Error ? "❌" : "⚠️"; Console.WriteLine($" {icon} [{issue.Metric}] {issue.Message}"); Console.WriteLine($" Baseline: {issue.BaselineValue:F4}, Current: {issue.CurrentValue:F4}"); } } else { Console.WriteLine("No regressions detected."); } // Write report if requested if (outputPath is not null) { var report = JsonSerializer.Serialize(checkResult, JsonOptions); await File.WriteAllTextAsync(outputPath, report, cancellationToken); Console.WriteLine($"\nReport written to: {outputPath}"); } // Exit with error if failed if (!checkResult.Passed) { Environment.ExitCode = 1; } }); return check; } /// /// Build the `bench baseline` command group. /// private static Command BuildBaselineCommand( IServiceProvider services, Option verboseOption, CancellationToken cancellationToken) { var baseline = new Command("baseline", "Manage benchmark baselines"); // baseline update var resultsOption = new Option("--results", "Path to benchmark results JSON") { Required = true }; var outputOption = new Option("--output", "Output path for new baseline") { Required = true }; var noteOption = new Option("--note", "Note explaining the baseline update"); var update = new Command("update", "Update baseline from benchmark results"); update.Add(resultsOption); update.Add(outputOption); update.Add(noteOption); update.SetAction(async parseResult => { var resultsPath = parseResult.GetValue(resultsOption)!; var outputPath = parseResult.GetValue(outputOption)!; var note = parseResult.GetValue(noteOption); if (!File.Exists(resultsPath)) { throw new CommandLineException($"Results file not found: {resultsPath}"); } var resultsJson = await File.ReadAllTextAsync(resultsPath, cancellationToken); var result = JsonSerializer.Deserialize(resultsJson, JsonOptions) ?? throw new CommandLineException("Failed to parse results JSON"); var newBaseline = new BenchmarkBaseline( Version: "1.0.0", CreatedAt: DateTimeOffset.UtcNow, CorpusVersion: result.CorpusVersion, ScannerVersion: result.ScannerVersion, Precision: result.Metrics.Precision, Recall: result.Metrics.Recall, F1: result.Metrics.F1, TtfrpP95Ms: result.Metrics.TtfrpP95Ms, DeterministicReplay: result.Metrics.DeterministicReplay, Note: note); var baselineJson = JsonSerializer.Serialize(newBaseline, JsonOptions); await File.WriteAllTextAsync(outputPath, baselineJson, cancellationToken); Console.WriteLine($"Baseline updated: {outputPath}"); Console.WriteLine($" Precision: {newBaseline.Precision:P1}"); Console.WriteLine($" Recall: {newBaseline.Recall:P1}"); Console.WriteLine($" F1: {newBaseline.F1:P1}"); Console.WriteLine($" TTFRP p95: {newBaseline.TtfrpP95Ms}ms"); Console.WriteLine($" Determinism: {newBaseline.DeterministicReplay:P0}"); }); baseline.Add(update); // baseline show var baselinePathOption = new Option("--path", "Path to baseline JSON") { Required = true }; var show = new Command("show", "Display baseline metrics"); show.Add(baselinePathOption); show.SetAction(async parseResult => { var path = parseResult.GetValue(baselinePathOption)!; if (!File.Exists(path)) { throw new CommandLineException($"Baseline file not found: {path}"); } var json = await File.ReadAllTextAsync(path, cancellationToken); var baseline = JsonSerializer.Deserialize(json, JsonOptions) ?? throw new CommandLineException("Failed to parse baseline JSON"); Console.WriteLine($"=== Baseline: {path} ==="); Console.WriteLine($"Version: {baseline.Version}"); Console.WriteLine($"Created: {baseline.CreatedAt:O}"); Console.WriteLine($"Corpus: {baseline.CorpusVersion}"); Console.WriteLine($"Scanner: {baseline.ScannerVersion}"); Console.WriteLine(); Console.WriteLine("Metrics:"); Console.WriteLine($" Precision: {baseline.Precision:P1}"); Console.WriteLine($" Recall: {baseline.Recall:P1}"); Console.WriteLine($" F1: {baseline.F1:P1}"); Console.WriteLine($" TTFRP p95: {baseline.TtfrpP95Ms}ms"); Console.WriteLine($" Determinism: {baseline.DeterministicReplay:P0}"); if (baseline.Note is not null) { Console.WriteLine(); Console.WriteLine($"Note: {baseline.Note}"); } }); baseline.Add(show); return baseline; } /// /// Build the `bench report` command. /// private static Command BuildReportCommand( IServiceProvider services, Option verboseOption, CancellationToken cancellationToken) { var resultsOption = new Option("--results", "Path to benchmark results JSON") { Required = true }; var formatOption = new Option("--format", () => "markdown", "Output format: markdown, html"); var outputOption = new Option("--output", "Output path for report"); var report = new Command("report", "Generate benchmark report"); report.Add(resultsOption); report.Add(formatOption); report.Add(outputOption); report.SetAction(async parseResult => { var resultsPath = parseResult.GetValue(resultsOption)!; var format = parseResult.GetValue(formatOption); var outputPath = parseResult.GetValue(outputOption); if (!File.Exists(resultsPath)) { throw new CommandLineException($"Results file not found: {resultsPath}"); } var resultsJson = await File.ReadAllTextAsync(resultsPath, cancellationToken); var result = JsonSerializer.Deserialize(resultsJson, JsonOptions) ?? throw new CommandLineException("Failed to parse results JSON"); var reportContent = format == "html" ? FormatHtmlReport(result) : FormatMarkdownReport(result); if (outputPath is not null) { await File.WriteAllTextAsync(outputPath, reportContent, cancellationToken); Console.WriteLine($"Report written to: {outputPath}"); } else { Console.WriteLine(reportContent); } }); return report; } private static string FormatMarkdownReport(BenchmarkResult result) { var sb = new System.Text.StringBuilder(); sb.AppendLine("# Reachability Benchmark Report"); sb.AppendLine(); sb.AppendLine($"**Run ID:** {result.RunId}"); sb.AppendLine($"**Timestamp:** {result.Timestamp:O}"); sb.AppendLine($"**Corpus Version:** {result.CorpusVersion}"); sb.AppendLine($"**Scanner Version:** {result.ScannerVersion}"); sb.AppendLine($"**Duration:** {result.DurationMs}ms"); sb.AppendLine(); sb.AppendLine("## Summary Metrics"); sb.AppendLine(); sb.AppendLine("| Metric | Value |"); sb.AppendLine("|--------|-------|"); sb.AppendLine($"| Precision | {result.Metrics.Precision:P1} |"); sb.AppendLine($"| Recall | {result.Metrics.Recall:P1} |"); sb.AppendLine($"| F1 Score | {result.Metrics.F1:P1} |"); sb.AppendLine($"| TTFRP p50 | {result.Metrics.TtfrpP50Ms}ms |"); sb.AppendLine($"| TTFRP p95 | {result.Metrics.TtfrpP95Ms}ms |"); sb.AppendLine($"| Deterministic Replay | {result.Metrics.DeterministicReplay:P0} |"); sb.AppendLine(); sb.AppendLine("## Sample Results"); sb.AppendLine(); sb.AppendLine("| Sample | Expected | Actual | Match | Duration |"); sb.AppendLine("|--------|----------|--------|-------|----------|"); foreach (var sample in result.SampleResults) { var match = sample.MatchedExpected ? "✅" : "❌"; sb.AppendLine($"| {sample.SampleId} | {sample.ExpectedReachability} | {sample.ActualReachability} | {match} | {sample.DurationMs}ms |"); } return sb.ToString(); } private static string FormatHtmlReport(BenchmarkResult result) { // Basic HTML report var sb = new System.Text.StringBuilder(); sb.AppendLine(""); sb.AppendLine("Benchmark Report"); sb.AppendLine(""); sb.AppendLine($"

Reachability Benchmark Report

"); sb.AppendLine($"

Run ID: {result.RunId}

"); sb.AppendLine($"

Timestamp: {result.Timestamp:O}

"); sb.AppendLine("

Summary Metrics

"); sb.AppendLine(""); sb.AppendLine(""); sb.AppendLine($""); sb.AppendLine($""); sb.AppendLine($""); sb.AppendLine($""); sb.AppendLine("
MetricValue
Precision{result.Metrics.Precision:P1}
Recall{result.Metrics.Recall:P1}
F1 Score{result.Metrics.F1:P1}
Determinism{result.Metrics.DeterministicReplay:P0}
"); sb.AppendLine(""); return sb.ToString(); } }