Files
git.stella-ops.org/src/Scanner/__Tests/StellaOps.Scanner.SmartDiff.Tests/Benchmarks/SmartDiffPerfSmokeTests.cs
StellaOps Bot 7503c19b8f Add determinism tests for verdict artifact generation and update SHA256 sums script
- Implemented comprehensive tests for verdict artifact generation to ensure deterministic outputs across various scenarios, including identical inputs, parallel execution, and change ordering.
- Created helper methods for generating sample verdict inputs and computing canonical hashes.
- Added tests to validate the stability of canonical hashes, proof spine ordering, and summary statistics.
- Introduced a new PowerShell script to update SHA256 sums for files, ensuring accurate hash generation and file integrity checks.
2025-12-24 02:17:34 +02:00

596 lines
21 KiB
C#
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

// -----------------------------------------------------------------------------
// SmartDiffPerfSmokeTests.cs
// Sprint: SPRINT_5100_0009_0001 - Scanner Module Test Implementation
// Task: SCANNER-5100-024 - Add perf smoke tests for smart diff (2× regression gate)
// Description: Performance smoke tests for SmartDiff with 2× regression gate.
// -----------------------------------------------------------------------------
using System.Diagnostics;
using System.Text.Json;
using FluentAssertions;
using Xunit;
using Xunit.Abstractions;
namespace StellaOps.Scanner.SmartDiffTests.Benchmarks;
/// <summary>
/// Performance smoke tests for SmartDiff calculation.
/// These tests enforce a 2× regression gate: if performance regresses to more than
/// twice the baseline, the test fails.
///
/// Baselines are conservative estimates based on expected behavior.
/// Run periodically in CI to detect performance regressions.
/// </summary>
[Trait("Category", "Perf")]
[Trait("Category", "PERF")]
[Trait("Category", "Smoke")]
public sealed class SmartDiffPerfSmokeTests
{
private readonly ITestOutputHelper _output;
// Regression gate multiplier: 2× means test fails if time exceeds 2× baseline
private const double RegressionGateMultiplier = 2.0;
// Baselines (in milliseconds) - conservative estimates
private const long BaselineSmallDiffMs = 25; // 50 pkgs, 10 vulns
private const long BaselineMediumDiffMs = 100; // 500 pkgs, 100 vulns
private const long BaselineLargeDiffMs = 500; // 5000 pkgs, 1000 vulns
private const long BaselineXLargeDiffMs = 2000; // 10000 pkgs, 2000 vulns
private const long BaselineSarifGenerationMs = 50; // SARIF output generation
private const long BaselineScoringSingleMs = 5; // Single finding scoring
private const long BaselineScoringBatchMs = 100; // Batch scoring (100 findings)
public SmartDiffPerfSmokeTests(ITestOutputHelper output)
{
_output = output;
}
#region Diff Computation Performance
[Fact]
public void SmallDiff_Computation_Under2xBaseline()
{
// Arrange
const int packageCount = 50;
const int vulnCount = 10;
var baseline = BaselineSmallDiffMs;
var threshold = (long)(baseline * RegressionGateMultiplier);
var baselineScan = GenerateScanData(packageCount, vulnCount, seed: 42);
var currentScan = GenerateScanData(packageCount + 5, vulnCount + 2, seed: 43);
// Warm up
_ = ComputeDiff(baselineScan, currentScan);
// Act
var sw = Stopwatch.StartNew();
var diff = ComputeDiff(baselineScan, currentScan);
sw.Stop();
// Log
_output.WriteLine($"Small diff ({packageCount} pkgs, {vulnCount} vulns): {sw.ElapsedMilliseconds}ms");
_output.WriteLine($"Baseline: {baseline}ms, Threshold (2×): {threshold}ms");
_output.WriteLine($"Added: {diff.Added.Count}, Removed: {diff.Removed.Count}");
// Assert
sw.ElapsedMilliseconds.Should().BeLessThanOrEqualTo(threshold,
$"Small diff exceeded 2× regression gate ({sw.ElapsedMilliseconds}ms > {threshold}ms)");
}
[Fact]
public void MediumDiff_Computation_Under2xBaseline()
{
// Arrange
const int packageCount = 500;
const int vulnCount = 100;
var baseline = BaselineMediumDiffMs;
var threshold = (long)(baseline * RegressionGateMultiplier);
var baselineScan = GenerateScanData(packageCount, vulnCount, seed: 42);
var currentScan = GenerateScanData(packageCount + 20, vulnCount + 10, seed: 43);
// Warm up
_ = ComputeDiff(baselineScan, currentScan);
// Act
var sw = Stopwatch.StartNew();
var diff = ComputeDiff(baselineScan, currentScan);
sw.Stop();
// Log
_output.WriteLine($"Medium diff ({packageCount} pkgs, {vulnCount} vulns): {sw.ElapsedMilliseconds}ms");
_output.WriteLine($"Baseline: {baseline}ms, Threshold (2×): {threshold}ms");
// Assert
sw.ElapsedMilliseconds.Should().BeLessThanOrEqualTo(threshold,
$"Medium diff exceeded 2× regression gate ({sw.ElapsedMilliseconds}ms > {threshold}ms)");
}
[Fact]
public void LargeDiff_Computation_Under2xBaseline()
{
// Arrange
const int packageCount = 5000;
const int vulnCount = 1000;
var baseline = BaselineLargeDiffMs;
var threshold = (long)(baseline * RegressionGateMultiplier);
var baselineScan = GenerateScanData(packageCount, vulnCount, seed: 42);
var currentScan = GenerateScanData(packageCount + 100, vulnCount + 50, seed: 43);
// Warm up
_ = ComputeDiff(baselineScan, currentScan);
// Act
var sw = Stopwatch.StartNew();
var diff = ComputeDiff(baselineScan, currentScan);
sw.Stop();
// Log
_output.WriteLine($"Large diff ({packageCount} pkgs, {vulnCount} vulns): {sw.ElapsedMilliseconds}ms");
_output.WriteLine($"Baseline: {baseline}ms, Threshold (2×): {threshold}ms");
// Assert
sw.ElapsedMilliseconds.Should().BeLessThanOrEqualTo(threshold,
$"Large diff exceeded 2× regression gate ({sw.ElapsedMilliseconds}ms > {threshold}ms)");
}
[Fact]
public void XLargeDiff_Computation_Under2xBaseline()
{
// Arrange
const int packageCount = 10000;
const int vulnCount = 2000;
var baseline = BaselineXLargeDiffMs;
var threshold = (long)(baseline * RegressionGateMultiplier);
var baselineScan = GenerateScanData(packageCount, vulnCount, seed: 42);
var currentScan = GenerateScanData(packageCount + 200, vulnCount + 100, seed: 43);
// Warm up (smaller)
_ = ComputeDiff(
GenerateScanData(1000, 200, seed: 100),
GenerateScanData(1050, 220, seed: 101));
// Act
var sw = Stopwatch.StartNew();
var diff = ComputeDiff(baselineScan, currentScan);
sw.Stop();
// Log
_output.WriteLine($"XLarge diff ({packageCount} pkgs, {vulnCount} vulns): {sw.ElapsedMilliseconds}ms");
_output.WriteLine($"Baseline: {baseline}ms, Threshold (2×): {threshold}ms");
// Assert
sw.ElapsedMilliseconds.Should().BeLessThanOrEqualTo(threshold,
$"XLarge diff exceeded 2× regression gate ({sw.ElapsedMilliseconds}ms > {threshold}ms)");
}
#endregion
#region SARIF Generation Performance
[Fact]
public void SarifGeneration_Under2xBaseline()
{
// Arrange
var baseline = BaselineSarifGenerationMs;
var threshold = (long)(baseline * RegressionGateMultiplier);
var baselineScan = GenerateScanData(500, 100, seed: 42);
var currentScan = GenerateScanData(550, 120, seed: 43);
var diff = ComputeDiff(baselineScan, currentScan);
// Warm up
_ = GenerateSarif(diff);
// Act
var sw = Stopwatch.StartNew();
var sarif = GenerateSarif(diff);
sw.Stop();
// Log
_output.WriteLine($"SARIF generation ({diff.Added.Count} added, {diff.Removed.Count} removed): {sw.ElapsedMilliseconds}ms");
_output.WriteLine($"Output size: {sarif.Length / 1024.0:F1}KB");
_output.WriteLine($"Baseline: {baseline}ms, Threshold (2×): {threshold}ms");
// Assert
sw.ElapsedMilliseconds.Should().BeLessThanOrEqualTo(threshold,
$"SARIF generation exceeded 2× regression gate ({sw.ElapsedMilliseconds}ms > {threshold}ms)");
}
[Fact]
public void SarifGeneration_LargeDiff_Under2xBaseline()
{
// Arrange
var baseline = BaselineSarifGenerationMs * 5; // Scale up for larger diff
var threshold = (long)(baseline * RegressionGateMultiplier);
var baselineScan = GenerateScanData(5000, 1000, seed: 42);
var currentScan = GenerateScanData(5200, 1100, seed: 43);
var diff = ComputeDiff(baselineScan, currentScan);
// Act
var sw = Stopwatch.StartNew();
var sarif = GenerateSarif(diff);
sw.Stop();
// Log
_output.WriteLine($"SARIF generation large ({diff.Added.Count} added): {sw.ElapsedMilliseconds}ms");
_output.WriteLine($"Output size: {sarif.Length / 1024.0:F1}KB");
_output.WriteLine($"Baseline: {baseline}ms, Threshold (2×): {threshold}ms");
// Assert
sw.ElapsedMilliseconds.Should().BeLessThanOrEqualTo(threshold,
$"Large SARIF generation exceeded 2× regression gate ({sw.ElapsedMilliseconds}ms > {threshold}ms)");
}
#endregion
#region Scoring Performance
[Fact]
public void SingleFindingScoring_Under2xBaseline()
{
// Arrange
var baseline = BaselineScoringSingleMs;
var threshold = (long)(baseline * RegressionGateMultiplier);
var finding = CreateFinding("CVE-2024-1234", "HIGH", true, "executed");
// Warm up
for (int i = 0; i < 100; i++) _ = ScoreFinding(finding);
// Act - run many iterations for accurate measurement
const int iterations = 1000;
var sw = Stopwatch.StartNew();
for (int i = 0; i < iterations; i++)
{
_ = ScoreFinding(finding);
}
sw.Stop();
var avgMs = sw.Elapsed.TotalMilliseconds / iterations;
// Log
_output.WriteLine($"Single finding scoring: {avgMs:F4}ms average over {iterations} iterations");
_output.WriteLine($"Baseline: {baseline}ms, Threshold (2×): {threshold}ms");
// Assert
avgMs.Should().BeLessThanOrEqualTo(threshold,
$"Single scoring exceeded 2× regression gate ({avgMs:F4}ms > {threshold}ms)");
}
[Fact]
public void BatchScoring_Under2xBaseline()
{
// Arrange
const int findingCount = 100;
var baseline = BaselineScoringBatchMs;
var threshold = (long)(baseline * RegressionGateMultiplier);
var findings = Enumerable.Range(0, findingCount)
.Select(i => CreateFinding($"CVE-2024-{i:D4}",
i % 4 == 0 ? "CRITICAL" : i % 4 == 1 ? "HIGH" : i % 4 == 2 ? "MEDIUM" : "LOW",
i % 3 != 0,
i % 2 == 0 ? "executed" : "called"))
.ToList();
// Warm up
_ = ScoreBatch(findings);
// Act
var sw = Stopwatch.StartNew();
var scores = ScoreBatch(findings);
sw.Stop();
// Log
_output.WriteLine($"Batch scoring ({findingCount} findings): {sw.ElapsedMilliseconds}ms");
_output.WriteLine($"Baseline: {baseline}ms, Threshold (2×): {threshold}ms");
// Assert
sw.ElapsedMilliseconds.Should().BeLessThanOrEqualTo(threshold,
$"Batch scoring exceeded 2× regression gate ({sw.ElapsedMilliseconds}ms > {threshold}ms)");
scores.Should().HaveCount(findingCount);
}
#endregion
#region Scaling Behavior
[Fact]
public void DiffComputation_ScalesLinearlyWithSize()
{
// Arrange - test that diff computation is O(n) not O(n²)
var sizes = new[] { 100, 500, 1000, 2000 };
var times = new List<(int size, long ms)>();
foreach (var size in sizes)
{
var baselineScan = GenerateScanData(size, size / 5, seed: 42);
var currentScan = GenerateScanData(size + size / 10, size / 5 + size / 50, seed: 43);
var sw = Stopwatch.StartNew();
_ = ComputeDiff(baselineScan, currentScan);
sw.Stop();
times.Add((size, sw.ElapsedMilliseconds));
_output.WriteLine($"Size {size}: {sw.ElapsedMilliseconds}ms");
}
// Assert - verify roughly linear scaling (within 4× of linear for O(n))
// If 2× input takes more than 4× time, it's superlinear
for (int i = 1; i < times.Count; i++)
{
var sizeRatio = times[i].size / (double)times[i - 1].size;
var timeRatio = times[i].ms / Math.Max(1.0, times[i - 1].ms);
var scaleFactor = timeRatio / sizeRatio;
_output.WriteLine($"Size ratio: {sizeRatio:F1}×, Time ratio: {timeRatio:F1}×, Scale factor: {scaleFactor:F2}");
// Allow some variance, but should be better than O(n²)
scaleFactor.Should().BeLessThan(2.5,
$"Diff computation shows non-linear scaling at size {times[i].size}");
}
}
[Fact]
public void DiffComputation_WithReachabilityFlips_UnderBaseline()
{
// Arrange - test performance when reachability changes
const int packageCount = 1000;
const int vulnCount = 200;
var baseline = 150L; // ms
var threshold = (long)(baseline * RegressionGateMultiplier);
var baselineScan = GenerateScanDataWithReachability(packageCount, vulnCount, reachableRatio: 0.3, seed: 42);
var currentScan = GenerateScanDataWithReachability(packageCount, vulnCount, reachableRatio: 0.5, seed: 42);
// Warm up
_ = ComputeDiffWithReachability(baselineScan, currentScan);
// Act
var sw = Stopwatch.StartNew();
var diff = ComputeDiffWithReachability(baselineScan, currentScan);
sw.Stop();
// Log
_output.WriteLine($"Diff with reachability flips: {sw.ElapsedMilliseconds}ms");
_output.WriteLine($"Reachability flips: {diff.ReachabilityFlips.Count}");
_output.WriteLine($"Baseline: {baseline}ms, Threshold (2×): {threshold}ms");
// Assert
sw.ElapsedMilliseconds.Should().BeLessThanOrEqualTo(threshold,
$"Diff with reachability exceeded 2× regression gate ({sw.ElapsedMilliseconds}ms > {threshold}ms)");
}
#endregion
#region Memory Efficiency
[Fact]
public void LargeDiff_MemoryEfficient_Under50MB()
{
// Arrange
const int packageCount = 5000;
const int vulnCount = 1000;
GC.Collect();
GC.WaitForPendingFinalizers();
var beforeMem = GC.GetTotalMemory(true);
// Act
var baselineScan = GenerateScanData(packageCount, vulnCount, seed: 42);
var currentScan = GenerateScanData(packageCount + 200, vulnCount + 100, seed: 43);
var diff = ComputeDiff(baselineScan, currentScan);
var sarif = GenerateSarif(diff);
GC.Collect();
GC.WaitForPendingFinalizers();
var afterMem = GC.GetTotalMemory(true);
var memoryUsedMB = (afterMem - beforeMem) / (1024.0 * 1024.0);
// Log
_output.WriteLine($"Large diff memory usage: {memoryUsedMB:F2}MB");
_output.WriteLine($"SARIF output size: {sarif.Length / 1024.0:F1}KB");
// Assert
memoryUsedMB.Should().BeLessThan(50,
$"Large diff memory usage ({memoryUsedMB:F2}MB) exceeds 50MB threshold");
// Keep objects alive for measurement
(baselineScan.Packages.Count + currentScan.Packages.Count).Should().BeGreaterThan(0);
}
#endregion
#region Test Infrastructure
private static SmartDiffScanData GenerateScanData(int packageCount, int vulnCount, int seed)
{
var random = new Random(seed);
var packages = new List<SmartDiffPackage>();
var vulnerabilities = new List<SmartDiffVuln>();
for (int i = 0; i < packageCount; i++)
{
packages.Add(new SmartDiffPackage
{
Name = $"package-{i:D5}",
Version = $"1.{random.Next(0, 10)}.{random.Next(0, 100)}",
Ecosystem = random.Next(0, 3) switch { 0 => "npm", 1 => "nuget", _ => "pypi" }
});
}
for (int i = 0; i < vulnCount; i++)
{
var pkg = packages[random.Next(0, packages.Count)];
vulnerabilities.Add(new SmartDiffVuln
{
CveId = $"CVE-2024-{10000 + i}",
Package = pkg.Name,
Version = pkg.Version,
Severity = random.Next(0, 4) switch { 0 => "LOW", 1 => "MEDIUM", 2 => "HIGH", _ => "CRITICAL" },
IsReachable = random.NextDouble() > 0.5,
ReachabilityTier = random.Next(0, 3) switch { 0 => "imported", 1 => "called", _ => "executed" }
});
}
return new SmartDiffScanData { Packages = packages, Vulnerabilities = vulnerabilities };
}
private static SmartDiffScanData GenerateScanDataWithReachability(
int packageCount, int vulnCount, double reachableRatio, int seed)
{
var data = GenerateScanData(packageCount, vulnCount, seed);
var random = new Random(seed + 1000);
foreach (var vuln in data.Vulnerabilities)
{
vuln.IsReachable = random.NextDouble() < reachableRatio;
}
return data;
}
private static SmartDiffResult ComputeDiff(SmartDiffScanData baseline, SmartDiffScanData current)
{
var baselineSet = baseline.Vulnerabilities
.Select(v => (v.CveId, v.Package, v.Version))
.ToHashSet();
var currentSet = current.Vulnerabilities
.Select(v => (v.CveId, v.Package, v.Version))
.ToHashSet();
return new SmartDiffResult
{
Added = current.Vulnerabilities
.Where(v => !baselineSet.Contains((v.CveId, v.Package, v.Version)))
.ToList(),
Removed = baseline.Vulnerabilities
.Where(v => !currentSet.Contains((v.CveId, v.Package, v.Version)))
.ToList(),
ReachabilityFlips = new List<SmartDiffVuln>()
};
}
private static SmartDiffResult ComputeDiffWithReachability(SmartDiffScanData baseline, SmartDiffScanData current)
{
var diff = ComputeDiff(baseline, current);
// Find reachability flips (same vuln, different reachability)
var baselineDict = baseline.Vulnerabilities
.ToDictionary(v => (v.CveId, v.Package, v.Version));
diff.ReachabilityFlips = current.Vulnerabilities
.Where(v => baselineDict.TryGetValue((v.CveId, v.Package, v.Version), out var b)
&& b.IsReachable != v.IsReachable)
.ToList();
return diff;
}
private static string GenerateSarif(SmartDiffResult diff)
{
var sarif = new
{
version = "2.1.0",
schema = "https://raw.githubusercontent.com/oasis-tcs/sarif-spec/master/Schemata/sarif-schema-2.1.0.json",
runs = new[]
{
new
{
tool = new { driver = new { name = "StellaOps.SmartDiff", version = "1.0.0" } },
results = diff.Added.Select(v => new
{
ruleId = v.CveId,
message = new { text = $"New vulnerability: {v.CveId} in {v.Package}@{v.Version}" },
level = v.Severity switch { "CRITICAL" => "error", "HIGH" => "error", _ => "warning" },
properties = new { severity = v.Severity, reachable = v.IsReachable }
}).ToArray()
}
}
};
return JsonSerializer.Serialize(sarif, new JsonSerializerOptions { WriteIndented = false });
}
private static SmartDiffVuln CreateFinding(string cveId, string severity, bool reachable, string tier)
{
return new SmartDiffVuln
{
CveId = cveId,
Package = "test-package",
Version = "1.0.0",
Severity = severity,
IsReachable = reachable,
ReachabilityTier = tier
};
}
private static double ScoreFinding(SmartDiffVuln finding)
{
// Simplified scoring algorithm
var baseScore = finding.Severity switch
{
"CRITICAL" => 10.0,
"HIGH" => 7.5,
"MEDIUM" => 5.0,
"LOW" => 2.5,
_ => 1.0
};
var reachabilityMultiplier = finding.IsReachable ? 1.5 : 1.0;
var tierMultiplier = finding.ReachabilityTier switch
{
"executed" => 1.5,
"called" => 1.2,
"imported" => 1.0,
_ => 0.8
};
return baseScore * reachabilityMultiplier * tierMultiplier;
}
private static List<double> ScoreBatch(List<SmartDiffVuln> findings)
{
return findings.Select(ScoreFinding).ToList();
}
#endregion
#region Test Models
private sealed class SmartDiffScanData
{
public List<SmartDiffPackage> Packages { get; init; } = new();
public List<SmartDiffVuln> Vulnerabilities { get; init; } = new();
}
private sealed class SmartDiffPackage
{
public required string Name { get; init; }
public required string Version { get; init; }
public required string Ecosystem { get; init; }
}
private sealed class SmartDiffVuln
{
public required string CveId { get; init; }
public required string Package { get; init; }
public required string Version { get; init; }
public required string Severity { get; set; }
public bool IsReachable { get; set; }
public required string ReachabilityTier { get; set; }
}
private sealed class SmartDiffResult
{
public List<SmartDiffVuln> Added { get; init; } = new();
public List<SmartDiffVuln> Removed { get; init; } = new();
public List<SmartDiffVuln> ReachabilityFlips { get; set; } = new();
}
#endregion
}