save work

This commit is contained in:
StellaOps Bot
2025-12-19 07:28:23 +02:00
parent 6410a6d082
commit 2eafe98d44
97 changed files with 5040 additions and 1443 deletions

View File

@@ -0,0 +1,82 @@
using System.Globalization;
using System.IO.Compression;
using System.Text;
namespace StellaOps.Scanner.Storage.Epss.Perf;
internal sealed record GeneratedEpssDataset(byte[] GzipBytes, long DecompressedBytes);
internal static class EpssDatasetGenerator
{
public static GeneratedEpssDataset GenerateGzip(DateOnly modelDate, int rowCount, ulong seed)
{
if (rowCount < 1)
{
throw new ArgumentOutOfRangeException(nameof(rowCount), rowCount, "Row count must be positive.");
}
using var raw = new MemoryStream(capacity: Math.Min(64 * 1024 * 1024, rowCount * 48));
using (var gzip = new GZipStream(raw, CompressionLevel.SmallestSize, leaveOpen: true))
using (var writer = new StreamWriter(gzip, new UTF8Encoding(encoderShouldEmitUTF8Identifier: false), bufferSize: 64 * 1024, leaveOpen: true))
{
writer.NewLine = "\n";
var versionTag = $"v{modelDate:yyyy.MM.dd}";
writer.Write("# EPSS model ");
writer.Write(versionTag);
writer.Write(" published ");
writer.WriteLine(modelDate.ToString("yyyy-MM-dd", CultureInfo.InvariantCulture));
writer.WriteLine("cve,epss,percentile");
var prng = new XorShift64Star(seed);
long decompressedBytes = 0;
for (var i = 0; i < rowCount; i++)
{
var cve = $"CVE-2024-{(i + 1):D7}";
var score = prng.NextDouble();
var percentile = prng.NextDouble();
// Keep formatting deterministic and compact.
var line = string.Create(CultureInfo.InvariantCulture, $"{cve},{score:0.000000},{percentile:0.000000}\n");
decompressedBytes += Encoding.UTF8.GetByteCount(line);
writer.Write(line);
}
writer.Flush();
gzip.Flush();
return new GeneratedEpssDataset(raw.ToArray(), decompressedBytes);
}
}
private sealed class XorShift64Star
{
private ulong _state;
public XorShift64Star(ulong seed)
{
_state = seed == 0 ? 0x9E3779B97F4A7C15UL : seed;
}
private ulong NextUInt64()
{
// xorshift64*
var x = _state;
x ^= x >> 12;
x ^= x << 25;
x ^= x >> 27;
_state = x;
return x * 0x2545F4914F6CDD1DUL;
}
public double NextDouble()
{
// Build a double in [0,1) with 53 bits of precision.
var value = NextUInt64() >> 11;
return value * (1.0 / (1UL << 53));
}
}
}

View File

@@ -0,0 +1,282 @@
using System.Diagnostics;
using System.Security.Cryptography;
using System.Text;
using System.Text.Json;
using Microsoft.Extensions.Logging.Abstractions;
using Microsoft.Extensions.Options;
using StellaOps.Infrastructure.Postgres.Testing;
using StellaOps.Scanner.Storage;
using StellaOps.Scanner.Storage.Epss.Perf;
using StellaOps.Scanner.Storage.Epss;
using StellaOps.Scanner.Storage.Postgres;
using Testcontainers.PostgreSql;
var options = PerfOptions.Parse(args);
var outputDirectory = Path.GetDirectoryName(options.OutputPath);
if (!string.IsNullOrWhiteSpace(outputDirectory))
{
Directory.CreateDirectory(outputDirectory);
}
var result = await RunAsync(options, CancellationToken.None).ConfigureAwait(false);
var json = JsonSerializer.Serialize(
result,
new JsonSerializerOptions
{
WriteIndented = true,
PropertyNamingPolicy = JsonNamingPolicy.CamelCase
});
await File.WriteAllTextAsync(options.OutputPath, json, new UTF8Encoding(encoderShouldEmitUTF8Identifier: false)).ConfigureAwait(false);
static async Task<EpssIngestPerfResult> RunAsync(PerfOptions options, CancellationToken cancellationToken)
{
var overallStopwatch = Stopwatch.StartNew();
var datasetStopwatch = Stopwatch.StartNew();
var dataset = EpssDatasetGenerator.GenerateGzip(
options.ModelDate,
options.RowCount,
options.Seed);
datasetStopwatch.Stop();
var compressedSha256 = "sha256:" + Convert.ToHexString(SHA256.HashData(dataset.GzipBytes)).ToLowerInvariant();
var containerStopwatch = Stopwatch.StartNew();
await using var container = new PostgreSqlBuilder()
.WithImage(options.PostgresImage)
.Build();
await container.StartAsync(cancellationToken).ConfigureAwait(false);
containerStopwatch.Stop();
var fixture = PostgresFixtureFactory.CreateRandom(container.GetConnectionString(), NullLogger.Instance);
await fixture.InitializeAsync(cancellationToken).ConfigureAwait(false);
var migrationsStopwatch = Stopwatch.StartNew();
await fixture.RunMigrationsFromAssemblyAsync(
typeof(ScannerStorageOptions).Assembly,
moduleName: "Scanner.Storage",
resourcePrefix: null,
cancellationToken: cancellationToken).ConfigureAwait(false);
migrationsStopwatch.Stop();
var storageOptions = new ScannerStorageOptions
{
Postgres = new StellaOps.Infrastructure.Postgres.Options.PostgresOptions
{
ConnectionString = container.GetConnectionString(),
SchemaName = fixture.SchemaName
}
};
var dataSource = new ScannerDataSource(
Options.Create(storageOptions),
NullLogger<ScannerDataSource>.Instance);
var repository = new PostgresEpssRepository(dataSource);
var parser = new EpssCsvStreamParser();
var retrievedAt = DateTimeOffset.UtcNow;
var importRun = await repository.BeginImportAsync(
options.ModelDate,
sourceUri: $"perf://generated?rows={options.RowCount}",
retrievedAtUtc: retrievedAt,
fileSha256: compressedSha256,
cancellationToken: cancellationToken).ConfigureAwait(false);
var writeStopwatch = Stopwatch.StartNew();
await using var parseSession = parser.ParseGzip(new MemoryStream(dataset.GzipBytes, writable: false));
var writeResult = await repository.WriteSnapshotAsync(
importRun.ImportRunId,
options.ModelDate,
updatedAtUtc: retrievedAt,
rows: parseSession,
cancellationToken: cancellationToken).ConfigureAwait(false);
writeStopwatch.Stop();
await repository.MarkImportSucceededAsync(
importRun.ImportRunId,
rowCount: writeResult.RowCount,
decompressedSha256: parseSession.DecompressedSha256,
modelVersionTag: parseSession.ModelVersionTag,
publishedDate: parseSession.PublishedDate,
cancellationToken: cancellationToken).ConfigureAwait(false);
overallStopwatch.Stop();
await fixture.DisposeAsync().ConfigureAwait(false);
return new EpssIngestPerfResult
{
Tool = new PerfToolInfo
{
Name = "StellaOps.Scanner.Storage.Epss.Perf",
Schema = 1
},
Dataset = new PerfDatasetInfo
{
ModelDate = options.ModelDate.ToString("yyyy-MM-dd"),
Rows = options.RowCount,
Seed = options.Seed,
CompressedSha256 = compressedSha256,
DecompressedSha256 = parseSession.DecompressedSha256,
ModelVersionTag = parseSession.ModelVersionTag,
PublishedDate = parseSession.PublishedDate?.ToString("yyyy-MM-dd"),
CompressedBytes = dataset.GzipBytes.LongLength,
DecompressedBytes = dataset.DecompressedBytes
},
Environment = new PerfEnvironmentInfo
{
Os = Environment.OSVersion.ToString(),
Framework = System.Runtime.InteropServices.RuntimeInformation.FrameworkDescription,
ProcessArchitecture = System.Runtime.InteropServices.RuntimeInformation.ProcessArchitecture.ToString(),
PostgresImage = options.PostgresImage
},
TimingsMs = new PerfTimingInfo
{
DatasetGenerate = datasetStopwatch.ElapsedMilliseconds,
ContainerStart = containerStopwatch.ElapsedMilliseconds,
Migrations = migrationsStopwatch.ElapsedMilliseconds,
WriteSnapshot = writeStopwatch.ElapsedMilliseconds,
Total = overallStopwatch.ElapsedMilliseconds
},
Result = new PerfWriteResultInfo
{
ImportRunId = importRun.ImportRunId,
RowCount = writeResult.RowCount,
DistinctCveCount = writeResult.DistinctCveCount
}
};
}
internal sealed record PerfOptions(DateOnly ModelDate, int RowCount, ulong Seed, string PostgresImage, string OutputPath)
{
public static PerfOptions Parse(string[] args)
{
var modelDate = DateOnly.FromDateTime(DateTime.UtcNow.Date);
var rowCount = 310_000;
ulong seed = 0x5EED_2025_12_19;
var postgresImage = "postgres:16-alpine";
var outputPath = Path.Combine("bench", "results", "epss-ingest-perf.json");
for (var i = 0; i < args.Length; i++)
{
var arg = args[i];
if (string.Equals(arg, "--rows", StringComparison.OrdinalIgnoreCase) && i + 1 < args.Length)
{
rowCount = int.Parse(args[++i]);
continue;
}
if (string.Equals(arg, "--seed", StringComparison.OrdinalIgnoreCase) && i + 1 < args.Length)
{
seed = Convert.ToUInt64(args[++i], 16);
continue;
}
if (string.Equals(arg, "--model-date", StringComparison.OrdinalIgnoreCase) && i + 1 < args.Length)
{
modelDate = DateOnly.Parse(args[++i]);
continue;
}
if (string.Equals(arg, "--postgres-image", StringComparison.OrdinalIgnoreCase) && i + 1 < args.Length)
{
postgresImage = args[++i];
continue;
}
if (string.Equals(arg, "--output", StringComparison.OrdinalIgnoreCase) && i + 1 < args.Length)
{
outputPath = args[++i];
continue;
}
if (string.Equals(arg, "--help", StringComparison.OrdinalIgnoreCase) ||
string.Equals(arg, "-h", StringComparison.OrdinalIgnoreCase))
{
Console.WriteLine("""
Usage:
dotnet run --project src/Scanner/__Benchmarks/StellaOps.Scanner.Storage.Epss.Perf -c Release -- --rows 310000 --output bench/results/epss-ingest-perf.json
Options:
--rows <int> Row count (default: 310000)
--seed <hex> 64-bit seed in hex without 0x (default: 5EED20251219)
--model-date <date> Model date (YYYY-MM-DD, default: today)
--postgres-image <str> Postgres image (default: postgres:16-alpine)
--output <path> Output JSON path (default: bench/results/epss-ingest-perf.json)
""");
Environment.Exit(0);
}
}
if (rowCount < 1)
{
throw new ArgumentOutOfRangeException(nameof(rowCount), rowCount, "Row count must be positive.");
}
if (string.IsNullOrWhiteSpace(postgresImage))
{
throw new ArgumentException("Postgres image must be provided.", nameof(postgresImage));
}
if (string.IsNullOrWhiteSpace(outputPath))
{
throw new ArgumentException("Output path must be provided.", nameof(outputPath));
}
return new PerfOptions(modelDate, rowCount, seed, postgresImage, outputPath);
}
}
internal sealed record EpssIngestPerfResult
{
public required PerfToolInfo Tool { get; init; }
public required PerfDatasetInfo Dataset { get; init; }
public required PerfEnvironmentInfo Environment { get; init; }
public required PerfTimingInfo TimingsMs { get; init; }
public required PerfWriteResultInfo Result { get; init; }
}
internal sealed record PerfToolInfo
{
public required string Name { get; init; }
public required int Schema { get; init; }
}
internal sealed record PerfDatasetInfo
{
public required string ModelDate { get; init; }
public required int Rows { get; init; }
public required ulong Seed { get; init; }
public required string CompressedSha256 { get; init; }
public string? DecompressedSha256 { get; init; }
public string? ModelVersionTag { get; init; }
public string? PublishedDate { get; init; }
public required long CompressedBytes { get; init; }
public required long DecompressedBytes { get; init; }
}
internal sealed record PerfEnvironmentInfo
{
public required string Os { get; init; }
public required string Framework { get; init; }
public required string ProcessArchitecture { get; init; }
public required string PostgresImage { get; init; }
}
internal sealed record PerfTimingInfo
{
public required long DatasetGenerate { get; init; }
public required long ContainerStart { get; init; }
public required long Migrations { get; init; }
public required long WriteSnapshot { get; init; }
public required long Total { get; init; }
}
internal sealed record PerfWriteResultInfo
{
public required Guid ImportRunId { get; init; }
public required int RowCount { get; init; }
public required int DistinctCveCount { get; init; }
}

View File

@@ -0,0 +1,32 @@
# EPSS Ingest Perf Harness
Sprint: `SPRINT_3410_0001_0001_epss_ingestion_storage` (Task `EPSS-3410-013A` / `EPSS-3410-014`)
## Local Run
Prereqs:
- Docker available to Testcontainers
- .NET 10 SDK (preview, per repo `global.json`)
Run (310k rows, default):
```bash
dotnet run --project src/Scanner/__Benchmarks/StellaOps.Scanner.Storage.Epss.Perf/StellaOps.Scanner.Storage.Epss.Perf.csproj -c Release -- --rows 310000 --output bench/results/epss-ingest-perf.json
```
Options:
- `--rows <int>`: dataset rows (default: `310000`)
- `--seed <hex>`: 64-bit seed in hex without `0x` (default: `5EED20251219`)
- `--model-date <YYYY-MM-DD>`: model date (default: today UTC)
- `--postgres-image <image>`: Postgres image (default: `postgres:16-alpine`)
- `--output <path>`: output JSON path
## Output Format
The harness writes a single JSON file:
- `tool`: `{ name, schema }`
- `dataset`: `{ modelDate, rows, seed, compressedSha256, decompressedSha256, modelVersionTag, publishedDate, compressedBytes, decompressedBytes }`
- `environment`: `{ os, framework, processArchitecture, postgresImage }`
- `timingsMs`: `{ datasetGenerate, containerStart, migrations, writeSnapshot, total }`
- `result`: `{ importRunId, rowCount, distinctCveCount }`

View File

@@ -0,0 +1,18 @@
<Project Sdk="Microsoft.NET.Sdk">
<PropertyGroup>
<OutputType>Exe</OutputType>
<TargetFramework>net10.0</TargetFramework>
<LangVersion>preview</LangVersion>
<ImplicitUsings>enable</ImplicitUsings>
<Nullable>enable</Nullable>
<TreatWarningsAsErrors>false</TreatWarningsAsErrors>
</PropertyGroup>
<ItemGroup>
<PackageReference Include="Testcontainers.PostgreSql" Version="4.1.0" />
</ItemGroup>
<ItemGroup>
<ProjectReference Include="..\..\__Libraries\StellaOps.Scanner.Storage\StellaOps.Scanner.Storage.csproj" />
</ItemGroup>
</Project>