save work
This commit is contained in:
@@ -0,0 +1,82 @@
|
||||
using System.Globalization;
|
||||
using System.IO.Compression;
|
||||
using System.Text;
|
||||
|
||||
namespace StellaOps.Scanner.Storage.Epss.Perf;
|
||||
|
||||
internal sealed record GeneratedEpssDataset(byte[] GzipBytes, long DecompressedBytes);
|
||||
|
||||
internal static class EpssDatasetGenerator
|
||||
{
|
||||
public static GeneratedEpssDataset GenerateGzip(DateOnly modelDate, int rowCount, ulong seed)
|
||||
{
|
||||
if (rowCount < 1)
|
||||
{
|
||||
throw new ArgumentOutOfRangeException(nameof(rowCount), rowCount, "Row count must be positive.");
|
||||
}
|
||||
|
||||
using var raw = new MemoryStream(capacity: Math.Min(64 * 1024 * 1024, rowCount * 48));
|
||||
using (var gzip = new GZipStream(raw, CompressionLevel.SmallestSize, leaveOpen: true))
|
||||
using (var writer = new StreamWriter(gzip, new UTF8Encoding(encoderShouldEmitUTF8Identifier: false), bufferSize: 64 * 1024, leaveOpen: true))
|
||||
{
|
||||
writer.NewLine = "\n";
|
||||
|
||||
var versionTag = $"v{modelDate:yyyy.MM.dd}";
|
||||
writer.Write("# EPSS model ");
|
||||
writer.Write(versionTag);
|
||||
writer.Write(" published ");
|
||||
writer.WriteLine(modelDate.ToString("yyyy-MM-dd", CultureInfo.InvariantCulture));
|
||||
|
||||
writer.WriteLine("cve,epss,percentile");
|
||||
|
||||
var prng = new XorShift64Star(seed);
|
||||
long decompressedBytes = 0;
|
||||
|
||||
for (var i = 0; i < rowCount; i++)
|
||||
{
|
||||
var cve = $"CVE-2024-{(i + 1):D7}";
|
||||
var score = prng.NextDouble();
|
||||
var percentile = prng.NextDouble();
|
||||
|
||||
// Keep formatting deterministic and compact.
|
||||
var line = string.Create(CultureInfo.InvariantCulture, $"{cve},{score:0.000000},{percentile:0.000000}\n");
|
||||
decompressedBytes += Encoding.UTF8.GetByteCount(line);
|
||||
writer.Write(line);
|
||||
}
|
||||
|
||||
writer.Flush();
|
||||
gzip.Flush();
|
||||
|
||||
return new GeneratedEpssDataset(raw.ToArray(), decompressedBytes);
|
||||
}
|
||||
}
|
||||
|
||||
private sealed class XorShift64Star
|
||||
{
|
||||
private ulong _state;
|
||||
|
||||
public XorShift64Star(ulong seed)
|
||||
{
|
||||
_state = seed == 0 ? 0x9E3779B97F4A7C15UL : seed;
|
||||
}
|
||||
|
||||
private ulong NextUInt64()
|
||||
{
|
||||
// xorshift64*
|
||||
var x = _state;
|
||||
x ^= x >> 12;
|
||||
x ^= x << 25;
|
||||
x ^= x >> 27;
|
||||
_state = x;
|
||||
return x * 0x2545F4914F6CDD1DUL;
|
||||
}
|
||||
|
||||
public double NextDouble()
|
||||
{
|
||||
// Build a double in [0,1) with 53 bits of precision.
|
||||
var value = NextUInt64() >> 11;
|
||||
return value * (1.0 / (1UL << 53));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -0,0 +1,282 @@
|
||||
using System.Diagnostics;
|
||||
using System.Security.Cryptography;
|
||||
using System.Text;
|
||||
using System.Text.Json;
|
||||
using Microsoft.Extensions.Logging.Abstractions;
|
||||
using Microsoft.Extensions.Options;
|
||||
using StellaOps.Infrastructure.Postgres.Testing;
|
||||
using StellaOps.Scanner.Storage;
|
||||
using StellaOps.Scanner.Storage.Epss.Perf;
|
||||
using StellaOps.Scanner.Storage.Epss;
|
||||
using StellaOps.Scanner.Storage.Postgres;
|
||||
|
||||
using Testcontainers.PostgreSql;
|
||||
|
||||
var options = PerfOptions.Parse(args);
|
||||
var outputDirectory = Path.GetDirectoryName(options.OutputPath);
|
||||
if (!string.IsNullOrWhiteSpace(outputDirectory))
|
||||
{
|
||||
Directory.CreateDirectory(outputDirectory);
|
||||
}
|
||||
|
||||
var result = await RunAsync(options, CancellationToken.None).ConfigureAwait(false);
|
||||
var json = JsonSerializer.Serialize(
|
||||
result,
|
||||
new JsonSerializerOptions
|
||||
{
|
||||
WriteIndented = true,
|
||||
PropertyNamingPolicy = JsonNamingPolicy.CamelCase
|
||||
});
|
||||
await File.WriteAllTextAsync(options.OutputPath, json, new UTF8Encoding(encoderShouldEmitUTF8Identifier: false)).ConfigureAwait(false);
|
||||
|
||||
static async Task<EpssIngestPerfResult> RunAsync(PerfOptions options, CancellationToken cancellationToken)
|
||||
{
|
||||
var overallStopwatch = Stopwatch.StartNew();
|
||||
|
||||
var datasetStopwatch = Stopwatch.StartNew();
|
||||
var dataset = EpssDatasetGenerator.GenerateGzip(
|
||||
options.ModelDate,
|
||||
options.RowCount,
|
||||
options.Seed);
|
||||
datasetStopwatch.Stop();
|
||||
|
||||
var compressedSha256 = "sha256:" + Convert.ToHexString(SHA256.HashData(dataset.GzipBytes)).ToLowerInvariant();
|
||||
|
||||
var containerStopwatch = Stopwatch.StartNew();
|
||||
await using var container = new PostgreSqlBuilder()
|
||||
.WithImage(options.PostgresImage)
|
||||
.Build();
|
||||
await container.StartAsync(cancellationToken).ConfigureAwait(false);
|
||||
containerStopwatch.Stop();
|
||||
|
||||
var fixture = PostgresFixtureFactory.CreateRandom(container.GetConnectionString(), NullLogger.Instance);
|
||||
await fixture.InitializeAsync(cancellationToken).ConfigureAwait(false);
|
||||
|
||||
var migrationsStopwatch = Stopwatch.StartNew();
|
||||
await fixture.RunMigrationsFromAssemblyAsync(
|
||||
typeof(ScannerStorageOptions).Assembly,
|
||||
moduleName: "Scanner.Storage",
|
||||
resourcePrefix: null,
|
||||
cancellationToken: cancellationToken).ConfigureAwait(false);
|
||||
migrationsStopwatch.Stop();
|
||||
|
||||
var storageOptions = new ScannerStorageOptions
|
||||
{
|
||||
Postgres = new StellaOps.Infrastructure.Postgres.Options.PostgresOptions
|
||||
{
|
||||
ConnectionString = container.GetConnectionString(),
|
||||
SchemaName = fixture.SchemaName
|
||||
}
|
||||
};
|
||||
|
||||
var dataSource = new ScannerDataSource(
|
||||
Options.Create(storageOptions),
|
||||
NullLogger<ScannerDataSource>.Instance);
|
||||
|
||||
var repository = new PostgresEpssRepository(dataSource);
|
||||
var parser = new EpssCsvStreamParser();
|
||||
|
||||
var retrievedAt = DateTimeOffset.UtcNow;
|
||||
var importRun = await repository.BeginImportAsync(
|
||||
options.ModelDate,
|
||||
sourceUri: $"perf://generated?rows={options.RowCount}",
|
||||
retrievedAtUtc: retrievedAt,
|
||||
fileSha256: compressedSha256,
|
||||
cancellationToken: cancellationToken).ConfigureAwait(false);
|
||||
|
||||
var writeStopwatch = Stopwatch.StartNew();
|
||||
await using var parseSession = parser.ParseGzip(new MemoryStream(dataset.GzipBytes, writable: false));
|
||||
var writeResult = await repository.WriteSnapshotAsync(
|
||||
importRun.ImportRunId,
|
||||
options.ModelDate,
|
||||
updatedAtUtc: retrievedAt,
|
||||
rows: parseSession,
|
||||
cancellationToken: cancellationToken).ConfigureAwait(false);
|
||||
writeStopwatch.Stop();
|
||||
|
||||
await repository.MarkImportSucceededAsync(
|
||||
importRun.ImportRunId,
|
||||
rowCount: writeResult.RowCount,
|
||||
decompressedSha256: parseSession.DecompressedSha256,
|
||||
modelVersionTag: parseSession.ModelVersionTag,
|
||||
publishedDate: parseSession.PublishedDate,
|
||||
cancellationToken: cancellationToken).ConfigureAwait(false);
|
||||
|
||||
overallStopwatch.Stop();
|
||||
|
||||
await fixture.DisposeAsync().ConfigureAwait(false);
|
||||
|
||||
return new EpssIngestPerfResult
|
||||
{
|
||||
Tool = new PerfToolInfo
|
||||
{
|
||||
Name = "StellaOps.Scanner.Storage.Epss.Perf",
|
||||
Schema = 1
|
||||
},
|
||||
Dataset = new PerfDatasetInfo
|
||||
{
|
||||
ModelDate = options.ModelDate.ToString("yyyy-MM-dd"),
|
||||
Rows = options.RowCount,
|
||||
Seed = options.Seed,
|
||||
CompressedSha256 = compressedSha256,
|
||||
DecompressedSha256 = parseSession.DecompressedSha256,
|
||||
ModelVersionTag = parseSession.ModelVersionTag,
|
||||
PublishedDate = parseSession.PublishedDate?.ToString("yyyy-MM-dd"),
|
||||
CompressedBytes = dataset.GzipBytes.LongLength,
|
||||
DecompressedBytes = dataset.DecompressedBytes
|
||||
},
|
||||
Environment = new PerfEnvironmentInfo
|
||||
{
|
||||
Os = Environment.OSVersion.ToString(),
|
||||
Framework = System.Runtime.InteropServices.RuntimeInformation.FrameworkDescription,
|
||||
ProcessArchitecture = System.Runtime.InteropServices.RuntimeInformation.ProcessArchitecture.ToString(),
|
||||
PostgresImage = options.PostgresImage
|
||||
},
|
||||
TimingsMs = new PerfTimingInfo
|
||||
{
|
||||
DatasetGenerate = datasetStopwatch.ElapsedMilliseconds,
|
||||
ContainerStart = containerStopwatch.ElapsedMilliseconds,
|
||||
Migrations = migrationsStopwatch.ElapsedMilliseconds,
|
||||
WriteSnapshot = writeStopwatch.ElapsedMilliseconds,
|
||||
Total = overallStopwatch.ElapsedMilliseconds
|
||||
},
|
||||
Result = new PerfWriteResultInfo
|
||||
{
|
||||
ImportRunId = importRun.ImportRunId,
|
||||
RowCount = writeResult.RowCount,
|
||||
DistinctCveCount = writeResult.DistinctCveCount
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
internal sealed record PerfOptions(DateOnly ModelDate, int RowCount, ulong Seed, string PostgresImage, string OutputPath)
|
||||
{
|
||||
public static PerfOptions Parse(string[] args)
|
||||
{
|
||||
var modelDate = DateOnly.FromDateTime(DateTime.UtcNow.Date);
|
||||
var rowCount = 310_000;
|
||||
ulong seed = 0x5EED_2025_12_19;
|
||||
var postgresImage = "postgres:16-alpine";
|
||||
var outputPath = Path.Combine("bench", "results", "epss-ingest-perf.json");
|
||||
|
||||
for (var i = 0; i < args.Length; i++)
|
||||
{
|
||||
var arg = args[i];
|
||||
if (string.Equals(arg, "--rows", StringComparison.OrdinalIgnoreCase) && i + 1 < args.Length)
|
||||
{
|
||||
rowCount = int.Parse(args[++i]);
|
||||
continue;
|
||||
}
|
||||
|
||||
if (string.Equals(arg, "--seed", StringComparison.OrdinalIgnoreCase) && i + 1 < args.Length)
|
||||
{
|
||||
seed = Convert.ToUInt64(args[++i], 16);
|
||||
continue;
|
||||
}
|
||||
|
||||
if (string.Equals(arg, "--model-date", StringComparison.OrdinalIgnoreCase) && i + 1 < args.Length)
|
||||
{
|
||||
modelDate = DateOnly.Parse(args[++i]);
|
||||
continue;
|
||||
}
|
||||
|
||||
if (string.Equals(arg, "--postgres-image", StringComparison.OrdinalIgnoreCase) && i + 1 < args.Length)
|
||||
{
|
||||
postgresImage = args[++i];
|
||||
continue;
|
||||
}
|
||||
|
||||
if (string.Equals(arg, "--output", StringComparison.OrdinalIgnoreCase) && i + 1 < args.Length)
|
||||
{
|
||||
outputPath = args[++i];
|
||||
continue;
|
||||
}
|
||||
|
||||
if (string.Equals(arg, "--help", StringComparison.OrdinalIgnoreCase) ||
|
||||
string.Equals(arg, "-h", StringComparison.OrdinalIgnoreCase))
|
||||
{
|
||||
Console.WriteLine("""
|
||||
Usage:
|
||||
dotnet run --project src/Scanner/__Benchmarks/StellaOps.Scanner.Storage.Epss.Perf -c Release -- --rows 310000 --output bench/results/epss-ingest-perf.json
|
||||
|
||||
Options:
|
||||
--rows <int> Row count (default: 310000)
|
||||
--seed <hex> 64-bit seed in hex without 0x (default: 5EED20251219)
|
||||
--model-date <date> Model date (YYYY-MM-DD, default: today)
|
||||
--postgres-image <str> Postgres image (default: postgres:16-alpine)
|
||||
--output <path> Output JSON path (default: bench/results/epss-ingest-perf.json)
|
||||
""");
|
||||
Environment.Exit(0);
|
||||
}
|
||||
}
|
||||
|
||||
if (rowCount < 1)
|
||||
{
|
||||
throw new ArgumentOutOfRangeException(nameof(rowCount), rowCount, "Row count must be positive.");
|
||||
}
|
||||
|
||||
if (string.IsNullOrWhiteSpace(postgresImage))
|
||||
{
|
||||
throw new ArgumentException("Postgres image must be provided.", nameof(postgresImage));
|
||||
}
|
||||
|
||||
if (string.IsNullOrWhiteSpace(outputPath))
|
||||
{
|
||||
throw new ArgumentException("Output path must be provided.", nameof(outputPath));
|
||||
}
|
||||
|
||||
return new PerfOptions(modelDate, rowCount, seed, postgresImage, outputPath);
|
||||
}
|
||||
}
|
||||
|
||||
internal sealed record EpssIngestPerfResult
|
||||
{
|
||||
public required PerfToolInfo Tool { get; init; }
|
||||
public required PerfDatasetInfo Dataset { get; init; }
|
||||
public required PerfEnvironmentInfo Environment { get; init; }
|
||||
public required PerfTimingInfo TimingsMs { get; init; }
|
||||
public required PerfWriteResultInfo Result { get; init; }
|
||||
}
|
||||
|
||||
internal sealed record PerfToolInfo
|
||||
{
|
||||
public required string Name { get; init; }
|
||||
public required int Schema { get; init; }
|
||||
}
|
||||
|
||||
internal sealed record PerfDatasetInfo
|
||||
{
|
||||
public required string ModelDate { get; init; }
|
||||
public required int Rows { get; init; }
|
||||
public required ulong Seed { get; init; }
|
||||
public required string CompressedSha256 { get; init; }
|
||||
public string? DecompressedSha256 { get; init; }
|
||||
public string? ModelVersionTag { get; init; }
|
||||
public string? PublishedDate { get; init; }
|
||||
public required long CompressedBytes { get; init; }
|
||||
public required long DecompressedBytes { get; init; }
|
||||
}
|
||||
|
||||
internal sealed record PerfEnvironmentInfo
|
||||
{
|
||||
public required string Os { get; init; }
|
||||
public required string Framework { get; init; }
|
||||
public required string ProcessArchitecture { get; init; }
|
||||
public required string PostgresImage { get; init; }
|
||||
}
|
||||
|
||||
internal sealed record PerfTimingInfo
|
||||
{
|
||||
public required long DatasetGenerate { get; init; }
|
||||
public required long ContainerStart { get; init; }
|
||||
public required long Migrations { get; init; }
|
||||
public required long WriteSnapshot { get; init; }
|
||||
public required long Total { get; init; }
|
||||
}
|
||||
|
||||
internal sealed record PerfWriteResultInfo
|
||||
{
|
||||
public required Guid ImportRunId { get; init; }
|
||||
public required int RowCount { get; init; }
|
||||
public required int DistinctCveCount { get; init; }
|
||||
}
|
||||
@@ -0,0 +1,32 @@
|
||||
# EPSS Ingest Perf Harness
|
||||
|
||||
Sprint: `SPRINT_3410_0001_0001_epss_ingestion_storage` (Task `EPSS-3410-013A` / `EPSS-3410-014`)
|
||||
|
||||
## Local Run
|
||||
|
||||
Prereqs:
|
||||
- Docker available to Testcontainers
|
||||
- .NET 10 SDK (preview, per repo `global.json`)
|
||||
|
||||
Run (310k rows, default):
|
||||
|
||||
```bash
|
||||
dotnet run --project src/Scanner/__Benchmarks/StellaOps.Scanner.Storage.Epss.Perf/StellaOps.Scanner.Storage.Epss.Perf.csproj -c Release -- --rows 310000 --output bench/results/epss-ingest-perf.json
|
||||
```
|
||||
|
||||
Options:
|
||||
- `--rows <int>`: dataset rows (default: `310000`)
|
||||
- `--seed <hex>`: 64-bit seed in hex without `0x` (default: `5EED20251219`)
|
||||
- `--model-date <YYYY-MM-DD>`: model date (default: today UTC)
|
||||
- `--postgres-image <image>`: Postgres image (default: `postgres:16-alpine`)
|
||||
- `--output <path>`: output JSON path
|
||||
|
||||
## Output Format
|
||||
|
||||
The harness writes a single JSON file:
|
||||
- `tool`: `{ name, schema }`
|
||||
- `dataset`: `{ modelDate, rows, seed, compressedSha256, decompressedSha256, modelVersionTag, publishedDate, compressedBytes, decompressedBytes }`
|
||||
- `environment`: `{ os, framework, processArchitecture, postgresImage }`
|
||||
- `timingsMs`: `{ datasetGenerate, containerStart, migrations, writeSnapshot, total }`
|
||||
- `result`: `{ importRunId, rowCount, distinctCveCount }`
|
||||
|
||||
@@ -0,0 +1,18 @@
|
||||
<Project Sdk="Microsoft.NET.Sdk">
|
||||
<PropertyGroup>
|
||||
<OutputType>Exe</OutputType>
|
||||
<TargetFramework>net10.0</TargetFramework>
|
||||
<LangVersion>preview</LangVersion>
|
||||
<ImplicitUsings>enable</ImplicitUsings>
|
||||
<Nullable>enable</Nullable>
|
||||
<TreatWarningsAsErrors>false</TreatWarningsAsErrors>
|
||||
</PropertyGroup>
|
||||
|
||||
<ItemGroup>
|
||||
<PackageReference Include="Testcontainers.PostgreSql" Version="4.1.0" />
|
||||
</ItemGroup>
|
||||
|
||||
<ItemGroup>
|
||||
<ProjectReference Include="..\..\__Libraries\StellaOps.Scanner.Storage\StellaOps.Scanner.Storage.csproj" />
|
||||
</ItemGroup>
|
||||
</Project>
|
||||
Reference in New Issue
Block a user