doctor enhancements, setup, enhancements, ui functionality and design consolidation and , test projects fixes , product advisory attestation/rekor and delta verfications enhancements

This commit is contained in:
master
2026-01-19 09:02:59 +02:00
parent 8c4bf54aed
commit 17419ba7c4
809 changed files with 170738 additions and 12244 deletions

View File

@@ -0,0 +1,173 @@
namespace StellaOps.Scanner.Cache.LayerSbomCas;
/// <summary>
/// Content-addressable storage for per-layer SBOMs keyed by diffID.
/// Layer SBOMs are immutable (same diffID = same content = same SBOM),
/// so entries can be cached indefinitely.
/// </summary>
public interface ILayerSbomCas
{
/// <summary>
/// Stores a per-layer SBOM.
/// </summary>
/// <param name="diffId">Layer diffID (sha256:...).</param>
/// <param name="format">SBOM format (cyclonedx or spdx).</param>
/// <param name="sbom">SBOM content.</param>
/// <param name="cancellationToken">Cancellation token.</param>
Task StoreAsync(string diffId, string format, string sbom, CancellationToken cancellationToken = default);
/// <summary>
/// Retrieves a per-layer SBOM.
/// </summary>
/// <param name="diffId">Layer diffID.</param>
/// <param name="format">SBOM format.</param>
/// <param name="cancellationToken">Cancellation token.</param>
/// <returns>SBOM content or null if not found.</returns>
Task<string?> GetAsync(string diffId, string format, CancellationToken cancellationToken = default);
/// <summary>
/// Checks if a per-layer SBOM exists.
/// </summary>
/// <param name="diffId">Layer diffID.</param>
/// <param name="format">SBOM format.</param>
/// <param name="cancellationToken">Cancellation token.</param>
/// <returns>True if exists.</returns>
Task<bool> ExistsAsync(string diffId, string format, CancellationToken cancellationToken = default);
/// <summary>
/// Deletes a per-layer SBOM.
/// </summary>
/// <param name="diffId">Layer diffID.</param>
/// <param name="format">SBOM format.</param>
/// <param name="cancellationToken">Cancellation token.</param>
/// <returns>True if deleted.</returns>
Task<bool> DeleteAsync(string diffId, string format, CancellationToken cancellationToken = default);
/// <summary>
/// Gets statistics about the CAS.
/// </summary>
/// <param name="cancellationToken">Cancellation token.</param>
/// <returns>CAS statistics.</returns>
Task<LayerSbomCasStatistics> GetStatisticsAsync(CancellationToken cancellationToken = default);
/// <summary>
/// Evicts entries older than the specified age.
/// </summary>
/// <param name="maxAge">Maximum age for entries.</param>
/// <param name="cancellationToken">Cancellation token.</param>
/// <returns>Number of entries evicted.</returns>
Task<int> EvictOlderThanAsync(TimeSpan maxAge, CancellationToken cancellationToken = default);
}
/// <summary>
/// Statistics about the Layer SBOM CAS.
/// </summary>
public sealed record LayerSbomCasStatistics
{
/// <summary>
/// Total number of entries.
/// </summary>
public long EntryCount { get; init; }
/// <summary>
/// Total size in bytes (compressed).
/// </summary>
public long TotalSizeBytes { get; init; }
/// <summary>
/// Number of CycloneDX format entries.
/// </summary>
public long CycloneDxCount { get; init; }
/// <summary>
/// Number of SPDX format entries.
/// </summary>
public long SpdxCount { get; init; }
/// <summary>
/// Oldest entry timestamp.
/// </summary>
public DateTimeOffset? OldestEntry { get; init; }
/// <summary>
/// Newest entry timestamp.
/// </summary>
public DateTimeOffset? NewestEntry { get; init; }
/// <summary>
/// Cache hit count since startup.
/// </summary>
public long HitCount { get; init; }
/// <summary>
/// Cache miss count since startup.
/// </summary>
public long MissCount { get; init; }
/// <summary>
/// Hit ratio (0.0 to 1.0).
/// </summary>
public double HitRatio => (HitCount + MissCount) > 0
? (double)HitCount / (HitCount + MissCount)
: 0;
}
/// <summary>
/// Options for the Layer SBOM CAS.
/// </summary>
public sealed class LayerSbomCasOptions
{
/// <summary>
/// Configuration section name.
/// </summary>
public const string SectionName = "Scanner:LayerSbomCas";
/// <summary>
/// Whether to compress SBOMs before storage.
/// Default is true.
/// </summary>
public bool CompressSboms { get; set; } = true;
/// <summary>
/// TTL for entries in days. Entries older than this are eligible for eviction.
/// Default is 90 days.
/// </summary>
public int TtlDays { get; set; } = 90;
/// <summary>
/// Maximum total storage size in bytes.
/// Default is 10GB.
/// </summary>
public long MaxStorageSizeBytes { get; set; } = 10L * 1024 * 1024 * 1024;
/// <summary>
/// Storage backend type.
/// </summary>
public StorageBackendType StorageBackend { get; set; } = StorageBackendType.PostgresBlob;
/// <summary>
/// Path for filesystem storage (when using Filesystem backend).
/// </summary>
public string? FilesystemPath { get; set; }
}
/// <summary>
/// Storage backend types for Layer SBOM CAS.
/// </summary>
public enum StorageBackendType
{
/// <summary>
/// Store SBOMs as compressed blobs in PostgreSQL.
/// </summary>
PostgresBlob,
/// <summary>
/// Store SBOMs on the filesystem.
/// </summary>
Filesystem,
/// <summary>
/// Store SBOMs in S3-compatible object storage.
/// </summary>
S3
}

View File

@@ -0,0 +1,292 @@
using System.IO.Compression;
using System.Text;
using Microsoft.Extensions.Logging;
using Microsoft.Extensions.Options;
using Npgsql;
namespace StellaOps.Scanner.Cache.LayerSbomCas;
/// <summary>
/// PostgreSQL-backed implementation of ILayerSbomCas.
/// Stores SBOMs as compressed blobs in the database.
/// </summary>
public sealed class PostgresLayerSbomCas : ILayerSbomCas
{
private const string TableName = "scanner.layer_sbom_cas";
private readonly NpgsqlDataSource _dataSource;
private readonly LayerSbomCasOptions _options;
private readonly ILogger<PostgresLayerSbomCas> _logger;
// Metrics counters
private long _hitCount;
private long _missCount;
public PostgresLayerSbomCas(
NpgsqlDataSource dataSource,
IOptions<LayerSbomCasOptions> options,
ILogger<PostgresLayerSbomCas> logger)
{
_dataSource = dataSource ?? throw new ArgumentNullException(nameof(dataSource));
_options = options?.Value ?? new LayerSbomCasOptions();
_logger = logger ?? throw new ArgumentNullException(nameof(logger));
}
public async Task StoreAsync(string diffId, string format, string sbom, CancellationToken cancellationToken = default)
{
ArgumentException.ThrowIfNullOrWhiteSpace(diffId);
ArgumentException.ThrowIfNullOrWhiteSpace(format);
ArgumentException.ThrowIfNullOrWhiteSpace(sbom);
var normalizedFormat = NormalizeFormat(format);
var content = _options.CompressSboms ? Compress(sbom) : Encoding.UTF8.GetBytes(sbom);
await using var conn = await _dataSource.OpenConnectionAsync(cancellationToken).ConfigureAwait(false);
await using var cmd = conn.CreateCommand();
cmd.CommandText = $"""
INSERT INTO {TableName} (diff_id, format, content, size_bytes, compressed, created_at, last_accessed_at)
VALUES (@diffId, @format, @content, @sizeBytes, @compressed, @now, @now)
ON CONFLICT (diff_id, format) DO UPDATE SET
content = EXCLUDED.content,
size_bytes = EXCLUDED.size_bytes,
compressed = EXCLUDED.compressed,
last_accessed_at = EXCLUDED.last_accessed_at
""";
cmd.Parameters.AddWithValue("diffId", diffId);
cmd.Parameters.AddWithValue("format", normalizedFormat);
cmd.Parameters.AddWithValue("content", content);
cmd.Parameters.AddWithValue("sizeBytes", content.Length);
cmd.Parameters.AddWithValue("compressed", _options.CompressSboms);
cmd.Parameters.AddWithValue("now", DateTimeOffset.UtcNow);
try
{
await cmd.ExecuteNonQueryAsync(cancellationToken).ConfigureAwait(false);
_logger.LogDebug("Stored SBOM for {DiffId} ({Format}), {Size} bytes",
diffId, normalizedFormat, content.Length);
}
catch (PostgresException ex) when (ex.SqlState == "42P01")
{
_logger.LogWarning("Layer SBOM CAS table does not exist. Run migrations.");
}
}
public async Task<string?> GetAsync(string diffId, string format, CancellationToken cancellationToken = default)
{
ArgumentException.ThrowIfNullOrWhiteSpace(diffId);
ArgumentException.ThrowIfNullOrWhiteSpace(format);
var normalizedFormat = NormalizeFormat(format);
await using var conn = await _dataSource.OpenConnectionAsync(cancellationToken).ConfigureAwait(false);
// Update last accessed timestamp
await using (var updateCmd = conn.CreateCommand())
{
updateCmd.CommandText = $"""
UPDATE {TableName}
SET last_accessed_at = @now
WHERE diff_id = @diffId AND format = @format
""";
updateCmd.Parameters.AddWithValue("diffId", diffId);
updateCmd.Parameters.AddWithValue("format", normalizedFormat);
updateCmd.Parameters.AddWithValue("now", DateTimeOffset.UtcNow);
await updateCmd.ExecuteNonQueryAsync(cancellationToken).ConfigureAwait(false);
}
await using var cmd = conn.CreateCommand();
cmd.CommandText = $"""
SELECT content, compressed
FROM {TableName}
WHERE diff_id = @diffId AND format = @format
""";
cmd.Parameters.AddWithValue("diffId", diffId);
cmd.Parameters.AddWithValue("format", normalizedFormat);
try
{
await using var reader = await cmd.ExecuteReaderAsync(cancellationToken).ConfigureAwait(false);
if (await reader.ReadAsync(cancellationToken).ConfigureAwait(false))
{
var content = (byte[])reader["content"];
var compressed = reader.GetBoolean(reader.GetOrdinal("compressed"));
Interlocked.Increment(ref _hitCount);
return compressed ? Decompress(content) : Encoding.UTF8.GetString(content);
}
Interlocked.Increment(ref _missCount);
return null;
}
catch (PostgresException ex) when (ex.SqlState == "42P01")
{
Interlocked.Increment(ref _missCount);
return null;
}
}
public async Task<bool> ExistsAsync(string diffId, string format, CancellationToken cancellationToken = default)
{
ArgumentException.ThrowIfNullOrWhiteSpace(diffId);
ArgumentException.ThrowIfNullOrWhiteSpace(format);
var normalizedFormat = NormalizeFormat(format);
await using var conn = await _dataSource.OpenConnectionAsync(cancellationToken).ConfigureAwait(false);
await using var cmd = conn.CreateCommand();
cmd.CommandText = $"""
SELECT 1 FROM {TableName}
WHERE diff_id = @diffId AND format = @format
LIMIT 1
""";
cmd.Parameters.AddWithValue("diffId", diffId);
cmd.Parameters.AddWithValue("format", normalizedFormat);
try
{
var result = await cmd.ExecuteScalarAsync(cancellationToken).ConfigureAwait(false);
return result is not null;
}
catch (PostgresException ex) when (ex.SqlState == "42P01")
{
return false;
}
}
public async Task<bool> DeleteAsync(string diffId, string format, CancellationToken cancellationToken = default)
{
ArgumentException.ThrowIfNullOrWhiteSpace(diffId);
ArgumentException.ThrowIfNullOrWhiteSpace(format);
var normalizedFormat = NormalizeFormat(format);
await using var conn = await _dataSource.OpenConnectionAsync(cancellationToken).ConfigureAwait(false);
await using var cmd = conn.CreateCommand();
cmd.CommandText = $"""
DELETE FROM {TableName}
WHERE diff_id = @diffId AND format = @format
""";
cmd.Parameters.AddWithValue("diffId", diffId);
cmd.Parameters.AddWithValue("format", normalizedFormat);
try
{
var rowsAffected = await cmd.ExecuteNonQueryAsync(cancellationToken).ConfigureAwait(false);
return rowsAffected > 0;
}
catch (PostgresException ex) when (ex.SqlState == "42P01")
{
return false;
}
}
public async Task<LayerSbomCasStatistics> GetStatisticsAsync(CancellationToken cancellationToken = default)
{
await using var conn = await _dataSource.OpenConnectionAsync(cancellationToken).ConfigureAwait(false);
try
{
await using var cmd = conn.CreateCommand();
cmd.CommandText = $"""
SELECT
COUNT(*) as entry_count,
COALESCE(SUM(size_bytes), 0) as total_size,
COUNT(*) FILTER (WHERE format = 'cyclonedx') as cyclonedx_count,
COUNT(*) FILTER (WHERE format = 'spdx') as spdx_count,
MIN(created_at) as oldest,
MAX(created_at) as newest
FROM {TableName}
""";
await using var reader = await cmd.ExecuteReaderAsync(cancellationToken).ConfigureAwait(false);
if (await reader.ReadAsync(cancellationToken).ConfigureAwait(false))
{
return new LayerSbomCasStatistics
{
EntryCount = reader.GetInt64(0),
TotalSizeBytes = reader.GetInt64(1),
CycloneDxCount = reader.GetInt64(2),
SpdxCount = reader.GetInt64(3),
OldestEntry = reader.IsDBNull(4) ? null : reader.GetFieldValue<DateTimeOffset>(4),
NewestEntry = reader.IsDBNull(5) ? null : reader.GetFieldValue<DateTimeOffset>(5),
HitCount = Interlocked.Read(ref _hitCount),
MissCount = Interlocked.Read(ref _missCount)
};
}
}
catch (PostgresException ex) when (ex.SqlState == "42P01")
{
_logger.LogDebug("Statistics table does not exist");
}
return new LayerSbomCasStatistics
{
HitCount = Interlocked.Read(ref _hitCount),
MissCount = Interlocked.Read(ref _missCount)
};
}
public async Task<int> EvictOlderThanAsync(TimeSpan maxAge, CancellationToken cancellationToken = default)
{
var cutoff = DateTimeOffset.UtcNow - maxAge;
await using var conn = await _dataSource.OpenConnectionAsync(cancellationToken).ConfigureAwait(false);
await using var cmd = conn.CreateCommand();
cmd.CommandText = $"""
DELETE FROM {TableName}
WHERE last_accessed_at < @cutoff
""";
cmd.Parameters.AddWithValue("cutoff", cutoff);
try
{
var rowsDeleted = await cmd.ExecuteNonQueryAsync(cancellationToken).ConfigureAwait(false);
if (rowsDeleted > 0)
{
_logger.LogInformation("Evicted {Count} layer SBOMs older than {Cutoff}", rowsDeleted, cutoff);
}
return rowsDeleted;
}
catch (PostgresException ex) when (ex.SqlState == "42P01")
{
return 0;
}
}
private static string NormalizeFormat(string format)
{
return format.ToLowerInvariant() switch
{
"cyclonedx" or "cdx" or "cyclone" => "cyclonedx",
"spdx" => "spdx",
_ => format.ToLowerInvariant()
};
}
private static byte[] Compress(string text)
{
var bytes = Encoding.UTF8.GetBytes(text);
using var output = new MemoryStream();
using (var gzip = new GZipStream(output, CompressionLevel.Optimal, leaveOpen: true))
{
gzip.Write(bytes, 0, bytes.Length);
}
return output.ToArray();
}
private static string Decompress(byte[] compressed)
{
using var input = new MemoryStream(compressed);
using var gzip = new GZipStream(input, CompressionMode.Decompress);
using var output = new MemoryStream();
gzip.CopyTo(output);
return Encoding.UTF8.GetString(output.ToArray());
}
}

View File

@@ -6,6 +6,7 @@ using Microsoft.Extensions.Options;
using StellaOps.Scanner.Cache.Abstractions;
using StellaOps.Scanner.Cache.FileCas;
using StellaOps.Scanner.Cache.LayerCache;
using StellaOps.Scanner.Cache.LayerSbomCas;
using StellaOps.Scanner.Cache.Maintenance;
namespace StellaOps.Scanner.Cache;
@@ -48,4 +49,23 @@ public static class ScannerCacheServiceCollectionExtensions
return services;
}
/// <summary>
/// Adds Layer SBOM CAS services to the service collection.
/// </summary>
public static IServiceCollection AddLayerSbomCas(
this IServiceCollection services,
IConfiguration configuration,
string sectionName = LayerSbomCasOptions.SectionName)
{
ArgumentNullException.ThrowIfNull(services);
ArgumentNullException.ThrowIfNull(configuration);
services.AddOptions<LayerSbomCasOptions>()
.Bind(configuration.GetSection(sectionName));
services.TryAddSingleton<ILayerSbomCas, PostgresLayerSbomCas>();
return services;
}
}

View File

@@ -0,0 +1,197 @@
namespace StellaOps.Scanner.Core.Normalization;
/// <summary>
/// Service for normalizing package names across different ecosystems.
/// Handles aliasing issues like apt↔dpkg, npm scopes, pip eggs/wheels.
/// </summary>
public interface IPackageNameNormalizer
{
/// <summary>
/// Normalizes a package reference to its canonical PURL form.
/// </summary>
/// <param name="packageRef">The original package reference (PURL or ecosystem-specific).</param>
/// <param name="cancellationToken">Cancellation token.</param>
/// <returns>Normalized package identity with confidence score.</returns>
Task<NormalizedPackageIdentity> NormalizeAsync(
string packageRef,
CancellationToken cancellationToken = default);
/// <summary>
/// Checks if two package references are equivalent (same package, possibly different names).
/// </summary>
/// <param name="package1">First package reference.</param>
/// <param name="package2">Second package reference.</param>
/// <param name="cancellationToken">Cancellation token.</param>
/// <returns>True if the packages are equivalent.</returns>
Task<bool> AreEquivalentAsync(
string package1,
string package2,
CancellationToken cancellationToken = default);
/// <summary>
/// Gets all known aliases for a package.
/// </summary>
/// <param name="packageRef">The package reference.</param>
/// <param name="cancellationToken">Cancellation token.</param>
/// <returns>List of known aliases.</returns>
Task<IReadOnlyList<string>> GetAliasesAsync(
string packageRef,
CancellationToken cancellationToken = default);
}
/// <summary>
/// Result of normalizing a package name.
/// </summary>
public sealed record NormalizedPackageIdentity
{
/// <summary>
/// The canonical PURL for this package.
/// </summary>
public required string CanonicalPurl { get; init; }
/// <summary>
/// The original reference that was normalized.
/// </summary>
public required string OriginalRef { get; init; }
/// <summary>
/// The package ecosystem (npm, pypi, deb, golang, etc.).
/// </summary>
public required string Ecosystem { get; init; }
/// <summary>
/// Confidence score for the normalization (0.0 to 1.0).
/// 1.0 = exact match or well-known alias
/// 0.5-0.9 = heuristic match
/// 0.0-0.5 = fallback/fingerprint
/// </summary>
public required double Confidence { get; init; }
/// <summary>
/// Method used for normalization.
/// </summary>
public required NormalizationMethod Method { get; init; }
/// <summary>
/// The namespace/scope extracted from the package reference.
/// </summary>
public string? Namespace { get; init; }
/// <summary>
/// The package name (without namespace/scope).
/// </summary>
public string? Name { get; init; }
/// <summary>
/// The version if present in the original reference.
/// </summary>
public string? Version { get; init; }
/// <summary>
/// File hash fingerprint (for unresolvable packages).
/// </summary>
public string? FileHash { get; init; }
/// <summary>
/// If this was normalized from a known alias.
/// </summary>
public string? NormalizedFrom { get; init; }
}
/// <summary>
/// Method used to normalize a package name.
/// </summary>
public enum NormalizationMethod
{
/// <summary>
/// Exact match - no transformation needed.
/// </summary>
ExactMatch,
/// <summary>
/// Transformed using ecosystem-specific rules.
/// </summary>
EcosystemRule,
/// <summary>
/// Matched via known alias mapping.
/// </summary>
AliasLookup,
/// <summary>
/// Heuristic matching (case normalization, scope handling, etc.).
/// </summary>
Heuristic,
/// <summary>
/// Fallback to file hash fingerprint.
/// </summary>
FileHashFallback,
/// <summary>
/// Unable to normalize, returned as-is.
/// </summary>
Passthrough
}
/// <summary>
/// Alias entry in the vendor alias map.
/// </summary>
public sealed record PackageAlias
{
/// <summary>
/// The canonical PURL for this package.
/// </summary>
public required string CanonicalPurl { get; init; }
/// <summary>
/// List of known aliases for this package.
/// </summary>
public required IReadOnlyList<string> Aliases { get; init; }
/// <summary>
/// The ecosystem this alias belongs to.
/// </summary>
public required string Ecosystem { get; init; }
/// <summary>
/// Optional notes about the aliasing.
/// </summary>
public string? Notes { get; init; }
}
/// <summary>
/// Options for the package name normalizer.
/// </summary>
public sealed class PackageNameNormalizerOptions
{
/// <summary>
/// Configuration section name.
/// </summary>
public const string SectionName = "Scanner:PackageNormalization";
/// <summary>
/// Path to the vendor alias map JSON file.
/// </summary>
public string? AliasMapPath { get; set; }
/// <summary>
/// Whether to use file hash fingerprinting as fallback.
/// </summary>
public bool EnableFileHashFallback { get; set; } = true;
/// <summary>
/// Minimum confidence threshold for alias matching.
/// </summary>
public double MinConfidenceThreshold { get; set; } = 0.5;
/// <summary>
/// Whether to cache normalization results.
/// </summary>
public bool EnableCaching { get; set; } = true;
/// <summary>
/// Cache size limit.
/// </summary>
public int CacheSize { get; set; } = 10000;
}

View File

@@ -0,0 +1,46 @@
using Microsoft.Extensions.Configuration;
using Microsoft.Extensions.DependencyInjection;
using Microsoft.Extensions.DependencyInjection.Extensions;
namespace StellaOps.Scanner.Core.Normalization;
/// <summary>
/// Extension methods for registering package normalization services.
/// </summary>
public static class NormalizationServiceCollectionExtensions
{
/// <summary>
/// Adds package name normalization services to the service collection.
/// </summary>
public static IServiceCollection AddPackageNameNormalization(
this IServiceCollection services,
IConfiguration configuration,
string sectionName = PackageNameNormalizerOptions.SectionName)
{
ArgumentNullException.ThrowIfNull(services);
ArgumentNullException.ThrowIfNull(configuration);
services.AddOptions<PackageNameNormalizerOptions>()
.Bind(configuration.GetSection(sectionName));
services.TryAddSingleton<IPackageNameNormalizer, PackageNameNormalizer>();
return services;
}
/// <summary>
/// Adds package name normalization services with custom options.
/// </summary>
public static IServiceCollection AddPackageNameNormalization(
this IServiceCollection services,
Action<PackageNameNormalizerOptions> configureOptions)
{
ArgumentNullException.ThrowIfNull(services);
ArgumentNullException.ThrowIfNull(configureOptions);
services.Configure(configureOptions);
services.TryAddSingleton<IPackageNameNormalizer, PackageNameNormalizer>();
return services;
}
}

View File

@@ -0,0 +1,623 @@
using System.Collections.Concurrent;
using System.Security.Cryptography;
using System.Text;
using System.Text.Json;
using System.Text.RegularExpressions;
using Microsoft.Extensions.Logging;
using Microsoft.Extensions.Options;
namespace StellaOps.Scanner.Core.Normalization;
/// <summary>
/// Implementation of IPackageNameNormalizer that handles ecosystem-specific
/// package name aliasing and normalization.
/// </summary>
public sealed partial class PackageNameNormalizer : IPackageNameNormalizer
{
private readonly PackageNameNormalizerOptions _options;
private readonly ILogger<PackageNameNormalizer> _logger;
// Vendor alias map: canonical PURL -> aliases
private readonly Dictionary<string, PackageAlias> _canonicalToAlias = new(StringComparer.OrdinalIgnoreCase);
// Reverse map: any alias -> canonical PURL
private readonly Dictionary<string, string> _aliasToCanonical = new(StringComparer.OrdinalIgnoreCase);
// Normalization cache
private readonly ConcurrentDictionary<string, NormalizedPackageIdentity> _cache = new(StringComparer.OrdinalIgnoreCase);
// PURL regex patterns
[GeneratedRegex(@"^pkg:(?<type>[a-z]+)(/(?<namespace>[^/]+))?/(?<name>[^@?#]+)(@(?<version>[^?#]+))?(\?(?<qualifiers>[^#]+))?(#(?<subpath>.+))?$", RegexOptions.IgnoreCase | RegexOptions.Compiled)]
private static partial Regex PurlPattern();
public PackageNameNormalizer(
IOptions<PackageNameNormalizerOptions> options,
ILogger<PackageNameNormalizer> logger)
{
_options = options?.Value ?? new PackageNameNormalizerOptions();
_logger = logger ?? throw new ArgumentNullException(nameof(logger));
LoadBuiltInAliases();
if (!string.IsNullOrWhiteSpace(_options.AliasMapPath))
{
LoadAliasMapFromFile(_options.AliasMapPath);
}
}
public Task<NormalizedPackageIdentity> NormalizeAsync(
string packageRef,
CancellationToken cancellationToken = default)
{
ArgumentException.ThrowIfNullOrWhiteSpace(packageRef);
// Check cache first
if (_options.EnableCaching && _cache.TryGetValue(packageRef, out var cached))
{
return Task.FromResult(cached);
}
var result = NormalizeInternal(packageRef);
// Cache the result
if (_options.EnableCaching && _cache.Count < _options.CacheSize)
{
_cache.TryAdd(packageRef, result);
}
return Task.FromResult(result);
}
public async Task<bool> AreEquivalentAsync(
string package1,
string package2,
CancellationToken cancellationToken = default)
{
ArgumentException.ThrowIfNullOrWhiteSpace(package1);
ArgumentException.ThrowIfNullOrWhiteSpace(package2);
var norm1 = await NormalizeAsync(package1, cancellationToken).ConfigureAwait(false);
var norm2 = await NormalizeAsync(package2, cancellationToken).ConfigureAwait(false);
// Compare canonical PURLs (without version)
var identity1 = GetPackageIdentity(norm1.CanonicalPurl);
var identity2 = GetPackageIdentity(norm2.CanonicalPurl);
return identity1.Equals(identity2, StringComparison.OrdinalIgnoreCase);
}
public Task<IReadOnlyList<string>> GetAliasesAsync(
string packageRef,
CancellationToken cancellationToken = default)
{
ArgumentException.ThrowIfNullOrWhiteSpace(packageRef);
var normalized = NormalizeInternal(packageRef);
if (_canonicalToAlias.TryGetValue(normalized.CanonicalPurl, out var aliasEntry))
{
return Task.FromResult(aliasEntry.Aliases);
}
// Check by identity (without version)
var identity = GetPackageIdentity(normalized.CanonicalPurl);
if (_canonicalToAlias.TryGetValue(identity, out var identityAliasEntry))
{
return Task.FromResult(identityAliasEntry.Aliases);
}
return Task.FromResult<IReadOnlyList<string>>([]);
}
private NormalizedPackageIdentity NormalizeInternal(string packageRef)
{
// Try to parse as PURL
var purlMatch = PurlPattern().Match(packageRef);
if (purlMatch.Success)
{
return NormalizePurl(packageRef, purlMatch);
}
// Try to detect ecosystem and normalize
var detectedEcosystem = DetectEcosystem(packageRef);
return NormalizeNonPurl(packageRef, detectedEcosystem);
}
private NormalizedPackageIdentity NormalizePurl(string purl, Match match)
{
var type = match.Groups["type"].Value.ToLowerInvariant();
var ns = match.Groups["namespace"].Success ? match.Groups["namespace"].Value : null;
var name = match.Groups["name"].Value;
var version = match.Groups["version"].Success ? match.Groups["version"].Value : null;
// Check for known alias
var identity = GetPackageIdentity(purl);
if (_aliasToCanonical.TryGetValue(identity, out var canonical) ||
_aliasToCanonical.TryGetValue(purl, out canonical))
{
// Reconstruct with version if present
var canonicalWithVersion = version != null && !canonical.Contains('@')
? $"{canonical}@{version}"
: canonical;
return new NormalizedPackageIdentity
{
CanonicalPurl = canonicalWithVersion,
OriginalRef = purl,
Ecosystem = type,
Confidence = 1.0,
Method = NormalizationMethod.AliasLookup,
Namespace = ns,
Name = name,
Version = version,
NormalizedFrom = identity
};
}
// Apply ecosystem-specific normalization
var (normalizedName, normalizedNs, confidence, method) = NormalizeByEcosystem(type, name, ns);
var normalizedPurl = BuildPurl(type, normalizedNs, normalizedName, version, null, null);
return new NormalizedPackageIdentity
{
CanonicalPurl = normalizedPurl,
OriginalRef = purl,
Ecosystem = type,
Confidence = confidence,
Method = method,
Namespace = normalizedNs,
Name = normalizedName,
Version = version
};
}
private NormalizedPackageIdentity NormalizeNonPurl(string packageRef, string ecosystem)
{
// Check for known alias
if (_aliasToCanonical.TryGetValue(packageRef, out var canonical))
{
return new NormalizedPackageIdentity
{
CanonicalPurl = canonical,
OriginalRef = packageRef,
Ecosystem = ecosystem,
Confidence = 1.0,
Method = NormalizationMethod.AliasLookup,
NormalizedFrom = packageRef
};
}
// Try ecosystem-specific parsing
var parsed = ParseByEcosystem(packageRef, ecosystem);
if (parsed != null)
{
return parsed;
}
// Fallback to file hash if enabled and looks like a file path
if (_options.EnableFileHashFallback && LooksLikeFilePath(packageRef))
{
var hash = ComputeFileHash(packageRef);
return new NormalizedPackageIdentity
{
CanonicalPurl = $"pkg:generic/{Uri.EscapeDataString(packageRef)}?checksum=sha256:{hash}",
OriginalRef = packageRef,
Ecosystem = "generic",
Confidence = 0.3,
Method = NormalizationMethod.FileHashFallback,
FileHash = hash
};
}
// Passthrough - return as-is with low confidence
return new NormalizedPackageIdentity
{
CanonicalPurl = $"pkg:generic/{Uri.EscapeDataString(packageRef)}",
OriginalRef = packageRef,
Ecosystem = ecosystem,
Confidence = 0.1,
Method = NormalizationMethod.Passthrough
};
}
private (string Name, string? Namespace, double Confidence, NormalizationMethod Method) NormalizeByEcosystem(
string ecosystem,
string name,
string? ns)
{
return ecosystem switch
{
"npm" => NormalizeNpm(name, ns),
"pypi" => NormalizePypi(name, ns),
"deb" => NormalizeDebian(name, ns),
"golang" => NormalizeGolang(name, ns),
"maven" => (name.ToLowerInvariant(), ns, 0.95, NormalizationMethod.EcosystemRule),
"nuget" => (name.ToLowerInvariant(), ns, 0.95, NormalizationMethod.EcosystemRule),
_ => (name, ns, 0.9, NormalizationMethod.Heuristic)
};
}
private static (string Name, string? Namespace, double Confidence, NormalizationMethod Method) NormalizeNpm(
string name,
string? ns)
{
// npm package names are case-sensitive but npm registry treats them case-insensitively
// Scoped packages: @scope/name
var normalizedName = name.ToLowerInvariant();
var normalizedNs = ns?.ToLowerInvariant();
// Remove leading @ from namespace if present (already handled by PURL parsing)
if (normalizedNs?.StartsWith('@') == true)
{
normalizedNs = normalizedNs[1..];
}
return (normalizedName, normalizedNs, 0.98, NormalizationMethod.EcosystemRule);
}
private static (string Name, string? Namespace, double Confidence, NormalizationMethod Method) NormalizePypi(
string name,
string? ns)
{
// PyPI normalizes: lowercase, replace _, -, . with -
var normalizedName = name.ToLowerInvariant()
.Replace('_', '-')
.Replace('.', '-');
// Remove consecutive dashes
while (normalizedName.Contains("--"))
{
normalizedName = normalizedName.Replace("--", "-");
}
return (normalizedName, ns, 0.95, NormalizationMethod.EcosystemRule);
}
private (string Name, string? Namespace, double Confidence, NormalizationMethod Method) NormalizeDebian(
string name,
string? ns)
{
// Check for known dpkg↔apt mapping
var dpkgName = name.ToLowerInvariant();
// Common patterns: lib* packages often have -dev, -dbg variants
// apt source packages vs binary packages
var baseName = dpkgName
.Replace("-dev", "")
.Replace("-dbg", "")
.Replace("-doc", "");
// Check if we have a known alias
var debPurl = $"pkg:deb/debian/{dpkgName}";
if (_aliasToCanonical.TryGetValue(debPurl, out _))
{
return (dpkgName, ns ?? "debian", 1.0, NormalizationMethod.AliasLookup);
}
return (dpkgName, ns ?? "debian", 0.85, NormalizationMethod.EcosystemRule);
}
private static (string Name, string? Namespace, double Confidence, NormalizationMethod Method) NormalizeGolang(
string name,
string? ns)
{
// Go module paths: github.com/org/repo vs github.com/org/repo/v2
// Package paths within modules
var normalizedName = name.ToLowerInvariant();
var normalizedNs = ns?.ToLowerInvariant();
// Handle major version suffixes
if (normalizedName.EndsWith("/v2") || normalizedName.EndsWith("/v3"))
{
// Keep as-is, this is the module path
}
return (normalizedName, normalizedNs, 0.9, NormalizationMethod.EcosystemRule);
}
private NormalizedPackageIdentity? ParseByEcosystem(string packageRef, string ecosystem)
{
return ecosystem switch
{
"npm" => ParseNpmRef(packageRef),
"pypi" => ParsePypiRef(packageRef),
"deb" => ParseDebianRef(packageRef),
"golang" => ParseGolangRef(packageRef),
_ => null
};
}
private NormalizedPackageIdentity? ParseNpmRef(string packageRef)
{
// Parse npm-style reference: @scope/name@version or name@version
string? scope = null;
string name;
string? version = null;
var remaining = packageRef;
if (remaining.StartsWith('@'))
{
var slashIndex = remaining.IndexOf('/');
if (slashIndex > 0)
{
scope = remaining[1..slashIndex];
remaining = remaining[(slashIndex + 1)..];
}
}
var atIndex = remaining.LastIndexOf('@');
if (atIndex > 0)
{
name = remaining[..atIndex];
version = remaining[(atIndex + 1)..];
}
else
{
name = remaining;
}
var purl = BuildPurl("npm", scope, name.ToLowerInvariant(), version, null, null);
return new NormalizedPackageIdentity
{
CanonicalPurl = purl,
OriginalRef = packageRef,
Ecosystem = "npm",
Confidence = 0.9,
Method = NormalizationMethod.EcosystemRule,
Namespace = scope,
Name = name.ToLowerInvariant(),
Version = version
};
}
private NormalizedPackageIdentity? ParsePypiRef(string packageRef)
{
// Parse PyPI-style reference: name==version or name>=version or just name
var parts = packageRef.Split(['=', '>', '<', '~', '!'], 2);
var name = parts[0].Trim();
var version = parts.Length > 1 ? parts[1].TrimStart('=', '>', '<', '~', '!').Trim() : null;
// Normalize name
var normalizedName = name.ToLowerInvariant()
.Replace('_', '-')
.Replace('.', '-');
var purl = BuildPurl("pypi", null, normalizedName, version, null, null);
return new NormalizedPackageIdentity
{
CanonicalPurl = purl,
OriginalRef = packageRef,
Ecosystem = "pypi",
Confidence = 0.9,
Method = NormalizationMethod.EcosystemRule,
Name = normalizedName,
Version = version
};
}
private NormalizedPackageIdentity? ParseDebianRef(string packageRef)
{
// Parse Debian-style reference: name or name=version
var parts = packageRef.Split('=', 2);
var name = parts[0].Trim().ToLowerInvariant();
var version = parts.Length > 1 ? parts[1].Trim() : null;
var purl = BuildPurl("deb", "debian", name, version, null, null);
return new NormalizedPackageIdentity
{
CanonicalPurl = purl,
OriginalRef = packageRef,
Ecosystem = "deb",
Confidence = 0.85,
Method = NormalizationMethod.EcosystemRule,
Namespace = "debian",
Name = name,
Version = version
};
}
private static NormalizedPackageIdentity? ParseGolangRef(string packageRef)
{
// Parse Go module reference: github.com/org/repo@version or just the path
string name;
string? version = null;
var atIndex = packageRef.LastIndexOf('@');
if (atIndex > 0 && !packageRef[..atIndex].Contains('/'))
{
// Likely not a version separator
name = packageRef;
}
else if (atIndex > 0)
{
name = packageRef[..atIndex];
version = packageRef[(atIndex + 1)..];
}
else
{
name = packageRef;
}
var purl = BuildPurl("golang", null, name.ToLowerInvariant(), version, null, null);
return new NormalizedPackageIdentity
{
CanonicalPurl = purl,
OriginalRef = packageRef,
Ecosystem = "golang",
Confidence = 0.85,
Method = NormalizationMethod.EcosystemRule,
Name = name.ToLowerInvariant(),
Version = version
};
}
private static string DetectEcosystem(string packageRef)
{
// Heuristics to detect ecosystem from package reference format
if (packageRef.StartsWith('@') || packageRef.Contains("/node_modules/"))
return "npm";
if (packageRef.Contains("==") || packageRef.Contains(">=") ||
packageRef.EndsWith(".whl") || packageRef.EndsWith(".egg"))
return "pypi";
if (packageRef.Contains(".deb") || packageRef.StartsWith("lib"))
return "deb";
if (packageRef.StartsWith("github.com/") || packageRef.StartsWith("golang.org/") ||
packageRef.Contains("/v2") || packageRef.Contains("/v3"))
return "golang";
if (packageRef.Contains(':') && packageRef.Contains('/'))
return "maven";
return "generic";
}
private static string BuildPurl(
string type,
string? ns,
string name,
string? version,
string? qualifiers,
string? subpath)
{
var sb = new StringBuilder($"pkg:{type}");
if (!string.IsNullOrEmpty(ns))
{
sb.Append('/').Append(Uri.EscapeDataString(ns));
}
sb.Append('/').Append(Uri.EscapeDataString(name));
if (!string.IsNullOrEmpty(version))
{
sb.Append('@').Append(Uri.EscapeDataString(version));
}
if (!string.IsNullOrEmpty(qualifiers))
{
sb.Append('?').Append(qualifiers);
}
if (!string.IsNullOrEmpty(subpath))
{
sb.Append('#').Append(subpath);
}
return sb.ToString();
}
private static string GetPackageIdentity(string purl)
{
var atIndex = purl.IndexOf('@');
if (atIndex > 0)
{
var beforeAt = purl[..atIndex];
var queryIndex = purl.IndexOf('?', atIndex);
var hashIndex = purl.IndexOf('#', atIndex);
var suffixIndex = queryIndex >= 0 ? queryIndex : hashIndex;
return suffixIndex > 0 ? beforeAt + purl[suffixIndex..] : beforeAt;
}
return purl;
}
private static bool LooksLikeFilePath(string reference)
{
return reference.Contains('/') || reference.Contains('\\') ||
reference.EndsWith(".so") || reference.EndsWith(".dll") ||
reference.EndsWith(".jar") || reference.EndsWith(".whl") ||
reference.EndsWith(".egg");
}
private static string ComputeFileHash(string reference)
{
var bytes = Encoding.UTF8.GetBytes(reference);
var hashBytes = SHA256.HashData(bytes);
return Convert.ToHexStringLower(hashBytes);
}
private void LoadBuiltInAliases()
{
// Built-in aliases for common cases
// Debian: dpkg vs apt naming
AddAlias("pkg:deb/debian/libc6", ["pkg:deb/debian/glibc", "libc", "glibc"], "deb");
AddAlias("pkg:deb/debian/libssl3", ["pkg:deb/debian/openssl", "openssl"], "deb");
AddAlias("pkg:deb/debian/libpython3.11", ["pkg:deb/debian/python3", "python3", "python"], "deb");
// Python: egg/wheel naming variations
AddAlias("pkg:pypi/pillow", ["PIL", "python-imaging", "pillow-simd"], "pypi");
AddAlias("pkg:pypi/pyyaml", ["yaml", "PyYAML"], "pypi");
AddAlias("pkg:pypi/beautifulsoup4", ["bs4", "BeautifulSoup"], "pypi");
AddAlias("pkg:pypi/scikit-learn", ["sklearn"], "pypi");
// npm: common variations
AddAlias("pkg:npm/lodash", ["underscore"], "npm", "Similar utility libraries, not identical");
// Go: module path variations
AddAlias("pkg:golang/github.com/golang/protobuf", ["google.golang.org/protobuf"], "golang");
_logger.LogDebug("Loaded {Count} built-in package aliases", _canonicalToAlias.Count);
}
private void AddAlias(string canonical, IReadOnlyList<string> aliases, string ecosystem, string? notes = null)
{
var entry = new PackageAlias
{
CanonicalPurl = canonical,
Aliases = aliases,
Ecosystem = ecosystem,
Notes = notes
};
_canonicalToAlias[canonical] = entry;
foreach (var alias in aliases)
{
_aliasToCanonical[alias] = canonical;
}
// Also add the canonical itself
_aliasToCanonical[canonical] = canonical;
}
private void LoadAliasMapFromFile(string path)
{
try
{
if (!File.Exists(path))
{
_logger.LogWarning("Alias map file not found: {Path}", path);
return;
}
var json = File.ReadAllText(path);
var aliases = JsonSerializer.Deserialize<List<PackageAlias>>(json);
if (aliases != null)
{
foreach (var alias in aliases)
{
AddAlias(alias.CanonicalPurl, alias.Aliases, alias.Ecosystem, alias.Notes);
}
_logger.LogInformation("Loaded {Count} package aliases from {Path}", aliases.Count, path);
}
}
catch (Exception ex)
{
_logger.LogError(ex, "Failed to load alias map from {Path}", path);
}
}
}

View File

@@ -0,0 +1,326 @@
[
{
"canonicalPurl": "pkg:deb/debian/libc6",
"aliases": ["pkg:deb/debian/glibc", "glibc", "libc", "eglibc"],
"ecosystem": "deb",
"notes": "GNU C Library"
},
{
"canonicalPurl": "pkg:deb/debian/libssl3",
"aliases": ["pkg:deb/debian/openssl", "openssl", "libssl", "libssl1.1"],
"ecosystem": "deb",
"notes": "OpenSSL library"
},
{
"canonicalPurl": "pkg:deb/debian/zlib1g",
"aliases": ["pkg:deb/debian/zlib", "zlib", "libz"],
"ecosystem": "deb",
"notes": "Compression library"
},
{
"canonicalPurl": "pkg:deb/debian/libcurl4",
"aliases": ["pkg:deb/debian/curl", "curl", "libcurl", "libcurl3"],
"ecosystem": "deb",
"notes": "URL transfer library"
},
{
"canonicalPurl": "pkg:deb/debian/libxml2",
"aliases": ["xml2", "libxml"],
"ecosystem": "deb",
"notes": "GNOME XML library"
},
{
"canonicalPurl": "pkg:deb/debian/libpng16-16",
"aliases": ["pkg:deb/debian/libpng", "libpng", "png"],
"ecosystem": "deb",
"notes": "PNG library"
},
{
"canonicalPurl": "pkg:deb/debian/libjpeg62-turbo",
"aliases": ["pkg:deb/debian/libjpeg", "libjpeg", "jpeg", "libjpeg-turbo"],
"ecosystem": "deb",
"notes": "JPEG library"
},
{
"canonicalPurl": "pkg:deb/debian/libsqlite3-0",
"aliases": ["pkg:deb/debian/sqlite3", "sqlite3", "sqlite", "libsqlite"],
"ecosystem": "deb",
"notes": "SQLite database"
},
{
"canonicalPurl": "pkg:pypi/pillow",
"aliases": ["PIL", "Pillow", "python-imaging", "pillow-simd", "pillow-avif-plugin"],
"ecosystem": "pypi",
"notes": "Python Imaging Library fork"
},
{
"canonicalPurl": "pkg:pypi/pyyaml",
"aliases": ["yaml", "PyYAML", "pyyaml", "ruamel.yaml"],
"ecosystem": "pypi",
"notes": "YAML parser"
},
{
"canonicalPurl": "pkg:pypi/beautifulsoup4",
"aliases": ["bs4", "BeautifulSoup", "BeautifulSoup4", "beautifulsoup"],
"ecosystem": "pypi",
"notes": "HTML/XML parser"
},
{
"canonicalPurl": "pkg:pypi/scikit-learn",
"aliases": ["sklearn", "scikitlearn", "scikit_learn"],
"ecosystem": "pypi",
"notes": "Machine learning library"
},
{
"canonicalPurl": "pkg:pypi/opencv-python",
"aliases": ["cv2", "opencv", "opencv-contrib-python", "opencv-python-headless"],
"ecosystem": "pypi",
"notes": "OpenCV computer vision"
},
{
"canonicalPurl": "pkg:pypi/numpy",
"aliases": ["np", "numpy"],
"ecosystem": "pypi",
"notes": "Numerical Python"
},
{
"canonicalPurl": "pkg:pypi/pandas",
"aliases": ["pd", "pandas"],
"ecosystem": "pypi",
"notes": "Data analysis library"
},
{
"canonicalPurl": "pkg:pypi/tensorflow",
"aliases": ["tf", "tensorflow-gpu", "tensorflow-cpu", "tf-nightly"],
"ecosystem": "pypi",
"notes": "Machine learning framework"
},
{
"canonicalPurl": "pkg:pypi/torch",
"aliases": ["pytorch", "torch-gpu"],
"ecosystem": "pypi",
"notes": "PyTorch deep learning"
},
{
"canonicalPurl": "pkg:pypi/cryptography",
"aliases": ["pyopenssl", "OpenSSL"],
"ecosystem": "pypi",
"notes": "Cryptographic library"
},
{
"canonicalPurl": "pkg:pypi/requests",
"aliases": ["urllib3", "httpx"],
"ecosystem": "pypi",
"notes": "HTTP library (related, not identical)"
},
{
"canonicalPurl": "pkg:npm/lodash",
"aliases": ["lodash-es", "lodash.debounce", "lodash.throttle", "lodash.merge"],
"ecosystem": "npm",
"notes": "Utility library and its modular versions"
},
{
"canonicalPurl": "pkg:npm/@babel/core",
"aliases": ["babel-core", "@babel/cli", "@babel/preset-env"],
"ecosystem": "npm",
"notes": "Babel JavaScript compiler"
},
{
"canonicalPurl": "pkg:npm/webpack",
"aliases": ["webpack-cli", "webpack-dev-server"],
"ecosystem": "npm",
"notes": "Module bundler"
},
{
"canonicalPurl": "pkg:npm/react",
"aliases": ["react-dom", "react-scripts"],
"ecosystem": "npm",
"notes": "React library and related packages"
},
{
"canonicalPurl": "pkg:npm/express",
"aliases": ["express-session", "express-validator"],
"ecosystem": "npm",
"notes": "Web framework"
},
{
"canonicalPurl": "pkg:npm/typescript",
"aliases": ["ts", "tsc"],
"ecosystem": "npm",
"notes": "TypeScript compiler"
},
{
"canonicalPurl": "pkg:npm/@types/node",
"aliases": ["node-types", "@types/node-fetch"],
"ecosystem": "npm",
"notes": "Node.js type definitions"
},
{
"canonicalPurl": "pkg:golang/github.com/golang/protobuf",
"aliases": ["google.golang.org/protobuf", "protobuf-go"],
"ecosystem": "golang",
"notes": "Protocol Buffers for Go"
},
{
"canonicalPurl": "pkg:golang/github.com/sirupsen/logrus",
"aliases": ["logrus", "github.com/Sirupsen/logrus"],
"ecosystem": "golang",
"notes": "Structured logger (case sensitivity)"
},
{
"canonicalPurl": "pkg:golang/github.com/stretchr/testify",
"aliases": ["testify", "github.com/stretchr/testify/assert"],
"ecosystem": "golang",
"notes": "Testing toolkit"
},
{
"canonicalPurl": "pkg:golang/github.com/gin-gonic/gin",
"aliases": ["gin", "gin-gonic"],
"ecosystem": "golang",
"notes": "HTTP web framework"
},
{
"canonicalPurl": "pkg:golang/github.com/gorilla/mux",
"aliases": ["gorilla-mux", "mux"],
"ecosystem": "golang",
"notes": "HTTP router"
},
{
"canonicalPurl": "pkg:maven/org.apache.logging.log4j/log4j-core",
"aliases": ["log4j", "log4j2", "org.apache.logging.log4j:log4j-api"],
"ecosystem": "maven",
"notes": "Apache Log4j"
},
{
"canonicalPurl": "pkg:maven/com.fasterxml.jackson.core/jackson-databind",
"aliases": ["jackson", "jackson-core", "jackson-annotations"],
"ecosystem": "maven",
"notes": "Jackson JSON library"
},
{
"canonicalPurl": "pkg:maven/org.springframework/spring-core",
"aliases": ["spring", "spring-framework", "spring-beans"],
"ecosystem": "maven",
"notes": "Spring Framework"
},
{
"canonicalPurl": "pkg:maven/org.apache.commons/commons-lang3",
"aliases": ["commons-lang", "apache-commons-lang"],
"ecosystem": "maven",
"notes": "Apache Commons Lang"
},
{
"canonicalPurl": "pkg:maven/com.google.guava/guava",
"aliases": ["guava", "google-guava"],
"ecosystem": "maven",
"notes": "Google Guava"
},
{
"canonicalPurl": "pkg:nuget/newtonsoft.json",
"aliases": ["json.net", "Newtonsoft.Json", "newtonsoft"],
"ecosystem": "nuget",
"notes": "JSON library for .NET"
},
{
"canonicalPurl": "pkg:nuget/microsoft.extensions.logging",
"aliases": ["ilogger", "Microsoft.Extensions.Logging.Abstractions"],
"ecosystem": "nuget",
"notes": ".NET logging abstractions"
},
{
"canonicalPurl": "pkg:nuget/system.text.json",
"aliases": ["stj", "System.Text.Json"],
"ecosystem": "nuget",
"notes": "Built-in .NET JSON"
},
{
"canonicalPurl": "pkg:nuget/xunit",
"aliases": ["xunit.core", "xunit.assert", "xunit.runner.visualstudio"],
"ecosystem": "nuget",
"notes": "xUnit testing framework"
},
{
"canonicalPurl": "pkg:nuget/moq",
"aliases": ["Moq", "moq4"],
"ecosystem": "nuget",
"notes": "Mocking library"
},
{
"canonicalPurl": "pkg:cargo/serde",
"aliases": ["serde_json", "serde_derive"],
"ecosystem": "cargo",
"notes": "Rust serialization framework"
},
{
"canonicalPurl": "pkg:cargo/tokio",
"aliases": ["tokio-runtime", "tokio-macros"],
"ecosystem": "cargo",
"notes": "Async runtime for Rust"
},
{
"canonicalPurl": "pkg:cargo/reqwest",
"aliases": ["hyper", "http"],
"ecosystem": "cargo",
"notes": "HTTP client (related)"
},
{
"canonicalPurl": "pkg:gem/rails",
"aliases": ["ruby-on-rails", "railties", "actionpack"],
"ecosystem": "gem",
"notes": "Ruby on Rails framework"
},
{
"canonicalPurl": "pkg:gem/rspec",
"aliases": ["rspec-core", "rspec-expectations", "rspec-mocks"],
"ecosystem": "gem",
"notes": "Ruby testing framework"
},
{
"canonicalPurl": "pkg:hex/phoenix",
"aliases": ["phoenix_html", "phoenix_live_view"],
"ecosystem": "hex",
"notes": "Elixir web framework"
},
{
"canonicalPurl": "pkg:hex/ecto",
"aliases": ["ecto_sql", "ecto_dev_logger"],
"ecosystem": "hex",
"notes": "Elixir database wrapper"
},
{
"canonicalPurl": "pkg:composer/symfony/symfony",
"aliases": ["symfony", "symfony/console", "symfony/http-foundation"],
"ecosystem": "composer",
"notes": "Symfony PHP framework"
},
{
"canonicalPurl": "pkg:composer/laravel/framework",
"aliases": ["laravel", "illuminate/support"],
"ecosystem": "composer",
"notes": "Laravel PHP framework"
},
{
"canonicalPurl": "pkg:alpine/busybox",
"aliases": ["busybox-static", "busybox-suid"],
"ecosystem": "alpine",
"notes": "BusyBox utilities"
},
{
"canonicalPurl": "pkg:alpine/musl",
"aliases": ["musl-libc", "libc-musl"],
"ecosystem": "alpine",
"notes": "musl C library"
},
{
"canonicalPurl": "pkg:rpm/fedora/glibc",
"aliases": ["glibc-common", "glibc-headers", "glibc-devel"],
"ecosystem": "rpm",
"notes": "GNU C Library for RPM"
},
{
"canonicalPurl": "pkg:rpm/fedora/openssl",
"aliases": ["openssl-libs", "openssl-devel"],
"ecosystem": "rpm",
"notes": "OpenSSL for RPM"
}
]

View File

@@ -0,0 +1,196 @@
// -----------------------------------------------------------------------------
// EnrichmentProvenanceCapture.cs
// Sprint: SPRINT_20260118_031_LIB_input_pinning_trusted_vex_keys
// Task: TASK-031-007 - Input Pinning Integration with Scanner
// Description: Implementation of provenance capture for enrichment operations
// -----------------------------------------------------------------------------
using Microsoft.Extensions.Logging;
namespace StellaOps.Scanner.Core.Provenance;
/// <summary>
/// Implementation of IEnrichmentProvenanceCapture.
/// Captures provenance data during enrichment operations for deterministic replay.
/// </summary>
public sealed class EnrichmentProvenanceCapture : IEnrichmentProvenanceCapture
{
private readonly TimeProvider _timeProvider;
private readonly ILogger<EnrichmentProvenanceCapture> _logger;
/// <summary>
/// Creates a new EnrichmentProvenanceCapture.
/// </summary>
public EnrichmentProvenanceCapture(
TimeProvider timeProvider,
ILogger<EnrichmentProvenanceCapture> logger)
{
_timeProvider = timeProvider ?? throw new ArgumentNullException(nameof(timeProvider));
_logger = logger ?? throw new ArgumentNullException(nameof(logger));
}
/// <inheritdoc />
public EpssProvenanceRecord CaptureEpss(
IEnumerable<string> cveIds,
IEnumerable<EpssDataPoint> epssData,
DateOnly modelDate)
{
var dataList = epssData.ToList();
var retrievedAt = _timeProvider.GetUtcNow();
var record = EpssProvenanceRecord.Create(dataList, modelDate, retrievedAt);
_logger.LogDebug(
"Captured EPSS provenance: model_date={ModelDate}, cve_count={CveCount}, digest={Digest}",
modelDate,
dataList.Count,
record.SourceDigest[..16]);
return record;
}
/// <inheritdoc />
public CvssProvenanceRecord CaptureCvss(
IEnumerable<string> cveIds,
IEnumerable<CvssDataPoint> cvssData,
string source)
{
var dataList = cvssData.ToList();
var retrievedAt = _timeProvider.GetUtcNow();
var record = CvssProvenanceRecord.Create(dataList, source, retrievedAt);
_logger.LogDebug(
"Captured CVSS provenance: source={Source}, cve_count={CveCount}, digest={Digest}",
source,
dataList.Count,
record.SourceDigest[..16]);
return record;
}
/// <inheritdoc />
public ReachabilityProvenanceRecord CaptureReachability(
IEnumerable<string> componentPurls,
IEnumerable<ReachabilityDataPoint> reachabilityData,
string analyzerVersion)
{
var dataList = reachabilityData.ToList();
var retrievedAt = _timeProvider.GetUtcNow();
var record = ReachabilityProvenanceRecord.Create(dataList, analyzerVersion, retrievedAt);
_logger.LogDebug(
"Captured reachability provenance: analyzer_version={AnalyzerVersion}, component_count={ComponentCount}, digest={Digest}",
analyzerVersion,
dataList.Count,
record.SourceDigest[..16]);
return record;
}
}
/// <summary>
/// Aggregated enrichment provenance for a scan job.
/// </summary>
public sealed record EnrichmentProvenance
{
/// <summary>EPSS provenance record.</summary>
public EpssProvenanceRecord? Epss { get; init; }
/// <summary>CVSS provenance record.</summary>
public CvssProvenanceRecord? Cvss { get; init; }
/// <summary>Reachability provenance record.</summary>
public ReachabilityProvenanceRecord? Reachability { get; init; }
/// <summary>Whether all expected provenance is captured.</summary>
public bool IsComplete => Epss != null && Cvss != null && Reachability != null;
/// <summary>Combined digest of all provenance records.</summary>
public string? CombinedDigest
{
get
{
if (!IsComplete) return null;
using var sha256 = System.Security.Cryptography.SHA256.Create();
var combined = $"{Epss!.SourceDigest}|{Cvss!.SourceDigest}|{Reachability!.SourceDigest}";
var hash = sha256.ComputeHash(System.Text.Encoding.UTF8.GetBytes(combined));
return Convert.ToHexString(hash).ToLowerInvariant();
}
}
}
/// <summary>
/// Context for tracking enrichment provenance during a scan job.
/// </summary>
public sealed class EnrichmentProvenanceContext
{
private readonly object _lock = new();
private EpssProvenanceRecord? _epss;
private CvssProvenanceRecord? _cvss;
private ReachabilityProvenanceRecord? _reachability;
/// <summary>
/// Sets the EPSS provenance record.
/// </summary>
public void SetEpss(EpssProvenanceRecord record)
{
lock (_lock)
{
_epss = record;
}
}
/// <summary>
/// Sets the CVSS provenance record.
/// </summary>
public void SetCvss(CvssProvenanceRecord record)
{
lock (_lock)
{
_cvss = record;
}
}
/// <summary>
/// Sets the reachability provenance record.
/// </summary>
public void SetReachability(ReachabilityProvenanceRecord record)
{
lock (_lock)
{
_reachability = record;
}
}
/// <summary>
/// Gets the aggregated enrichment provenance.
/// </summary>
public EnrichmentProvenance GetProvenance()
{
lock (_lock)
{
return new EnrichmentProvenance
{
Epss = _epss,
Cvss = _cvss,
Reachability = _reachability
};
}
}
/// <summary>
/// Clears all captured provenance.
/// </summary>
public void Clear()
{
lock (_lock)
{
_epss = null;
_cvss = null;
_reachability = null;
}
}
}

View File

@@ -0,0 +1,284 @@
// -----------------------------------------------------------------------------
// IEnrichmentProvenanceCapture.cs
// Sprint: SPRINT_20260118_031_LIB_input_pinning_trusted_vex_keys
// Task: TASK-031-007 - Input Pinning Integration with Scanner
// Description: Interface for capturing provenance during enrichment operations
// -----------------------------------------------------------------------------
using System.Security.Cryptography;
namespace StellaOps.Scanner.Core.Provenance;
/// <summary>
/// Captures provenance data during enrichment operations.
/// Enables deterministic replay by recording source digests and timestamps.
/// </summary>
public interface IEnrichmentProvenanceCapture
{
/// <summary>
/// Captures EPSS enrichment provenance.
/// </summary>
/// <param name="cveIds">CVE IDs being enriched.</param>
/// <param name="epssData">EPSS data retrieved.</param>
/// <param name="modelDate">EPSS model date.</param>
/// <returns>Captured provenance record.</returns>
EpssProvenanceRecord CaptureEpss(
IEnumerable<string> cveIds,
IEnumerable<EpssDataPoint> epssData,
DateOnly modelDate);
/// <summary>
/// Captures CVSS enrichment provenance.
/// </summary>
/// <param name="cveIds">CVE IDs being enriched.</param>
/// <param name="cvssData">CVSS data retrieved.</param>
/// <param name="source">Data source (e.g., "nvd").</param>
/// <returns>Captured provenance record.</returns>
CvssProvenanceRecord CaptureCvss(
IEnumerable<string> cveIds,
IEnumerable<CvssDataPoint> cvssData,
string source);
/// <summary>
/// Captures reachability enrichment provenance.
/// </summary>
/// <param name="componentPurls">Component PURLs analyzed.</param>
/// <param name="reachabilityData">Reachability results.</param>
/// <param name="analyzerVersion">Analyzer version used.</param>
/// <returns>Captured provenance record.</returns>
ReachabilityProvenanceRecord CaptureReachability(
IEnumerable<string> componentPurls,
IEnumerable<ReachabilityDataPoint> reachabilityData,
string analyzerVersion);
}
/// <summary>
/// EPSS data point for provenance capture.
/// </summary>
public sealed record EpssDataPoint
{
/// <summary>CVE identifier.</summary>
public required string CveId { get; init; }
/// <summary>EPSS score [0, 1].</summary>
public required double Score { get; init; }
/// <summary>EPSS percentile [0, 1].</summary>
public required double Percentile { get; init; }
}
/// <summary>
/// CVSS data point for provenance capture.
/// </summary>
public sealed record CvssDataPoint
{
/// <summary>CVE identifier.</summary>
public required string CveId { get; init; }
/// <summary>CVSS base score [0, 10].</summary>
public required double BaseScore { get; init; }
/// <summary>CVSS version (3.0, 3.1, 4.0).</summary>
public required string Version { get; init; }
/// <summary>Full CVSS vector string.</summary>
public string? Vector { get; init; }
}
/// <summary>
/// Reachability data point for provenance capture.
/// </summary>
public sealed record ReachabilityDataPoint
{
/// <summary>Component PURL.</summary>
public required string ComponentPurl { get; init; }
/// <summary>Reachability state.</summary>
public required string State { get; init; }
/// <summary>Confidence [0, 1].</summary>
public double Confidence { get; init; }
}
/// <summary>
/// Base provenance record with common fields.
/// </summary>
public abstract record ProvenanceRecord
{
/// <summary>SHA-256 digest of the source data.</summary>
public required string SourceDigest { get; init; }
/// <summary>When the data was retrieved.</summary>
public required DateTimeOffset RetrievedAt { get; init; }
/// <summary>Data source identifier.</summary>
public required string Source { get; init; }
/// <summary>Number of data points captured.</summary>
public int DataPointCount { get; init; }
}
/// <summary>
/// EPSS enrichment provenance record.
/// </summary>
public sealed record EpssProvenanceRecord : ProvenanceRecord
{
/// <summary>EPSS model date.</summary>
public required DateOnly ModelDate { get; init; }
/// <summary>Creates a provenance record for EPSS enrichment.</summary>
public static EpssProvenanceRecord Create(
IEnumerable<EpssDataPoint> data,
DateOnly modelDate,
DateTimeOffset? retrievedAt = null)
{
var dataList = data.ToList();
var digest = ComputeDigest(dataList, modelDate);
return new EpssProvenanceRecord
{
SourceDigest = digest,
RetrievedAt = retrievedAt ?? DateTimeOffset.UtcNow,
Source = "first.org/epss",
DataPointCount = dataList.Count,
ModelDate = modelDate
};
}
private static string ComputeDigest(IReadOnlyList<EpssDataPoint> data, DateOnly modelDate)
{
using var sha256 = SHA256.Create();
using var stream = new MemoryStream();
using var writer = new StreamWriter(stream);
// Write model date
writer.Write(modelDate.ToString("yyyy-MM-dd"));
writer.Write('|');
// Write sorted CVE data for determinism
foreach (var point in data.OrderBy(d => d.CveId, StringComparer.Ordinal))
{
writer.Write(point.CveId);
writer.Write(':');
writer.Write(point.Score.ToString("F6"));
writer.Write(':');
writer.Write(point.Percentile.ToString("F6"));
writer.Write('|');
}
writer.Flush();
stream.Position = 0;
var hash = sha256.ComputeHash(stream);
return Convert.ToHexString(hash).ToLowerInvariant();
}
}
/// <summary>
/// CVSS enrichment provenance record.
/// </summary>
public sealed record CvssProvenanceRecord : ProvenanceRecord
{
/// <summary>CVSS version used.</summary>
public required string CvssVersion { get; init; }
/// <summary>Creates a provenance record for CVSS enrichment.</summary>
public static CvssProvenanceRecord Create(
IEnumerable<CvssDataPoint> data,
string source,
DateTimeOffset? retrievedAt = null)
{
var dataList = data.ToList();
var digest = ComputeDigest(dataList);
var version = dataList.FirstOrDefault()?.Version ?? "unknown";
return new CvssProvenanceRecord
{
SourceDigest = digest,
RetrievedAt = retrievedAt ?? DateTimeOffset.UtcNow,
Source = source,
DataPointCount = dataList.Count,
CvssVersion = version
};
}
private static string ComputeDigest(IReadOnlyList<CvssDataPoint> data)
{
using var sha256 = SHA256.Create();
using var stream = new MemoryStream();
using var writer = new StreamWriter(stream);
// Write sorted CVE data for determinism
foreach (var point in data.OrderBy(d => d.CveId, StringComparer.Ordinal))
{
writer.Write(point.CveId);
writer.Write(':');
writer.Write(point.BaseScore.ToString("F1"));
writer.Write(':');
writer.Write(point.Version);
writer.Write('|');
}
writer.Flush();
stream.Position = 0;
var hash = sha256.ComputeHash(stream);
return Convert.ToHexString(hash).ToLowerInvariant();
}
}
/// <summary>
/// Reachability enrichment provenance record.
/// </summary>
public sealed record ReachabilityProvenanceRecord : ProvenanceRecord
{
/// <summary>Analyzer version used.</summary>
public required string AnalyzerVersion { get; init; }
/// <summary>Creates a provenance record for reachability enrichment.</summary>
public static ReachabilityProvenanceRecord Create(
IEnumerable<ReachabilityDataPoint> data,
string analyzerVersion,
DateTimeOffset? retrievedAt = null)
{
var dataList = data.ToList();
var digest = ComputeDigest(dataList, analyzerVersion);
return new ReachabilityProvenanceRecord
{
SourceDigest = digest,
RetrievedAt = retrievedAt ?? DateTimeOffset.UtcNow,
Source = "stellaops/reachability",
DataPointCount = dataList.Count,
AnalyzerVersion = analyzerVersion
};
}
private static string ComputeDigest(IReadOnlyList<ReachabilityDataPoint> data, string analyzerVersion)
{
using var sha256 = SHA256.Create();
using var stream = new MemoryStream();
using var writer = new StreamWriter(stream);
// Write analyzer version
writer.Write(analyzerVersion);
writer.Write('|');
// Write sorted component data for determinism
foreach (var point in data.OrderBy(d => d.ComponentPurl, StringComparer.Ordinal))
{
writer.Write(point.ComponentPurl);
writer.Write(':');
writer.Write(point.State);
writer.Write(':');
writer.Write(point.Confidence.ToString("F2"));
writer.Write('|');
}
writer.Flush();
stream.Position = 0;
var hash = sha256.ComputeHash(stream);
return Convert.ToHexString(hash).ToLowerInvariant();
}
}

View File

@@ -0,0 +1,343 @@
using System.Collections.Immutable;
using System.Diagnostics;
using Microsoft.Extensions.Logging;
using StellaOps.Scanner.Manifest.Resolution;
namespace StellaOps.Scanner.Delta;
/// <summary>
/// Implementation of IDeltaLayerScanner that scans only changed layers between image versions.
/// Reduces scan time and CVE churn by reusing cached per-layer SBOMs.
/// </summary>
public sealed class DeltaLayerScanner : IDeltaLayerScanner
{
private readonly ILayerDigestResolver _layerResolver;
private readonly ILayerSbomCas _sbomCas;
private readonly ILayerScannerPipeline _scannerPipeline;
private readonly ISbomComposer _sbomComposer;
private readonly ILogger<DeltaLayerScanner> _logger;
public DeltaLayerScanner(
ILayerDigestResolver layerResolver,
ILayerSbomCas sbomCas,
ILayerScannerPipeline scannerPipeline,
ISbomComposer sbomComposer,
ILogger<DeltaLayerScanner> logger)
{
_layerResolver = layerResolver ?? throw new ArgumentNullException(nameof(layerResolver));
_sbomCas = sbomCas ?? throw new ArgumentNullException(nameof(sbomCas));
_scannerPipeline = scannerPipeline ?? throw new ArgumentNullException(nameof(scannerPipeline));
_sbomComposer = sbomComposer ?? throw new ArgumentNullException(nameof(sbomComposer));
_logger = logger ?? throw new ArgumentNullException(nameof(logger));
}
public async Task<DeltaScanResult> ScanDeltaAsync(
string oldImage,
string newImage,
DeltaScanOptions? options = null,
CancellationToken cancellationToken = default)
{
ArgumentException.ThrowIfNullOrWhiteSpace(oldImage);
ArgumentException.ThrowIfNullOrWhiteSpace(newImage);
options ??= new DeltaScanOptions();
var totalStopwatch = Stopwatch.StartNew();
_logger.LogInformation("Starting delta scan: {OldImage} -> {NewImage}", oldImage, newImage);
// Resolve layers for both images in parallel
var resolveOptions = new LayerResolutionOptions
{
ComputeDiffIds = true,
Platform = options.Platform
};
var oldResolvedTask = _layerResolver.ResolveLayersAsync(oldImage, resolveOptions, cancellationToken);
var newResolvedTask = _layerResolver.ResolveLayersAsync(newImage, resolveOptions, cancellationToken);
var oldResolved = await oldResolvedTask.ConfigureAwait(false);
var newResolved = await newResolvedTask.ConfigureAwait(false);
// Identify layer changes using diffID (content hash)
var oldDiffIds = oldResolved.Layers
.Where(l => !string.IsNullOrWhiteSpace(l.DiffId))
.Select(l => l.DiffId!)
.ToHashSet(StringComparer.OrdinalIgnoreCase);
var addedLayers = new List<LayerChangeInfo>();
var unchangedLayers = new List<LayerChangeInfo>();
var removedLayers = new List<LayerChangeInfo>();
// Categorize new image layers
foreach (var layer in newResolved.Layers)
{
if (string.IsNullOrWhiteSpace(layer.DiffId))
{
// Treat as added if diffID unknown
addedLayers.Add(CreateLayerInfo(layer));
}
else if (oldDiffIds.Contains(layer.DiffId))
{
unchangedLayers.Add(CreateLayerInfo(layer));
}
else
{
addedLayers.Add(CreateLayerInfo(layer));
}
}
// Find removed layers
var newDiffIds = newResolved.Layers
.Where(l => !string.IsNullOrWhiteSpace(l.DiffId))
.Select(l => l.DiffId!)
.ToHashSet(StringComparer.OrdinalIgnoreCase);
foreach (var layer in oldResolved.Layers)
{
if (!string.IsNullOrWhiteSpace(layer.DiffId) && !newDiffIds.Contains(layer.DiffId))
{
removedLayers.Add(CreateLayerInfo(layer));
}
}
_logger.LogInformation(
"Layer analysis: {Added} added, {Removed} removed, {Unchanged} unchanged",
addedLayers.Count, removedLayers.Count, unchangedLayers.Count);
// Collect SBOMs for unchanged layers from cache
var layerSboms = new Dictionary<string, string>(StringComparer.OrdinalIgnoreCase);
var cachedComponentCount = 0;
if (options.UseCachedSboms && !options.ForceFullScan)
{
for (var i = 0; i < unchangedLayers.Count; i++)
{
var layer = unchangedLayers[i];
var cachedSbom = await _sbomCas.GetAsync(layer.DiffId, options.SbomFormat, cancellationToken)
.ConfigureAwait(false);
if (!string.IsNullOrWhiteSpace(cachedSbom))
{
layerSboms[layer.DiffId] = cachedSbom;
var componentCount = CountComponents(cachedSbom);
unchangedLayers[i] = layer with { FromCache = true, ComponentCount = componentCount };
cachedComponentCount += componentCount;
}
else
{
// Layer not in cache - need to scan it
_logger.LogDebug("Unchanged layer {DiffId} not in cache, will scan", layer.DiffId);
addedLayers.Add(layer);
unchangedLayers.RemoveAt(i);
i--;
}
}
_logger.LogDebug("Retrieved {Count} cached layer SBOMs", layerSboms.Count);
}
// Scan added layers
var addedScanStopwatch = Stopwatch.StartNew();
var addedComponentCount = 0;
for (var i = 0; i < addedLayers.Count; i++)
{
var layer = addedLayers[i];
cancellationToken.ThrowIfCancellationRequested();
var layerSbom = await _scannerPipeline.ScanLayerAsync(
newResolved.ImageReference,
layer.DiffId,
layer.LayerDigest,
options.SbomFormat,
cancellationToken).ConfigureAwait(false);
if (!string.IsNullOrWhiteSpace(layerSbom))
{
layerSboms[layer.DiffId] = layerSbom;
// Store in cache for future use
await _sbomCas.StoreAsync(layer.DiffId, options.SbomFormat, layerSbom, cancellationToken)
.ConfigureAwait(false);
var componentCount = CountComponents(layerSbom);
addedLayers[i] = layer with { ComponentCount = componentCount };
addedComponentCount += componentCount;
}
}
addedScanStopwatch.Stop();
_logger.LogInformation("Scanned {Count} added layers in {Duration:N2}s",
addedLayers.Count, addedScanStopwatch.Elapsed.TotalSeconds);
// Compose final SBOM in layer order
var orderedSboms = newResolved.Layers
.Where(l => !string.IsNullOrWhiteSpace(l.DiffId) && layerSboms.ContainsKey(l.DiffId!))
.Select(l => layerSboms[l.DiffId!])
.ToList();
var compositeSbom = await _sbomComposer.ComposeAsync(
orderedSboms,
newResolved.ImageReference,
options.SbomFormat,
options.IncludeLayerAttribution,
cancellationToken).ConfigureAwait(false);
totalStopwatch.Stop();
var result = new DeltaScanResult
{
OldImage = oldImage,
OldManifestDigest = oldResolved.ManifestDigest,
NewImage = newImage,
NewManifestDigest = newResolved.ManifestDigest,
AddedLayers = [.. addedLayers],
RemovedLayers = [.. removedLayers],
UnchangedLayers = [.. unchangedLayers],
CompositeSbom = compositeSbom,
SbomFormat = options.SbomFormat,
ScanDuration = totalStopwatch.Elapsed,
AddedLayersScanDuration = addedScanStopwatch.Elapsed,
AddedComponentCount = addedComponentCount,
CachedComponentCount = cachedComponentCount,
UsedCache = options.UseCachedSboms && !options.ForceFullScan
};
_logger.LogInformation(
"Delta scan complete: {LayerReuse:P1} reuse, {Duration:N2}s total, {AddedDuration:N2}s scanning added layers",
result.LayerReuseRatio, result.ScanDuration.TotalSeconds, result.AddedLayersScanDuration.TotalSeconds);
return result;
}
public async Task<LayerChangeSummary> IdentifyLayerChangesAsync(
string oldImage,
string newImage,
CancellationToken cancellationToken = default)
{
ArgumentException.ThrowIfNullOrWhiteSpace(oldImage);
ArgumentException.ThrowIfNullOrWhiteSpace(newImage);
var resolveOptions = new LayerResolutionOptions { ComputeDiffIds = true };
var oldResolvedTask = _layerResolver.ResolveLayersAsync(oldImage, resolveOptions, cancellationToken);
var newResolvedTask = _layerResolver.ResolveLayersAsync(newImage, resolveOptions, cancellationToken);
var oldResolved = await oldResolvedTask.ConfigureAwait(false);
var newResolved = await newResolvedTask.ConfigureAwait(false);
var oldDiffIds = oldResolved.Layers
.Where(l => !string.IsNullOrWhiteSpace(l.DiffId))
.Select(l => l.DiffId!)
.ToHashSet(StringComparer.OrdinalIgnoreCase);
var newDiffIds = newResolved.Layers
.Where(l => !string.IsNullOrWhiteSpace(l.DiffId))
.Select(l => l.DiffId!)
.ToHashSet(StringComparer.OrdinalIgnoreCase);
var added = newDiffIds.Except(oldDiffIds, StringComparer.OrdinalIgnoreCase).ToList();
var removed = oldDiffIds.Except(newDiffIds, StringComparer.OrdinalIgnoreCase).ToList();
var unchanged = newDiffIds.Intersect(oldDiffIds, StringComparer.OrdinalIgnoreCase).ToList();
return new LayerChangeSummary
{
OldImage = oldImage,
NewImage = newImage,
OldLayerCount = oldResolved.LayerCount,
NewLayerCount = newResolved.LayerCount,
AddedCount = added.Count,
RemovedCount = removed.Count,
UnchangedCount = unchanged.Count,
AddedDiffIds = [.. added],
RemovedDiffIds = [.. removed],
UnchangedDiffIds = [.. unchanged]
};
}
private static LayerChangeInfo CreateLayerInfo(LayerProvenance layer) => new()
{
LayerDigest = layer.LayerDigest,
DiffId = layer.DiffId ?? string.Empty,
LayerIndex = layer.LayerIndex,
Size = layer.Size,
MediaType = layer.MediaType
};
private static int CountComponents(string sbom)
{
if (string.IsNullOrWhiteSpace(sbom))
return 0;
// Count component markers in SBOM
var cycloneDxCount = CountOccurrences(sbom, "\"bom-ref\"");
var spdxCount = CountOccurrences(sbom, "\"SPDXID\"");
return Math.Max(cycloneDxCount, spdxCount);
}
private static int CountOccurrences(string text, string pattern)
{
var count = 0;
var index = 0;
while ((index = text.IndexOf(pattern, index, StringComparison.Ordinal)) != -1)
{
count++;
index += pattern.Length;
}
return count;
}
}
/// <summary>
/// Interface for per-layer SBOM content-addressable storage.
/// </summary>
public interface ILayerSbomCas
{
/// <summary>
/// Stores a per-layer SBOM.
/// </summary>
Task StoreAsync(string diffId, string format, string sbom, CancellationToken cancellationToken = default);
/// <summary>
/// Retrieves a per-layer SBOM.
/// </summary>
Task<string?> GetAsync(string diffId, string format, CancellationToken cancellationToken = default);
/// <summary>
/// Checks if a per-layer SBOM exists.
/// </summary>
Task<bool> ExistsAsync(string diffId, string format, CancellationToken cancellationToken = default);
}
/// <summary>
/// Interface for scanning individual layers.
/// </summary>
public interface ILayerScannerPipeline
{
/// <summary>
/// Scans a single layer and produces an SBOM.
/// </summary>
Task<string?> ScanLayerAsync(
string imageReference,
string diffId,
string layerDigest,
string sbomFormat,
CancellationToken cancellationToken = default);
}
/// <summary>
/// Interface for composing multiple layer SBOMs into a single image SBOM.
/// </summary>
public interface ISbomComposer
{
/// <summary>
/// Composes multiple layer SBOMs into a single image SBOM.
/// </summary>
Task<string?> ComposeAsync(
IReadOnlyList<string> layerSboms,
string imageReference,
string sbomFormat,
bool includeLayerAttribution,
CancellationToken cancellationToken = default);
}

View File

@@ -0,0 +1,352 @@
// -----------------------------------------------------------------------------
// DeltaEvidenceComposer.cs
// Sprint: SPRINT_20260118_026_Scanner_delta_scanning_engine
// Task: TASK-026-05 - Delta Evidence Composer
// Description: Implementation of delta scan evidence composition
// -----------------------------------------------------------------------------
using System.Reflection;
using System.Security.Cryptography;
using System.Text;
using System.Text.Json;
using System.Text.Json.Serialization;
using Microsoft.Extensions.Logging;
namespace StellaOps.Scanner.Delta.Evidence;
/// <summary>
/// Composes signed, DSSE-wrapped delta evidence for policy gates.
/// Produces deterministic, canonical JSON for consistent hashing.
/// </summary>
public sealed class DeltaEvidenceComposer : IDeltaEvidenceComposer
{
private static readonly JsonSerializerOptions CanonicalJsonOptions = new()
{
PropertyNamingPolicy = JsonNamingPolicy.CamelCase,
WriteIndented = false,
DefaultIgnoreCondition = JsonIgnoreCondition.WhenWritingNull,
// Ensure deterministic key ordering
PropertyNameCaseInsensitive = false
};
private readonly IEvidenceSigningService? _signingService;
private readonly IRekorSubmissionService? _rekorService;
private readonly ILogger<DeltaEvidenceComposer> _logger;
private readonly TimeProvider _timeProvider;
private static readonly string ScannerVersion = Assembly.GetExecutingAssembly()
.GetCustomAttribute<AssemblyInformationalVersionAttribute>()?.InformationalVersion ?? "1.0.0";
public DeltaEvidenceComposer(
ILogger<DeltaEvidenceComposer> logger,
TimeProvider? timeProvider = null,
IEvidenceSigningService? signingService = null,
IRekorSubmissionService? rekorService = null)
{
_logger = logger ?? throw new ArgumentNullException(nameof(logger));
_timeProvider = timeProvider ?? TimeProvider.System;
_signingService = signingService;
_rekorService = rekorService;
}
public async Task<DeltaScanEvidence> ComposeAsync(
DeltaScanResult scanResult,
EvidenceCompositionOptions? options = null,
CancellationToken cancellationToken = default)
{
ArgumentNullException.ThrowIfNull(scanResult);
options ??= new EvidenceCompositionOptions();
var composedAt = _timeProvider.GetUtcNow();
var scanId = options.ScanId ?? Guid.NewGuid().ToString("N");
// Create the in-toto statement
var statement = CreateStatement(scanResult, scanId, composedAt);
// Serialize to canonical JSON
var statementJson = SerializeCanonical(statement);
var payloadBytes = Encoding.UTF8.GetBytes(statementJson);
var payloadBase64 = Convert.ToBase64String(payloadBytes);
var payloadHash = ComputeHash(payloadBytes);
// Sign if requested
var signatures = new List<DsseSignature>();
if (options.Sign && _signingService != null)
{
var signature = await _signingService.SignAsync(
payloadBytes,
options.SigningKeyId,
cancellationToken).ConfigureAwait(false);
signatures.Add(new DsseSignature
{
KeyId = signature.KeyId,
Sig = signature.SignatureBase64
});
_logger.LogDebug("Signed delta scan evidence with key {KeyId}", signature.KeyId);
}
else if (options.Sign)
{
_logger.LogWarning("Signing requested but no signing service available");
}
var envelope = new DeltaScanDsseEnvelope
{
PayloadType = "application/vnd.in-toto+json",
Payload = payloadBase64,
Signatures = signatures
};
// Compute idempotency key
var idempotencyKey = ComputeIdempotencyKey(
scanResult.OldManifestDigest,
scanResult.NewManifestDigest);
// Submit to Rekor if requested
RekorEntryInfo? rekorEntry = null;
if (options.SubmitToRekor && _rekorService != null)
{
try
{
rekorEntry = await _rekorService.SubmitAsync(
envelope,
idempotencyKey,
cancellationToken).ConfigureAwait(false);
_logger.LogInformation(
"Submitted delta scan evidence to Rekor, logIndex={LogIndex}",
rekorEntry.LogIndex);
}
catch (Exception ex)
{
_logger.LogWarning(ex, "Failed to submit to Rekor, continuing without transparency log");
}
}
return new DeltaScanEvidence
{
Envelope = envelope,
Statement = statement,
PayloadHash = payloadHash,
RekorEntry = rekorEntry,
IdempotencyKey = idempotencyKey,
ComposedAt = composedAt
};
}
public InTotoStatement CreateStatement(DeltaScanResult scanResult)
{
return CreateStatement(scanResult, Guid.NewGuid().ToString("N"), _timeProvider.GetUtcNow());
}
private InTotoStatement CreateStatement(
DeltaScanResult scanResult,
string scanId,
DateTimeOffset scannedAt)
{
var predicate = CreatePredicate(scanResult, scanId, scannedAt);
// Create subjects for both old and new images
var subjects = new List<InTotoSubject>
{
new()
{
Name = scanResult.NewImage,
Digest = new Dictionary<string, string>
{
["sha256"] = ExtractDigestValue(scanResult.NewManifestDigest)
}
}
};
// Add old image as second subject
if (!string.IsNullOrWhiteSpace(scanResult.OldManifestDigest))
{
subjects.Add(new InTotoSubject
{
Name = scanResult.OldImage,
Digest = new Dictionary<string, string>
{
["sha256"] = ExtractDigestValue(scanResult.OldManifestDigest)
}
});
}
return new InTotoStatement
{
Subject = subjects,
PredicateType = DeltaScanPredicate.PredicateType,
Predicate = predicate
};
}
private static DeltaScanPredicate CreatePredicate(
DeltaScanResult scanResult,
string scanId,
DateTimeOffset scannedAt)
{
// Calculate layer reuse ratio
var totalLayers = scanResult.AddedLayers.Count +
scanResult.RemovedLayers.Count +
scanResult.UnchangedLayers.Count;
var reuseRatio = totalLayers > 0
? (double)scanResult.UnchangedLayers.Count / totalLayers
: 0.0;
return new DeltaScanPredicate
{
ScanId = scanId,
ScannedAt = scannedAt,
Scanner = new ScannerInfo
{
Name = "StellaOps.Scanner.Delta",
Version = ScannerVersion,
SbomTool = "syft",
SbomToolVersion = "0.100.0"
},
OldImage = new ImageSubject
{
Reference = scanResult.OldImage,
ManifestDigest = scanResult.OldManifestDigest,
LayerCount = scanResult.UnchangedLayers.Count + scanResult.RemovedLayers.Count
},
NewImage = new ImageSubject
{
Reference = scanResult.NewImage,
ManifestDigest = scanResult.NewManifestDigest,
LayerCount = scanResult.UnchangedLayers.Count + scanResult.AddedLayers.Count
},
LayerChanges = new LayerChangesInfo
{
Added = scanResult.AddedLayers.Count,
Removed = scanResult.RemovedLayers.Count,
Unchanged = scanResult.UnchangedLayers.Count,
ReuseRatio = Math.Round(reuseRatio, 4),
AddedDiffIds = scanResult.AddedLayers.Select(l => l.DiffId).ToList(),
RemovedDiffIds = scanResult.RemovedLayers.Select(l => l.DiffId).ToList()
},
ComponentChanges = new ComponentChangesInfo
{
Added = scanResult.AddedComponentCount,
Removed = 0, // Would need to track this in DeltaScanResult
VersionChanged = 0,
OtherModified = 0,
Unchanged = scanResult.CachedComponentCount,
TotalComponents = scanResult.AddedComponentCount + scanResult.CachedComponentCount,
IsBreaking = false, // Would need to determine from SBOM diff
CachedComponentCount = scanResult.CachedComponentCount,
ScannedComponentCount = scanResult.AddedComponentCount
},
Metrics = new ScanMetrics
{
TotalDurationMs = (long)scanResult.ScanDuration.TotalMilliseconds,
AddedLayersScanDurationMs = (long)scanResult.AddedLayersScanDuration.TotalMilliseconds,
UsedCache = scanResult.UsedCache,
CacheHitRatio = scanResult.CachedComponentCount > 0
? (double)scanResult.CachedComponentCount /
(scanResult.AddedComponentCount + scanResult.CachedComponentCount)
: null
},
SbomFormat = scanResult.SbomFormat,
SbomDigest = !string.IsNullOrWhiteSpace(scanResult.CompositeSbom)
? "sha256:" + ComputeHash(Encoding.UTF8.GetBytes(scanResult.CompositeSbom))
: null
};
}
public string ComputePredicateHash(DeltaScanPredicate predicate)
{
var json = SerializeCanonical(predicate);
return ComputeHash(Encoding.UTF8.GetBytes(json));
}
private static string SerializeCanonical<T>(T obj)
{
// For truly canonical JSON, we need sorted keys
// System.Text.Json doesn't natively support this, but for our use case
// the predictable property order from the record definitions is sufficient
return JsonSerializer.Serialize(obj, CanonicalJsonOptions);
}
private static string ComputeHash(byte[] data)
{
var hashBytes = SHA256.HashData(data);
return Convert.ToHexStringLower(hashBytes);
}
private static string ComputeIdempotencyKey(string oldDigest, string newDigest)
{
var combined = $"{oldDigest}:{newDigest}";
return ComputeHash(Encoding.UTF8.GetBytes(combined));
}
private static string ExtractDigestValue(string digest)
{
// Extract the hex value from "sha256:abc123..."
if (digest.StartsWith("sha256:", StringComparison.OrdinalIgnoreCase))
{
return digest[7..];
}
return digest;
}
}
/// <summary>
/// Interface for evidence signing service.
/// Implementations may use various signing backends (local key, KMS, HSM, etc.).
/// </summary>
public interface IEvidenceSigningService
{
/// <summary>
/// Signs the payload and returns the signature.
/// </summary>
/// <param name="payload">Payload bytes to sign.</param>
/// <param name="keyId">Optional key ID (null for default).</param>
/// <param name="cancellationToken">Cancellation token.</param>
/// <returns>Signature result.</returns>
Task<SignatureResult> SignAsync(
byte[] payload,
string? keyId,
CancellationToken cancellationToken = default);
}
/// <summary>
/// Result of signing operation.
/// </summary>
public sealed record SignatureResult
{
/// <summary>
/// Key ID used for signing.
/// </summary>
public string? KeyId { get; init; }
/// <summary>
/// Base64-encoded signature.
/// </summary>
public required string SignatureBase64 { get; init; }
/// <summary>
/// Signature algorithm used.
/// </summary>
public string? Algorithm { get; init; }
}
/// <summary>
/// Interface for Rekor transparency log submission.
/// </summary>
public interface IRekorSubmissionService
{
/// <summary>
/// Submits an envelope to Rekor.
/// </summary>
/// <param name="envelope">DSSE envelope to submit.</param>
/// <param name="idempotencyKey">Idempotency key for deduplication.</param>
/// <param name="cancellationToken">Cancellation token.</param>
/// <returns>Rekor entry information.</returns>
Task<RekorEntryInfo> SubmitAsync(
DeltaScanDsseEnvelope envelope,
string idempotencyKey,
CancellationToken cancellationToken = default);
}

View File

@@ -0,0 +1,293 @@
// -----------------------------------------------------------------------------
// DeltaScanPredicate.cs
// Sprint: SPRINT_20260118_026_Scanner_delta_scanning_engine
// Task: TASK-026-05 - Delta Evidence Composer
// Description: DSSE predicate type for delta scan attestations
// -----------------------------------------------------------------------------
using System.Text.Json.Serialization;
namespace StellaOps.Scanner.Delta.Evidence;
/// <summary>
/// DSSE predicate for delta scan attestations.
/// Predicate type: https://stellaops.io/attestations/delta-scan/v1
/// </summary>
public sealed record DeltaScanPredicate
{
/// <summary>
/// Predicate type URI.
/// </summary>
[JsonIgnore]
public const string PredicateType = "https://stellaops.io/attestations/delta-scan/v1";
/// <summary>
/// Unique scan ID.
/// </summary>
[JsonPropertyName("scanId")]
public required string ScanId { get; init; }
/// <summary>
/// When the scan was performed (UTC).
/// </summary>
[JsonPropertyName("scannedAt")]
public required DateTimeOffset ScannedAt { get; init; }
/// <summary>
/// Scanner tool information.
/// </summary>
[JsonPropertyName("scanner")]
public required ScannerInfo Scanner { get; init; }
/// <summary>
/// The old (baseline) image being compared.
/// </summary>
[JsonPropertyName("oldImage")]
public required ImageSubject OldImage { get; init; }
/// <summary>
/// The new image being scanned.
/// </summary>
[JsonPropertyName("newImage")]
public required ImageSubject NewImage { get; init; }
/// <summary>
/// Layer changes between images.
/// </summary>
[JsonPropertyName("layerChanges")]
public required LayerChangesInfo LayerChanges { get; init; }
/// <summary>
/// Component changes (SBOM diff).
/// </summary>
[JsonPropertyName("componentChanges")]
public required ComponentChangesInfo ComponentChanges { get; init; }
/// <summary>
/// Scan performance metrics.
/// </summary>
[JsonPropertyName("metrics")]
public required ScanMetrics Metrics { get; init; }
/// <summary>
/// SBOM format used.
/// </summary>
[JsonPropertyName("sbomFormat")]
public required string SbomFormat { get; init; }
/// <summary>
/// Content hash of the composite SBOM.
/// </summary>
[JsonPropertyName("sbomDigest")]
public string? SbomDigest { get; init; }
}
/// <summary>
/// Scanner tool information.
/// </summary>
public sealed record ScannerInfo
{
/// <summary>Scanner name.</summary>
[JsonPropertyName("name")]
public required string Name { get; init; }
/// <summary>Scanner version.</summary>
[JsonPropertyName("version")]
public required string Version { get; init; }
/// <summary>SBOM generation tool.</summary>
[JsonPropertyName("sbomTool")]
public string? SbomTool { get; init; }
/// <summary>SBOM tool version.</summary>
[JsonPropertyName("sbomToolVersion")]
public string? SbomToolVersion { get; init; }
}
/// <summary>
/// Image subject information.
/// </summary>
public sealed record ImageSubject
{
/// <summary>Image reference (registry/repository:tag).</summary>
[JsonPropertyName("reference")]
public required string Reference { get; init; }
/// <summary>Manifest digest (sha256:...).</summary>
[JsonPropertyName("manifestDigest")]
public required string ManifestDigest { get; init; }
/// <summary>Config digest (sha256:...).</summary>
[JsonPropertyName("configDigest")]
public string? ConfigDigest { get; init; }
/// <summary>Total number of layers.</summary>
[JsonPropertyName("layerCount")]
public required int LayerCount { get; init; }
/// <summary>Platform (os/arch).</summary>
[JsonPropertyName("platform")]
public string? Platform { get; init; }
}
/// <summary>
/// Summary of layer changes.
/// </summary>
public sealed record LayerChangesInfo
{
/// <summary>Number of layers added.</summary>
[JsonPropertyName("added")]
public required int Added { get; init; }
/// <summary>Number of layers removed.</summary>
[JsonPropertyName("removed")]
public required int Removed { get; init; }
/// <summary>Number of unchanged layers.</summary>
[JsonPropertyName("unchanged")]
public required int Unchanged { get; init; }
/// <summary>Layer reuse ratio (0.0 to 1.0).</summary>
[JsonPropertyName("reuseRatio")]
public required double ReuseRatio { get; init; }
/// <summary>DiffIDs of added layers.</summary>
[JsonPropertyName("addedDiffIds")]
public IReadOnlyList<string>? AddedDiffIds { get; init; }
/// <summary>DiffIDs of removed layers.</summary>
[JsonPropertyName("removedDiffIds")]
public IReadOnlyList<string>? RemovedDiffIds { get; init; }
}
/// <summary>
/// Summary of component changes.
/// </summary>
public sealed record ComponentChangesInfo
{
/// <summary>Number of components added.</summary>
[JsonPropertyName("added")]
public required int Added { get; init; }
/// <summary>Number of components removed.</summary>
[JsonPropertyName("removed")]
public required int Removed { get; init; }
/// <summary>Number of components with version changes.</summary>
[JsonPropertyName("versionChanged")]
public required int VersionChanged { get; init; }
/// <summary>Number of components with other modifications.</summary>
[JsonPropertyName("otherModified")]
public required int OtherModified { get; init; }
/// <summary>Number of unchanged components.</summary>
[JsonPropertyName("unchanged")]
public required int Unchanged { get; init; }
/// <summary>Total components in new image.</summary>
[JsonPropertyName("totalComponents")]
public required int TotalComponents { get; init; }
/// <summary>Whether this is a breaking change.</summary>
[JsonPropertyName("isBreaking")]
public required bool IsBreaking { get; init; }
/// <summary>Components added from cache (not scanned).</summary>
[JsonPropertyName("cachedComponentCount")]
public int CachedComponentCount { get; init; }
/// <summary>Components added from fresh scans.</summary>
[JsonPropertyName("scannedComponentCount")]
public int ScannedComponentCount { get; init; }
}
/// <summary>
/// Scan performance metrics.
/// </summary>
public sealed record ScanMetrics
{
/// <summary>Total scan duration in milliseconds.</summary>
[JsonPropertyName("totalDurationMs")]
public required long TotalDurationMs { get; init; }
/// <summary>Time spent scanning added layers.</summary>
[JsonPropertyName("addedLayersScanDurationMs")]
public required long AddedLayersScanDurationMs { get; init; }
/// <summary>Whether cached SBOMs were used.</summary>
[JsonPropertyName("usedCache")]
public required bool UsedCache { get; init; }
/// <summary>Cache hit ratio for unchanged layers.</summary>
[JsonPropertyName("cacheHitRatio")]
public double? CacheHitRatio { get; init; }
}
/// <summary>
/// DSSE envelope structure for delta scan attestations.
/// </summary>
public sealed record DeltaScanDsseEnvelope
{
/// <summary>Payload type.</summary>
[JsonPropertyName("payloadType")]
public required string PayloadType { get; init; }
/// <summary>Base64-encoded payload.</summary>
[JsonPropertyName("payload")]
public required string Payload { get; init; }
/// <summary>Signatures over the payload.</summary>
[JsonPropertyName("signatures")]
public required IReadOnlyList<DsseSignature> Signatures { get; init; }
}
/// <summary>
/// Signature in a DSSE envelope.
/// </summary>
public sealed record DsseSignature
{
/// <summary>Key ID.</summary>
[JsonPropertyName("keyid")]
public string? KeyId { get; init; }
/// <summary>Base64-encoded signature.</summary>
[JsonPropertyName("sig")]
public required string Sig { get; init; }
}
/// <summary>
/// In-toto statement wrapper for the predicate.
/// </summary>
public sealed record InTotoStatement
{
/// <summary>Statement type (always in-toto statement).</summary>
[JsonPropertyName("_type")]
public string Type { get; init; } = "https://in-toto.io/Statement/v1";
/// <summary>Subjects (what the attestation is about).</summary>
[JsonPropertyName("subject")]
public required IReadOnlyList<InTotoSubject> Subject { get; init; }
/// <summary>Predicate type URI.</summary>
[JsonPropertyName("predicateType")]
public required string PredicateType { get; init; }
/// <summary>The predicate payload.</summary>
[JsonPropertyName("predicate")]
public required DeltaScanPredicate Predicate { get; init; }
}
/// <summary>
/// In-toto subject (artifact being attested).
/// </summary>
public sealed record InTotoSubject
{
/// <summary>Subject name (image reference).</summary>
[JsonPropertyName("name")]
public required string Name { get; init; }
/// <summary>Subject digests.</summary>
[JsonPropertyName("digest")]
public required IReadOnlyDictionary<string, string> Digest { get; init; }
}

View File

@@ -0,0 +1,137 @@
// -----------------------------------------------------------------------------
// IDeltaEvidenceComposer.cs
// Sprint: SPRINT_20260118_026_Scanner_delta_scanning_engine
// Task: TASK-026-05 - Delta Evidence Composer
// Description: Interface for composing signed delta scan evidence
// -----------------------------------------------------------------------------
namespace StellaOps.Scanner.Delta.Evidence;
/// <summary>
/// Composes signed, DSSE-wrapped delta evidence for policy gates.
/// Evidence is deterministic and can be submitted to Rekor for transparency logging.
/// </summary>
public interface IDeltaEvidenceComposer
{
/// <summary>
/// Composes a DSSE-wrapped attestation from a delta scan result.
/// </summary>
/// <param name="scanResult">The delta scan result to attest.</param>
/// <param name="options">Optional composition options.</param>
/// <param name="cancellationToken">Cancellation token.</param>
/// <returns>Composed evidence with optional Rekor entry.</returns>
Task<DeltaScanEvidence> ComposeAsync(
DeltaScanResult scanResult,
EvidenceCompositionOptions? options = null,
CancellationToken cancellationToken = default);
/// <summary>
/// Creates an unsigned in-toto statement for the delta scan.
/// Useful for preview or when signing is handled separately.
/// </summary>
/// <param name="scanResult">The delta scan result.</param>
/// <returns>Unsigned in-toto statement.</returns>
InTotoStatement CreateStatement(DeltaScanResult scanResult);
/// <summary>
/// Computes the canonical hash of a delta scan predicate.
/// Used for idempotency key generation.
/// </summary>
/// <param name="predicate">The predicate to hash.</param>
/// <returns>SHA256 hash as hex string.</returns>
string ComputePredicateHash(DeltaScanPredicate predicate);
}
/// <summary>
/// Options for evidence composition.
/// </summary>
public sealed class EvidenceCompositionOptions
{
/// <summary>
/// Whether to sign the evidence.
/// </summary>
public bool Sign { get; set; } = true;
/// <summary>
/// Whether to submit to Rekor transparency log.
/// </summary>
public bool SubmitToRekor { get; set; } = true;
/// <summary>
/// Key ID for signing (null for default key).
/// </summary>
public string? SigningKeyId { get; set; }
/// <summary>
/// Custom scan ID (auto-generated if null).
/// </summary>
public string? ScanId { get; set; }
/// <summary>
/// Include detailed layer diffIDs in evidence.
/// </summary>
public bool IncludeLayerDetails { get; set; } = true;
}
/// <summary>
/// Composed delta scan evidence with metadata.
/// </summary>
public sealed record DeltaScanEvidence
{
/// <summary>
/// The DSSE-wrapped attestation envelope.
/// </summary>
public required DeltaScanDsseEnvelope Envelope { get; init; }
/// <summary>
/// The in-toto statement (for reference).
/// </summary>
public required InTotoStatement Statement { get; init; }
/// <summary>
/// SHA256 hash of the canonical JSON payload.
/// </summary>
public required string PayloadHash { get; init; }
/// <summary>
/// Rekor log entry information (if submitted).
/// </summary>
public RekorEntryInfo? RekorEntry { get; init; }
/// <summary>
/// Idempotency key for deduplication.
/// Based on old+new image digests.
/// </summary>
public required string IdempotencyKey { get; init; }
/// <summary>
/// When the evidence was composed.
/// </summary>
public required DateTimeOffset ComposedAt { get; init; }
}
/// <summary>
/// Information about the Rekor log entry.
/// </summary>
public sealed record RekorEntryInfo
{
/// <summary>
/// Log index in Rekor.
/// </summary>
public required long LogIndex { get; init; }
/// <summary>
/// Entry UUID.
/// </summary>
public required string EntryUuid { get; init; }
/// <summary>
/// Integrated timestamp from Rekor.
/// </summary>
public required DateTimeOffset IntegratedTime { get; init; }
/// <summary>
/// Rekor log ID.
/// </summary>
public string? LogId { get; init; }
}

View File

@@ -0,0 +1,270 @@
using System.Collections.Immutable;
namespace StellaOps.Scanner.Delta;
/// <summary>
/// Scans only changed layers between two image versions for efficient delta scanning.
/// </summary>
public interface IDeltaLayerScanner
{
/// <summary>
/// Performs a delta scan comparing two image versions.
/// </summary>
/// <param name="oldImage">Reference to the old/baseline image.</param>
/// <param name="newImage">Reference to the new image to scan.</param>
/// <param name="options">Delta scan options.</param>
/// <param name="cancellationToken">Cancellation token.</param>
/// <returns>Delta scan result with changed layers and composite SBOM.</returns>
Task<DeltaScanResult> ScanDeltaAsync(
string oldImage,
string newImage,
DeltaScanOptions? options = null,
CancellationToken cancellationToken = default);
/// <summary>
/// Identifies layer changes between two images without scanning.
/// </summary>
/// <param name="oldImage">Reference to the old image.</param>
/// <param name="newImage">Reference to the new image.</param>
/// <param name="cancellationToken">Cancellation token.</param>
/// <returns>Layer change summary.</returns>
Task<LayerChangeSummary> IdentifyLayerChangesAsync(
string oldImage,
string newImage,
CancellationToken cancellationToken = default);
}
/// <summary>
/// Options for delta scanning.
/// </summary>
public sealed record DeltaScanOptions
{
/// <summary>
/// Whether to force a full scan even for unchanged layers.
/// Default is false.
/// </summary>
public bool ForceFullScan { get; init; } = false;
/// <summary>
/// Whether to use cached per-layer SBOMs for unchanged layers.
/// Default is true.
/// </summary>
public bool UseCachedSboms { get; init; } = true;
/// <summary>
/// Maximum age of cached SBOMs to consider valid (in days).
/// Default is 30 days.
/// </summary>
public int MaxCacheAgeDays { get; init; } = 30;
/// <summary>
/// SBOM format to produce (CycloneDX or SPDX).
/// Default is CycloneDX.
/// </summary>
public string SbomFormat { get; init; } = "cyclonedx";
/// <summary>
/// Whether to include layer attribution in the SBOM.
/// Default is true.
/// </summary>
public bool IncludeLayerAttribution { get; init; } = true;
/// <summary>
/// Target platform for multi-arch images.
/// If null, uses default platform.
/// </summary>
public string? Platform { get; init; }
}
/// <summary>
/// Result of a delta scan operation.
/// </summary>
public sealed record DeltaScanResult
{
/// <summary>
/// Old image reference.
/// </summary>
public required string OldImage { get; init; }
/// <summary>
/// Old image manifest digest.
/// </summary>
public required string OldManifestDigest { get; init; }
/// <summary>
/// New image reference.
/// </summary>
public required string NewImage { get; init; }
/// <summary>
/// New image manifest digest.
/// </summary>
public required string NewManifestDigest { get; init; }
/// <summary>
/// Layers that were added in the new image.
/// </summary>
public ImmutableArray<LayerChangeInfo> AddedLayers { get; init; } = [];
/// <summary>
/// Layers that were removed from the old image.
/// </summary>
public ImmutableArray<LayerChangeInfo> RemovedLayers { get; init; } = [];
/// <summary>
/// Layers that are unchanged between images.
/// </summary>
public ImmutableArray<LayerChangeInfo> UnchangedLayers { get; init; } = [];
/// <summary>
/// Composite SBOM for the new image (combining cached + newly scanned).
/// </summary>
public string? CompositeSbom { get; init; }
/// <summary>
/// SBOM format (cyclonedx or spdx).
/// </summary>
public string? SbomFormat { get; init; }
/// <summary>
/// Total scan duration.
/// </summary>
public TimeSpan ScanDuration { get; init; }
/// <summary>
/// Duration spent scanning only added layers.
/// </summary>
public TimeSpan AddedLayersScanDuration { get; init; }
/// <summary>
/// Number of components found in added layers.
/// </summary>
public int AddedComponentCount { get; init; }
/// <summary>
/// Number of components from cached layers.
/// </summary>
public int CachedComponentCount { get; init; }
/// <summary>
/// Whether the scan used cached SBOMs.
/// </summary>
public bool UsedCache { get; init; }
/// <summary>
/// Percentage of layers that were reused from cache.
/// </summary>
public double LayerReuseRatio =>
(AddedLayers.Length + UnchangedLayers.Length) > 0
? (double)UnchangedLayers.Length / (AddedLayers.Length + UnchangedLayers.Length)
: 0;
/// <summary>
/// When the scan was performed.
/// </summary>
public DateTimeOffset ScannedAt { get; init; } = DateTimeOffset.UtcNow;
}
/// <summary>
/// Information about a layer change.
/// </summary>
public sealed record LayerChangeInfo
{
/// <summary>
/// Compressed layer digest.
/// </summary>
public required string LayerDigest { get; init; }
/// <summary>
/// Uncompressed content hash (diffID).
/// </summary>
public required string DiffId { get; init; }
/// <summary>
/// Layer index in the image.
/// </summary>
public int LayerIndex { get; init; }
/// <summary>
/// Size of the layer in bytes.
/// </summary>
public long Size { get; init; }
/// <summary>
/// Media type of the layer.
/// </summary>
public string? MediaType { get; init; }
/// <summary>
/// Whether this layer's SBOM was retrieved from cache.
/// </summary>
public bool FromCache { get; init; }
/// <summary>
/// Number of components found in this layer.
/// </summary>
public int ComponentCount { get; init; }
}
/// <summary>
/// Summary of layer changes between two images.
/// </summary>
public sealed record LayerChangeSummary
{
/// <summary>
/// Old image reference.
/// </summary>
public required string OldImage { get; init; }
/// <summary>
/// New image reference.
/// </summary>
public required string NewImage { get; init; }
/// <summary>
/// Total layers in old image.
/// </summary>
public int OldLayerCount { get; init; }
/// <summary>
/// Total layers in new image.
/// </summary>
public int NewLayerCount { get; init; }
/// <summary>
/// Number of layers added.
/// </summary>
public int AddedCount { get; init; }
/// <summary>
/// Number of layers removed.
/// </summary>
public int RemovedCount { get; init; }
/// <summary>
/// Number of layers unchanged.
/// </summary>
public int UnchangedCount { get; init; }
/// <summary>
/// Estimated scan savings (layers we don't need to scan).
/// </summary>
public double EstimatedSavingsRatio => NewLayerCount > 0
? (double)UnchangedCount / NewLayerCount
: 0;
/// <summary>
/// DiffIDs of added layers.
/// </summary>
public ImmutableArray<string> AddedDiffIds { get; init; } = [];
/// <summary>
/// DiffIDs of removed layers.
/// </summary>
public ImmutableArray<string> RemovedDiffIds { get; init; } = [];
/// <summary>
/// DiffIDs of unchanged layers.
/// </summary>
public ImmutableArray<string> UnchangedDiffIds { get; init; } = [];
}

View File

@@ -0,0 +1,20 @@
<Project Sdk="Microsoft.NET.Sdk">
<PropertyGroup>
<TargetFramework>net10.0</TargetFramework>
<LangVersion>preview</LangVersion>
<ImplicitUsings>enable</ImplicitUsings>
<Nullable>enable</Nullable>
<TreatWarningsAsErrors>true</TreatWarningsAsErrors>
</PropertyGroup>
<ItemGroup>
<PackageReference Include="Microsoft.Extensions.DependencyInjection.Abstractions" />
<PackageReference Include="Microsoft.Extensions.Logging.Abstractions" />
<PackageReference Include="Microsoft.Extensions.Options" />
</ItemGroup>
<ItemGroup>
<ProjectReference Include="..\StellaOps.Scanner.Contracts\StellaOps.Scanner.Contracts.csproj" />
<ProjectReference Include="..\StellaOps.Scanner.Manifest\StellaOps.Scanner.Manifest.csproj" />
<ProjectReference Include="..\StellaOps.Scanner.Emit\StellaOps.Scanner.Emit.csproj" />
<ProjectReference Include="..\StellaOps.Scanner.Cache\StellaOps.Scanner.Cache.csproj" />
</ItemGroup>
</Project>

View File

@@ -57,6 +57,19 @@ public sealed record ComponentDelta
/// List of fields that changed (for modified components).
/// </summary>
public ImmutableArray<string> ChangedFields { get; init; } = [];
/// <summary>
/// The layer diffID that introduced this change (null if not layer-attributed).
/// For Added: the layer where the component was added.
/// For Removed: the layer where the component was last present.
/// For Modified: the layer where the new version appeared.
/// </summary>
public string? SourceLayerDiffId { get; init; }
/// <summary>
/// The index of the source layer in the image (0-indexed).
/// </summary>
public int? SourceLayerIndex { get; init; }
}
/// <summary>
@@ -166,3 +179,102 @@ public sealed record DiffSummary
/// </summary>
public bool IsBreaking { get; init; }
}
/// <summary>
/// Input for a single layer's SBOM in layer-attributed diff computation.
/// </summary>
public sealed record LayerSbomInput
{
/// <summary>
/// The layer's diffID (sha256 of uncompressed content).
/// </summary>
public required string DiffId { get; init; }
/// <summary>
/// The layer's index in the image (0-indexed).
/// </summary>
public required int LayerIndex { get; init; }
/// <summary>
/// Components found in this layer.
/// </summary>
public required IReadOnlyList<ComponentRef> Components { get; init; }
}
/// <summary>
/// Result of a layer-attributed diff computation.
/// </summary>
public sealed record LayerAttributedDiff
{
/// <summary>
/// The base diff (without layer attribution).
/// </summary>
public required SbomDiff BaseDiff { get; init; }
/// <summary>
/// Changes grouped by the layer that introduced them.
/// </summary>
public required ImmutableArray<LayerChangeGroup> ChangesByLayer { get; init; }
/// <summary>
/// Summary of changes per layer.
/// </summary>
public required ImmutableArray<LayerChangeSummary> LayerSummaries { get; init; }
}
/// <summary>
/// Group of changes introduced by a single layer.
/// </summary>
public sealed record LayerChangeGroup
{
/// <summary>
/// The layer's diffID.
/// </summary>
public required string DiffId { get; init; }
/// <summary>
/// The layer's index.
/// </summary>
public required int LayerIndex { get; init; }
/// <summary>
/// Changes introduced by this layer.
/// </summary>
public required ImmutableArray<ComponentDelta> Changes { get; init; }
}
/// <summary>
/// Summary of changes for a single layer.
/// </summary>
public sealed record LayerChangeSummary
{
/// <summary>
/// The layer's diffID.
/// </summary>
public required string DiffId { get; init; }
/// <summary>
/// The layer's index.
/// </summary>
public required int LayerIndex { get; init; }
/// <summary>
/// Number of components added in this layer.
/// </summary>
public required int Added { get; init; }
/// <summary>
/// Number of components removed from this layer.
/// </summary>
public required int Removed { get; init; }
/// <summary>
/// Number of components modified in this layer.
/// </summary>
public required int Modified { get; init; }
/// <summary>
/// Total components in this layer.
/// </summary>
public required int TotalComponents { get; init; }
}

View File

@@ -201,6 +201,165 @@ public sealed class SbomDiffEngine
return Convert.ToHexStringLower(hashBytes);
}
/// <summary>
/// Computes a layer-attributed diff between two images' per-layer SBOMs.
/// Attributes each change to the specific layer that introduced it.
/// </summary>
public LayerAttributedDiff ComputeLayerAttributedDiff(
SbomId fromId,
IReadOnlyList<LayerSbomInput> fromLayers,
SbomId toId,
IReadOnlyList<LayerSbomInput> toLayers)
{
// Build component→layer mapping for old image
var oldComponentLayers = new Dictionary<string, (string DiffId, int LayerIndex)>(StringComparer.OrdinalIgnoreCase);
foreach (var layer in fromLayers)
{
foreach (var component in layer.Components)
{
var identity = GetPackageIdentity(component);
// First layer wins (components installed in lower layers)
if (!oldComponentLayers.ContainsKey(identity))
{
oldComponentLayers[identity] = (layer.DiffId, layer.LayerIndex);
}
}
}
// Build component→layer mapping for new image
var newComponentLayers = new Dictionary<string, (string DiffId, int LayerIndex)>(StringComparer.OrdinalIgnoreCase);
foreach (var layer in toLayers)
{
foreach (var component in layer.Components)
{
var identity = GetPackageIdentity(component);
if (!newComponentLayers.ContainsKey(identity))
{
newComponentLayers[identity] = (layer.DiffId, layer.LayerIndex);
}
}
}
// Flatten components for base diff computation
var fromComponents = fromLayers.SelectMany(l => l.Components).ToList();
var toComponents = toLayers.SelectMany(l => l.Components).ToList();
// Compute base diff (reuse existing logic)
var baseDiff = ComputeDiff(fromId, fromComponents, toId, toComponents);
// Attribute each delta to its source layer
var attributedDeltas = baseDiff.Deltas.Select(delta =>
{
var identity = GetPackageIdentity(delta.After ?? delta.Before!);
(string DiffId, int LayerIndex)? sourceLayer = delta.Type switch
{
ComponentDeltaType.Added when newComponentLayers.TryGetValue(identity, out var nl) => nl,
ComponentDeltaType.Removed when oldComponentLayers.TryGetValue(identity, out var ol) => ol,
ComponentDeltaType.VersionChanged when newComponentLayers.TryGetValue(identity, out var nl2) => nl2,
ComponentDeltaType.LicenseChanged when newComponentLayers.TryGetValue(identity, out var nl3) => nl3,
ComponentDeltaType.MetadataChanged when newComponentLayers.TryGetValue(identity, out var nl4) => nl4,
_ => null
};
return delta with
{
SourceLayerDiffId = sourceLayer?.DiffId,
SourceLayerIndex = sourceLayer?.LayerIndex
};
}).ToImmutableArray();
// Group by layer
var changesByLayer = attributedDeltas
.Where(d => d.SourceLayerDiffId != null)
.GroupBy(d => (d.SourceLayerDiffId!, d.SourceLayerIndex!.Value))
.Select(g => new LayerChangeGroup
{
DiffId = g.Key.Item1,
LayerIndex = g.Key.Item2,
Changes = [.. g]
})
.OrderBy(g => g.LayerIndex)
.ToImmutableArray();
// Build layer summaries
var allLayerDiffIds = toLayers
.Select(l => (l.DiffId, l.LayerIndex, l.Components.Count))
.ToList();
var layerSummaries = allLayerDiffIds.Select(layer =>
{
var layerChanges = changesByLayer.FirstOrDefault(g => g.DiffId == layer.DiffId);
var added = layerChanges?.Changes.Count(c => c.Type == ComponentDeltaType.Added) ?? 0;
var removed = layerChanges?.Changes.Count(c => c.Type == ComponentDeltaType.Removed) ?? 0;
var modified = layerChanges?.Changes.Count(c =>
c.Type is ComponentDeltaType.VersionChanged or ComponentDeltaType.LicenseChanged or ComponentDeltaType.MetadataChanged) ?? 0;
return new LayerChangeSummary
{
DiffId = layer.DiffId,
LayerIndex = layer.LayerIndex,
Added = added,
Removed = removed,
Modified = modified,
TotalComponents = layer.Count
};
}).ToImmutableArray();
// Rebuild the base diff with attributed deltas
var attributedBaseDiff = baseDiff with
{
Deltas = attributedDeltas
};
return new LayerAttributedDiff
{
BaseDiff = attributedBaseDiff,
ChangesByLayer = changesByLayer,
LayerSummaries = layerSummaries
};
}
/// <summary>
/// Identifies which layer introduced a specific component in an image.
/// Useful for "blame" queries: which layer introduced vulnerability X?
/// </summary>
public static string? FindComponentLayer(
IReadOnlyList<LayerSbomInput> layers,
string componentPurl)
{
var targetIdentity = GetPackageIdentityFromPurl(componentPurl);
foreach (var layer in layers)
{
foreach (var component in layer.Components)
{
if (GetPackageIdentity(component).Equals(targetIdentity, StringComparison.OrdinalIgnoreCase))
{
return layer.DiffId;
}
}
}
return null;
}
/// <summary>
/// Gets the package identity from a PURL string.
/// </summary>
private static string GetPackageIdentityFromPurl(string purl)
{
var atIndex = purl.IndexOf('@');
if (atIndex > 0)
{
var beforeAt = purl[..atIndex];
var queryIndex = purl.IndexOf('?', atIndex);
var hashIndex = purl.IndexOf('#', atIndex);
var suffixIndex = queryIndex >= 0 ? queryIndex : hashIndex;
return suffixIndex > 0 ? beforeAt + purl[suffixIndex..] : beforeAt;
}
return purl;
}
/// <summary>
/// Gets the package identity (PURL without version) for matching.
/// </summary>

View File

@@ -0,0 +1,133 @@
using StellaOps.Scanner.Manifest.Models;
namespace StellaOps.Scanner.Manifest;
/// <summary>
/// Service for capturing, storing, and comparing OCI image manifest snapshots.
/// Provides the foundation for delta-based CVE scanning by tracking layer diffIDs.
/// </summary>
public interface IOciManifestSnapshotService
{
/// <summary>
/// Captures a snapshot of an OCI image manifest from a registry.
/// </summary>
/// <param name="imageReference">The image reference (e.g., docker.io/library/alpine:3.19).</param>
/// <param name="options">Optional capture options.</param>
/// <param name="cancellationToken">Cancellation token.</param>
/// <returns>The captured manifest snapshot, or null if the image could not be found.</returns>
Task<OciManifestSnapshot?> CaptureAsync(
string imageReference,
ManifestCaptureOptions? options = null,
CancellationToken cancellationToken = default);
/// <summary>
/// Gets a previously captured manifest snapshot by manifest digest.
/// </summary>
/// <param name="manifestDigest">The manifest digest to look up.</param>
/// <param name="cancellationToken">Cancellation token.</param>
/// <returns>The snapshot if found, null otherwise.</returns>
Task<OciManifestSnapshot?> GetByDigestAsync(
string manifestDigest,
CancellationToken cancellationToken = default);
/// <summary>
/// Gets a previously captured manifest snapshot by image reference.
/// Returns the most recent snapshot for the reference.
/// </summary>
/// <param name="imageReference">The image reference to look up.</param>
/// <param name="cancellationToken">Cancellation token.</param>
/// <returns>The most recent snapshot if found, null otherwise.</returns>
Task<OciManifestSnapshot?> GetByReferenceAsync(
string imageReference,
CancellationToken cancellationToken = default);
/// <summary>
/// Gets a manifest snapshot by its unique ID.
/// </summary>
/// <param name="snapshotId">The snapshot ID.</param>
/// <param name="cancellationToken">Cancellation token.</param>
/// <returns>The snapshot if found, null otherwise.</returns>
Task<OciManifestSnapshot?> GetByIdAsync(
Guid snapshotId,
CancellationToken cancellationToken = default);
/// <summary>
/// Compares two manifest snapshots to identify layer changes.
/// </summary>
/// <param name="oldManifestDigest">The digest of the older manifest.</param>
/// <param name="newManifestDigest">The digest of the newer manifest.</param>
/// <param name="cancellationToken">Cancellation token.</param>
/// <returns>The comparison result showing layer changes, or null if either manifest not found.</returns>
Task<ManifestComparisonResult?> CompareAsync(
string oldManifestDigest,
string newManifestDigest,
CancellationToken cancellationToken = default);
/// <summary>
/// Compares two manifest snapshots directly.
/// </summary>
/// <param name="oldSnapshot">The older manifest snapshot.</param>
/// <param name="newSnapshot">The newer manifest snapshot.</param>
/// <returns>The comparison result showing layer changes.</returns>
ManifestComparisonResult Compare(OciManifestSnapshot oldSnapshot, OciManifestSnapshot newSnapshot);
/// <summary>
/// Lists all snapshots for a given repository.
/// </summary>
/// <param name="registry">The registry hostname.</param>
/// <param name="repository">The repository path.</param>
/// <param name="limit">Maximum number of snapshots to return.</param>
/// <param name="cancellationToken">Cancellation token.</param>
/// <returns>The list of snapshots ordered by capture time descending.</returns>
Task<IReadOnlyList<OciManifestSnapshot>> ListByRepositoryAsync(
string registry,
string repository,
int limit = 100,
CancellationToken cancellationToken = default);
/// <summary>
/// Deletes old snapshots to free storage space.
/// </summary>
/// <param name="olderThan">Delete snapshots captured before this time.</param>
/// <param name="cancellationToken">Cancellation token.</param>
/// <returns>The number of snapshots deleted.</returns>
Task<int> PruneAsync(
DateTimeOffset olderThan,
CancellationToken cancellationToken = default);
}
/// <summary>
/// Options for capturing a manifest snapshot.
/// </summary>
public sealed record ManifestCaptureOptions
{
/// <summary>
/// Whether to compute diffIDs for all layers during capture.
/// If false, diffIDs will be null and must be computed separately.
/// Default is false for faster capture.
/// </summary>
public bool ComputeDiffIds { get; init; } = false;
/// <summary>
/// Platform filter for multi-arch images (e.g., "linux/amd64").
/// If null, uses the default platform for the system.
/// </summary>
public string? PlatformFilter { get; init; }
/// <summary>
/// Timeout for the capture operation.
/// </summary>
public TimeSpan? Timeout { get; init; }
/// <summary>
/// Whether to store the snapshot in the database.
/// If false, only returns the snapshot without persisting.
/// </summary>
public bool Persist { get; init; } = true;
/// <summary>
/// Skip layers larger than this size when computing diffIDs (to avoid timeout).
/// Default is 1GB.
/// </summary>
public long MaxLayerSizeForDiffId { get; init; } = 1024L * 1024L * 1024L;
}

View File

@@ -0,0 +1,53 @@
using Microsoft.Extensions.DependencyInjection;
using StellaOps.Scanner.Manifest.Persistence;
using StellaOps.Scanner.Manifest.Resolution;
using StellaOps.Scanner.Manifest.Reuse;
namespace StellaOps.Scanner.Manifest;
/// <summary>
/// Extension methods for registering Scanner.Manifest services.
/// </summary>
public static class ManifestServiceCollectionExtensions
{
/// <summary>
/// Adds OCI manifest snapshot services to the service collection.
/// </summary>
public static IServiceCollection AddOciManifestSnapshotServices(this IServiceCollection services)
{
services.AddSingleton<IManifestSnapshotRepository, ManifestSnapshotRepository>();
services.AddSingleton<IOciManifestSnapshotService, OciManifestSnapshotService>();
return services;
}
/// <summary>
/// Adds layer digest resolution services to the service collection.
/// </summary>
public static IServiceCollection AddLayerDigestResolutionServices(this IServiceCollection services)
{
services.AddSingleton<IDiffIdCache, DiffIdCache>();
services.AddSingleton<IBaseImageDetector, BaseImageDetector>();
services.AddSingleton<ILayerDigestResolver, LayerDigestResolver>();
return services;
}
/// <summary>
/// Adds layer reuse detection services to the service collection.
/// </summary>
public static IServiceCollection AddLayerReuseDetectionServices(this IServiceCollection services)
{
services.AddSingleton<ILayerReuseDetector, LayerReuseDetector>();
return services;
}
/// <summary>
/// Adds all Scanner.Manifest services (snapshots, resolution, and reuse detection).
/// </summary>
public static IServiceCollection AddScannerManifestServices(this IServiceCollection services)
{
services.AddOciManifestSnapshotServices();
services.AddLayerDigestResolutionServices();
services.AddLayerReuseDetectionServices();
return services;
}
}

View File

@@ -0,0 +1,123 @@
using System.Collections.Immutable;
namespace StellaOps.Scanner.Manifest.Models;
/// <summary>
/// Result of comparing two OCI manifest snapshots to identify layer changes.
/// </summary>
public sealed record ManifestComparisonResult
{
/// <summary>
/// The older manifest snapshot being compared from.
/// </summary>
public required OciManifestSnapshot OldSnapshot { get; init; }
/// <summary>
/// The newer manifest snapshot being compared to.
/// </summary>
public required OciManifestSnapshot NewSnapshot { get; init; }
/// <summary>
/// Layers that exist in both manifests with the same diffID (unchanged content).
/// </summary>
public ImmutableArray<LayerChange> UnchangedLayers { get; init; } = ImmutableArray<LayerChange>.Empty;
/// <summary>
/// Layers that exist only in the new manifest (newly added).
/// </summary>
public ImmutableArray<LayerChange> AddedLayers { get; init; } = ImmutableArray<LayerChange>.Empty;
/// <summary>
/// Layers that exist only in the old manifest (removed).
/// </summary>
public ImmutableArray<LayerChange> RemovedLayers { get; init; } = ImmutableArray<LayerChange>.Empty;
/// <summary>
/// Layers at the same index but with different diffID (modified).
/// </summary>
public ImmutableArray<LayerChange> ModifiedLayers { get; init; } = ImmutableArray<LayerChange>.Empty;
/// <summary>
/// Whether the manifests are identical (same manifest digest).
/// </summary>
public bool IsIdentical => string.Equals(OldSnapshot.ManifestDigest, NewSnapshot.ManifestDigest, StringComparison.OrdinalIgnoreCase);
/// <summary>
/// Whether the manifests share any layers (by diffID).
/// </summary>
public bool HasSharedLayers => UnchangedLayers.Length > 0;
/// <summary>
/// The percentage of layers that are unchanged (0-100).
/// </summary>
public double ReusePercentage
{
get
{
var totalNew = NewSnapshot.LayerCount;
return totalNew == 0 ? 100.0 : (UnchangedLayers.Length * 100.0) / totalNew;
}
}
/// <summary>
/// Total number of layers that need scanning (added + modified).
/// </summary>
public int LayersToScan => AddedLayers.Length + ModifiedLayers.Length;
}
/// <summary>
/// Represents a change to a single layer between two manifest versions.
/// </summary>
public sealed record LayerChange
{
/// <summary>
/// The type of change for this layer.
/// </summary>
public required LayerChangeType ChangeType { get; init; }
/// <summary>
/// The layer descriptor from the old manifest (null for added layers).
/// </summary>
public OciLayerDescriptor? OldLayer { get; init; }
/// <summary>
/// The layer descriptor from the new manifest (null for removed layers).
/// </summary>
public OciLayerDescriptor? NewLayer { get; init; }
/// <summary>
/// The diffID of the layer (from old for removed, from new for added/modified).
/// </summary>
public string? DiffId => NewLayer?.DiffId ?? OldLayer?.DiffId;
/// <summary>
/// The layer index in the new manifest (or old manifest for removed layers).
/// </summary>
public int LayerIndex => NewLayer?.LayerIndex ?? OldLayer?.LayerIndex ?? -1;
}
/// <summary>
/// The type of change for a layer between manifest versions.
/// </summary>
public enum LayerChangeType
{
/// <summary>
/// Layer exists in both manifests with the same diffID.
/// </summary>
Unchanged = 0,
/// <summary>
/// Layer exists only in the new manifest.
/// </summary>
Added = 1,
/// <summary>
/// Layer exists only in the old manifest.
/// </summary>
Removed = 2,
/// <summary>
/// Layer at same index has different diffID.
/// </summary>
Modified = 3
}

View File

@@ -0,0 +1,47 @@
using System.Collections.Immutable;
namespace StellaOps.Scanner.Manifest.Models;
/// <summary>
/// Represents an OCI image layer descriptor with both compressed digest and uncompressed diffID.
/// </summary>
public sealed record OciLayerDescriptor
{
/// <summary>
/// The compressed layer digest (sha256:...) as stored in the registry.
/// This is the content-addressable identifier for the compressed blob.
/// </summary>
public required string Digest { get; init; }
/// <summary>
/// The uncompressed layer diffID (sha256:...) computed by hashing the decompressed content.
/// This identifies the actual layer content regardless of compression.
/// May be null if not yet computed.
/// </summary>
public string? DiffId { get; init; }
/// <summary>
/// The size of the compressed layer blob in bytes.
/// </summary>
public long Size { get; init; }
/// <summary>
/// The media type of the layer (e.g., application/vnd.oci.image.layer.v1.tar+gzip).
/// </summary>
public required string MediaType { get; init; }
/// <summary>
/// The zero-based index of this layer in the image (0 = base layer).
/// </summary>
public int LayerIndex { get; init; }
/// <summary>
/// Optional annotations attached to the layer descriptor.
/// </summary>
public ImmutableDictionary<string, string>? Annotations { get; init; }
/// <summary>
/// Whether this layer is empty (used for scratch base images).
/// </summary>
public bool IsEmpty => Size == 0 || MediaType.Contains("empty", StringComparison.OrdinalIgnoreCase);
}

View File

@@ -0,0 +1,122 @@
using System.Collections.Immutable;
namespace StellaOps.Scanner.Manifest.Models;
/// <summary>
/// Represents a point-in-time snapshot of an OCI image manifest including layer diffIDs.
/// Used for delta scanning to identify changed layers between image versions.
/// </summary>
public sealed record OciManifestSnapshot
{
/// <summary>
/// Unique identifier for this snapshot.
/// </summary>
public Guid Id { get; init; } = Guid.NewGuid();
/// <summary>
/// The original image reference used to fetch this manifest (e.g., registry/repo:tag).
/// </summary>
public required string ImageReference { get; init; }
/// <summary>
/// The registry hostname (e.g., docker.io, ghcr.io).
/// </summary>
public required string Registry { get; init; }
/// <summary>
/// The repository path (e.g., library/alpine, myorg/myapp).
/// </summary>
public required string Repository { get; init; }
/// <summary>
/// The tag if specified in the original reference, null for digest-only references.
/// </summary>
public string? Tag { get; init; }
/// <summary>
/// The manifest digest (sha256:...) - the content-addressable identifier for the manifest.
/// </summary>
public required string ManifestDigest { get; init; }
/// <summary>
/// The config blob digest (sha256:...) - identifies the image configuration.
/// </summary>
public required string ConfigDigest { get; init; }
/// <summary>
/// The media type of the manifest (e.g., application/vnd.oci.image.manifest.v1+json).
/// </summary>
public required string MediaType { get; init; }
/// <summary>
/// The ordered list of layer descriptors from base to top.
/// </summary>
public ImmutableArray<OciLayerDescriptor> Layers { get; init; } = ImmutableArray<OciLayerDescriptor>.Empty;
/// <summary>
/// The ordered list of diffIDs corresponding to each layer.
/// May contain nulls for layers where diffID has not been computed.
/// </summary>
public ImmutableArray<string?> DiffIds { get; init; } = ImmutableArray<string?>.Empty;
/// <summary>
/// Platform information (OS/architecture) for this manifest.
/// </summary>
public OciPlatformInfo? Platform { get; init; }
/// <summary>
/// Total compressed size of all layers in bytes.
/// </summary>
public long TotalSize { get; init; }
/// <summary>
/// The timestamp when this snapshot was captured.
/// </summary>
public DateTimeOffset CapturedAt { get; init; }
/// <summary>
/// Version of the snapshot service that created this record.
/// </summary>
public string? SnapshotVersion { get; init; }
/// <summary>
/// Whether all diffIDs have been computed for this snapshot.
/// </summary>
public bool DiffIdsComplete => DiffIds.All(id => !string.IsNullOrWhiteSpace(id));
/// <summary>
/// The number of layers in this image.
/// </summary>
public int LayerCount => Layers.Length;
}
/// <summary>
/// Platform information for an OCI image.
/// </summary>
public sealed record OciPlatformInfo
{
/// <summary>
/// The operating system (e.g., linux, windows).
/// </summary>
public required string Os { get; init; }
/// <summary>
/// The CPU architecture (e.g., amd64, arm64).
/// </summary>
public required string Architecture { get; init; }
/// <summary>
/// Optional architecture variant (e.g., v8 for arm64).
/// </summary>
public string? Variant { get; init; }
/// <summary>
/// Optional OS version (primarily used for Windows images).
/// </summary>
public string? OsVersion { get; init; }
public override string ToString() =>
string.IsNullOrWhiteSpace(Variant)
? $"{Os}/{Architecture}"
: $"{Os}/{Architecture}/{Variant}";
}

View File

@@ -0,0 +1,314 @@
using System.Collections.Immutable;
using Microsoft.Extensions.Logging;
using StellaOps.Scanner.Contracts;
using StellaOps.Scanner.Manifest.Models;
using StellaOps.Scanner.Manifest.Persistence;
namespace StellaOps.Scanner.Manifest;
/// <summary>
/// Service for capturing, storing, and comparing OCI image manifest snapshots.
/// </summary>
public sealed class OciManifestSnapshotService : IOciManifestSnapshotService
{
private readonly IOciImageInspector _imageInspector;
private readonly IManifestSnapshotRepository _repository;
private readonly TimeProvider _timeProvider;
private readonly ILogger<OciManifestSnapshotService> _logger;
public OciManifestSnapshotService(
IOciImageInspector imageInspector,
IManifestSnapshotRepository repository,
ILogger<OciManifestSnapshotService> logger,
TimeProvider? timeProvider = null)
{
_imageInspector = imageInspector ?? throw new ArgumentNullException(nameof(imageInspector));
_repository = repository ?? throw new ArgumentNullException(nameof(repository));
_logger = logger ?? throw new ArgumentNullException(nameof(logger));
_timeProvider = timeProvider ?? TimeProvider.System;
}
public async Task<OciManifestSnapshot?> CaptureAsync(
string imageReference,
ManifestCaptureOptions? options = null,
CancellationToken cancellationToken = default)
{
if (string.IsNullOrWhiteSpace(imageReference))
{
return null;
}
options ??= new ManifestCaptureOptions();
_logger.LogDebug("Capturing manifest snapshot for {ImageReference}", imageReference);
var inspectionOptions = new ImageInspectionOptions
{
IncludeLayers = true,
ResolveIndex = true,
PlatformFilter = options.PlatformFilter,
Timeout = options.Timeout
};
var inspection = await _imageInspector.InspectAsync(imageReference, inspectionOptions, cancellationToken)
.ConfigureAwait(false);
if (inspection is null)
{
_logger.LogWarning("Failed to inspect image {ImageReference}", imageReference);
return null;
}
var platform = inspection.Platforms.FirstOrDefault();
if (platform is null)
{
_logger.LogWarning("No platforms found for image {ImageReference}", imageReference);
return null;
}
var layers = BuildLayerDescriptors(platform.Layers);
var snapshot = new OciManifestSnapshot
{
Id = Guid.NewGuid(),
ImageReference = imageReference,
Registry = inspection.Registry,
Repository = inspection.Repository,
Tag = ExtractTag(imageReference),
ManifestDigest = inspection.ResolvedDigest,
ConfigDigest = platform.ConfigDigest,
MediaType = platform.ManifestMediaType,
Layers = layers,
DiffIds = layers.Select(l => l.DiffId).ToImmutableArray(),
Platform = new OciPlatformInfo
{
Os = platform.Os,
Architecture = platform.Architecture,
Variant = platform.Variant,
OsVersion = platform.OsVersion
},
TotalSize = platform.TotalSize,
CapturedAt = _timeProvider.GetUtcNow(),
SnapshotVersion = GetSnapshotVersion()
};
if (options.Persist)
{
await _repository.UpsertAsync(snapshot, cancellationToken).ConfigureAwait(false);
_logger.LogInformation(
"Captured manifest snapshot for {ImageReference} with {LayerCount} layers",
imageReference,
snapshot.LayerCount);
}
return snapshot;
}
public Task<OciManifestSnapshot?> GetByDigestAsync(
string manifestDigest,
CancellationToken cancellationToken = default)
{
if (string.IsNullOrWhiteSpace(manifestDigest))
{
return Task.FromResult<OciManifestSnapshot?>(null);
}
return _repository.GetByDigestAsync(manifestDigest, cancellationToken);
}
public Task<OciManifestSnapshot?> GetByReferenceAsync(
string imageReference,
CancellationToken cancellationToken = default)
{
if (string.IsNullOrWhiteSpace(imageReference))
{
return Task.FromResult<OciManifestSnapshot?>(null);
}
return _repository.GetByReferenceAsync(imageReference, cancellationToken);
}
public Task<OciManifestSnapshot?> GetByIdAsync(
Guid snapshotId,
CancellationToken cancellationToken = default)
{
return _repository.GetByIdAsync(snapshotId, cancellationToken);
}
public async Task<ManifestComparisonResult?> CompareAsync(
string oldManifestDigest,
string newManifestDigest,
CancellationToken cancellationToken = default)
{
var oldSnapshot = await GetByDigestAsync(oldManifestDigest, cancellationToken).ConfigureAwait(false);
if (oldSnapshot is null)
{
_logger.LogWarning("Old manifest {Digest} not found for comparison", oldManifestDigest);
return null;
}
var newSnapshot = await GetByDigestAsync(newManifestDigest, cancellationToken).ConfigureAwait(false);
if (newSnapshot is null)
{
_logger.LogWarning("New manifest {Digest} not found for comparison", newManifestDigest);
return null;
}
return Compare(oldSnapshot, newSnapshot);
}
public ManifestComparisonResult Compare(OciManifestSnapshot oldSnapshot, OciManifestSnapshot newSnapshot)
{
ArgumentNullException.ThrowIfNull(oldSnapshot);
ArgumentNullException.ThrowIfNull(newSnapshot);
var oldDiffIds = BuildDiffIdSet(oldSnapshot);
var newDiffIds = BuildDiffIdSet(newSnapshot);
var unchanged = new List<LayerChange>();
var added = new List<LayerChange>();
var removed = new List<LayerChange>();
var modified = new List<LayerChange>();
foreach (var oldLayer in oldSnapshot.Layers)
{
var diffId = oldLayer.DiffId;
if (string.IsNullOrWhiteSpace(diffId))
{
continue;
}
if (newDiffIds.TryGetValue(diffId, out var newLayer))
{
unchanged.Add(new LayerChange
{
ChangeType = LayerChangeType.Unchanged,
OldLayer = oldLayer,
NewLayer = newLayer
});
}
else
{
var newLayerAtIndex = newSnapshot.Layers.FirstOrDefault(l => l.LayerIndex == oldLayer.LayerIndex);
if (newLayerAtIndex is not null && !string.IsNullOrWhiteSpace(newLayerAtIndex.DiffId))
{
modified.Add(new LayerChange
{
ChangeType = LayerChangeType.Modified,
OldLayer = oldLayer,
NewLayer = newLayerAtIndex
});
}
else
{
removed.Add(new LayerChange
{
ChangeType = LayerChangeType.Removed,
OldLayer = oldLayer,
NewLayer = null
});
}
}
}
foreach (var newLayer in newSnapshot.Layers)
{
var diffId = newLayer.DiffId;
if (string.IsNullOrWhiteSpace(diffId))
{
continue;
}
if (!oldDiffIds.ContainsKey(diffId) &&
!modified.Any(m => m.NewLayer?.LayerIndex == newLayer.LayerIndex))
{
added.Add(new LayerChange
{
ChangeType = LayerChangeType.Added,
OldLayer = null,
NewLayer = newLayer
});
}
}
return new ManifestComparisonResult
{
OldSnapshot = oldSnapshot,
NewSnapshot = newSnapshot,
UnchangedLayers = unchanged.OrderBy(l => l.LayerIndex).ToImmutableArray(),
AddedLayers = added.OrderBy(l => l.LayerIndex).ToImmutableArray(),
RemovedLayers = removed.OrderBy(l => l.LayerIndex).ToImmutableArray(),
ModifiedLayers = modified.OrderBy(l => l.LayerIndex).ToImmutableArray()
};
}
public Task<IReadOnlyList<OciManifestSnapshot>> ListByRepositoryAsync(
string registry,
string repository,
int limit = 100,
CancellationToken cancellationToken = default)
{
return _repository.ListByRepositoryAsync(registry, repository, limit, cancellationToken);
}
public Task<int> PruneAsync(
DateTimeOffset olderThan,
CancellationToken cancellationToken = default)
{
return _repository.PruneAsync(olderThan, cancellationToken);
}
private static ImmutableArray<OciLayerDescriptor> BuildLayerDescriptors(ImmutableArray<LayerInfo> layers)
{
return layers.Select((layer, index) => new OciLayerDescriptor
{
Digest = layer.Digest,
DiffId = null,
Size = layer.Size,
MediaType = layer.MediaType,
LayerIndex = index,
Annotations = layer.Annotations
}).ToImmutableArray();
}
private static Dictionary<string, OciLayerDescriptor> BuildDiffIdSet(OciManifestSnapshot snapshot)
{
var result = new Dictionary<string, OciLayerDescriptor>(StringComparer.OrdinalIgnoreCase);
foreach (var layer in snapshot.Layers)
{
if (!string.IsNullOrWhiteSpace(layer.DiffId) && !result.ContainsKey(layer.DiffId))
{
result[layer.DiffId] = layer;
}
}
return result;
}
private static string? ExtractTag(string imageReference)
{
if (imageReference.Contains('@'))
{
var atIndex = imageReference.IndexOf('@');
var beforeAt = imageReference[..atIndex];
var colonIndex = beforeAt.LastIndexOf(':');
if (colonIndex > 0 && !beforeAt[(colonIndex + 1)..].Contains('/'))
{
return beforeAt[(colonIndex + 1)..];
}
return null;
}
var lastColon = imageReference.LastIndexOf(':');
if (lastColon > 0 && !imageReference[lastColon..].Contains('/'))
{
return imageReference[(lastColon + 1)..];
}
return null;
}
private static string GetSnapshotVersion()
{
return typeof(OciManifestSnapshotService).Assembly.GetName().Version?.ToString() ?? "1.0.0";
}
}

View File

@@ -0,0 +1,48 @@
using StellaOps.Scanner.Manifest.Models;
namespace StellaOps.Scanner.Manifest.Persistence;
/// <summary>
/// Repository interface for persisting OCI manifest snapshots.
/// </summary>
public interface IManifestSnapshotRepository
{
/// <summary>
/// Inserts or updates a manifest snapshot.
/// </summary>
Task UpsertAsync(OciManifestSnapshot snapshot, CancellationToken cancellationToken = default);
/// <summary>
/// Gets a snapshot by manifest digest.
/// </summary>
Task<OciManifestSnapshot?> GetByDigestAsync(string manifestDigest, CancellationToken cancellationToken = default);
/// <summary>
/// Gets the most recent snapshot for an image reference.
/// </summary>
Task<OciManifestSnapshot?> GetByReferenceAsync(string imageReference, CancellationToken cancellationToken = default);
/// <summary>
/// Gets a snapshot by its unique ID.
/// </summary>
Task<OciManifestSnapshot?> GetByIdAsync(Guid snapshotId, CancellationToken cancellationToken = default);
/// <summary>
/// Lists snapshots for a repository.
/// </summary>
Task<IReadOnlyList<OciManifestSnapshot>> ListByRepositoryAsync(
string registry,
string repository,
int limit = 100,
CancellationToken cancellationToken = default);
/// <summary>
/// Deletes snapshots older than the specified time.
/// </summary>
Task<int> PruneAsync(DateTimeOffset olderThan, CancellationToken cancellationToken = default);
/// <summary>
/// Checks if a snapshot exists for the given manifest digest.
/// </summary>
Task<bool> ExistsAsync(string manifestDigest, CancellationToken cancellationToken = default);
}

View File

@@ -0,0 +1,249 @@
using System.Collections.Immutable;
using System.Text.Json;
using Microsoft.Extensions.Logging;
using Npgsql;
using StellaOps.Infrastructure.Postgres.Repositories;
using StellaOps.Scanner.Manifest.Models;
using StellaOps.Scanner.Storage.Postgres;
namespace StellaOps.Scanner.Manifest.Persistence;
/// <summary>
/// PostgreSQL repository for OCI manifest snapshots.
/// </summary>
public sealed class ManifestSnapshotRepository : RepositoryBase<ScannerDataSource>, IManifestSnapshotRepository
{
private const string Tenant = "";
private string Table => $"{SchemaName}.manifest_snapshots";
private string SchemaName => DataSource.SchemaName ?? ScannerDataSource.DefaultSchema;
private static readonly JsonSerializerOptions JsonOptions = new()
{
PropertyNamingPolicy = JsonNamingPolicy.CamelCase,
WriteIndented = false
};
public ManifestSnapshotRepository(ScannerDataSource dataSource, ILogger<ManifestSnapshotRepository> logger)
: base(dataSource, logger)
{
}
public Task UpsertAsync(OciManifestSnapshot snapshot, CancellationToken cancellationToken = default)
{
ArgumentNullException.ThrowIfNull(snapshot);
var layersJson = JsonSerializer.Serialize(snapshot.Layers, JsonOptions);
var diffIdsJson = JsonSerializer.Serialize(snapshot.DiffIds, JsonOptions);
var platformJson = snapshot.Platform is not null
? JsonSerializer.Serialize(snapshot.Platform, JsonOptions)
: null;
var sql = $"""
INSERT INTO {Table} (
id, image_reference, registry, repository, tag,
manifest_digest, config_digest, media_type,
layers_json, diff_ids_json, platform_json,
total_size, captured_at, snapshot_version
)
VALUES (
@id, @image_reference, @registry, @repository, @tag,
@manifest_digest, @config_digest, @media_type,
@layers_json::jsonb, @diff_ids_json::jsonb, @platform_json::jsonb,
@total_size, @captured_at, @snapshot_version
)
ON CONFLICT (manifest_digest) DO UPDATE SET
image_reference = EXCLUDED.image_reference,
tag = EXCLUDED.tag,
layers_json = EXCLUDED.layers_json,
diff_ids_json = EXCLUDED.diff_ids_json,
platform_json = EXCLUDED.platform_json,
total_size = EXCLUDED.total_size,
snapshot_version = EXCLUDED.snapshot_version
""";
return ExecuteAsync(
Tenant,
sql,
cmd =>
{
AddParameter(cmd, "id", snapshot.Id);
AddParameter(cmd, "image_reference", snapshot.ImageReference);
AddParameter(cmd, "registry", snapshot.Registry);
AddParameter(cmd, "repository", snapshot.Repository);
AddParameter(cmd, "tag", snapshot.Tag ?? (object)DBNull.Value);
AddParameter(cmd, "manifest_digest", snapshot.ManifestDigest);
AddParameter(cmd, "config_digest", snapshot.ConfigDigest);
AddParameter(cmd, "media_type", snapshot.MediaType);
AddParameter(cmd, "layers_json", layersJson);
AddParameter(cmd, "diff_ids_json", diffIdsJson);
AddParameter(cmd, "platform_json", platformJson ?? (object)DBNull.Value);
AddParameter(cmd, "total_size", snapshot.TotalSize);
AddParameter(cmd, "captured_at", snapshot.CapturedAt.UtcDateTime);
AddParameter(cmd, "snapshot_version", snapshot.SnapshotVersion ?? (object)DBNull.Value);
},
cancellationToken);
}
public Task<OciManifestSnapshot?> GetByDigestAsync(string manifestDigest, CancellationToken cancellationToken = default)
{
ArgumentException.ThrowIfNullOrWhiteSpace(manifestDigest);
var sql = $"""
SELECT id, image_reference, registry, repository, tag,
manifest_digest, config_digest, media_type,
layers_json, diff_ids_json, platform_json,
total_size, captured_at, snapshot_version
FROM {Table}
WHERE manifest_digest = @manifest_digest
""";
return QuerySingleOrDefaultAsync(
Tenant,
sql,
cmd => AddParameter(cmd, "manifest_digest", manifestDigest),
MapSnapshot,
cancellationToken);
}
public Task<OciManifestSnapshot?> GetByReferenceAsync(string imageReference, CancellationToken cancellationToken = default)
{
ArgumentException.ThrowIfNullOrWhiteSpace(imageReference);
var sql = $"""
SELECT id, image_reference, registry, repository, tag,
manifest_digest, config_digest, media_type,
layers_json, diff_ids_json, platform_json,
total_size, captured_at, snapshot_version
FROM {Table}
WHERE image_reference = @image_reference
ORDER BY captured_at DESC
LIMIT 1
""";
return QuerySingleOrDefaultAsync(
Tenant,
sql,
cmd => AddParameter(cmd, "image_reference", imageReference),
MapSnapshot,
cancellationToken);
}
public Task<OciManifestSnapshot?> GetByIdAsync(Guid snapshotId, CancellationToken cancellationToken = default)
{
var sql = $"""
SELECT id, image_reference, registry, repository, tag,
manifest_digest, config_digest, media_type,
layers_json, diff_ids_json, platform_json,
total_size, captured_at, snapshot_version
FROM {Table}
WHERE id = @id
""";
return QuerySingleOrDefaultAsync(
Tenant,
sql,
cmd => AddParameter(cmd, "id", snapshotId),
MapSnapshot,
cancellationToken);
}
public async Task<IReadOnlyList<OciManifestSnapshot>> ListByRepositoryAsync(
string registry,
string repository,
int limit = 100,
CancellationToken cancellationToken = default)
{
ArgumentException.ThrowIfNullOrWhiteSpace(registry);
ArgumentException.ThrowIfNullOrWhiteSpace(repository);
var sql = $"""
SELECT id, image_reference, registry, repository, tag,
manifest_digest, config_digest, media_type,
layers_json, diff_ids_json, platform_json,
total_size, captured_at, snapshot_version
FROM {Table}
WHERE registry = @registry AND repository = @repository
ORDER BY captured_at DESC
LIMIT @limit
""";
var results = await QueryAsync(
Tenant,
sql,
cmd =>
{
AddParameter(cmd, "registry", registry);
AddParameter(cmd, "repository", repository);
AddParameter(cmd, "limit", limit);
},
MapSnapshot,
cancellationToken).ConfigureAwait(false);
return results;
}
public async Task<int> PruneAsync(DateTimeOffset olderThan, CancellationToken cancellationToken = default)
{
var sql = $"""
DELETE FROM {Table}
WHERE captured_at < @older_than
""";
return await ExecuteWithRowCountAsync(
Tenant,
sql,
cmd => AddParameter(cmd, "older_than", olderThan.UtcDateTime),
cancellationToken).ConfigureAwait(false);
}
public async Task<bool> ExistsAsync(string manifestDigest, CancellationToken cancellationToken = default)
{
ArgumentException.ThrowIfNullOrWhiteSpace(manifestDigest);
var sql = $"""
SELECT EXISTS(SELECT 1 FROM {Table} WHERE manifest_digest = @manifest_digest)
""";
return await QueryScalarAsync(
Tenant,
sql,
cmd => AddParameter(cmd, "manifest_digest", manifestDigest),
reader => reader.GetBoolean(0),
cancellationToken).ConfigureAwait(false);
}
private static OciManifestSnapshot MapSnapshot(NpgsqlDataReader reader)
{
var layersJson = reader.GetString(reader.GetOrdinal("layers_json"));
var diffIdsJson = reader.GetString(reader.GetOrdinal("diff_ids_json"));
var platformOrdinal = reader.GetOrdinal("platform_json");
var platformJson = reader.IsDBNull(platformOrdinal) ? null : reader.GetString(platformOrdinal);
var layers = JsonSerializer.Deserialize<ImmutableArray<OciLayerDescriptor>>(layersJson, JsonOptions);
var diffIds = JsonSerializer.Deserialize<ImmutableArray<string?>>(diffIdsJson, JsonOptions);
var platform = !string.IsNullOrWhiteSpace(platformJson)
? JsonSerializer.Deserialize<OciPlatformInfo>(platformJson, JsonOptions)
: null;
var tagOrdinal = reader.GetOrdinal("tag");
var versionOrdinal = reader.GetOrdinal("snapshot_version");
return new OciManifestSnapshot
{
Id = reader.GetGuid(reader.GetOrdinal("id")),
ImageReference = reader.GetString(reader.GetOrdinal("image_reference")),
Registry = reader.GetString(reader.GetOrdinal("registry")),
Repository = reader.GetString(reader.GetOrdinal("repository")),
Tag = reader.IsDBNull(tagOrdinal) ? null : reader.GetString(tagOrdinal),
ManifestDigest = reader.GetString(reader.GetOrdinal("manifest_digest")),
ConfigDigest = reader.GetString(reader.GetOrdinal("config_digest")),
MediaType = reader.GetString(reader.GetOrdinal("media_type")),
Layers = layers,
DiffIds = diffIds,
Platform = platform,
TotalSize = reader.GetInt64(reader.GetOrdinal("total_size")),
CapturedAt = new DateTimeOffset(reader.GetDateTime(reader.GetOrdinal("captured_at")), TimeSpan.Zero),
SnapshotVersion = reader.IsDBNull(versionOrdinal) ? null : reader.GetString(versionOrdinal)
};
}
}

View File

@@ -0,0 +1,257 @@
using System.Collections.Concurrent;
using System.Collections.Immutable;
using Microsoft.Extensions.Logging;
using Npgsql;
using NpgsqlTypes;
namespace StellaOps.Scanner.Manifest.Resolution;
/// <summary>
/// Detects well-known base images from layer diffIDs using fingerprinting.
/// </summary>
public sealed class BaseImageDetector : IBaseImageDetector
{
private const string TableName = "scanner_base_image_fingerprints";
private const string LayerTableName = "scanner_base_image_layers";
private readonly NpgsqlDataSource _dataSource;
private readonly ILogger<BaseImageDetector> _logger;
// In-memory index: diffId -> (baseImage, layerIndex)
private readonly ConcurrentDictionary<string, List<(string BaseImage, int LayerIndex)>> _diffIdIndex = new(StringComparer.OrdinalIgnoreCase);
// Known base images loaded at startup
private readonly ConcurrentDictionary<string, ImmutableArray<string>> _knownBaseImages = new(StringComparer.OrdinalIgnoreCase);
private bool _indexLoaded;
private readonly SemaphoreSlim _loadLock = new(1, 1);
public BaseImageDetector(NpgsqlDataSource dataSource, ILogger<BaseImageDetector> logger)
{
_dataSource = dataSource ?? throw new ArgumentNullException(nameof(dataSource));
_logger = logger ?? throw new ArgumentNullException(nameof(logger));
}
public async Task<string?> DetectBaseImageAsync(string diffId, int layerIndex, CancellationToken cancellationToken = default)
{
ArgumentException.ThrowIfNullOrWhiteSpace(diffId);
await EnsureIndexLoadedAsync(cancellationToken).ConfigureAwait(false);
if (_diffIdIndex.TryGetValue(diffId, out var matches))
{
// Prefer exact layer index match
var exactMatch = matches.FirstOrDefault(m => m.LayerIndex == layerIndex);
if (!string.IsNullOrEmpty(exactMatch.BaseImage))
{
return exactMatch.BaseImage;
}
// Return any matching base image
return matches.FirstOrDefault().BaseImage;
}
return null;
}
public async Task<bool> IsKnownBaseLayerAsync(string diffId, CancellationToken cancellationToken = default)
{
ArgumentException.ThrowIfNullOrWhiteSpace(diffId);
await EnsureIndexLoadedAsync(cancellationToken).ConfigureAwait(false);
return _diffIdIndex.ContainsKey(diffId);
}
public async Task RegisterBaseImageAsync(
string baseImageRef,
IEnumerable<string> layerDiffIds,
CancellationToken cancellationToken = default)
{
ArgumentException.ThrowIfNullOrWhiteSpace(baseImageRef);
var diffIds = layerDiffIds.ToArray();
if (diffIds.Length == 0)
{
return;
}
_logger.LogInformation("Registering base image {BaseImage} with {LayerCount} layers", baseImageRef, diffIds.Length);
await using var conn = await _dataSource.OpenConnectionAsync(cancellationToken).ConfigureAwait(false);
await using var transaction = await conn.BeginTransactionAsync(cancellationToken).ConfigureAwait(false);
try
{
// Upsert base image fingerprint
await using (var cmd = conn.CreateCommand())
{
cmd.Transaction = transaction;
cmd.CommandText = $"""
INSERT INTO {TableName} (image_reference, layer_count, registered_at)
VALUES (@imageRef, @layerCount, @registeredAt)
ON CONFLICT (image_reference) DO UPDATE SET
layer_count = @layerCount,
registered_at = @registeredAt
""";
cmd.Parameters.AddWithValue("imageRef", baseImageRef);
cmd.Parameters.AddWithValue("layerCount", diffIds.Length);
cmd.Parameters.AddWithValue("registeredAt", DateTimeOffset.UtcNow);
await cmd.ExecuteNonQueryAsync(cancellationToken).ConfigureAwait(false);
}
// Delete existing layers
await using (var cmd = conn.CreateCommand())
{
cmd.Transaction = transaction;
cmd.CommandText = $"DELETE FROM {LayerTableName} WHERE image_reference = @imageRef";
cmd.Parameters.AddWithValue("imageRef", baseImageRef);
await cmd.ExecuteNonQueryAsync(cancellationToken).ConfigureAwait(false);
}
// Insert layer diffIds
await using (var cmd = conn.CreateCommand())
{
cmd.Transaction = transaction;
cmd.CommandText = $"""
INSERT INTO {LayerTableName} (image_reference, layer_index, diff_id)
SELECT @imageRef, unnest(@indices::int[]), unnest(@diffIds::varchar[])
""";
cmd.Parameters.AddWithValue("imageRef", baseImageRef);
cmd.Parameters.AddWithValue("indices", Enumerable.Range(0, diffIds.Length).ToArray());
cmd.Parameters.AddWithValue("diffIds", diffIds);
await cmd.ExecuteNonQueryAsync(cancellationToken).ConfigureAwait(false);
}
await transaction.CommitAsync(cancellationToken).ConfigureAwait(false);
// Update in-memory index
_knownBaseImages[baseImageRef] = [.. diffIds];
for (var i = 0; i < diffIds.Length; i++)
{
var diffId = diffIds[i];
if (!_diffIdIndex.TryGetValue(diffId, out var list))
{
list = [];
_diffIdIndex[diffId] = list;
}
// Remove existing entry for this base image and add new one
list.RemoveAll(e => e.BaseImage.Equals(baseImageRef, StringComparison.OrdinalIgnoreCase));
list.Add((baseImageRef, i));
}
_logger.LogInformation("Registered base image {BaseImage}", baseImageRef);
}
catch (Exception ex)
{
await transaction.RollbackAsync(cancellationToken).ConfigureAwait(false);
_logger.LogError(ex, "Failed to register base image {BaseImage}", baseImageRef);
throw;
}
}
public async Task<IReadOnlyList<string>> GetRegisteredBaseImagesAsync(CancellationToken cancellationToken = default)
{
await EnsureIndexLoadedAsync(cancellationToken).ConfigureAwait(false);
return [.. _knownBaseImages.Keys];
}
private async Task EnsureIndexLoadedAsync(CancellationToken cancellationToken)
{
if (_indexLoaded)
{
return;
}
await _loadLock.WaitAsync(cancellationToken).ConfigureAwait(false);
try
{
if (_indexLoaded)
{
return;
}
await LoadIndexFromDatabaseAsync(cancellationToken).ConfigureAwait(false);
LoadBuiltInBaseImages();
_indexLoaded = true;
}
finally
{
_loadLock.Release();
}
}
private async Task LoadIndexFromDatabaseAsync(CancellationToken cancellationToken)
{
try
{
await using var conn = await _dataSource.OpenConnectionAsync(cancellationToken).ConfigureAwait(false);
await using var cmd = conn.CreateCommand();
cmd.CommandText = $"""
SELECT l.image_reference, l.layer_index, l.diff_id
FROM {LayerTableName} l
INNER JOIN {TableName} f ON l.image_reference = f.image_reference
ORDER BY l.image_reference, l.layer_index
""";
await using var reader = await cmd.ExecuteReaderAsync(cancellationToken).ConfigureAwait(false);
var currentImage = "";
var currentLayers = new List<string>();
while (await reader.ReadAsync(cancellationToken).ConfigureAwait(false))
{
var imageRef = reader.GetString(0);
var layerIndex = reader.GetInt32(1);
var diffId = reader.GetString(2);
if (!imageRef.Equals(currentImage, StringComparison.OrdinalIgnoreCase))
{
if (!string.IsNullOrEmpty(currentImage) && currentLayers.Count > 0)
{
_knownBaseImages[currentImage] = [.. currentLayers];
}
currentImage = imageRef;
currentLayers.Clear();
}
currentLayers.Add(diffId);
if (!_diffIdIndex.TryGetValue(diffId, out var list))
{
list = [];
_diffIdIndex[diffId] = list;
}
list.Add((imageRef, layerIndex));
}
// Don't forget the last image
if (!string.IsNullOrEmpty(currentImage) && currentLayers.Count > 0)
{
_knownBaseImages[currentImage] = [.. currentLayers];
}
_logger.LogInformation("Loaded {Count} base image fingerprints from database", _knownBaseImages.Count);
}
catch (PostgresException ex) when (ex.SqlState == "42P01") // Table doesn't exist
{
_logger.LogWarning("Base image fingerprint tables do not exist. Run migrations to create them.");
}
}
private void LoadBuiltInBaseImages()
{
// These are well-known base image layer patterns.
// In a real implementation, these would be periodically updated from a registry scan.
// For now, we'll rely on the database to store actual fingerprints.
// Note: Real diffIDs would be computed from actual base images.
// This is a placeholder to show the pattern.
_logger.LogDebug("Built-in base image index ready for population");
}
}

View File

@@ -0,0 +1,182 @@
using System.Collections.Concurrent;
using Microsoft.Extensions.Logging;
using Npgsql;
namespace StellaOps.Scanner.Manifest.Resolution;
/// <summary>
/// PostgreSQL-backed cache for layer diffIDs with in-memory LRU caching.
/// </summary>
public sealed class DiffIdCache : IDiffIdCache
{
private const int MemoryCacheMaxSize = 10000;
private const string TableName = "scanner_diffid_cache";
private readonly NpgsqlDataSource _dataSource;
private readonly ILogger<DiffIdCache> _logger;
private readonly ConcurrentDictionary<string, string> _memoryCache = new(StringComparer.OrdinalIgnoreCase);
public DiffIdCache(NpgsqlDataSource dataSource, ILogger<DiffIdCache> logger)
{
_dataSource = dataSource ?? throw new ArgumentNullException(nameof(dataSource));
_logger = logger ?? throw new ArgumentNullException(nameof(logger));
}
public async Task<string?> GetAsync(string layerDigest, CancellationToken cancellationToken = default)
{
ArgumentException.ThrowIfNullOrWhiteSpace(layerDigest);
// Check memory cache first
if (_memoryCache.TryGetValue(layerDigest, out var cached))
{
return cached;
}
// Query database
await using var conn = await _dataSource.OpenConnectionAsync(cancellationToken).ConfigureAwait(false);
await using var cmd = conn.CreateCommand();
cmd.CommandText = $"""
SELECT diff_id FROM {TableName}
WHERE layer_digest = @layerDigest
""";
cmd.Parameters.AddWithValue("layerDigest", layerDigest);
var result = await cmd.ExecuteScalarAsync(cancellationToken).ConfigureAwait(false);
if (result is string diffId && !string.IsNullOrWhiteSpace(diffId))
{
AddToMemoryCache(layerDigest, diffId);
return diffId;
}
return null;
}
public async Task SetAsync(string layerDigest, string diffId, CancellationToken cancellationToken = default)
{
ArgumentException.ThrowIfNullOrWhiteSpace(layerDigest);
ArgumentException.ThrowIfNullOrWhiteSpace(diffId);
AddToMemoryCache(layerDigest, diffId);
await using var conn = await _dataSource.OpenConnectionAsync(cancellationToken).ConfigureAwait(false);
await using var cmd = conn.CreateCommand();
cmd.CommandText = $"""
INSERT INTO {TableName} (layer_digest, diff_id, created_at)
VALUES (@layerDigest, @diffId, @createdAt)
ON CONFLICT (layer_digest) DO NOTHING
""";
cmd.Parameters.AddWithValue("layerDigest", layerDigest);
cmd.Parameters.AddWithValue("diffId", diffId);
cmd.Parameters.AddWithValue("createdAt", DateTimeOffset.UtcNow);
try
{
await cmd.ExecuteNonQueryAsync(cancellationToken).ConfigureAwait(false);
}
catch (PostgresException ex) when (ex.SqlState == "42P01") // Table doesn't exist
{
_logger.LogWarning("DiffID cache table does not exist. Run migrations to create it.");
}
}
public async Task<bool> ExistsAsync(string layerDigest, CancellationToken cancellationToken = default)
{
ArgumentException.ThrowIfNullOrWhiteSpace(layerDigest);
if (_memoryCache.ContainsKey(layerDigest))
{
return true;
}
await using var conn = await _dataSource.OpenConnectionAsync(cancellationToken).ConfigureAwait(false);
await using var cmd = conn.CreateCommand();
cmd.CommandText = $"""
SELECT 1 FROM {TableName}
WHERE layer_digest = @layerDigest
LIMIT 1
""";
cmd.Parameters.AddWithValue("layerDigest", layerDigest);
var result = await cmd.ExecuteScalarAsync(cancellationToken).ConfigureAwait(false);
return result is not null;
}
public async Task<IReadOnlyDictionary<string, string>> GetManyAsync(
IEnumerable<string> layerDigests,
CancellationToken cancellationToken = default)
{
var digests = layerDigests.Where(d => !string.IsNullOrWhiteSpace(d)).ToList();
if (digests.Count == 0)
{
return new Dictionary<string, string>();
}
var result = new Dictionary<string, string>(StringComparer.OrdinalIgnoreCase);
var notInMemory = new List<string>();
// Check memory cache first
foreach (var digest in digests)
{
if (_memoryCache.TryGetValue(digest, out var cached))
{
result[digest] = cached;
}
else
{
notInMemory.Add(digest);
}
}
if (notInMemory.Count == 0)
{
return result;
}
// Query database for remaining
await using var conn = await _dataSource.OpenConnectionAsync(cancellationToken).ConfigureAwait(false);
await using var cmd = conn.CreateCommand();
// Build parameterized IN clause
var paramNames = new List<string>();
for (var i = 0; i < notInMemory.Count; i++)
{
var paramName = $"@p{i}";
paramNames.Add(paramName);
cmd.Parameters.AddWithValue($"p{i}", notInMemory[i]);
}
cmd.CommandText = $"""
SELECT layer_digest, diff_id FROM {TableName}
WHERE layer_digest = ANY(@digests)
""";
cmd.Parameters.AddWithValue("digests", notInMemory.ToArray());
await using var reader = await cmd.ExecuteReaderAsync(cancellationToken).ConfigureAwait(false);
while (await reader.ReadAsync(cancellationToken).ConfigureAwait(false))
{
var layerDigest = reader.GetString(0);
var diffId = reader.GetString(1);
result[layerDigest] = diffId;
AddToMemoryCache(layerDigest, diffId);
}
return result;
}
private void AddToMemoryCache(string layerDigest, string diffId)
{
// Simple eviction when cache gets too large
if (_memoryCache.Count >= MemoryCacheMaxSize)
{
// Remove approximately 10% of entries (random eviction)
var toRemove = _memoryCache.Keys.Take(MemoryCacheMaxSize / 10).ToList();
foreach (var key in toRemove)
{
_memoryCache.TryRemove(key, out _);
}
}
_memoryCache[layerDigest] = diffId;
}
}

View File

@@ -0,0 +1,68 @@
namespace StellaOps.Scanner.Manifest.Resolution;
/// <summary>
/// Detects well-known base images from layer diffIDs.
/// </summary>
public interface IBaseImageDetector
{
/// <summary>
/// Attempts to detect the base image that introduced a layer.
/// </summary>
/// <param name="diffId">Layer diffID.</param>
/// <param name="layerIndex">Zero-based index of the layer in the image.</param>
/// <param name="cancellationToken">Cancellation token.</param>
/// <returns>Base image reference (e.g., "alpine:3.19") or null if unknown.</returns>
Task<string?> DetectBaseImageAsync(string diffId, int layerIndex, CancellationToken cancellationToken = default);
/// <summary>
/// Checks if a layer is from a known base image.
/// </summary>
/// <param name="diffId">Layer diffID.</param>
/// <param name="cancellationToken">Cancellation token.</param>
/// <returns>True if the layer is recognized as a base image layer.</returns>
Task<bool> IsKnownBaseLayerAsync(string diffId, CancellationToken cancellationToken = default);
/// <summary>
/// Registers a base image fingerprint.
/// </summary>
/// <param name="baseImageRef">Base image reference (e.g., "alpine:3.19").</param>
/// <param name="layerDiffIds">Ordered diffIDs for the base image layers.</param>
/// <param name="cancellationToken">Cancellation token.</param>
Task RegisterBaseImageAsync(
string baseImageRef,
IEnumerable<string> layerDiffIds,
CancellationToken cancellationToken = default);
/// <summary>
/// Gets all registered base images.
/// </summary>
/// <param name="cancellationToken">Cancellation token.</param>
/// <returns>List of registered base image references.</returns>
Task<IReadOnlyList<string>> GetRegisteredBaseImagesAsync(CancellationToken cancellationToken = default);
}
/// <summary>
/// Information about a known base image.
/// </summary>
public sealed record BaseImageInfo
{
/// <summary>
/// Base image reference (e.g., "alpine:3.19").
/// </summary>
public required string ImageReference { get; init; }
/// <summary>
/// Ordered diffIDs for this base image.
/// </summary>
public required IReadOnlyList<string> LayerDiffIds { get; init; }
/// <summary>
/// When this base image was registered/fingerprinted.
/// </summary>
public DateTimeOffset RegisteredAt { get; init; }
/// <summary>
/// How many times this base image has been detected.
/// </summary>
public long DetectionCount { get; init; }
}

View File

@@ -0,0 +1,43 @@
namespace StellaOps.Scanner.Manifest.Resolution;
/// <summary>
/// Cache for layer diffIDs.
/// DiffIDs are immutable (same layer digest always produces same diffID),
/// so they can be cached indefinitely.
/// </summary>
public interface IDiffIdCache
{
/// <summary>
/// Gets a cached diffID for a layer digest.
/// </summary>
/// <param name="layerDigest">Compressed layer digest (sha256:...).</param>
/// <param name="cancellationToken">Cancellation token.</param>
/// <returns>Cached diffID or null if not in cache.</returns>
Task<string?> GetAsync(string layerDigest, CancellationToken cancellationToken = default);
/// <summary>
/// Stores a diffID for a layer digest.
/// </summary>
/// <param name="layerDigest">Compressed layer digest (sha256:...).</param>
/// <param name="diffId">Computed diffID (sha256:...).</param>
/// <param name="cancellationToken">Cancellation token.</param>
Task SetAsync(string layerDigest, string diffId, CancellationToken cancellationToken = default);
/// <summary>
/// Checks if a diffID is cached for a layer digest.
/// </summary>
/// <param name="layerDigest">Compressed layer digest.</param>
/// <param name="cancellationToken">Cancellation token.</param>
/// <returns>True if cached.</returns>
Task<bool> ExistsAsync(string layerDigest, CancellationToken cancellationToken = default);
/// <summary>
/// Gets multiple cached diffIDs at once.
/// </summary>
/// <param name="layerDigests">Layer digests to look up.</param>
/// <param name="cancellationToken">Cancellation token.</param>
/// <returns>Dictionary of layer digest to diffID for found entries.</returns>
Task<IReadOnlyDictionary<string, string>> GetManyAsync(
IEnumerable<string> layerDigests,
CancellationToken cancellationToken = default);
}

View File

@@ -0,0 +1,109 @@
namespace StellaOps.Scanner.Manifest.Resolution;
/// <summary>
/// Resolves image references to layer digests and computes diffIDs.
/// </summary>
public interface ILayerDigestResolver
{
/// <summary>
/// Resolves an image reference to its ordered layer descriptors with provenance.
/// </summary>
/// <param name="imageReference">Full image reference (registry/repo:tag or registry/repo@sha256:...).</param>
/// <param name="options">Resolution options.</param>
/// <param name="cancellationToken">Cancellation token.</param>
/// <returns>Resolved layers with provenance information.</returns>
Task<ResolvedImageLayers> ResolveLayersAsync(
string imageReference,
LayerResolutionOptions? options = null,
CancellationToken cancellationToken = default);
/// <summary>
/// Computes the diffID for a layer by decompressing and hashing its content.
/// </summary>
/// <param name="registry">Registry hostname.</param>
/// <param name="repository">Repository path.</param>
/// <param name="layerDigest">Compressed layer digest.</param>
/// <param name="mediaType">Layer media type (to determine decompression).</param>
/// <param name="cancellationToken">Cancellation token.</param>
/// <returns>The computed diffID (sha256:...).</returns>
Task<string> ResolveDiffIdAsync(
string registry,
string repository,
string layerDigest,
string mediaType,
CancellationToken cancellationToken = default);
/// <summary>
/// Finds layers that are shared across multiple images.
/// </summary>
/// <param name="imageReferences">Image references to compare.</param>
/// <param name="cancellationToken">Cancellation token.</param>
/// <returns>Information about shared layers.</returns>
Task<IReadOnlyList<SharedLayerInfo>> FindSharedLayersAsync(
IEnumerable<string> imageReferences,
CancellationToken cancellationToken = default);
/// <summary>
/// Gets the cached diffID for a layer digest if available.
/// </summary>
/// <param name="layerDigest">Compressed layer digest.</param>
/// <param name="cancellationToken">Cancellation token.</param>
/// <returns>Cached diffID or null if not in cache.</returns>
Task<string?> GetCachedDiffIdAsync(
string layerDigest,
CancellationToken cancellationToken = default);
/// <summary>
/// Caches a diffID for a layer digest.
/// </summary>
/// <param name="layerDigest">Compressed layer digest.</param>
/// <param name="diffId">Computed diffID.</param>
/// <param name="cancellationToken">Cancellation token.</param>
Task CacheDiffIdAsync(
string layerDigest,
string diffId,
CancellationToken cancellationToken = default);
}
/// <summary>
/// Options for layer resolution.
/// </summary>
public sealed record LayerResolutionOptions
{
/// <summary>
/// Whether to compute diffIDs during resolution.
/// Default is false (deferred computation).
/// </summary>
public bool ComputeDiffIds { get; init; } = false;
/// <summary>
/// Target platform for multi-arch images (e.g., "linux/amd64").
/// If null, uses the default platform.
/// </summary>
public string? Platform { get; init; }
/// <summary>
/// Maximum layer size (bytes) for diffID computation.
/// Layers larger than this are skipped to prevent timeouts.
/// Default is 1GB.
/// </summary>
public long MaxDiffIdLayerSize { get; init; } = 1024L * 1024 * 1024;
/// <summary>
/// Whether to use cached diffIDs if available.
/// Default is true.
/// </summary>
public bool UseDiffIdCache { get; init; } = true;
/// <summary>
/// Timeout for diffID computation per layer.
/// Default is 5 minutes.
/// </summary>
public TimeSpan DiffIdTimeout { get; init; } = TimeSpan.FromMinutes(5);
/// <summary>
/// Whether to attempt base image detection.
/// Default is true.
/// </summary>
public bool DetectBaseImage { get; init; } = true;
}

View File

@@ -0,0 +1,427 @@
using System.Collections.Concurrent;
using System.Collections.Immutable;
using System.IO.Compression;
using System.Security.Cryptography;
using System.Text.Json;
using Microsoft.Extensions.Logging;
using StellaOps.Scanner.Manifest.Models;
using StellaOps.Scanner.Registry;
namespace StellaOps.Scanner.Manifest.Resolution;
/// <summary>
/// Implementation of ILayerDigestResolver that resolves image references to layer digests
/// and computes diffIDs with caching support.
/// </summary>
public sealed class LayerDigestResolver : ILayerDigestResolver
{
private readonly IRegistryClient _registryClient;
private readonly IOciManifestSnapshotService _snapshotService;
private readonly IDiffIdCache _diffIdCache;
private readonly IBaseImageDetector _baseImageDetector;
private readonly ILogger<LayerDigestResolver> _logger;
// In-memory cache for recently resolved diffIDs (layer digests are immutable)
private readonly ConcurrentDictionary<string, string> _memoryCache = new(StringComparer.OrdinalIgnoreCase);
public LayerDigestResolver(
IRegistryClient registryClient,
IOciManifestSnapshotService snapshotService,
IDiffIdCache diffIdCache,
IBaseImageDetector baseImageDetector,
ILogger<LayerDigestResolver> logger)
{
_registryClient = registryClient ?? throw new ArgumentNullException(nameof(registryClient));
_snapshotService = snapshotService ?? throw new ArgumentNullException(nameof(snapshotService));
_diffIdCache = diffIdCache ?? throw new ArgumentNullException(nameof(diffIdCache));
_baseImageDetector = baseImageDetector ?? throw new ArgumentNullException(nameof(baseImageDetector));
_logger = logger ?? throw new ArgumentNullException(nameof(logger));
}
public async Task<ResolvedImageLayers> ResolveLayersAsync(
string imageReference,
LayerResolutionOptions? options = null,
CancellationToken cancellationToken = default)
{
options ??= new LayerResolutionOptions();
var (registry, repository, reference) = ParseImageReference(imageReference);
_logger.LogInformation("Resolving layers for {Image}", imageReference);
// Capture manifest snapshot
var snapshot = await _snapshotService.CaptureAsync(
registry,
repository,
reference,
new ManifestCaptureOptions { Platform = options.Platform },
cancellationToken).ConfigureAwait(false);
if (snapshot is null)
{
throw new InvalidOperationException($"Failed to capture manifest for {imageReference}");
}
// Resolve layers with provenance
var layers = new List<LayerProvenance>();
var configHistory = await GetConfigHistoryAsync(registry, repository, snapshot.ConfigDigest, cancellationToken)
.ConfigureAwait(false);
for (var i = 0; i < snapshot.Layers.Length; i++)
{
var layer = snapshot.Layers[i];
string? diffId = snapshot.DiffIds.Length > i ? snapshot.DiffIds[i] : null;
// Try to get cached diffID if not in snapshot
if (string.IsNullOrWhiteSpace(diffId) && options.UseDiffIdCache)
{
diffId = await GetCachedDiffIdAsync(layer.Digest, cancellationToken).ConfigureAwait(false);
}
// Compute diffID if requested and not cached
if (string.IsNullOrWhiteSpace(diffId) && options.ComputeDiffIds && layer.Size <= options.MaxDiffIdLayerSize)
{
try
{
using var cts = CancellationTokenSource.CreateLinkedTokenSource(cancellationToken);
cts.CancelAfter(options.DiffIdTimeout);
diffId = await ResolveDiffIdAsync(registry, repository, layer.Digest, layer.MediaType, cts.Token)
.ConfigureAwait(false);
}
catch (OperationCanceledException) when (!cancellationToken.IsCancellationRequested)
{
_logger.LogWarning("DiffID computation timed out for layer {Digest}", layer.Digest);
}
}
// Get history command for this layer
var createdBy = configHistory.Length > i ? configHistory[i] : null;
// Detect base image for this layer
string? introducedBy = null;
if (options.DetectBaseImage && !string.IsNullOrWhiteSpace(diffId))
{
introducedBy = await _baseImageDetector.DetectBaseImageAsync(diffId, i, cancellationToken)
.ConfigureAwait(false);
}
layers.Add(new LayerProvenance
{
LayerDigest = layer.Digest,
DiffId = diffId,
SourceImage = imageReference,
LayerIndex = i,
IntroducedBy = introducedBy,
Size = layer.Size,
MediaType = layer.MediaType,
CreatedByCommand = createdBy
});
}
return new ResolvedImageLayers
{
ImageReference = imageReference,
ManifestDigest = snapshot.ManifestDigest,
ConfigDigest = snapshot.ConfigDigest,
Platform = snapshot.Platform?.ToString(),
Layers = [.. layers],
ResolvedAt = DateTimeOffset.UtcNow
};
}
public async Task<string> ResolveDiffIdAsync(
string registry,
string repository,
string layerDigest,
string mediaType,
CancellationToken cancellationToken = default)
{
// Check memory cache first
if (_memoryCache.TryGetValue(layerDigest, out var cached))
{
return cached;
}
// Check persistent cache
var persistedDiffId = await _diffIdCache.GetAsync(layerDigest, cancellationToken).ConfigureAwait(false);
if (!string.IsNullOrWhiteSpace(persistedDiffId))
{
_memoryCache[layerDigest] = persistedDiffId;
return persistedDiffId;
}
_logger.LogDebug("Computing diffID for layer {Digest}", layerDigest);
// Fetch and decompress layer to compute hash
using var blobStream = await _registryClient.GetBlobAsync(registry, repository, layerDigest, cancellationToken)
.ConfigureAwait(false);
if (blobStream is null)
{
throw new InvalidOperationException($"Layer blob not found: {layerDigest}");
}
// Decompress based on media type and compute SHA256
var diffId = await ComputeDiffIdAsync(blobStream, mediaType, cancellationToken).ConfigureAwait(false);
// Cache the result
_memoryCache[layerDigest] = diffId;
await CacheDiffIdAsync(layerDigest, diffId, cancellationToken).ConfigureAwait(false);
_logger.LogDebug("Computed diffID {DiffId} for layer {Digest}", diffId, layerDigest);
return diffId;
}
public async Task<IReadOnlyList<SharedLayerInfo>> FindSharedLayersAsync(
IEnumerable<string> imageReferences,
CancellationToken cancellationToken = default)
{
var images = imageReferences.ToArray();
if (images.Length < 2)
{
return [];
}
// Resolve all images
var resolvedImages = new List<ResolvedImageLayers>();
foreach (var imageRef in images)
{
try
{
var resolved = await ResolveLayersAsync(imageRef, new LayerResolutionOptions
{
ComputeDiffIds = true,
DetectBaseImage = true
}, cancellationToken).ConfigureAwait(false);
resolvedImages.Add(resolved);
}
catch (Exception ex)
{
_logger.LogWarning(ex, "Failed to resolve {Image} for shared layer detection", imageRef);
}
}
// Group layers by diffID
var layersByDiffId = new Dictionary<string, List<(ResolvedImageLayers Image, LayerProvenance Layer)>>(
StringComparer.OrdinalIgnoreCase);
foreach (var image in resolvedImages)
{
foreach (var layer in image.Layers)
{
if (string.IsNullOrWhiteSpace(layer.DiffId))
{
continue;
}
if (!layersByDiffId.TryGetValue(layer.DiffId, out var list))
{
list = [];
layersByDiffId[layer.DiffId] = list;
}
list.Add((image, layer));
}
}
// Build shared layer info for layers present in multiple images
var sharedLayers = new List<SharedLayerInfo>();
foreach (var (diffId, occurrences) in layersByDiffId)
{
if (occurrences.Count < 2)
{
continue;
}
var firstLayer = occurrences[0].Layer;
var sampleImages = occurrences
.Select(o => o.Image.ImageReference)
.Distinct()
.Take(5)
.ToImmutableArray();
var isKnownBase = await _baseImageDetector.IsKnownBaseLayerAsync(diffId, cancellationToken)
.ConfigureAwait(false);
sharedLayers.Add(new SharedLayerInfo
{
DiffId = diffId,
ImageCount = occurrences.Select(o => o.Image.ImageReference).Distinct().Count(),
SampleImages = sampleImages,
LikelyBaseImage = firstLayer.IntroducedBy,
Size = firstLayer.Size,
IsKnownBaseLayer = isKnownBase
});
}
return sharedLayers.OrderByDescending(s => s.ImageCount).ToList();
}
public async Task<string?> GetCachedDiffIdAsync(string layerDigest, CancellationToken cancellationToken = default)
{
if (_memoryCache.TryGetValue(layerDigest, out var cached))
{
return cached;
}
var persisted = await _diffIdCache.GetAsync(layerDigest, cancellationToken).ConfigureAwait(false);
if (!string.IsNullOrWhiteSpace(persisted))
{
_memoryCache[layerDigest] = persisted;
}
return persisted;
}
public async Task CacheDiffIdAsync(string layerDigest, string diffId, CancellationToken cancellationToken = default)
{
_memoryCache[layerDigest] = diffId;
await _diffIdCache.SetAsync(layerDigest, diffId, cancellationToken).ConfigureAwait(false);
}
private async Task<string> ComputeDiffIdAsync(Stream compressedStream, string mediaType, CancellationToken cancellationToken)
{
Stream decompressedStream;
if (mediaType.Contains("gzip", StringComparison.OrdinalIgnoreCase))
{
decompressedStream = new GZipStream(compressedStream, CompressionMode.Decompress, leaveOpen: true);
}
else if (mediaType.Contains("zstd", StringComparison.OrdinalIgnoreCase))
{
// ZStd requires external library - for now, treat as uncompressed
// TODO: Add ZStd support via ZstdSharp or System.IO.Compression.ZstdStream when available
_logger.LogWarning("Zstd compression not yet supported, treating as uncompressed");
decompressedStream = compressedStream;
}
else
{
// Treat as uncompressed tar
decompressedStream = compressedStream;
}
try
{
using var sha256 = SHA256.Create();
var buffer = new byte[81920]; // 80KB buffer
int bytesRead;
while ((bytesRead = await decompressedStream.ReadAsync(buffer.AsMemory(0, buffer.Length), cancellationToken)
.ConfigureAwait(false)) > 0)
{
sha256.TransformBlock(buffer, 0, bytesRead, null, 0);
}
sha256.TransformFinalBlock([], 0, 0);
var hash = sha256.Hash!;
return $"sha256:{Convert.ToHexString(hash).ToLowerInvariant()}";
}
finally
{
if (decompressedStream != compressedStream)
{
await decompressedStream.DisposeAsync().ConfigureAwait(false);
}
}
}
private async Task<ImmutableArray<string?>> GetConfigHistoryAsync(
string registry,
string repository,
string configDigest,
CancellationToken cancellationToken)
{
try
{
using var configStream = await _registryClient.GetBlobAsync(registry, repository, configDigest, cancellationToken)
.ConfigureAwait(false);
if (configStream is null)
{
return [];
}
using var doc = await JsonDocument.ParseAsync(configStream, cancellationToken: cancellationToken)
.ConfigureAwait(false);
if (doc.RootElement.TryGetProperty("history", out var historyElement) &&
historyElement.ValueKind == JsonValueKind.Array)
{
var history = new List<string?>();
foreach (var item in historyElement.EnumerateArray())
{
// Skip empty layers (created_by for empty layer configs)
if (item.TryGetProperty("empty_layer", out var emptyLayer) && emptyLayer.GetBoolean())
{
continue;
}
string? createdBy = null;
if (item.TryGetProperty("created_by", out var createdByElement))
{
createdBy = createdByElement.GetString();
}
history.Add(createdBy);
}
return [.. history];
}
}
catch (Exception ex)
{
_logger.LogDebug(ex, "Failed to parse config history for {ConfigDigest}", configDigest);
}
return [];
}
private static (string Registry, string Repository, string Reference) ParseImageReference(string imageReference)
{
// Handle digest reference
var digestIdx = imageReference.IndexOf('@');
string reference;
string imagePath;
if (digestIdx > 0)
{
reference = imageReference[(digestIdx + 1)..];
imagePath = imageReference[..digestIdx];
}
else
{
// Handle tag reference
var tagIdx = imageReference.LastIndexOf(':');
var slashIdx = imageReference.LastIndexOf('/');
if (tagIdx > slashIdx && tagIdx > 0)
{
reference = imageReference[(tagIdx + 1)..];
imagePath = imageReference[..tagIdx];
}
else
{
reference = "latest";
imagePath = imageReference;
}
}
// Parse registry and repository
var firstSlash = imagePath.IndexOf('/');
if (firstSlash > 0 && (imagePath[..firstSlash].Contains('.') || imagePath[..firstSlash].Contains(':')))
{
// First segment is a registry
return (imagePath[..firstSlash], imagePath[(firstSlash + 1)..], reference);
}
// Default to Docker Hub
if (!imagePath.Contains('/'))
{
return ("registry-1.docker.io", $"library/{imagePath}", reference);
}
return ("registry-1.docker.io", imagePath, reference);
}
}

View File

@@ -0,0 +1,144 @@
using System.Collections.Immutable;
namespace StellaOps.Scanner.Manifest.Resolution;
/// <summary>
/// Tracks the provenance (origin and lineage) of an image layer.
/// </summary>
public sealed record LayerProvenance
{
/// <summary>
/// The compressed layer digest (sha256:...).
/// </summary>
public required string LayerDigest { get; init; }
/// <summary>
/// The uncompressed layer content hash (diffID).
/// May be null if not yet computed.
/// </summary>
public string? DiffId { get; init; }
/// <summary>
/// The image reference where this layer was observed.
/// </summary>
public required string SourceImage { get; init; }
/// <summary>
/// Zero-based index of this layer in the image.
/// </summary>
public int LayerIndex { get; init; }
/// <summary>
/// The base image that introduced this layer, if known.
/// For example, "alpine:3.19" for the first few layers of an Alpine-based image.
/// </summary>
public string? IntroducedBy { get; init; }
/// <summary>
/// When this provenance record was captured.
/// </summary>
public DateTimeOffset CapturedAt { get; init; } = DateTimeOffset.UtcNow;
/// <summary>
/// Size of the compressed layer in bytes.
/// </summary>
public long Size { get; init; }
/// <summary>
/// Media type of the layer (e.g., application/vnd.oci.image.layer.v1.tar+gzip).
/// </summary>
public string? MediaType { get; init; }
/// <summary>
/// History command that created this layer (from image config), if available.
/// </summary>
public string? CreatedByCommand { get; init; }
}
/// <summary>
/// Resolved layers for an image with provenance information.
/// </summary>
public sealed record ResolvedImageLayers
{
/// <summary>
/// The image reference that was resolved.
/// </summary>
public required string ImageReference { get; init; }
/// <summary>
/// The manifest digest of the resolved image.
/// </summary>
public required string ManifestDigest { get; init; }
/// <summary>
/// The config digest of the resolved image.
/// </summary>
public required string ConfigDigest { get; init; }
/// <summary>
/// The platform of the resolved image (for multi-arch).
/// </summary>
public string? Platform { get; init; }
/// <summary>
/// Ordered list of layers with provenance information.
/// Index 0 is the base layer.
/// </summary>
public ImmutableArray<LayerProvenance> Layers { get; init; } = [];
/// <summary>
/// Total number of layers.
/// </summary>
public int LayerCount => Layers.Length;
/// <summary>
/// Total size of all layers (compressed).
/// </summary>
public long TotalSize => Layers.Sum(l => l.Size);
/// <summary>
/// Whether all diffIDs have been resolved.
/// </summary>
public bool AllDiffIdsResolved => Layers.All(l => !string.IsNullOrWhiteSpace(l.DiffId));
/// <summary>
/// When this resolution was performed.
/// </summary>
public DateTimeOffset ResolvedAt { get; init; } = DateTimeOffset.UtcNow;
}
/// <summary>
/// Information about layers shared across multiple images.
/// </summary>
public sealed record SharedLayerInfo
{
/// <summary>
/// The diffID (uncompressed content hash) that is shared.
/// </summary>
public required string DiffId { get; init; }
/// <summary>
/// Number of images that contain this layer.
/// </summary>
public int ImageCount { get; init; }
/// <summary>
/// Sample of image references containing this layer.
/// </summary>
public ImmutableArray<string> SampleImages { get; init; } = [];
/// <summary>
/// Likely base image origin if detected.
/// </summary>
public string? LikelyBaseImage { get; init; }
/// <summary>
/// Size of the layer (compressed).
/// </summary>
public long Size { get; init; }
/// <summary>
/// Whether this layer is from a known base image.
/// </summary>
public bool IsKnownBaseLayer { get; init; }
}

View File

@@ -0,0 +1,245 @@
namespace StellaOps.Scanner.Manifest.Reuse;
/// <summary>
/// Detects layer reuse across images for scan deduplication.
/// </summary>
public interface ILayerReuseDetector
{
/// <summary>
/// Detects reuse information for layers in an image.
/// </summary>
/// <param name="imageReference">Image reference to analyze.</param>
/// <param name="cancellationToken">Cancellation token.</param>
/// <returns>Reuse information for each layer.</returns>
Task<LayerReuseReport> DetectReuseAsync(
string imageReference,
CancellationToken cancellationToken = default);
/// <summary>
/// Gets the scan status for a layer by its diffID.
/// </summary>
/// <param name="diffId">Layer diffID.</param>
/// <param name="cancellationToken">Cancellation token.</param>
/// <returns>Scan status information.</returns>
Task<LayerScanStatus?> GetLayerScanStatusAsync(
string diffId,
CancellationToken cancellationToken = default);
/// <summary>
/// Records that a layer has been scanned.
/// </summary>
/// <param name="diffId">Layer diffID.</param>
/// <param name="scanResult">Result of the scan.</param>
/// <param name="cancellationToken">Cancellation token.</param>
Task RecordLayerScanAsync(
string diffId,
LayerScanResult scanResult,
CancellationToken cancellationToken = default);
/// <summary>
/// Gets reuse statistics for monitoring.
/// </summary>
/// <param name="cancellationToken">Cancellation token.</param>
/// <returns>Reuse statistics.</returns>
Task<LayerReuseStatistics> GetStatisticsAsync(CancellationToken cancellationToken = default);
}
/// <summary>
/// Report on layer reuse for an image.
/// </summary>
public sealed record LayerReuseReport
{
/// <summary>
/// The image that was analyzed.
/// </summary>
public required string ImageReference { get; init; }
/// <summary>
/// Total number of layers in the image.
/// </summary>
public int TotalLayers { get; init; }
/// <summary>
/// Number of layers that can be skipped (already scanned).
/// </summary>
public int SkippableLayers { get; init; }
/// <summary>
/// Number of layers that are from known base images.
/// </summary>
public int BaseImageLayers { get; init; }
/// <summary>
/// Reuse information for each layer.
/// </summary>
public required IReadOnlyList<LayerReuseInfo> Layers { get; init; }
/// <summary>
/// Reuse ratio (0.0 to 1.0).
/// </summary>
public double ReuseRatio => TotalLayers > 0 ? (double)SkippableLayers / TotalLayers : 0;
/// <summary>
/// When this report was generated.
/// </summary>
public DateTimeOffset GeneratedAt { get; init; } = DateTimeOffset.UtcNow;
}
/// <summary>
/// Reuse information for a single layer.
/// </summary>
public sealed record LayerReuseInfo
{
/// <summary>
/// Compressed layer digest.
/// </summary>
public required string LayerDigest { get; init; }
/// <summary>
/// Uncompressed content hash.
/// </summary>
public required string DiffId { get; init; }
/// <summary>
/// Layer index in the image.
/// </summary>
public int LayerIndex { get; init; }
/// <summary>
/// Number of times this layer appears across all scanned images.
/// </summary>
public int ReuseCount { get; init; }
/// <summary>
/// When this layer was last scanned, if at all.
/// </summary>
public DateTimeOffset? LastScannedAt { get; init; }
/// <summary>
/// Known base image that introduced this layer, if detected.
/// </summary>
public string? KnownBaseImage { get; init; }
/// <summary>
/// Whether this layer can be skipped in scanning.
/// </summary>
public bool CanSkip { get; init; }
/// <summary>
/// Reason for skip decision.
/// </summary>
public string? SkipReason { get; init; }
/// <summary>
/// Size of the layer (compressed).
/// </summary>
public long Size { get; init; }
}
/// <summary>
/// Scan status for a layer.
/// </summary>
public sealed record LayerScanStatus
{
/// <summary>
/// Layer diffID.
/// </summary>
public required string DiffId { get; init; }
/// <summary>
/// Whether the layer has been scanned.
/// </summary>
public bool HasBeenScanned { get; init; }
/// <summary>
/// When the layer was last scanned.
/// </summary>
public DateTimeOffset? LastScannedAt { get; init; }
/// <summary>
/// Number of findings from the last scan.
/// </summary>
public int? FindingCount { get; init; }
/// <summary>
/// Scanner that performed the last scan.
/// </summary>
public string? ScannedBy { get; init; }
/// <summary>
/// Version of the scanner used.
/// </summary>
public string? ScannerVersion { get; init; }
/// <summary>
/// Number of images containing this layer.
/// </summary>
public int ImageCount { get; init; }
}
/// <summary>
/// Result of scanning a layer.
/// </summary>
public sealed record LayerScanResult
{
/// <summary>
/// Number of findings (vulnerabilities) found.
/// </summary>
public int FindingCount { get; init; }
/// <summary>
/// Scanner that performed the scan.
/// </summary>
public required string ScannedBy { get; init; }
/// <summary>
/// Version of the scanner.
/// </summary>
public string? ScannerVersion { get; init; }
/// <summary>
/// When the scan was performed.
/// </summary>
public DateTimeOffset ScannedAt { get; init; } = DateTimeOffset.UtcNow;
}
/// <summary>
/// Statistics about layer reuse.
/// </summary>
public sealed record LayerReuseStatistics
{
/// <summary>
/// Total number of unique layers tracked.
/// </summary>
public long TotalUniqueLayers { get; init; }
/// <summary>
/// Total number of images tracked.
/// </summary>
public long TotalImages { get; init; }
/// <summary>
/// Number of layers from known base images.
/// </summary>
public long BaseImageLayers { get; init; }
/// <summary>
/// Average reuse ratio across all scanned images.
/// </summary>
public double AverageReuseRatio { get; init; }
/// <summary>
/// Number of scan operations saved due to reuse.
/// </summary>
public long ScansSaved { get; init; }
/// <summary>
/// Most reused layers (top 10).
/// </summary>
public IReadOnlyList<(string DiffId, int Count)> MostReusedLayers { get; init; } = [];
/// <summary>
/// When statistics were computed.
/// </summary>
public DateTimeOffset ComputedAt { get; init; } = DateTimeOffset.UtcNow;
}

View File

@@ -0,0 +1,324 @@
using Microsoft.Extensions.Logging;
using Npgsql;
using StellaOps.Scanner.Manifest.Resolution;
namespace StellaOps.Scanner.Manifest.Reuse;
/// <summary>
/// Detects layer reuse across images for scan deduplication.
/// Tracks layer scan history and base image detection.
/// </summary>
public sealed class LayerReuseDetector : ILayerReuseDetector
{
private const string SchemaName = "scanner";
private const string LayerScansTable = $"{SchemaName}.layer_scans";
private const string LayerReuseTable = $"{SchemaName}.layer_reuse_counts";
private readonly NpgsqlDataSource _dataSource;
private readonly ILayerDigestResolver _layerResolver;
private readonly IBaseImageDetector _baseImageDetector;
private readonly ILogger<LayerReuseDetector> _logger;
public LayerReuseDetector(
NpgsqlDataSource dataSource,
ILayerDigestResolver layerResolver,
IBaseImageDetector baseImageDetector,
ILogger<LayerReuseDetector> logger)
{
_dataSource = dataSource ?? throw new ArgumentNullException(nameof(dataSource));
_layerResolver = layerResolver ?? throw new ArgumentNullException(nameof(layerResolver));
_baseImageDetector = baseImageDetector ?? throw new ArgumentNullException(nameof(baseImageDetector));
_logger = logger ?? throw new ArgumentNullException(nameof(logger));
}
public async Task<LayerReuseReport> DetectReuseAsync(
string imageReference,
CancellationToken cancellationToken = default)
{
ArgumentException.ThrowIfNullOrWhiteSpace(imageReference);
_logger.LogInformation("Detecting layer reuse for {Image}", imageReference);
// Resolve layers for the image
var resolved = await _layerResolver.ResolveLayersAsync(
imageReference,
new LayerResolutionOptions { ComputeDiffIds = true, DetectBaseImage = true },
cancellationToken).ConfigureAwait(false);
var layers = new List<LayerReuseInfo>();
var skippable = 0;
var baseImageCount = 0;
foreach (var layer in resolved.Layers)
{
if (string.IsNullOrWhiteSpace(layer.DiffId))
{
layers.Add(new LayerReuseInfo
{
LayerDigest = layer.LayerDigest,
DiffId = string.Empty,
LayerIndex = layer.LayerIndex,
ReuseCount = 0,
CanSkip = false,
SkipReason = "DiffID not computed",
Size = layer.Size
});
continue;
}
// Get scan status for this layer
var scanStatus = await GetLayerScanStatusAsync(layer.DiffId, cancellationToken).ConfigureAwait(false);
var isKnownBase = await _baseImageDetector.IsKnownBaseLayerAsync(layer.DiffId, cancellationToken).ConfigureAwait(false);
var canSkip = scanStatus?.HasBeenScanned == true;
string? skipReason = null;
if (canSkip && isKnownBase)
{
skipReason = $"Known base layer (last scanned {scanStatus?.LastScannedAt:yyyy-MM-dd})";
baseImageCount++;
}
else if (canSkip)
{
skipReason = $"Previously scanned on {scanStatus?.LastScannedAt:yyyy-MM-dd}";
}
if (canSkip)
{
skippable++;
}
layers.Add(new LayerReuseInfo
{
LayerDigest = layer.LayerDigest,
DiffId = layer.DiffId,
LayerIndex = layer.LayerIndex,
ReuseCount = scanStatus?.ImageCount ?? 0,
LastScannedAt = scanStatus?.LastScannedAt,
KnownBaseImage = layer.IntroducedBy,
CanSkip = canSkip,
SkipReason = skipReason,
Size = layer.Size
});
// Increment reuse count for this layer
await IncrementReuseCountAsync(layer.DiffId, cancellationToken).ConfigureAwait(false);
}
var report = new LayerReuseReport
{
ImageReference = imageReference,
TotalLayers = resolved.LayerCount,
SkippableLayers = skippable,
BaseImageLayers = baseImageCount,
Layers = layers
};
_logger.LogInformation(
"Layer reuse for {Image}: {Skippable}/{Total} skippable ({Ratio:P1} reuse)",
imageReference, skippable, resolved.LayerCount, report.ReuseRatio);
return report;
}
public async Task<LayerScanStatus?> GetLayerScanStatusAsync(
string diffId,
CancellationToken cancellationToken = default)
{
ArgumentException.ThrowIfNullOrWhiteSpace(diffId);
await using var conn = await _dataSource.OpenConnectionAsync(cancellationToken).ConfigureAwait(false);
await using var cmd = conn.CreateCommand();
cmd.CommandText = $"""
SELECT
ls.diff_id,
ls.scanned_at,
ls.finding_count,
ls.scanned_by,
ls.scanner_version,
COALESCE(lr.reuse_count, 0) as image_count
FROM {LayerScansTable} ls
LEFT JOIN {LayerReuseTable} lr ON ls.diff_id = lr.diff_id
WHERE ls.diff_id = @diffId
ORDER BY ls.scanned_at DESC
LIMIT 1
""";
cmd.Parameters.AddWithValue("diffId", diffId);
try
{
await using var reader = await cmd.ExecuteReaderAsync(cancellationToken).ConfigureAwait(false);
if (await reader.ReadAsync(cancellationToken).ConfigureAwait(false))
{
return new LayerScanStatus
{
DiffId = reader.GetString(0),
HasBeenScanned = true,
LastScannedAt = reader.GetFieldValue<DateTimeOffset>(1),
FindingCount = reader.IsDBNull(2) ? null : reader.GetInt32(2),
ScannedBy = reader.IsDBNull(3) ? null : reader.GetString(3),
ScannerVersion = reader.IsDBNull(4) ? null : reader.GetString(4),
ImageCount = reader.GetInt32(5)
};
}
// Check if we have reuse count without scan history
await using var countCmd = conn.CreateCommand();
countCmd.CommandText = $"SELECT reuse_count FROM {LayerReuseTable} WHERE diff_id = @diffId";
countCmd.Parameters.AddWithValue("diffId", diffId);
var countResult = await countCmd.ExecuteScalarAsync(cancellationToken).ConfigureAwait(false);
if (countResult is int count)
{
return new LayerScanStatus
{
DiffId = diffId,
HasBeenScanned = false,
ImageCount = count
};
}
return null;
}
catch (PostgresException ex) when (ex.SqlState == "42P01") // Table doesn't exist
{
_logger.LogDebug("Layer scan tables not yet created");
return null;
}
}
public async Task RecordLayerScanAsync(
string diffId,
LayerScanResult scanResult,
CancellationToken cancellationToken = default)
{
ArgumentException.ThrowIfNullOrWhiteSpace(diffId);
ArgumentNullException.ThrowIfNull(scanResult);
await using var conn = await _dataSource.OpenConnectionAsync(cancellationToken).ConfigureAwait(false);
await using var cmd = conn.CreateCommand();
cmd.CommandText = $"""
INSERT INTO {LayerScansTable} (diff_id, scanned_at, finding_count, scanned_by, scanner_version)
VALUES (@diffId, @scannedAt, @findingCount, @scannedBy, @scannerVersion)
ON CONFLICT (diff_id) DO UPDATE SET
scanned_at = EXCLUDED.scanned_at,
finding_count = EXCLUDED.finding_count,
scanned_by = EXCLUDED.scanned_by,
scanner_version = EXCLUDED.scanner_version
""";
cmd.Parameters.AddWithValue("diffId", diffId);
cmd.Parameters.AddWithValue("scannedAt", scanResult.ScannedAt);
cmd.Parameters.AddWithValue("findingCount", scanResult.FindingCount);
cmd.Parameters.AddWithValue("scannedBy", scanResult.ScannedBy);
cmd.Parameters.AddWithValue("scannerVersion", scanResult.ScannerVersion ?? (object)DBNull.Value);
try
{
await cmd.ExecuteNonQueryAsync(cancellationToken).ConfigureAwait(false);
_logger.LogDebug("Recorded scan for layer {DiffId}: {FindingCount} findings", diffId, scanResult.FindingCount);
}
catch (PostgresException ex) when (ex.SqlState == "42P01")
{
_logger.LogWarning("Layer scan tables not yet created. Run migrations.");
}
}
public async Task<LayerReuseStatistics> GetStatisticsAsync(CancellationToken cancellationToken = default)
{
await using var conn = await _dataSource.OpenConnectionAsync(cancellationToken).ConfigureAwait(false);
try
{
// Get basic counts
await using var countsCmd = conn.CreateCommand();
countsCmd.CommandText = $"""
SELECT
(SELECT COUNT(*) FROM {LayerReuseTable}) as total_layers,
(SELECT COUNT(DISTINCT image_reference) FROM {SchemaName}.image_layers) as total_images,
(SELECT COUNT(*) FROM {SchemaName}.scanner_base_image_layers) as base_layers,
(SELECT SUM(reuse_count) FROM {LayerReuseTable} WHERE reuse_count > 1) as scans_saved
""";
long totalLayers = 0, totalImages = 0, baseLayers = 0, scansSaved = 0;
await using (var reader = await countsCmd.ExecuteReaderAsync(cancellationToken).ConfigureAwait(false))
{
if (await reader.ReadAsync(cancellationToken).ConfigureAwait(false))
{
totalLayers = reader.IsDBNull(0) ? 0 : reader.GetInt64(0);
totalImages = reader.IsDBNull(1) ? 0 : reader.GetInt64(1);
baseLayers = reader.IsDBNull(2) ? 0 : reader.GetInt64(2);
scansSaved = reader.IsDBNull(3) ? 0 : reader.GetInt64(3);
}
}
// Get most reused layers
await using var topCmd = conn.CreateCommand();
topCmd.CommandText = $"""
SELECT diff_id, reuse_count
FROM {LayerReuseTable}
ORDER BY reuse_count DESC
LIMIT 10
""";
var mostReused = new List<(string, int)>();
await using (var reader = await topCmd.ExecuteReaderAsync(cancellationToken).ConfigureAwait(false))
{
while (await reader.ReadAsync(cancellationToken).ConfigureAwait(false))
{
mostReused.Add((reader.GetString(0), reader.GetInt32(1)));
}
}
// Calculate average reuse ratio
double avgReuseRatio = 0;
if (totalImages > 0 && totalLayers > 0)
{
avgReuseRatio = (double)scansSaved / (totalImages * totalLayers / Math.Max(1, totalImages));
}
return new LayerReuseStatistics
{
TotalUniqueLayers = totalLayers,
TotalImages = totalImages,
BaseImageLayers = baseLayers,
AverageReuseRatio = Math.Min(1.0, avgReuseRatio),
ScansSaved = scansSaved,
MostReusedLayers = mostReused
};
}
catch (PostgresException ex) when (ex.SqlState == "42P01")
{
_logger.LogDebug("Statistics tables not yet created");
return new LayerReuseStatistics();
}
}
private async Task IncrementReuseCountAsync(string diffId, CancellationToken cancellationToken)
{
await using var conn = await _dataSource.OpenConnectionAsync(cancellationToken).ConfigureAwait(false);
await using var cmd = conn.CreateCommand();
cmd.CommandText = $"""
INSERT INTO {LayerReuseTable} (diff_id, reuse_count, first_seen_at)
VALUES (@diffId, 1, @now)
ON CONFLICT (diff_id) DO UPDATE SET
reuse_count = {LayerReuseTable}.reuse_count + 1
""";
cmd.Parameters.AddWithValue("diffId", diffId);
cmd.Parameters.AddWithValue("now", DateTimeOffset.UtcNow);
try
{
await cmd.ExecuteNonQueryAsync(cancellationToken).ConfigureAwait(false);
}
catch (PostgresException ex) when (ex.SqlState == "42P01")
{
// Table doesn't exist yet
}
}
}

View File

@@ -0,0 +1,22 @@
<Project Sdk="Microsoft.NET.Sdk">
<PropertyGroup>
<TargetFramework>net10.0</TargetFramework>
<LangVersion>preview</LangVersion>
<ImplicitUsings>enable</ImplicitUsings>
<Nullable>enable</Nullable>
<TreatWarningsAsErrors>true</TreatWarningsAsErrors>
</PropertyGroup>
<ItemGroup>
<PackageReference Include="Microsoft.Extensions.DependencyInjection.Abstractions" />
<PackageReference Include="Microsoft.Extensions.Logging.Abstractions" />
<PackageReference Include="Microsoft.Extensions.Options" />
<PackageReference Include="Npgsql" />
</ItemGroup>
<ItemGroup>
<ProjectReference Include="..\StellaOps.Scanner.Contracts\StellaOps.Scanner.Contracts.csproj" />
<ProjectReference Include="..\StellaOps.Scanner.Registry\StellaOps.Scanner.Registry.csproj" />
<ProjectReference Include="..\StellaOps.Scanner.Storage\StellaOps.Scanner.Storage.csproj" />
<ProjectReference Include="..\StellaOps.Scanner.Storage.Oci\StellaOps.Scanner.Storage.Oci.csproj" />
<ProjectReference Include="..\..\..\__Libraries\StellaOps.Infrastructure.Postgres\StellaOps.Infrastructure.Postgres.csproj" />
</ItemGroup>
</Project>

View File

@@ -12,7 +12,7 @@ public sealed class InMemorySliceCache : ISliceCache, IDisposable
private readonly ILogger<InMemorySliceCache> _logger;
private readonly TimeProvider _timeProvider;
private readonly CancellationTokenSource _evictionCts = new();
private readonly Timer _evictionTimer;
private readonly ITimer _evictionTimer;
private readonly SemaphoreSlim _evictionLock = new(1, 1);
private long _hitCount;
@@ -26,7 +26,7 @@ public sealed class InMemorySliceCache : ISliceCache, IDisposable
{
_logger = logger ?? throw new ArgumentNullException(nameof(logger));
_timeProvider = timeProvider ?? TimeProvider.System;
_evictionTimer = new Timer(
_evictionTimer = _timeProvider.CreateTimer(
_ => _ = EvictExpiredEntriesAsync(_evictionCts.Token),
null,
TimeSpan.FromSeconds(EvictionIntervalSeconds),

View File

@@ -32,7 +32,7 @@ public sealed class SliceCache : ISliceCache, IDisposable
private readonly SliceCacheOptions _options;
private readonly TimeProvider _timeProvider;
private readonly ConcurrentDictionary<string, CacheItem> _cache = new(StringComparer.Ordinal);
private readonly Timer _evictionTimer;
private readonly ITimer _evictionTimer;
private long _hitCount;
private long _missCount;
private bool _disposed;
@@ -41,7 +41,7 @@ public sealed class SliceCache : ISliceCache, IDisposable
{
_options = options?.Value ?? new SliceCacheOptions();
_timeProvider = timeProvider ?? TimeProvider.System;
_evictionTimer = new Timer(EvictExpired, null, TimeSpan.FromMinutes(1), TimeSpan.FromMinutes(1));
_evictionTimer = _timeProvider.CreateTimer(EvictExpired, null, TimeSpan.FromMinutes(1), TimeSpan.FromMinutes(1));
}
public Task<CachedSliceResult?> TryGetAsync(string cacheKey, CancellationToken cancellationToken = default)

View File

@@ -0,0 +1,147 @@
// -----------------------------------------------------------------------------
// ClaimIdGenerator.cs
// Sprint: SPRINT_20260118_015_Scanner_runtime_witness_model
// Task: TASK-015-002 - Add claim_id field for static-runtime linkage
// Description: Generates deterministic claim IDs for linking witnesses
// -----------------------------------------------------------------------------
using System.Globalization;
using System.Security.Cryptography;
using System.Text;
namespace StellaOps.Scanner.Reachability.Witnesses;
/// <summary>
/// Generates deterministic claim IDs for linking runtime witnesses to static claims.
/// </summary>
public static class ClaimIdGenerator
{
/// <summary>
/// Claim ID prefix.
/// </summary>
public const string Prefix = "claim";
/// <summary>
/// Generates a deterministic claim ID from artifact digest and path hash.
/// </summary>
/// <param name="artifactDigest">SHA-256 digest of the artifact (SBOM).</param>
/// <param name="pathHash">Path hash from PathWitnessBuilder.</param>
/// <returns>Claim ID in format "claim:&lt;artifact-digest&gt;:&lt;path-hash&gt;".</returns>
public static string Generate(string artifactDigest, string pathHash)
{
ArgumentException.ThrowIfNullOrWhiteSpace(artifactDigest);
ArgumentException.ThrowIfNullOrWhiteSpace(pathHash);
// Normalize inputs
var normalizedArtifact = NormalizeDigest(artifactDigest);
var normalizedPath = NormalizeHash(pathHash);
return $"{Prefix}:{normalizedArtifact}:{normalizedPath}";
}
/// <summary>
/// Generates a claim ID from witness artifact and path hash.
/// </summary>
/// <param name="artifact">Witness artifact containing SBOM digest.</param>
/// <param name="pathHash">Path hash from witness.</param>
/// <returns>Claim ID.</returns>
public static string Generate(WitnessArtifact artifact, string pathHash)
{
ArgumentNullException.ThrowIfNull(artifact);
return Generate(artifact.SbomDigest, pathHash);
}
/// <summary>
/// Generates a claim ID directly from a path witness that has a path hash.
/// </summary>
/// <param name="witness">Path witness with PathHash set.</param>
/// <returns>Claim ID.</returns>
/// <exception cref="InvalidOperationException">If PathHash is not set.</exception>
public static string Generate(PathWitness witness)
{
ArgumentNullException.ThrowIfNull(witness);
if (string.IsNullOrWhiteSpace(witness.PathHash))
{
throw new InvalidOperationException(
"Cannot generate claim ID: PathHash is not set on witness.");
}
return Generate(witness.Artifact.SbomDigest, witness.PathHash);
}
/// <summary>
/// Parses a claim ID into its components.
/// </summary>
/// <param name="claimId">The claim ID to parse.</param>
/// <returns>Tuple of (artifactDigest, pathHash).</returns>
/// <exception cref="FormatException">If claim ID format is invalid.</exception>
public static (string ArtifactDigest, string PathHash) Parse(string claimId)
{
ArgumentException.ThrowIfNullOrWhiteSpace(claimId);
var parts = claimId.Split(':');
if (parts.Length != 3 || parts[0] != Prefix)
{
throw new FormatException(
$"Invalid claim ID format. Expected '{Prefix}:<artifact-digest>:<path-hash>', got '{claimId}'.");
}
return (parts[1], parts[2]);
}
/// <summary>
/// Validates that a claim ID is well-formed.
/// </summary>
/// <param name="claimId">The claim ID to validate.</param>
/// <returns>True if valid, false otherwise.</returns>
public static bool IsValid(string? claimId)
{
if (string.IsNullOrWhiteSpace(claimId))
return false;
var parts = claimId.Split(':');
return parts.Length == 3
&& parts[0] == Prefix
&& !string.IsNullOrWhiteSpace(parts[1])
&& !string.IsNullOrWhiteSpace(parts[2]);
}
/// <summary>
/// Computes a combined hash for linking multiple claims.
/// </summary>
/// <param name="claimIds">Collection of claim IDs to link.</param>
/// <returns>SHA-256 hash of sorted, concatenated claim IDs.</returns>
public static string ComputeLinkHash(IEnumerable<string> claimIds)
{
ArgumentNullException.ThrowIfNull(claimIds);
var sorted = claimIds.OrderBy(c => c, StringComparer.Ordinal).ToList();
if (sorted.Count == 0)
return string.Empty;
var combined = string.Join("\n", sorted);
var bytes = SHA256.HashData(Encoding.UTF8.GetBytes(combined));
return Convert.ToHexString(bytes).ToLowerInvariant();
}
private static string NormalizeDigest(string digest)
{
// Remove common prefixes like "sha256:"
if (digest.StartsWith("sha256:", StringComparison.OrdinalIgnoreCase))
digest = digest[7..];
if (digest.StartsWith("sha512:", StringComparison.OrdinalIgnoreCase))
digest = digest[7..];
return digest.ToLowerInvariant();
}
private static string NormalizeHash(string hash)
{
// Remove any prefixes and normalize case
if (hash.StartsWith("0x", StringComparison.OrdinalIgnoreCase))
hash = hash[2..];
return hash.ToLowerInvariant();
}
}

View File

@@ -0,0 +1,158 @@
// -----------------------------------------------------------------------------
// IRuntimeWitnessGenerator.cs
// Sprint: SPRINT_20260118_016_Scanner_runtime_witness_signing
// Task: TASK-016-003 - Extend SignedWitnessGenerator for runtime requests
// Description: Interface for runtime witness generation
// -----------------------------------------------------------------------------
namespace StellaOps.Scanner.Reachability.Witnesses;
/// <summary>
/// Generates signed runtime witnesses from observation data.
/// </summary>
public interface IRuntimeWitnessGenerator
{
/// <summary>
/// Generates a single signed runtime witness.
/// </summary>
/// <param name="request">The runtime witness request.</param>
/// <param name="ct">Cancellation token.</param>
/// <returns>The signed witness result.</returns>
Task<RuntimeWitnessResult> GenerateAsync(
RuntimeWitnessRequest request,
CancellationToken ct = default);
/// <summary>
/// Generates multiple signed runtime witnesses.
/// </summary>
/// <param name="request">The batch request.</param>
/// <param name="ct">Cancellation token.</param>
/// <returns>Async enumerable of signed witness results.</returns>
IAsyncEnumerable<RuntimeWitnessResult> GenerateBatchAsync(
BatchRuntimeWitnessRequest request,
CancellationToken ct = default);
/// <summary>
/// Generates runtime witnesses from a stream of observations.
/// </summary>
/// <param name="observations">Stream of runtime observations.</param>
/// <param name="contextProvider">Provider for witness context.</param>
/// <param name="ct">Cancellation token.</param>
/// <returns>Async enumerable of signed witness results.</returns>
IAsyncEnumerable<RuntimeWitnessResult> GenerateFromStreamAsync(
IAsyncEnumerable<RuntimeObservation> observations,
IRuntimeWitnessContextProvider contextProvider,
CancellationToken ct = default);
}
/// <summary>
/// Result of runtime witness generation.
/// </summary>
public sealed record RuntimeWitnessResult
{
/// <summary>
/// Whether the generation was successful.
/// </summary>
public required bool Success { get; init; }
/// <summary>
/// The generated witness (if successful).
/// </summary>
public PathWitness? Witness { get; init; }
/// <summary>
/// The signed DSSE envelope (if successful).
/// </summary>
public byte[]? EnvelopeBytes { get; init; }
/// <summary>
/// Rekor log entry index (if published).
/// </summary>
public long? RekorLogIndex { get; init; }
/// <summary>
/// Rekor log ID (if published).
/// </summary>
public string? RekorLogId { get; init; }
/// <summary>
/// Content-addressed storage URI (if stored).
/// </summary>
public string? CasUri { get; init; }
/// <summary>
/// Error message (if failed).
/// </summary>
public string? ErrorMessage { get; init; }
/// <summary>
/// The original claim ID.
/// </summary>
public string? ClaimId { get; init; }
/// <summary>
/// Creates a successful result.
/// </summary>
public static RuntimeWitnessResult Successful(
PathWitness witness,
byte[] envelopeBytes,
long? rekorLogIndex = null,
string? rekorLogId = null,
string? casUri = null)
{
return new RuntimeWitnessResult
{
Success = true,
Witness = witness,
EnvelopeBytes = envelopeBytes,
RekorLogIndex = rekorLogIndex,
RekorLogId = rekorLogId,
CasUri = casUri,
ClaimId = witness.ClaimId
};
}
/// <summary>
/// Creates a failed result.
/// </summary>
public static RuntimeWitnessResult Failed(string claimId, string errorMessage)
{
return new RuntimeWitnessResult
{
Success = false,
ErrorMessage = errorMessage,
ClaimId = claimId
};
}
}
/// <summary>
/// Provides context for runtime witness generation from observation streams.
/// </summary>
public interface IRuntimeWitnessContextProvider
{
/// <summary>
/// Gets the claim ID for an observation.
/// </summary>
string GetClaimId(RuntimeObservation observation);
/// <summary>
/// Gets the artifact digest for context.
/// </summary>
string GetArtifactDigest();
/// <summary>
/// Gets the component PURL being observed.
/// </summary>
string GetComponentPurl(RuntimeObservation observation);
/// <summary>
/// Gets the vulnerability ID if applicable.
/// </summary>
string? GetVulnerabilityId(RuntimeObservation observation);
/// <summary>
/// Gets signing options.
/// </summary>
RuntimeWitnessSigningOptions GetSigningOptions();
}

View File

@@ -0,0 +1,280 @@
// -----------------------------------------------------------------------------
// IWitnessVerifier.cs
// Sprint: SPRINT_20260118_017_Scanner_witness_verifier
// Task: TASK-017-001 - Create IWitnessVerifier interface and WitnessMatchResult
// Description: Interface for verifying and matching runtime witnesses to claims
// -----------------------------------------------------------------------------
namespace StellaOps.Scanner.Reachability.Witnesses;
/// <summary>
/// Verifies and matches runtime witnesses to static reachability claims.
/// </summary>
public interface IWitnessVerifier
{
/// <summary>
/// Find and verify all runtime witnesses for a claim.
/// </summary>
/// <param name="claimId">The claim ID to verify.</param>
/// <param name="ct">Cancellation token.</param>
/// <returns>Verification result with matched witnesses.</returns>
Task<WitnessVerificationResult> VerifyAsync(string claimId, CancellationToken ct = default);
/// <summary>
/// Verify a specific Rekor entry.
/// </summary>
/// <param name="logIndex">The Rekor log index.</param>
/// <param name="ct">Cancellation token.</param>
/// <returns>Verification result for the entry.</returns>
Task<WitnessVerificationResult> VerifyByLogIndexAsync(long logIndex, CancellationToken ct = default);
/// <summary>
/// Check if a witness matches a claim.
/// </summary>
/// <param name="witness">The path witness to check.</param>
/// <param name="claimId">The claim ID to match against.</param>
/// <param name="ct">Cancellation token.</param>
/// <returns>Match result with confidence score.</returns>
Task<WitnessMatchResult> MatchWitnessToClaimAsync(
PathWitness witness,
string claimId,
CancellationToken ct = default);
/// <summary>
/// Batch verify multiple claims.
/// </summary>
/// <param name="claimIds">The claim IDs to verify.</param>
/// <param name="ct">Cancellation token.</param>
/// <returns>Async enumerable of verification results.</returns>
IAsyncEnumerable<WitnessVerificationResult> VerifyBatchAsync(
IEnumerable<string> claimIds,
CancellationToken ct = default);
}
/// <summary>
/// Result of witness verification.
/// </summary>
public sealed record WitnessVerificationResult
{
/// <summary>
/// The claim ID that was verified.
/// </summary>
public required string ClaimId { get; init; }
/// <summary>
/// Overall verification status.
/// </summary>
public required WitnessVerificationStatus Status { get; init; }
/// <summary>
/// Matched witnesses for this claim.
/// </summary>
public IReadOnlyList<WitnessMatchResult> Matches { get; init; } = [];
/// <summary>
/// Best match (highest confidence).
/// </summary>
public WitnessMatchResult? BestMatch { get; init; }
/// <summary>
/// Verification timestamp.
/// </summary>
public DateTimeOffset VerifiedAt { get; init; } = DateTimeOffset.UtcNow;
/// <summary>
/// Error message if verification failed.
/// </summary>
public string? ErrorMessage { get; init; }
/// <summary>
/// Creates a successful verification result.
/// </summary>
public static WitnessVerificationResult Success(
string claimId,
IReadOnlyList<WitnessMatchResult> matches)
{
var best = matches.MaxBy(m => m.Confidence);
return new WitnessVerificationResult
{
ClaimId = claimId,
Status = matches.Count > 0
? WitnessVerificationStatus.Verified
: WitnessVerificationStatus.NoWitnessFound,
Matches = matches,
BestMatch = best
};
}
/// <summary>
/// Creates a failed verification result.
/// </summary>
public static WitnessVerificationResult Failed(string claimId, string errorMessage)
{
return new WitnessVerificationResult
{
ClaimId = claimId,
Status = WitnessVerificationStatus.Failed,
ErrorMessage = errorMessage
};
}
}
/// <summary>
/// Overall verification status.
/// </summary>
public enum WitnessVerificationStatus
{
/// <summary>Witness found and verified.</summary>
Verified,
/// <summary>Verification in progress.</summary>
Pending,
/// <summary>No witness found for claim.</summary>
NoWitnessFound,
/// <summary>Witness found but verification failed.</summary>
Failed,
/// <summary>Witness signature invalid.</summary>
SignatureInvalid,
/// <summary>Witness expired (outside time window).</summary>
Expired
}
/// <summary>
/// Result of matching a witness to a claim.
/// </summary>
public sealed record WitnessMatchResult
{
/// <summary>
/// Match status.
/// </summary>
public required WitnessMatchStatus MatchStatus { get; init; }
/// <summary>
/// The matched witness.
/// </summary>
public PathWitness? Witness { get; init; }
/// <summary>
/// Match confidence score (0.0-1.0).
/// 1.0 = exact PathHash match
/// 0.8+ = high node hash overlap
/// </summary>
public double Confidence { get; init; }
/// <summary>
/// List of matched function/method IDs.
/// </summary>
public IReadOnlyList<string> MatchedFrames { get; init; } = [];
/// <summary>
/// List of discrepancies between claim and witness.
/// </summary>
public IReadOnlyList<WitnessDiscrepancy> Discrepancies { get; init; } = [];
/// <summary>
/// Rekor log index (if anchored).
/// </summary>
public long? RekorLogIndex { get; init; }
/// <summary>
/// Whether Rekor verification passed.
/// </summary>
public bool RekorVerified { get; init; }
/// <summary>
/// DSSE signature verification status.
/// </summary>
public bool SignatureVerified { get; init; }
/// <summary>
/// Integrated timestamp from Rekor.
/// </summary>
public DateTimeOffset? IntegratedTime { get; init; }
/// <summary>
/// Creates an exact match result.
/// </summary>
public static WitnessMatchResult ExactMatch(PathWitness witness, long? rekorLogIndex = null)
{
return new WitnessMatchResult
{
MatchStatus = WitnessMatchStatus.Matched,
Witness = witness,
Confidence = 1.0,
RekorLogIndex = rekorLogIndex,
RekorVerified = rekorLogIndex.HasValue,
SignatureVerified = true
};
}
/// <summary>
/// Creates a no-match result.
/// </summary>
public static WitnessMatchResult NoMatch(string reason)
{
return new WitnessMatchResult
{
MatchStatus = WitnessMatchStatus.NoMatch,
Confidence = 0.0,
Discrepancies = [new WitnessDiscrepancy { Type = "NoMatch", Description = reason }]
};
}
}
/// <summary>
/// Match status between witness and claim.
/// </summary>
public enum WitnessMatchStatus
{
/// <summary>Exact match on PathHash.</summary>
Matched,
/// <summary>Partial match on NodeHashes.</summary>
PartialMatch,
/// <summary>No match found.</summary>
NoMatch,
/// <summary>Match found but verification failed.</summary>
VerificationFailed
}
/// <summary>
/// A discrepancy between claim and witness.
/// </summary>
public sealed record WitnessDiscrepancy
{
/// <summary>Discrepancy type.</summary>
public required string Type { get; init; }
/// <summary>Description of the discrepancy.</summary>
public required string Description { get; init; }
/// <summary>Expected value (from claim).</summary>
public string? Expected { get; init; }
/// <summary>Actual value (from witness).</summary>
public string? Actual { get; init; }
/// <summary>Severity of the discrepancy.</summary>
public DiscrepancySeverity Severity { get; init; } = DiscrepancySeverity.Warning;
}
/// <summary>
/// Severity of a discrepancy.
/// </summary>
public enum DiscrepancySeverity
{
/// <summary>Informational difference.</summary>
Info,
/// <summary>Minor difference that may affect match confidence.</summary>
Warning,
/// <summary>Significant difference that prevents match.</summary>
Error
}

View File

@@ -0,0 +1,33 @@
// -----------------------------------------------------------------------------
// ObservationType.cs
// Sprint: SPRINT_20260118_015_Scanner_runtime_witness_model
// Task: TASK-015-001 - Add ObservationType enum and field to PathWitness
// Description: Discriminator for static vs runtime vs confirmed paths
// -----------------------------------------------------------------------------
namespace StellaOps.Scanner.Reachability.Witnesses;
/// <summary>
/// Indicates how a path witness was observed.
/// Follows the discriminated union pattern used by <see cref="SuppressionType"/>.
/// </summary>
public enum ObservationType
{
/// <summary>
/// Path discovered through static call graph analysis only.
/// Default value for backward compatibility with legacy witnesses.
/// </summary>
Static = 0,
/// <summary>
/// Path observed at runtime only (e.g., via Tetragon, OTel, profiler).
/// May not have corresponding static analysis evidence.
/// </summary>
Runtime = 1,
/// <summary>
/// Static path confirmed by runtime observation.
/// Highest confidence level - both static analysis and runtime execution agree.
/// </summary>
Confirmed = 2
}

View File

@@ -127,6 +127,29 @@ public sealed record PathWitness
/// </summary>
[JsonPropertyName("predicate_type")]
public string PredicateType { get; init; } = WitnessPredicateTypes.PathWitnessCanonical;
/// <summary>
/// How this path was observed (static analysis, runtime, or confirmed by both).
/// Sprint: SPRINT_20260118_015_Scanner_runtime_witness_model (TASK-015-001)
/// </summary>
[JsonPropertyName("observation_type")]
public ObservationType ObservationType { get; init; } = ObservationType.Static;
/// <summary>
/// Claim ID for linking runtime witnesses to static claims.
/// Format: "claim:&lt;artifact-digest&gt;:&lt;path-hash&gt;".
/// Sprint: SPRINT_20260118_015_Scanner_runtime_witness_model (TASK-015-002)
/// </summary>
[JsonPropertyName("claim_id")]
public string? ClaimId { get; init; }
/// <summary>
/// Runtime observations that confirmed or discovered this path.
/// Empty for purely static witnesses.
/// Sprint: SPRINT_20260118_015_Scanner_runtime_witness_model (TASK-015-003)
/// </summary>
[JsonPropertyName("observations")]
public IReadOnlyList<RuntimeObservation>? Observations { get; init; }
}
/// <summary>

View File

@@ -0,0 +1,98 @@
// -----------------------------------------------------------------------------
// RuntimeObservation.cs
// Sprint: SPRINT_20260118_015_Scanner_runtime_witness_model
// Task: TASK-015-003 - Create RuntimeObservation record
// Description: Wraps runtime call events for witness evidence
// -----------------------------------------------------------------------------
using System.Text.Json.Serialization;
namespace StellaOps.Scanner.Reachability.Witnesses;
/// <summary>
/// A runtime observation record that wraps/extends RuntimeCallEvent data.
/// Used to attach runtime evidence to path witnesses.
/// </summary>
public sealed record RuntimeObservation
{
/// <summary>
/// Timestamp when the observation was recorded (UTC).
/// </summary>
[JsonPropertyName("observed_at")]
public required DateTimeOffset ObservedAt { get; init; }
/// <summary>
/// Number of times this path was observed (aggregated count).
/// </summary>
[JsonPropertyName("observation_count")]
public int ObservationCount { get; init; } = 1;
/// <summary>
/// SHA-256 hash of stack frame signatures for verification.
/// </summary>
[JsonPropertyName("stack_sample_hash")]
public string? StackSampleHash { get; init; }
/// <summary>
/// Process ID where the path was observed.
/// </summary>
[JsonPropertyName("process_id")]
public int? ProcessId { get; init; }
/// <summary>
/// Container ID where the path was observed.
/// </summary>
[JsonPropertyName("container_id")]
public string? ContainerId { get; init; }
/// <summary>
/// Kubernetes pod name if running in K8s.
/// </summary>
[JsonPropertyName("pod_name")]
public string? PodName { get; init; }
/// <summary>
/// Kubernetes namespace if running in K8s.
/// </summary>
[JsonPropertyName("namespace")]
public string? Namespace { get; init; }
/// <summary>
/// Source type of the observation.
/// </summary>
[JsonPropertyName("source_type")]
public required RuntimeObservationSourceType SourceType { get; init; }
/// <summary>
/// Unique observation ID for deduplication.
/// </summary>
[JsonPropertyName("observation_id")]
public string? ObservationId { get; init; }
/// <summary>
/// Duration of the observed call in microseconds (if available).
/// </summary>
[JsonPropertyName("duration_us")]
public long? DurationMicroseconds { get; init; }
}
/// <summary>
/// Source type for runtime observations.
/// </summary>
public enum RuntimeObservationSourceType
{
/// <summary>Observation from Tetragon eBPF.</summary>
Tetragon,
/// <summary>Observation from OpenTelemetry tracing.</summary>
OpenTelemetry,
/// <summary>Observation from profiler sampling.</summary>
Profiler,
/// <summary>Observation from APM tracer.</summary>
Tracer,
/// <summary>Observation from custom instrumentation.</summary>
Custom
}

View File

@@ -0,0 +1,66 @@
// -----------------------------------------------------------------------------
// RuntimeWitnessPredicateTypes.cs
// Sprint: SPRINT_20260118_016_Scanner_runtime_witness_signing
// Task: TASK-016-002 - Define runtime witness predicate type
// Description: Predicate types for runtime witness attestations
// -----------------------------------------------------------------------------
namespace StellaOps.Scanner.Reachability.Witnesses;
/// <summary>
/// Well-known predicate types for runtime witness attestations.
/// </summary>
public static class RuntimeWitnessPredicateTypes
{
/// <summary>
/// Canonical runtime witness predicate type URI.
/// </summary>
public const string RuntimeWitnessCanonical = "https://stella.ops/predicates/runtime-witness/v1";
/// <summary>
/// Alias for runtime witness predicate type.
/// </summary>
public const string RuntimeWitnessAlias = "stella.ops/runtimeWitness@v1";
/// <summary>
/// Confirmed witness predicate type (static + runtime match).
/// </summary>
public const string ConfirmedWitnessCanonical = "https://stella.ops/predicates/confirmed-witness/v1";
/// <summary>
/// Alias for confirmed witness predicate type.
/// </summary>
public const string ConfirmedWitnessAlias = "stella.ops/confirmedWitness@v1";
/// <summary>
/// Returns true if the predicate type is a recognized runtime witness type.
/// </summary>
public static bool IsRuntimeWitnessType(string predicateType)
{
return predicateType == RuntimeWitnessCanonical
|| predicateType == RuntimeWitnessAlias;
}
/// <summary>
/// Returns true if the predicate type is a recognized confirmed witness type.
/// </summary>
public static bool IsConfirmedWitnessType(string predicateType)
{
return predicateType == ConfirmedWitnessCanonical
|| predicateType == ConfirmedWitnessAlias;
}
/// <summary>
/// Gets the appropriate predicate type for an observation type.
/// </summary>
public static string GetPredicateType(ObservationType observationType)
{
return observationType switch
{
ObservationType.Static => WitnessPredicateTypes.PathWitnessCanonical,
ObservationType.Runtime => RuntimeWitnessCanonical,
ObservationType.Confirmed => ConfirmedWitnessCanonical,
_ => WitnessPredicateTypes.PathWitnessCanonical
};
}
}

View File

@@ -0,0 +1,135 @@
// -----------------------------------------------------------------------------
// RuntimeWitnessRequest.cs
// Sprint: SPRINT_20260118_016_Scanner_runtime_witness_signing
// Task: TASK-016-001 - Create RuntimeWitnessRequest model
// Description: Request model for runtime witness generation
// -----------------------------------------------------------------------------
namespace StellaOps.Scanner.Reachability.Witnesses;
/// <summary>
/// Request to generate a signed runtime witness.
/// Follows existing PathWitnessRequest pattern.
/// </summary>
public sealed record RuntimeWitnessRequest
{
/// <summary>
/// Claim ID linking to the static claim being witnessed.
/// Format: "claim:&lt;artifact-digest&gt;:&lt;path-hash&gt;".
/// </summary>
public required string ClaimId { get; init; }
/// <summary>
/// SHA-256 digest of the artifact (SBOM/image) for context.
/// </summary>
public required string ArtifactDigest { get; init; }
/// <summary>
/// Component PURL being observed.
/// </summary>
public required string ComponentPurl { get; init; }
/// <summary>
/// Runtime observations to include in the witness.
/// Must have at least one observation.
/// </summary>
public required IReadOnlyList<RuntimeObservation> Observations { get; init; }
/// <summary>
/// Vulnerability ID if observing a vulnerable path.
/// </summary>
public string? VulnerabilityId { get; init; }
/// <summary>
/// Whether to publish to Rekor transparency log.
/// </summary>
public bool PublishToRekor { get; init; } = true;
/// <summary>
/// Signing options.
/// </summary>
public RuntimeWitnessSigningOptions SigningOptions { get; init; } = new();
/// <summary>
/// Validates the request.
/// </summary>
/// <exception cref="ArgumentException">If validation fails.</exception>
public void Validate()
{
if (string.IsNullOrWhiteSpace(ClaimId))
throw new ArgumentException("ClaimId is required.", nameof(ClaimId));
if (!ClaimIdGenerator.IsValid(ClaimId))
throw new ArgumentException($"Invalid ClaimId format: {ClaimId}", nameof(ClaimId));
if (string.IsNullOrWhiteSpace(ArtifactDigest))
throw new ArgumentException("ArtifactDigest is required.", nameof(ArtifactDigest));
if (string.IsNullOrWhiteSpace(ComponentPurl))
throw new ArgumentException("ComponentPurl is required.", nameof(ComponentPurl));
if (Observations == null || Observations.Count == 0)
throw new ArgumentException("At least one observation is required.", nameof(Observations));
}
}
/// <summary>
/// Signing options for runtime witnesses.
/// </summary>
public sealed record RuntimeWitnessSigningOptions
{
/// <summary>
/// Key ID for signing (null for keyless/ephemeral).
/// </summary>
public string? KeyId { get; init; }
/// <summary>
/// Whether to use keyless (Fulcio) signing.
/// </summary>
public bool UseKeyless { get; init; } = true;
/// <summary>
/// Signature algorithm preference.
/// </summary>
public string Algorithm { get; init; } = "ECDSA_P256_SHA256";
/// <summary>
/// Timeout for signing operations.
/// </summary>
public TimeSpan Timeout { get; init; } = TimeSpan.FromSeconds(30);
}
/// <summary>
/// Request to generate multiple runtime witnesses in batch.
/// </summary>
public sealed record BatchRuntimeWitnessRequest
{
/// <summary>
/// Individual runtime witness requests.
/// </summary>
public required IReadOnlyList<RuntimeWitnessRequest> Requests { get; init; }
/// <summary>
/// Maximum parallelism for batch processing.
/// </summary>
public int MaxParallelism { get; init; } = 4;
/// <summary>
/// Whether to continue on individual failures.
/// </summary>
public bool ContinueOnError { get; init; } = true;
/// <summary>
/// Validates all requests in the batch.
/// </summary>
public void Validate()
{
if (Requests == null || Requests.Count == 0)
throw new ArgumentException("At least one request is required.", nameof(Requests));
foreach (var request in Requests)
{
request.Validate();
}
}
}

View File

@@ -0,0 +1,199 @@
// -----------------------------------------------------------------------------
// WitnessMatcher.cs
// Sprint: SPRINT_20260118_017_Scanner_witness_verifier
// Task: TASK-017-002 - Implement claim-to-witness matching using PathHash
// Description: Matches runtime witnesses to claims using path hashes
// -----------------------------------------------------------------------------
using System.Globalization;
namespace StellaOps.Scanner.Reachability.Witnesses;
/// <summary>
/// Matches runtime witnesses to static claims using PathHash and NodeHashes.
/// </summary>
public sealed class WitnessMatcher
{
/// <summary>
/// Threshold for high-confidence partial match.
/// </summary>
public const double HighConfidenceThreshold = 0.8;
/// <summary>
/// Threshold for accepting a partial match.
/// </summary>
public const double PartialMatchThreshold = 0.5;
/// <summary>
/// Matches a witness to a claim ID.
/// </summary>
/// <param name="witness">The path witness to match.</param>
/// <param name="claimId">The claim ID to match against.</param>
/// <returns>Match result with confidence score.</returns>
public WitnessMatchResult Match(PathWitness witness, string claimId)
{
ArgumentNullException.ThrowIfNull(witness);
ArgumentException.ThrowIfNullOrWhiteSpace(claimId);
// Parse claim ID to extract artifact digest and path hash
if (!ClaimIdGenerator.IsValid(claimId))
{
return WitnessMatchResult.NoMatch($"Invalid claim ID format: {claimId}");
}
var (artifactDigest, expectedPathHash) = ClaimIdGenerator.Parse(claimId);
// Check artifact digest match
if (!ArtifactDigestMatches(witness, artifactDigest))
{
return CreateNoMatch("Artifact digest mismatch", artifactDigest, witness.Artifact.SbomDigest);
}
// Check exact PathHash match
if (!string.IsNullOrEmpty(witness.PathHash))
{
var normalizedWitnessHash = NormalizeHash(witness.PathHash);
var normalizedExpectedHash = NormalizeHash(expectedPathHash);
if (normalizedWitnessHash == normalizedExpectedHash)
{
return WitnessMatchResult.ExactMatch(witness);
}
}
// Try partial match using NodeHashes
if (witness.NodeHashes != null && witness.NodeHashes.Count > 0)
{
var confidence = ComputeNodeHashConfidence(witness.NodeHashes, expectedPathHash);
if (confidence >= PartialMatchThreshold)
{
return new WitnessMatchResult
{
MatchStatus = confidence >= HighConfidenceThreshold
? WitnessMatchStatus.Matched
: WitnessMatchStatus.PartialMatch,
Witness = witness,
Confidence = confidence,
MatchedFrames = witness.NodeHashes.ToList(),
Discrepancies = confidence < 1.0
? [new WitnessDiscrepancy
{
Type = "PartialPathMatch",
Description = $"Node hash overlap: {confidence:P0}",
Severity = DiscrepancySeverity.Info
}]
: []
};
}
}
return CreateNoMatch(
"PathHash does not match",
expectedPathHash,
witness.PathHash ?? "(no hash)");
}
/// <summary>
/// Finds the best matching witness from a collection.
/// </summary>
public WitnessMatchResult FindBestMatch(IEnumerable<PathWitness> witnesses, string claimId)
{
ArgumentNullException.ThrowIfNull(witnesses);
WitnessMatchResult? bestMatch = null;
foreach (var witness in witnesses)
{
var match = Match(witness, claimId);
if (bestMatch == null || match.Confidence > bestMatch.Confidence)
{
bestMatch = match;
}
// Early exit on exact match
if (match.Confidence >= 1.0)
{
return match;
}
}
return bestMatch ?? WitnessMatchResult.NoMatch("No witnesses provided");
}
/// <summary>
/// Checks if a witness could potentially match a claim (fast pre-filter).
/// </summary>
public bool CouldMatch(PathWitness witness, string claimId)
{
if (!ClaimIdGenerator.IsValid(claimId))
return false;
var (artifactDigest, _) = ClaimIdGenerator.Parse(claimId);
return ArtifactDigestMatches(witness, artifactDigest);
}
private static bool ArtifactDigestMatches(PathWitness witness, string artifactDigest)
{
var witnessDigest = NormalizeHash(witness.Artifact.SbomDigest);
var expectedDigest = NormalizeHash(artifactDigest);
return witnessDigest == expectedDigest;
}
private static double ComputeNodeHashConfidence(IReadOnlyList<string> nodeHashes, string expectedPathHash)
{
// Simple heuristic: check if path hash appears in node hashes
// In a full implementation, this would compute actual overlap
var normalizedExpected = NormalizeHash(expectedPathHash);
// Check for substring match (simplified)
foreach (var nodeHash in nodeHashes)
{
var normalized = NormalizeHash(nodeHash);
if (normalized.Contains(normalizedExpected[..Math.Min(8, normalizedExpected.Length)], StringComparison.Ordinal))
{
return 0.9; // High confidence partial match
}
}
// Default partial confidence based on having node hashes
return nodeHashes.Count switch
{
>= 10 => 0.6,
>= 5 => 0.5,
>= 1 => 0.3,
_ => 0.0
};
}
private static string NormalizeHash(string hash)
{
if (hash.StartsWith("sha256:", StringComparison.OrdinalIgnoreCase))
hash = hash[7..];
if (hash.StartsWith("0x", StringComparison.OrdinalIgnoreCase))
hash = hash[2..];
return hash.ToLowerInvariant();
}
private static WitnessMatchResult CreateNoMatch(string reason, string expected, string actual)
{
return new WitnessMatchResult
{
MatchStatus = WitnessMatchStatus.NoMatch,
Confidence = 0.0,
Discrepancies =
[
new WitnessDiscrepancy
{
Type = "Mismatch",
Description = reason,
Expected = expected,
Actual = actual,
Severity = DiscrepancySeverity.Error
}
]
};
}
}

View File

@@ -0,0 +1,121 @@
namespace StellaOps.Scanner.Registry;
/// <summary>
/// Client interface for interacting with OCI container registries.
/// Supports Docker Registry v2 API and OCI Distribution Spec 1.1.
/// </summary>
public interface IRegistryClient
{
/// <summary>
/// Gets a manifest from the registry.
/// </summary>
/// <param name="registry">Registry hostname.</param>
/// <param name="repository">Repository path.</param>
/// <param name="reference">Tag or digest reference.</param>
/// <param name="acceptMediaTypes">Acceptable media types for content negotiation.</param>
/// <param name="cancellationToken">Cancellation token.</param>
/// <returns>The manifest response with content and metadata.</returns>
Task<RegistryManifestResponse?> GetManifestAsync(
string registry,
string repository,
string reference,
IEnumerable<string>? acceptMediaTypes = null,
CancellationToken cancellationToken = default);
/// <summary>
/// Downloads a blob from the registry.
/// </summary>
/// <param name="registry">Registry hostname.</param>
/// <param name="repository">Repository path.</param>
/// <param name="digest">Blob digest.</param>
/// <param name="cancellationToken">Cancellation token.</param>
/// <returns>Stream containing the blob content.</returns>
Task<Stream?> GetBlobAsync(
string registry,
string repository,
string digest,
CancellationToken cancellationToken = default);
/// <summary>
/// Checks if a blob exists and returns its size.
/// </summary>
/// <param name="registry">Registry hostname.</param>
/// <param name="repository">Repository path.</param>
/// <param name="digest">Blob digest.</param>
/// <param name="cancellationToken">Cancellation token.</param>
/// <returns>The blob metadata if exists, null otherwise.</returns>
Task<RegistryBlobInfo?> HeadBlobAsync(
string registry,
string repository,
string digest,
CancellationToken cancellationToken = default);
/// <summary>
/// Lists tags for a repository.
/// </summary>
/// <param name="registry">Registry hostname.</param>
/// <param name="repository">Repository path.</param>
/// <param name="limit">Maximum number of tags to return.</param>
/// <param name="cancellationToken">Cancellation token.</param>
/// <returns>List of tags.</returns>
Task<IReadOnlyList<string>> GetTagsAsync(
string registry,
string repository,
int limit = 100,
CancellationToken cancellationToken = default);
/// <summary>
/// Checks if the registry is reachable and supports the v2 API.
/// </summary>
/// <param name="registry">Registry hostname.</param>
/// <param name="cancellationToken">Cancellation token.</param>
/// <returns>True if the registry is accessible.</returns>
Task<bool> PingAsync(string registry, CancellationToken cancellationToken = default);
}
/// <summary>
/// Response from a manifest fetch operation.
/// </summary>
public sealed record RegistryManifestResponse
{
/// <summary>
/// The manifest content as a string.
/// </summary>
public required string Content { get; init; }
/// <summary>
/// The media type of the manifest.
/// </summary>
public required string MediaType { get; init; }
/// <summary>
/// The digest of the manifest (from Docker-Content-Digest header).
/// </summary>
public required string Digest { get; init; }
/// <summary>
/// The size of the manifest in bytes.
/// </summary>
public long Size { get; init; }
}
/// <summary>
/// Information about a blob in the registry.
/// </summary>
public sealed record RegistryBlobInfo
{
/// <summary>
/// The digest of the blob.
/// </summary>
public required string Digest { get; init; }
/// <summary>
/// The size of the blob in bytes.
/// </summary>
public long Size { get; init; }
/// <summary>
/// The media type of the blob if available.
/// </summary>
public string? MediaType { get; init; }
}

View File

@@ -0,0 +1,86 @@
namespace StellaOps.Scanner.Registry;
/// <summary>
/// Provider for registry authentication credentials.
/// </summary>
public interface IRegistryCredentialProvider
{
/// <summary>
/// Gets credentials for a registry.
/// </summary>
/// <param name="registry">Registry hostname.</param>
/// <param name="cancellationToken">Cancellation token.</param>
/// <returns>Credentials if available, null for anonymous access.</returns>
Task<RegistryCredential?> GetCredentialAsync(string registry, CancellationToken cancellationToken = default);
}
/// <summary>
/// Credentials for registry authentication.
/// </summary>
public sealed record RegistryCredential
{
/// <summary>
/// The authentication type.
/// </summary>
public required RegistryAuthType AuthType { get; init; }
/// <summary>
/// Username for basic auth.
/// </summary>
public string? Username { get; init; }
/// <summary>
/// Password or token for authentication.
/// </summary>
public string? Password { get; init; }
/// <summary>
/// Bearer token for token-based auth.
/// </summary>
public string? BearerToken { get; init; }
/// <summary>
/// Creates anonymous credentials.
/// </summary>
public static RegistryCredential Anonymous => new() { AuthType = RegistryAuthType.Anonymous };
/// <summary>
/// Creates basic auth credentials.
/// </summary>
public static RegistryCredential Basic(string username, string password) => new()
{
AuthType = RegistryAuthType.Basic,
Username = username,
Password = password
};
/// <summary>
/// Creates bearer token credentials.
/// </summary>
public static RegistryCredential Bearer(string token) => new()
{
AuthType = RegistryAuthType.Bearer,
BearerToken = token
};
}
/// <summary>
/// Registry authentication types.
/// </summary>
public enum RegistryAuthType
{
/// <summary>
/// No authentication (anonymous access).
/// </summary>
Anonymous = 0,
/// <summary>
/// HTTP Basic authentication.
/// </summary>
Basic = 1,
/// <summary>
/// Bearer token authentication.
/// </summary>
Bearer = 2
}

View File

@@ -0,0 +1,427 @@
using System.Collections.Concurrent;
using System.Net;
using System.Net.Http.Headers;
using System.Text.Json;
using Microsoft.Extensions.Logging;
using Microsoft.Extensions.Options;
namespace StellaOps.Scanner.Registry;
/// <summary>
/// Implementation of IRegistryClient with authentication, rate limiting, and retry support.
/// </summary>
public sealed class RegistryClient : IRegistryClient
{
public const string HttpClientName = "stellaops-registry-client";
private static readonly string[] DefaultManifestAccept =
[
"application/vnd.oci.image.index.v1+json",
"application/vnd.docker.distribution.manifest.list.v2+json",
"application/vnd.oci.image.manifest.v1+json",
"application/vnd.docker.distribution.manifest.v2+json"
];
private readonly IHttpClientFactory _httpClientFactory;
private readonly IRegistryCredentialProvider _credentialProvider;
private readonly RegistryClientOptions _options;
private readonly ILogger<RegistryClient> _logger;
private readonly ConcurrentDictionary<string, TokenCacheEntry> _tokenCache = new(StringComparer.OrdinalIgnoreCase);
private readonly SemaphoreSlim _rateLimitSemaphore;
public RegistryClient(
IHttpClientFactory httpClientFactory,
IRegistryCredentialProvider credentialProvider,
IOptions<RegistryClientOptions> options,
ILogger<RegistryClient> logger)
{
_httpClientFactory = httpClientFactory ?? throw new ArgumentNullException(nameof(httpClientFactory));
_credentialProvider = credentialProvider ?? throw new ArgumentNullException(nameof(credentialProvider));
_options = options?.Value ?? throw new ArgumentNullException(nameof(options));
_logger = logger ?? throw new ArgumentNullException(nameof(logger));
_rateLimitSemaphore = new SemaphoreSlim(_options.MaxConcurrentRequests);
}
public async Task<RegistryManifestResponse?> GetManifestAsync(
string registry,
string repository,
string reference,
IEnumerable<string>? acceptMediaTypes = null,
CancellationToken cancellationToken = default)
{
var uri = BuildUri(registry, repository, $"manifests/{reference}");
var accept = acceptMediaTypes?.ToArray() ?? DefaultManifestAccept;
return await ExecuteWithRateLimitAsync(async () =>
{
using var request = new HttpRequestMessage(HttpMethod.Get, uri);
foreach (var mediaType in accept)
{
request.Headers.Accept.Add(new MediaTypeWithQualityHeaderValue(mediaType));
}
using var response = await SendWithAuthAsync(registry, repository, request, cancellationToken)
.ConfigureAwait(false);
if (response.StatusCode == HttpStatusCode.NotFound)
{
return null;
}
response.EnsureSuccessStatusCode();
var content = await response.Content.ReadAsStringAsync(cancellationToken).ConfigureAwait(false);
var mediaType = response.Content.Headers.ContentType?.MediaType ?? string.Empty;
var digest = response.Headers.TryGetValues("Docker-Content-Digest", out var digestValues)
? digestValues.FirstOrDefault() ?? string.Empty
: string.Empty;
return new RegistryManifestResponse
{
Content = content,
MediaType = mediaType,
Digest = digest,
Size = content.Length
};
}, cancellationToken).ConfigureAwait(false);
}
public async Task<Stream?> GetBlobAsync(
string registry,
string repository,
string digest,
CancellationToken cancellationToken = default)
{
var uri = BuildUri(registry, repository, $"blobs/{digest}");
return await ExecuteWithRateLimitAsync(async () =>
{
using var request = new HttpRequestMessage(HttpMethod.Get, uri);
var response = await SendWithAuthAsync(registry, repository, request, cancellationToken)
.ConfigureAwait(false);
if (response.StatusCode == HttpStatusCode.NotFound)
{
response.Dispose();
return null;
}
response.EnsureSuccessStatusCode();
// Return the stream - caller is responsible for disposing
return await response.Content.ReadAsStreamAsync(cancellationToken).ConfigureAwait(false);
}, cancellationToken).ConfigureAwait(false);
}
public async Task<RegistryBlobInfo?> HeadBlobAsync(
string registry,
string repository,
string digest,
CancellationToken cancellationToken = default)
{
var uri = BuildUri(registry, repository, $"blobs/{digest}");
return await ExecuteWithRateLimitAsync(async () =>
{
using var request = new HttpRequestMessage(HttpMethod.Head, uri);
using var response = await SendWithAuthAsync(registry, repository, request, cancellationToken)
.ConfigureAwait(false);
if (response.StatusCode == HttpStatusCode.NotFound)
{
return null;
}
response.EnsureSuccessStatusCode();
var size = response.Content.Headers.ContentLength ?? 0;
var mediaType = response.Content.Headers.ContentType?.MediaType;
return new RegistryBlobInfo
{
Digest = digest,
Size = size,
MediaType = mediaType
};
}, cancellationToken).ConfigureAwait(false);
}
public async Task<IReadOnlyList<string>> GetTagsAsync(
string registry,
string repository,
int limit = 100,
CancellationToken cancellationToken = default)
{
var uri = BuildUri(registry, repository, $"tags/list?n={limit}");
return await ExecuteWithRateLimitAsync(async () =>
{
using var request = new HttpRequestMessage(HttpMethod.Get, uri);
using var response = await SendWithAuthAsync(registry, repository, request, cancellationToken)
.ConfigureAwait(false);
if (response.StatusCode == HttpStatusCode.NotFound)
{
return Array.Empty<string>();
}
response.EnsureSuccessStatusCode();
var json = await response.Content.ReadAsStringAsync(cancellationToken).ConfigureAwait(false);
using var doc = JsonDocument.Parse(json);
if (doc.RootElement.TryGetProperty("tags", out var tagsElement) &&
tagsElement.ValueKind == JsonValueKind.Array)
{
return tagsElement.EnumerateArray()
.Select(t => t.GetString())
.Where(t => !string.IsNullOrWhiteSpace(t))
.Cast<string>()
.ToArray();
}
return Array.Empty<string>();
}, cancellationToken).ConfigureAwait(false);
}
public async Task<bool> PingAsync(string registry, CancellationToken cancellationToken = default)
{
var uri = new Uri($"https://{registry}/v2/");
return await ExecuteWithRateLimitAsync(async () =>
{
try
{
var client = _httpClientFactory.CreateClient(HttpClientName);
using var response = await client.GetAsync(uri, cancellationToken).ConfigureAwait(false);
return response.StatusCode == HttpStatusCode.OK ||
response.StatusCode == HttpStatusCode.Unauthorized;
}
catch (Exception ex)
{
_logger.LogDebug(ex, "Failed to ping registry {Registry}", registry);
return false;
}
}, cancellationToken).ConfigureAwait(false);
}
private async Task<HttpResponseMessage> SendWithAuthAsync(
string registry,
string repository,
HttpRequestMessage request,
CancellationToken cancellationToken)
{
var credential = await _credentialProvider.GetCredentialAsync(registry, cancellationToken)
.ConfigureAwait(false);
ApplyCredential(request, credential);
var client = _httpClientFactory.CreateClient(HttpClientName);
var response = await client.SendAsync(request, cancellationToken).ConfigureAwait(false);
if (response.StatusCode != HttpStatusCode.Unauthorized)
{
return response;
}
// Try bearer token authentication
var challenge = response.Headers.WwwAuthenticate.FirstOrDefault(h =>
h.Scheme.Equals("Bearer", StringComparison.OrdinalIgnoreCase));
if (challenge is not null)
{
var token = await GetBearerTokenAsync(registry, repository, challenge, credential, cancellationToken)
.ConfigureAwait(false);
if (!string.IsNullOrWhiteSpace(token))
{
response.Dispose();
var retry = CloneRequest(request);
retry.Headers.Authorization = new AuthenticationHeaderValue("Bearer", token);
return await client.SendAsync(retry, cancellationToken).ConfigureAwait(false);
}
}
return response;
}
private static void ApplyCredential(HttpRequestMessage request, RegistryCredential? credential)
{
if (credential is null || credential.AuthType == RegistryAuthType.Anonymous)
{
return;
}
switch (credential.AuthType)
{
case RegistryAuthType.Basic when !string.IsNullOrWhiteSpace(credential.Username):
var basicToken = Convert.ToBase64String(
System.Text.Encoding.UTF8.GetBytes($"{credential.Username}:{credential.Password}"));
request.Headers.Authorization = new AuthenticationHeaderValue("Basic", basicToken);
break;
case RegistryAuthType.Bearer when !string.IsNullOrWhiteSpace(credential.BearerToken):
request.Headers.Authorization = new AuthenticationHeaderValue("Bearer", credential.BearerToken);
break;
}
}
private async Task<string?> GetBearerTokenAsync(
string registry,
string repository,
AuthenticationHeaderValue challenge,
RegistryCredential? credential,
CancellationToken cancellationToken)
{
var parameters = ParseChallengeParameters(challenge.Parameter);
if (!parameters.TryGetValue("realm", out var realm) || string.IsNullOrWhiteSpace(realm))
{
return null;
}
var service = parameters.GetValueOrDefault("service");
var scope = parameters.GetValueOrDefault("scope") ?? $"repository:{repository}:pull";
var cacheKey = $"{realm}|{service}|{scope}";
// Check cache
if (_tokenCache.TryGetValue(cacheKey, out var cached) && cached.ExpiresAt > DateTimeOffset.UtcNow)
{
return cached.Token;
}
// Fetch new token
var tokenUri = BuildTokenUri(realm, service, scope);
using var request = new HttpRequestMessage(HttpMethod.Get, tokenUri);
if (credential is { AuthType: RegistryAuthType.Basic, Username: not null })
{
var basicToken = Convert.ToBase64String(
System.Text.Encoding.UTF8.GetBytes($"{credential.Username}:{credential.Password}"));
request.Headers.Authorization = new AuthenticationHeaderValue("Basic", basicToken);
}
var client = _httpClientFactory.CreateClient(HttpClientName);
using var response = await client.SendAsync(request, cancellationToken).ConfigureAwait(false);
if (!response.IsSuccessStatusCode)
{
_logger.LogWarning("Bearer token request failed for {Registry}: {StatusCode}", registry, response.StatusCode);
return null;
}
var json = await response.Content.ReadAsStringAsync(cancellationToken).ConfigureAwait(false);
using var doc = JsonDocument.Parse(json);
string? token = null;
if (doc.RootElement.TryGetProperty("token", out var tokenElement) ||
doc.RootElement.TryGetProperty("access_token", out tokenElement))
{
token = tokenElement.GetString();
}
if (!string.IsNullOrWhiteSpace(token))
{
var expiresIn = doc.RootElement.TryGetProperty("expires_in", out var expiresElement)
? expiresElement.GetInt32()
: 300; // Default 5 minutes
_tokenCache[cacheKey] = new TokenCacheEntry(token, DateTimeOffset.UtcNow.AddSeconds(expiresIn - 30));
}
return token;
}
private async Task<T> ExecuteWithRateLimitAsync<T>(Func<Task<T>> action, CancellationToken cancellationToken)
{
await _rateLimitSemaphore.WaitAsync(cancellationToken).ConfigureAwait(false);
try
{
return await ExecuteWithRetryAsync(action, cancellationToken).ConfigureAwait(false);
}
finally
{
_rateLimitSemaphore.Release();
}
}
private async Task<T> ExecuteWithRetryAsync<T>(Func<Task<T>> action, CancellationToken cancellationToken)
{
var attempt = 0;
var delays = _options.RetryDelays;
while (true)
{
try
{
return await action().ConfigureAwait(false);
}
catch (HttpRequestException ex) when (attempt < delays.Length && !cancellationToken.IsCancellationRequested)
{
_logger.LogWarning(ex, "Registry request failed, attempt {Attempt}/{MaxAttempts}", attempt + 1, delays.Length + 1);
await Task.Delay(delays[attempt], cancellationToken).ConfigureAwait(false);
attempt++;
}
}
}
private static Uri BuildUri(string registry, string repository, string path)
{
return new Uri($"https://{registry}/v2/{repository}/{path}");
}
private static Uri BuildTokenUri(string realm, string? service, string? scope)
{
var builder = new UriBuilder(realm);
var query = new List<string>();
if (!string.IsNullOrWhiteSpace(service))
{
query.Add($"service={Uri.EscapeDataString(service)}");
}
if (!string.IsNullOrWhiteSpace(scope))
{
query.Add($"scope={Uri.EscapeDataString(scope)}");
}
builder.Query = string.Join("&", query);
return builder.Uri;
}
private static Dictionary<string, string> ParseChallengeParameters(string? parameter)
{
var result = new Dictionary<string, string>(StringComparer.OrdinalIgnoreCase);
if (string.IsNullOrWhiteSpace(parameter))
{
return result;
}
foreach (var part in parameter.Split(',', StringSplitOptions.RemoveEmptyEntries))
{
var tokens = part.Split('=', 2, StringSplitOptions.RemoveEmptyEntries);
if (tokens.Length == 2)
{
var key = tokens[0].Trim();
var value = tokens[1].Trim().Trim('"');
if (!string.IsNullOrWhiteSpace(key))
{
result[key] = value;
}
}
}
return result;
}
private static HttpRequestMessage CloneRequest(HttpRequestMessage request)
{
var clone = new HttpRequestMessage(request.Method, request.RequestUri);
foreach (var header in request.Headers)
{
clone.Headers.TryAddWithoutValidation(header.Key, header.Value);
}
return clone;
}
private sealed record TokenCacheEntry(string Token, DateTimeOffset ExpiresAt);
}

View File

@@ -0,0 +1,55 @@
namespace StellaOps.Scanner.Registry;
/// <summary>
/// Configuration options for the registry client.
/// </summary>
public sealed class RegistryClientOptions
{
/// <summary>
/// Configuration section name for binding.
/// </summary>
public const string SectionName = "Registry:Client";
/// <summary>
/// Maximum number of concurrent requests to registries.
/// Default is 10.
/// </summary>
public int MaxConcurrentRequests { get; set; } = 10;
/// <summary>
/// Delays between retry attempts in milliseconds.
/// Default is [500, 1000, 2000, 5000] (4 retries with exponential backoff).
/// </summary>
public int[] RetryDelays { get; set; } = [500, 1000, 2000, 5000];
/// <summary>
/// Default timeout for HTTP requests in seconds.
/// Default is 30 seconds.
/// </summary>
public int RequestTimeoutSeconds { get; set; } = 30;
/// <summary>
/// Timeout for blob download operations in seconds.
/// Default is 300 seconds (5 minutes).
/// </summary>
public int BlobDownloadTimeoutSeconds { get; set; } = 300;
/// <summary>
/// Whether to skip TLS certificate validation.
/// WARNING: Only use for development/testing with self-signed certificates.
/// Default is false.
/// </summary>
public bool SkipTlsVerify { get; set; } = false;
/// <summary>
/// User agent string for HTTP requests.
/// </summary>
public string UserAgent { get; set; } = "StellaOps-Scanner/1.0";
/// <summary>
/// Token cache expiration buffer in seconds.
/// Tokens are refreshed this many seconds before actual expiration.
/// Default is 30 seconds.
/// </summary>
public int TokenExpirationBufferSeconds { get; set; } = 30;
}

View File

@@ -0,0 +1,91 @@
using System.Net.Security;
using Microsoft.Extensions.DependencyInjection;
using Microsoft.Extensions.Options;
using Polly;
namespace StellaOps.Scanner.Registry;
/// <summary>
/// Extension methods for registering Scanner.Registry services.
/// </summary>
public static class RegistryServiceCollectionExtensions
{
/// <summary>
/// Adds registry client services to the service collection.
/// </summary>
/// <param name="services">The service collection.</param>
/// <param name="configureOptions">Optional configuration delegate.</param>
/// <returns>The service collection for chaining.</returns>
public static IServiceCollection AddRegistryClient(
this IServiceCollection services,
Action<RegistryClientOptions>? configureOptions = null)
{
// Configure options
if (configureOptions is not null)
{
services.Configure(configureOptions);
}
// Register HTTP client with resilience
services.AddHttpClient(RegistryClient.HttpClientName, (sp, client) =>
{
var options = sp.GetRequiredService<IOptions<RegistryClientOptions>>().Value;
client.Timeout = TimeSpan.FromSeconds(options.RequestTimeoutSeconds);
client.DefaultRequestHeaders.UserAgent.ParseAdd(options.UserAgent);
})
.ConfigurePrimaryHttpMessageHandler(sp =>
{
var options = sp.GetRequiredService<IOptions<RegistryClientOptions>>().Value;
var handler = new HttpClientHandler();
if (options.SkipTlsVerify)
{
handler.ServerCertificateCustomValidationCallback =
(message, cert, chain, errors) => true;
}
return handler;
});
// Register the client
services.AddSingleton<IRegistryClient, RegistryClient>();
return services;
}
/// <summary>
/// Adds a credential provider for registry authentication.
/// </summary>
/// <typeparam name="TProvider">The credential provider implementation type.</typeparam>
/// <param name="services">The service collection.</param>
/// <returns>The service collection for chaining.</returns>
public static IServiceCollection AddRegistryCredentialProvider<TProvider>(
this IServiceCollection services)
where TProvider : class, IRegistryCredentialProvider
{
services.AddSingleton<IRegistryCredentialProvider, TProvider>();
return services;
}
/// <summary>
/// Adds a simple anonymous credential provider for registries that don't require authentication.
/// </summary>
/// <param name="services">The service collection.</param>
/// <returns>The service collection for chaining.</returns>
public static IServiceCollection AddAnonymousRegistryCredentials(this IServiceCollection services)
{
services.AddSingleton<IRegistryCredentialProvider, AnonymousCredentialProvider>();
return services;
}
}
/// <summary>
/// A credential provider that always returns anonymous credentials.
/// </summary>
internal sealed class AnonymousCredentialProvider : IRegistryCredentialProvider
{
public Task<RegistryCredential?> GetCredentialAsync(string registry, CancellationToken cancellationToken = default)
{
return Task.FromResult<RegistryCredential?>(RegistryCredential.Anonymous);
}
}

View File

@@ -0,0 +1,20 @@
<Project Sdk="Microsoft.NET.Sdk">
<PropertyGroup>
<TargetFramework>net10.0</TargetFramework>
<LangVersion>preview</LangVersion>
<ImplicitUsings>enable</ImplicitUsings>
<Nullable>enable</Nullable>
<TreatWarningsAsErrors>true</TreatWarningsAsErrors>
</PropertyGroup>
<ItemGroup>
<PackageReference Include="Microsoft.Extensions.DependencyInjection.Abstractions" />
<PackageReference Include="Microsoft.Extensions.Http" />
<PackageReference Include="Microsoft.Extensions.Http.Resilience" />
<PackageReference Include="Microsoft.Extensions.Logging.Abstractions" />
<PackageReference Include="Microsoft.Extensions.Options" />
<PackageReference Include="Polly" />
</ItemGroup>
<ItemGroup>
<ProjectReference Include="..\StellaOps.Scanner.Contracts\StellaOps.Scanner.Contracts.csproj" />
</ItemGroup>
</Project>

View File

@@ -4,6 +4,13 @@ public sealed class LayerDocument
{
public string LayerDigest { get; set; } = string.Empty;
/// <summary>
/// The diffID (uncompressed content hash) of this layer.
/// Format: sha256:64hex (71 chars total).
/// Null if not yet computed.
/// </summary>
public string? DiffId { get; set; }
public string MediaType { get; set; } = string.Empty;
public long SizeBytes { get; set; }
@@ -14,4 +21,9 @@ public sealed class LayerDocument
public DateTime LastSeenAtUtc { get; set; }
= DateTime.UtcNow;
/// <summary>
/// When the diffID was computed, if at all.
/// </summary>
public DateTime? DiffIdComputedAtUtc { get; set; }
}

View File

@@ -10,6 +10,7 @@ public sealed class LayerRepository : RepositoryBase<ScannerDataSource>
{
private const string Tenant = "";
private string Table => $"{SchemaName}.layers";
private string ImageLayersTable => $"{SchemaName}.image_layers";
private string SchemaName => DataSource.SchemaName ?? ScannerDataSource.DefaultSchema;
private readonly TimeProvider _timeProvider;
@@ -28,12 +29,14 @@ public sealed class LayerRepository : RepositoryBase<ScannerDataSource>
document.CreatedAtUtc = document.CreatedAtUtc == default ? now : document.CreatedAtUtc;
var sql = $"""
INSERT INTO {Table} (layer_digest, media_type, size_bytes, created_at_utc, last_seen_at_utc)
VALUES (@layer_digest, @media_type, @size_bytes, @created_at_utc, @last_seen_at_utc)
INSERT INTO {Table} (layer_digest, diff_id, media_type, size_bytes, created_at_utc, last_seen_at_utc, diff_id_computed_at_utc)
VALUES (@layer_digest, @diff_id, @media_type, @size_bytes, @created_at_utc, @last_seen_at_utc, @diff_id_computed_at_utc)
ON CONFLICT (layer_digest) DO UPDATE SET
diff_id = COALESCE(EXCLUDED.diff_id, {Table}.diff_id),
media_type = EXCLUDED.media_type,
size_bytes = EXCLUDED.size_bytes,
last_seen_at_utc = EXCLUDED.last_seen_at_utc
last_seen_at_utc = EXCLUDED.last_seen_at_utc,
diff_id_computed_at_utc = COALESCE(EXCLUDED.diff_id_computed_at_utc, {Table}.diff_id_computed_at_utc)
""";
return ExecuteAsync(
@@ -42,10 +45,40 @@ public sealed class LayerRepository : RepositoryBase<ScannerDataSource>
cmd =>
{
AddParameter(cmd, "layer_digest", document.LayerDigest);
AddParameter(cmd, "diff_id", document.DiffId ?? (object)DBNull.Value);
AddParameter(cmd, "media_type", document.MediaType);
AddParameter(cmd, "size_bytes", document.SizeBytes);
AddParameter(cmd, "created_at_utc", document.CreatedAtUtc);
AddParameter(cmd, "last_seen_at_utc", document.LastSeenAtUtc);
AddParameter(cmd, "diff_id_computed_at_utc", document.DiffIdComputedAtUtc ?? (object)DBNull.Value);
},
cancellationToken);
}
/// <summary>
/// Updates only the diffID for a layer.
/// </summary>
public Task SetDiffIdAsync(string layerDigest, string diffId, CancellationToken cancellationToken)
{
ArgumentException.ThrowIfNullOrWhiteSpace(layerDigest);
ArgumentException.ThrowIfNullOrWhiteSpace(diffId);
var now = _timeProvider.GetUtcNow().UtcDateTime;
var sql = $"""
UPDATE {Table}
SET diff_id = @diff_id, diff_id_computed_at_utc = @computed_at
WHERE layer_digest = @layer_digest AND diff_id IS NULL
""";
return ExecuteAsync(
Tenant,
sql,
cmd =>
{
AddParameter(cmd, "layer_digest", layerDigest);
AddParameter(cmd, "diff_id", diffId);
AddParameter(cmd, "computed_at", now);
},
cancellationToken);
}
@@ -55,7 +88,7 @@ public sealed class LayerRepository : RepositoryBase<ScannerDataSource>
ArgumentException.ThrowIfNullOrWhiteSpace(layerDigest);
var sql = $"""
SELECT layer_digest, media_type, size_bytes, created_at_utc, last_seen_at_utc
SELECT layer_digest, diff_id, media_type, size_bytes, created_at_utc, last_seen_at_utc, diff_id_computed_at_utc
FROM {Table}
WHERE layer_digest = @layer_digest
""";
@@ -68,12 +101,112 @@ public sealed class LayerRepository : RepositoryBase<ScannerDataSource>
cancellationToken);
}
private static LayerDocument MapLayer(NpgsqlDataReader reader) => new()
/// <summary>
/// Gets a layer by its diffID (uncompressed content hash).
/// </summary>
public Task<LayerDocument?> GetByDiffIdAsync(string diffId, CancellationToken cancellationToken)
{
LayerDigest = reader.GetString(reader.GetOrdinal("layer_digest")),
MediaType = reader.GetString(reader.GetOrdinal("media_type")),
SizeBytes = reader.GetInt64(reader.GetOrdinal("size_bytes")),
CreatedAtUtc = reader.GetFieldValue<DateTime>(reader.GetOrdinal("created_at_utc")),
LastSeenAtUtc = reader.GetFieldValue<DateTime>(reader.GetOrdinal("last_seen_at_utc")),
};
ArgumentException.ThrowIfNullOrWhiteSpace(diffId);
var sql = $"""
SELECT layer_digest, diff_id, media_type, size_bytes, created_at_utc, last_seen_at_utc, diff_id_computed_at_utc
FROM {Table}
WHERE diff_id = @diff_id
LIMIT 1
""";
return QuerySingleOrDefaultAsync(
Tenant,
sql,
cmd => AddParameter(cmd, "diff_id", diffId),
MapLayer,
cancellationToken);
}
/// <summary>
/// Finds all images that contain a layer with the given diffID.
/// </summary>
public Task<IReadOnlyList<string>> FindImagesWithLayerAsync(string diffId, CancellationToken cancellationToken)
{
ArgumentException.ThrowIfNullOrWhiteSpace(diffId);
var sql = $"""
SELECT DISTINCT il.image_reference
FROM {ImageLayersTable} il
INNER JOIN {Table} l ON il.layer_digest = l.layer_digest
WHERE l.diff_id = @diff_id
ORDER BY il.image_reference
LIMIT 1000
""";
return QueryAsync(
Tenant,
sql,
cmd => AddParameter(cmd, "diff_id", diffId),
reader => reader.GetString(0),
cancellationToken);
}
/// <summary>
/// Gets all layers that do not have a computed diffID.
/// </summary>
public Task<IReadOnlyList<LayerDocument>> GetLayersWithoutDiffIdAsync(int limit, CancellationToken cancellationToken)
{
var sql = $"""
SELECT layer_digest, diff_id, media_type, size_bytes, created_at_utc, last_seen_at_utc, diff_id_computed_at_utc
FROM {Table}
WHERE diff_id IS NULL
ORDER BY created_at_utc DESC
LIMIT @limit
""";
return QueryAsync(
Tenant,
sql,
cmd => AddParameter(cmd, "limit", limit),
MapLayer,
cancellationToken);
}
/// <summary>
/// Gets layers by multiple diffIDs.
/// </summary>
public Task<IReadOnlyList<LayerDocument>> GetByDiffIdsAsync(IEnumerable<string> diffIds, CancellationToken cancellationToken)
{
var ids = diffIds.Where(d => !string.IsNullOrWhiteSpace(d)).ToArray();
if (ids.Length == 0)
{
return Task.FromResult<IReadOnlyList<LayerDocument>>([]);
}
var sql = $"""
SELECT layer_digest, diff_id, media_type, size_bytes, created_at_utc, last_seen_at_utc, diff_id_computed_at_utc
FROM {Table}
WHERE diff_id = ANY(@diff_ids)
""";
return QueryAsync(
Tenant,
sql,
cmd => AddParameter(cmd, "diff_ids", ids),
MapLayer,
cancellationToken);
}
private static LayerDocument MapLayer(NpgsqlDataReader reader)
{
var diffIdOrdinal = reader.GetOrdinal("diff_id");
var diffIdComputedOrdinal = reader.GetOrdinal("diff_id_computed_at_utc");
return new()
{
LayerDigest = reader.GetString(reader.GetOrdinal("layer_digest")),
DiffId = reader.IsDBNull(diffIdOrdinal) ? null : reader.GetString(diffIdOrdinal),
MediaType = reader.GetString(reader.GetOrdinal("media_type")),
SizeBytes = reader.GetInt64(reader.GetOrdinal("size_bytes")),
CreatedAtUtc = reader.GetFieldValue<DateTime>(reader.GetOrdinal("created_at_utc")),
LastSeenAtUtc = reader.GetFieldValue<DateTime>(reader.GetOrdinal("last_seen_at_utc")),
DiffIdComputedAtUtc = reader.IsDBNull(diffIdComputedOrdinal) ? null : reader.GetFieldValue<DateTime>(diffIdComputedOrdinal),
};
}
}

View File

@@ -0,0 +1,176 @@
using FluentAssertions;
using Microsoft.Extensions.Logging.Abstractions;
using Microsoft.Extensions.Options;
using Microsoft.Extensions.Time.Testing;
using StellaOps.Scanner.Cache.Abstractions;
using StellaOps.Scanner.Cache.LayerCache;
using StellaOps.TestKit;
using Xunit;
namespace StellaOps.Scanner.Cache.Tests.LayerCache;
/// <summary>
/// Unit tests for <see cref="LayerCacheStore"/> with FakeTimeProvider for deterministic TTL testing.
/// </summary>
[Trait("Category", TestCategories.Unit)]
public sealed class LayerCacheStoreTimeProviderTests : IDisposable
{
private readonly FakeTimeProvider _timeProvider;
private readonly LayerCacheStore _store;
private readonly string _testCacheDir;
public LayerCacheStoreTimeProviderTests()
{
_timeProvider = new FakeTimeProvider(new DateTimeOffset(2026, 1, 18, 12, 0, 0, TimeSpan.Zero));
_testCacheDir = Path.Combine(Path.GetTempPath(), $"layer-cache-test-{Guid.NewGuid():N}");
Directory.CreateDirectory(_testCacheDir);
var options = Options.Create(new ScannerCacheOptions
{
RootPath = _testCacheDir,
LayerTtl = TimeSpan.FromHours(24)
});
_store = new LayerCacheStore(
options,
NullLogger<LayerCacheStore>.Instance,
_timeProvider);
}
public void Dispose()
{
if (Directory.Exists(_testCacheDir))
{
try
{
Directory.Delete(_testCacheDir, recursive: true);
}
catch
{
// Ignore cleanup errors in tests
}
}
}
private static LayerCachePutRequest CreatePutRequest(string layerDigest) =>
new(
layerDigest: layerDigest,
architecture: "amd64",
mediaType: "application/vnd.oci.image.layer.v1.tar+gzip",
metadata: new Dictionary<string, string>(),
artifacts: new List<LayerCacheArtifactContent>
{
new("sbom.json", new MemoryStream(new byte[] { 1, 2, 3, 4, 5 }), "application/json")
});
[Fact]
public async Task TryGetAsync_NewEntry_ReturnsEntry()
{
// Arrange
var layerDigest = "sha256:" + Guid.NewGuid().ToString("N");
var request = CreatePutRequest(layerDigest);
await _store.PutAsync(request);
// Act
var entry = await _store.TryGetAsync(layerDigest);
// Assert
entry.Should().NotBeNull();
entry!.LayerDigest.Should().Be(layerDigest);
}
[Fact]
public async Task TryGetAsync_AfterTtlExpires_ReturnsNull()
{
// Arrange
var layerDigest = "sha256:" + Guid.NewGuid().ToString("N");
var request = CreatePutRequest(layerDigest);
await _store.PutAsync(request);
// Verify entry exists
var entryBefore = await _store.TryGetAsync(layerDigest);
entryBefore.Should().NotBeNull();
// Act - advance time past TTL (24 hours + buffer)
_timeProvider.Advance(TimeSpan.FromHours(25));
var entryAfter = await _store.TryGetAsync(layerDigest);
// Assert
entryAfter.Should().BeNull();
}
[Fact]
public async Task TryGetAsync_BeforeTtlExpires_ReturnsEntry()
{
// Arrange
var layerDigest = "sha256:" + Guid.NewGuid().ToString("N");
var request = CreatePutRequest(layerDigest);
await _store.PutAsync(request);
// Act - advance time but not past TTL
_timeProvider.Advance(TimeSpan.FromHours(12));
var entry = await _store.TryGetAsync(layerDigest);
// Assert
entry.Should().NotBeNull();
entry!.LayerDigest.Should().Be(layerDigest);
}
[Fact]
public async Task TryGetAsync_AtExactTtl_ReturnsNull()
{
// Arrange
var layerDigest = "sha256:" + Guid.NewGuid().ToString("N");
var request = CreatePutRequest(layerDigest);
await _store.PutAsync(request);
// Act - advance time to exactly TTL + 1 second
_timeProvider.Advance(TimeSpan.FromHours(24).Add(TimeSpan.FromSeconds(1)));
var entry = await _store.TryGetAsync(layerDigest);
// Assert
entry.Should().BeNull();
}
[Fact]
public async Task TryGetAsync_UpdatesLastAccessed()
{
// Arrange
var layerDigest = "sha256:" + Guid.NewGuid().ToString("N");
var request = CreatePutRequest(layerDigest);
await _store.PutAsync(request);
// First access
await _store.TryGetAsync(layerDigest);
var firstAccessTime = _timeProvider.GetUtcNow();
// Advance time
_timeProvider.Advance(TimeSpan.FromHours(1));
// Second access
var entry = await _store.TryGetAsync(layerDigest);
// Assert
entry.Should().NotBeNull();
entry!.LastAccessed.Should().BeAfter(firstAccessTime);
}
[Fact]
public async Task PutAsync_SetsCorrectCachedAt()
{
// Arrange
var layerDigest = "sha256:" + Guid.NewGuid().ToString("N");
var request = CreatePutRequest(layerDigest);
var expectedTime = _timeProvider.GetUtcNow();
// Act
var entry = await _store.PutAsync(request);
// Assert
entry.CachedAt.Should().Be(expectedTime);
}
}

View File

@@ -9,6 +9,8 @@
<ItemGroup>
<PackageReference Include="FluentAssertions" />
<PackageReference Include="Microsoft.Extensions.Logging.Abstractions" />
<PackageReference Include="Microsoft.Extensions.TimeProvider.Testing" />
<PackageReference Include="Moq" />
</ItemGroup>
<ItemGroup>
<Compile Remove="..\StellaOps.Concelier.Tests.Shared\AssemblyInfo.cs" />

View File

@@ -0,0 +1,361 @@
// SPDX-License-Identifier: AGPL-3.0-or-later
// Copyright (c) StellaOps
using FluentAssertions;
using Microsoft.Extensions.Logging.Abstractions;
using Microsoft.Extensions.Options;
using StellaOps.Scanner.Core.Normalization;
using StellaOps.TestKit;
namespace StellaOps.Scanner.Core.Tests.Normalization;
public class PackageNameNormalizerTests
{
private readonly PackageNameNormalizer _normalizer;
public PackageNameNormalizerTests()
{
var options = Options.Create(new PackageNameNormalizerOptions
{
EnableCaching = true,
EnableFileHashFallback = true
});
_normalizer = new PackageNameNormalizer(
options,
NullLogger<PackageNameNormalizer>.Instance);
}
#region PURL Parsing Tests
[Trait("Category", TestCategories.Unit)]
[Fact]
public async Task NormalizeAsync_ValidPurl_ReturnsParsedResult()
{
var result = await _normalizer.NormalizeAsync("pkg:npm/lodash@4.17.21");
result.CanonicalPurl.Should().Be("pkg:npm/lodash@4.17.21");
result.Ecosystem.Should().Be("npm");
result.Name.Should().Be("lodash");
result.Version.Should().Be("4.17.21");
result.Confidence.Should().BeGreaterThan(0.9);
}
[Trait("Category", TestCategories.Unit)]
[Fact]
public async Task NormalizeAsync_ScopedNpmPackage_NormalizesCorrectly()
{
var result = await _normalizer.NormalizeAsync("pkg:npm/@types/node@18.0.0");
result.Ecosystem.Should().Be("npm");
result.Namespace.Should().Be("types");
result.Name.Should().Be("node");
result.Version.Should().Be("18.0.0");
}
[Trait("Category", TestCategories.Unit)]
[Fact]
public async Task NormalizeAsync_PurlWithoutVersion_ParsesCorrectly()
{
var result = await _normalizer.NormalizeAsync("pkg:pypi/requests");
result.CanonicalPurl.Should().Be("pkg:pypi/requests");
result.Ecosystem.Should().Be("pypi");
result.Name.Should().Be("requests");
result.Version.Should().BeNull();
}
#endregion
#region Ecosystem Normalization Tests
[Trait("Category", TestCategories.Unit)]
[Theory]
[InlineData("pkg:npm/Lodash@4.17.21", "pkg:npm/lodash@4.17.21")]
[InlineData("pkg:npm/REACT@18.0.0", "pkg:npm/react@18.0.0")]
public async Task NormalizeAsync_NpmPackage_LowercasesName(string input, string expected)
{
var result = await _normalizer.NormalizeAsync(input);
result.CanonicalPurl.Should().Be(expected);
}
[Trait("Category", TestCategories.Unit)]
[Theory]
[InlineData("pkg:pypi/Flask_RESTful@0.3.9", "pkg:pypi/flask-restful@0.3.9")]
[InlineData("pkg:pypi/Django.REST.Framework@3.14.0", "pkg:pypi/django-rest-framework@3.14.0")]
[InlineData("pkg:pypi/Scikit_Learn@1.0.0", "pkg:pypi/scikit-learn@1.0.0")]
public async Task NormalizeAsync_PypiPackage_NormalizesNameCorrectly(string input, string expected)
{
var result = await _normalizer.NormalizeAsync(input);
result.CanonicalPurl.Should().Be(expected);
}
[Trait("Category", TestCategories.Unit)]
[Fact]
public async Task NormalizeAsync_DebianPackage_SetsDefaultNamespace()
{
var result = await _normalizer.NormalizeAsync("pkg:deb/libc6");
result.Namespace.Should().Be("debian");
}
[Trait("Category", TestCategories.Unit)]
[Fact]
public async Task NormalizeAsync_GolangPackage_NormalizesModulePath()
{
var result = await _normalizer.NormalizeAsync("pkg:golang/github.com/Sirupsen/Logrus@1.9.0");
result.Name.Should().Be("github.com/sirupsen/logrus");
}
#endregion
#region Alias Lookup Tests
[Trait("Category", TestCategories.Unit)]
[Fact]
public async Task NormalizeAsync_KnownAlias_ReturnsCanonical()
{
// PIL is an alias for pillow
var result = await _normalizer.NormalizeAsync("PIL");
result.CanonicalPurl.Should().Contain("pillow");
result.Method.Should().Be(NormalizationMethod.AliasLookup);
result.Confidence.Should().Be(1.0);
}
[Trait("Category", TestCategories.Unit)]
[Fact]
public async Task NormalizeAsync_SklearnAlias_NormalizesToScikitLearn()
{
var result = await _normalizer.NormalizeAsync("sklearn");
result.CanonicalPurl.Should().Contain("scikit-learn");
result.Method.Should().Be(NormalizationMethod.AliasLookup);
}
[Trait("Category", TestCategories.Unit)]
[Fact]
public async Task GetAliasesAsync_KnownPackage_ReturnsAliases()
{
var aliases = await _normalizer.GetAliasesAsync("pkg:pypi/pillow");
aliases.Should().NotBeEmpty();
aliases.Should().Contain("PIL");
}
[Trait("Category", TestCategories.Unit)]
[Fact]
public async Task GetAliasesAsync_UnknownPackage_ReturnsEmpty()
{
var aliases = await _normalizer.GetAliasesAsync("pkg:npm/unknown-package-xyz");
aliases.Should().BeEmpty();
}
#endregion
#region Equivalence Tests
[Trait("Category", TestCategories.Unit)]
[Theory]
[InlineData("pkg:npm/lodash@4.17.20", "pkg:npm/lodash@4.17.21", true)]
[InlineData("pkg:npm/Lodash@4.17.20", "pkg:npm/lodash@4.17.21", true)]
[InlineData("pkg:npm/lodash@4.17.20", "pkg:npm/underscore@1.13.6", false)]
public async Task AreEquivalentAsync_PackagesWithDifferentVersions_ComparesIdentity(
string pkg1, string pkg2, bool expected)
{
var result = await _normalizer.AreEquivalentAsync(pkg1, pkg2);
result.Should().Be(expected);
}
[Trait("Category", TestCategories.Unit)]
[Fact]
public async Task AreEquivalentAsync_AliasAndCanonical_ReturnsTrue()
{
// PIL and pillow should be equivalent
var result = await _normalizer.AreEquivalentAsync("PIL", "pkg:pypi/pillow@9.0.0");
result.Should().BeTrue();
}
[Trait("Category", TestCategories.Unit)]
[Fact]
public async Task AreEquivalentAsync_DifferentPackages_ReturnsFalse()
{
var result = await _normalizer.AreEquivalentAsync("pkg:npm/react@18.0.0", "pkg:npm/vue@3.0.0");
result.Should().BeFalse();
}
#endregion
#region Non-PURL Parsing Tests
[Trait("Category", TestCategories.Unit)]
[Fact]
public async Task NormalizeAsync_NpmStyleReference_ParsesCorrectly()
{
var result = await _normalizer.NormalizeAsync("@types/node@18.0.0");
result.Ecosystem.Should().Be("npm");
result.Namespace.Should().Be("types");
result.Name.Should().Be("node");
result.Version.Should().Be("18.0.0");
}
[Trait("Category", TestCategories.Unit)]
[Fact]
public async Task NormalizeAsync_PipStyleReference_ParsesCorrectly()
{
var result = await _normalizer.NormalizeAsync("requests==2.28.0");
result.Ecosystem.Should().Be("pypi");
result.Name.Should().Be("requests");
result.Version.Should().Be("2.28.0");
}
[Trait("Category", TestCategories.Unit)]
[Fact]
public async Task NormalizeAsync_DebianStyleReference_ParsesCorrectly()
{
var result = await _normalizer.NormalizeAsync("libssl3=3.0.8-1");
result.Ecosystem.Should().Be("deb");
result.Name.Should().Be("libssl3");
result.Version.Should().Be("3.0.8-1");
}
[Trait("Category", TestCategories.Unit)]
[Fact]
public async Task NormalizeAsync_GoModulePath_ParsesCorrectly()
{
var result = await _normalizer.NormalizeAsync("github.com/gin-gonic/gin@v1.9.0");
result.Ecosystem.Should().Be("golang");
result.Name.Should().Contain("gin-gonic/gin");
result.Version.Should().Be("v1.9.0");
}
#endregion
#region Fallback Tests
[Trait("Category", TestCategories.Unit)]
[Fact]
public async Task NormalizeAsync_UnknownFormat_ReturnsPassthrough()
{
var result = await _normalizer.NormalizeAsync("some-unknown-reference");
result.Method.Should().Be(NormalizationMethod.Passthrough);
result.Confidence.Should().BeLessThan(0.5);
}
[Trait("Category", TestCategories.Unit)]
[Fact]
public async Task NormalizeAsync_FilePathReference_UsesFileHashFallback()
{
var result = await _normalizer.NormalizeAsync("/usr/lib/libcustom.so");
result.Method.Should().Be(NormalizationMethod.FileHashFallback);
result.FileHash.Should().NotBeNullOrEmpty();
}
#endregion
#region False Positive Prevention Tests
[Trait("Category", TestCategories.Unit)]
[Fact]
public async Task AreEquivalentAsync_SimilarButDifferentPackages_ReturnsFalse()
{
// react and react-dom are different packages, even though related
var result = await _normalizer.AreEquivalentAsync(
"pkg:npm/react@18.0.0",
"pkg:npm/react-dom@18.0.0");
result.Should().BeFalse();
}
[Trait("Category", TestCategories.Unit)]
[Fact]
public async Task AreEquivalentAsync_SimilarNamesAcrossEcosystems_ReturnsFalse()
{
// requests in npm vs pypi are different packages
var result = await _normalizer.AreEquivalentAsync(
"pkg:npm/requests@1.0.0",
"pkg:pypi/requests@2.28.0");
result.Should().BeFalse();
}
[Trait("Category", TestCategories.Unit)]
[Theory]
[InlineData("lodash", "underscore")]
[InlineData("express", "fastify")]
[InlineData("react", "vue")]
public async Task AreEquivalentAsync_SimilarLibraries_ReturnsFalse(string pkg1, string pkg2)
{
var result = await _normalizer.AreEquivalentAsync(
$"pkg:npm/{pkg1}@1.0.0",
$"pkg:npm/{pkg2}@1.0.0");
result.Should().BeFalse();
}
#endregion
#region Caching Tests
[Trait("Category", TestCategories.Unit)]
[Fact]
public async Task NormalizeAsync_SameInput_ReturnsCachedResult()
{
var result1 = await _normalizer.NormalizeAsync("pkg:npm/lodash@4.17.21");
var result2 = await _normalizer.NormalizeAsync("pkg:npm/lodash@4.17.21");
// Results should be identical (potentially same reference if cached)
result1.CanonicalPurl.Should().Be(result2.CanonicalPurl);
result1.Confidence.Should().Be(result2.Confidence);
}
#endregion
#region Determinism Tests
[Trait("Category", TestCategories.Unit)]
[Fact]
public async Task NormalizeAsync_SameInput_ProducesDeterministicOutput()
{
var inputs = new[]
{
"pkg:npm/lodash@4.17.21",
"pkg:pypi/requests@2.28.0",
"pkg:deb/debian/libc6@2.31-13",
"PIL",
"sklearn"
};
var results1 = new List<NormalizedPackageIdentity>();
var results2 = new List<NormalizedPackageIdentity>();
foreach (var input in inputs)
{
results1.Add(await _normalizer.NormalizeAsync(input));
}
foreach (var input in inputs)
{
results2.Add(await _normalizer.NormalizeAsync(input));
}
for (var i = 0; i < inputs.Length; i++)
{
results1[i].CanonicalPurl.Should().Be(results2[i].CanonicalPurl);
results1[i].Method.Should().Be(results2[i].Method);
results1[i].Confidence.Should().Be(results2[i].Confidence);
}
}
#endregion
}

View File

@@ -16,6 +16,7 @@
</ItemGroup>
<ItemGroup>
<PackageReference Include="FluentAssertions" />
<PackageReference Include="Microsoft.Extensions.TimeProvider.Testing" />
<PackageReference Include="Moq" />
</ItemGroup>
<ItemGroup>

View File

@@ -0,0 +1,207 @@
using FluentAssertions;
using Microsoft.Extensions.Logging.Abstractions;
using Microsoft.Extensions.Options;
using Microsoft.Extensions.Time.Testing;
using StellaOps.Scanner.Core.Configuration;
using StellaOps.Scanner.Core.TrustAnchors;
using StellaOps.TestKit;
using Xunit;
namespace StellaOps.Scanner.Core.Tests;
/// <summary>
/// Unit tests for <see cref="TrustAnchorRegistry"/> with FakeTimeProvider for deterministic expiration testing.
/// </summary>
[Trait("Category", TestCategories.Unit)]
public sealed class TrustAnchorRegistryTimeProviderTests
{
private readonly FakeTimeProvider _timeProvider;
private readonly DateTimeOffset _fixedNow;
public TrustAnchorRegistryTimeProviderTests()
{
_fixedNow = new DateTimeOffset(2026, 1, 18, 12, 0, 0, TimeSpan.Zero);
_timeProvider = new FakeTimeProvider(_fixedNow);
}
[Fact]
public void ResolveForPurl_WithExpiredAnchor_SkipsIt()
{
// Arrange
var options = new OfflineKitOptions
{
Enabled = true,
TrustAnchors = new List<TrustAnchorConfig>
{
new()
{
AnchorId = "expired",
PurlPattern = "*",
AllowedKeyIds = new List<string> { "sha256:aaaa" },
ExpiresAt = _fixedNow.AddDays(-1) // Already expired
},
new()
{
AnchorId = "active",
PurlPattern = "*",
AllowedKeyIds = new List<string> { "sha256:bbbb" },
}
}
};
var keys = new Dictionary<string, byte[]>(StringComparer.OrdinalIgnoreCase)
{
["aaaa"] = new byte[] { 0x01, 0x02 },
["bbbb"] = new byte[] { 0x03, 0x04 },
};
var registry = new TrustAnchorRegistry(
new StaticOptionsMonitor<OfflineKitOptions>(options),
new StubKeyLoader(keys),
NullLogger<TrustAnchorRegistry>.Instance,
_timeProvider);
// Act
var resolution = registry.ResolveForPurl("pkg:npm/foo@1.0.0");
// Assert
resolution.Should().NotBeNull();
resolution!.AnchorId.Should().Be("active");
}
[Fact]
public void ResolveForPurl_WithFutureExpirationAnchor_UsesIt()
{
// Arrange
var options = new OfflineKitOptions
{
Enabled = true,
TrustAnchors = new List<TrustAnchorConfig>
{
new()
{
AnchorId = "not-yet-expired",
PurlPattern = "*",
AllowedKeyIds = new List<string> { "sha256:aaaa" },
ExpiresAt = _fixedNow.AddDays(1) // Expires tomorrow
}
}
};
var keys = new Dictionary<string, byte[]>(StringComparer.OrdinalIgnoreCase)
{
["aaaa"] = new byte[] { 0x01, 0x02 },
};
var registry = new TrustAnchorRegistry(
new StaticOptionsMonitor<OfflineKitOptions>(options),
new StubKeyLoader(keys),
NullLogger<TrustAnchorRegistry>.Instance,
_timeProvider);
// Act
var resolution = registry.ResolveForPurl("pkg:npm/foo@1.0.0");
// Assert
resolution.Should().NotBeNull();
resolution!.AnchorId.Should().Be("not-yet-expired");
}
[Fact]
public void ResolveForPurl_AfterTimeAdvances_PreviouslyValidAnchorExpires()
{
// Arrange
var options = new OfflineKitOptions
{
Enabled = true,
TrustAnchors = new List<TrustAnchorConfig>
{
new()
{
AnchorId = "short-lived",
PurlPattern = "pkg:npm/*",
AllowedKeyIds = new List<string> { "sha256:aaaa" },
ExpiresAt = _fixedNow.AddHours(1) // Expires in 1 hour
},
new()
{
AnchorId = "fallback",
PurlPattern = "*",
AllowedKeyIds = new List<string> { "sha256:bbbb" },
}
}
};
var keys = new Dictionary<string, byte[]>(StringComparer.OrdinalIgnoreCase)
{
["aaaa"] = new byte[] { 0x01, 0x02 },
["bbbb"] = new byte[] { 0x03, 0x04 },
};
var registry = new TrustAnchorRegistry(
new StaticOptionsMonitor<OfflineKitOptions>(options),
new StubKeyLoader(keys),
NullLogger<TrustAnchorRegistry>.Instance,
_timeProvider);
// Act - before expiration
var resolutionBefore = registry.ResolveForPurl("pkg:npm/foo@1.0.0");
resolutionBefore.Should().NotBeNull();
resolutionBefore!.AnchorId.Should().Be("short-lived");
// Advance time past expiration
_timeProvider.Advance(TimeSpan.FromHours(2));
// Act - after expiration
var resolutionAfter = registry.ResolveForPurl("pkg:npm/foo@1.0.0");
// Assert
resolutionAfter.Should().NotBeNull();
resolutionAfter!.AnchorId.Should().Be("fallback");
}
[Fact]
public void ResolveForPurl_ExactExpirationTime_TreatsAsExpired()
{
// Arrange
var options = new OfflineKitOptions
{
Enabled = true,
TrustAnchors = new List<TrustAnchorConfig>
{
new()
{
AnchorId = "expires-now",
PurlPattern = "*",
AllowedKeyIds = new List<string> { "sha256:aaaa" },
ExpiresAt = _fixedNow // Expires exactly now
},
new()
{
AnchorId = "fallback",
PurlPattern = "*",
AllowedKeyIds = new List<string> { "sha256:bbbb" },
}
}
};
var keys = new Dictionary<string, byte[]>(StringComparer.OrdinalIgnoreCase)
{
["aaaa"] = new byte[] { 0x01, 0x02 },
["bbbb"] = new byte[] { 0x03, 0x04 },
};
var registry = new TrustAnchorRegistry(
new StaticOptionsMonitor<OfflineKitOptions>(options),
new StubKeyLoader(keys),
NullLogger<TrustAnchorRegistry>.Instance,
_timeProvider);
// Act
var resolution = registry.ResolveForPurl("pkg:npm/foo@1.0.0");
// Assert - should use fallback since exactly-now is treated as expired
resolution.Should().NotBeNull();
resolution!.AnchorId.Should().Be("fallback");
}
}

View File

@@ -349,4 +349,222 @@ public class SbomDiffEngineTests
}
#endregion
#region Layer Attribution Tests
private static LayerSbomInput CreateLayerInput(string diffId, int layerIndex, params ComponentRef[] components)
{
return new LayerSbomInput
{
DiffId = diffId,
LayerIndex = layerIndex,
Components = components.ToList()
};
}
[Trait("Category", TestCategories.Unit)]
[Fact]
public void ComputeLayerAttributedDiff_AddedComponent_AttributesToCorrectLayer()
{
var fromId = SbomId.New();
var toId = SbomId.New();
var fromLayers = new[]
{
CreateLayerInput("sha256:base", 0, CreateComponent("base-pkg", "1.0.0"))
};
var toLayers = new[]
{
CreateLayerInput("sha256:base", 0, CreateComponent("base-pkg", "1.0.0")),
CreateLayerInput("sha256:app", 1, CreateComponent("app-pkg", "2.0.0"))
};
var diff = _engine.ComputeLayerAttributedDiff(fromId, fromLayers, toId, toLayers);
diff.BaseDiff.Summary.Added.Should().Be(1);
diff.ChangesByLayer.Should().HaveCount(1);
diff.ChangesByLayer[0].DiffId.Should().Be("sha256:app");
diff.ChangesByLayer[0].LayerIndex.Should().Be(1);
diff.ChangesByLayer[0].Changes.Should().HaveCount(1);
diff.ChangesByLayer[0].Changes[0].Type.Should().Be(ComponentDeltaType.Added);
diff.ChangesByLayer[0].Changes[0].SourceLayerDiffId.Should().Be("sha256:app");
}
[Trait("Category", TestCategories.Unit)]
[Fact]
public void ComputeLayerAttributedDiff_RemovedComponent_AttributesToOldLayer()
{
var fromId = SbomId.New();
var toId = SbomId.New();
var fromLayers = new[]
{
CreateLayerInput("sha256:base", 0, CreateComponent("base-pkg", "1.0.0")),
CreateLayerInput("sha256:app", 1, CreateComponent("app-pkg", "2.0.0"))
};
var toLayers = new[]
{
CreateLayerInput("sha256:base", 0, CreateComponent("base-pkg", "1.0.0"))
};
var diff = _engine.ComputeLayerAttributedDiff(fromId, fromLayers, toId, toLayers);
diff.BaseDiff.Summary.Removed.Should().Be(1);
var removedDelta = diff.BaseDiff.Deltas.First(d => d.Type == ComponentDeltaType.Removed);
removedDelta.SourceLayerDiffId.Should().Be("sha256:app");
removedDelta.SourceLayerIndex.Should().Be(1);
}
[Trait("Category", TestCategories.Unit)]
[Fact]
public void ComputeLayerAttributedDiff_VersionChange_AttributesToNewLayer()
{
var fromId = SbomId.New();
var toId = SbomId.New();
var fromLayers = new[]
{
CreateLayerInput("sha256:base-old", 0, CreateComponent("lodash", "4.17.20"))
};
var toLayers = new[]
{
CreateLayerInput("sha256:base-new", 0, CreateComponent("lodash", "4.17.21"))
};
var diff = _engine.ComputeLayerAttributedDiff(fromId, fromLayers, toId, toLayers);
diff.BaseDiff.Summary.VersionChanged.Should().Be(1);
var versionDelta = diff.BaseDiff.Deltas.First(d => d.Type == ComponentDeltaType.VersionChanged);
versionDelta.SourceLayerDiffId.Should().Be("sha256:base-new");
versionDelta.SourceLayerIndex.Should().Be(0);
}
[Trait("Category", TestCategories.Unit)]
[Fact]
public void ComputeLayerAttributedDiff_MultipleLayersWithChanges_GroupsCorrectly()
{
var fromId = SbomId.New();
var toId = SbomId.New();
var fromLayers = new[]
{
CreateLayerInput("sha256:base", 0,
CreateComponent("base-pkg", "1.0.0"),
CreateComponent("removed-from-base", "1.0.0"))
};
var toLayers = new[]
{
CreateLayerInput("sha256:base-new", 0,
CreateComponent("base-pkg", "1.0.0"),
CreateComponent("new-in-base", "1.0.0")),
CreateLayerInput("sha256:app", 1,
CreateComponent("app-pkg", "2.0.0"))
};
var diff = _engine.ComputeLayerAttributedDiff(fromId, fromLayers, toId, toLayers);
diff.ChangesByLayer.Should().HaveCount(2);
var baseChanges = diff.ChangesByLayer.First(g => g.DiffId == "sha256:base-new");
baseChanges.Changes.Should().Contain(c => c.After!.Name == "new-in-base");
var appChanges = diff.ChangesByLayer.First(g => g.DiffId == "sha256:app");
appChanges.Changes.Should().Contain(c => c.After!.Name == "app-pkg");
}
[Trait("Category", TestCategories.Unit)]
[Fact]
public void ComputeLayerAttributedDiff_LayerSummaries_CalculatedCorrectly()
{
var fromId = SbomId.New();
var toId = SbomId.New();
var fromLayers = new[]
{
CreateLayerInput("sha256:base", 0, CreateComponent("base-pkg", "1.0.0"))
};
var toLayers = new[]
{
CreateLayerInput("sha256:base", 0, CreateComponent("base-pkg", "1.0.0")),
CreateLayerInput("sha256:app", 1,
CreateComponent("app-pkg1", "1.0.0"),
CreateComponent("app-pkg2", "2.0.0"))
};
var diff = _engine.ComputeLayerAttributedDiff(fromId, fromLayers, toId, toLayers);
diff.LayerSummaries.Should().HaveCount(2);
var baseSummary = diff.LayerSummaries.First(s => s.DiffId == "sha256:base");
baseSummary.TotalComponents.Should().Be(1);
baseSummary.Added.Should().Be(0);
var appSummary = diff.LayerSummaries.First(s => s.DiffId == "sha256:app");
appSummary.TotalComponents.Should().Be(2);
appSummary.Added.Should().Be(2);
}
[Trait("Category", TestCategories.Unit)]
[Fact]
public void FindComponentLayer_ComponentExists_ReturnsCorrectLayer()
{
var layers = new[]
{
CreateLayerInput("sha256:base", 0, CreateComponent("base-pkg", "1.0.0")),
CreateLayerInput("sha256:app", 1, CreateComponent("target-pkg", "2.0.0"))
};
var result = SbomDiffEngine.FindComponentLayer(layers, "pkg:npm/target-pkg@2.0.0");
result.Should().Be("sha256:app");
}
[Trait("Category", TestCategories.Unit)]
[Fact]
public void FindComponentLayer_ComponentNotFound_ReturnsNull()
{
var layers = new[]
{
CreateLayerInput("sha256:base", 0, CreateComponent("base-pkg", "1.0.0"))
};
var result = SbomDiffEngine.FindComponentLayer(layers, "pkg:npm/nonexistent@1.0.0");
result.Should().BeNull();
}
[Trait("Category", TestCategories.Unit)]
[Fact]
public void ComputeLayerAttributedDiff_Determinism_SameInputsProduceSameOutput()
{
var fromId = SbomId.New();
var toId = SbomId.New();
var fromLayers = new[]
{
CreateLayerInput("sha256:base", 0, CreateComponent("pkg1", "1.0.0")),
CreateLayerInput("sha256:mid", 1, CreateComponent("pkg2", "1.0.0"))
};
var toLayers = new[]
{
CreateLayerInput("sha256:base", 0, CreateComponent("pkg1", "1.0.0")),
CreateLayerInput("sha256:mid-new", 1, CreateComponent("pkg2", "2.0.0")),
CreateLayerInput("sha256:app", 2, CreateComponent("pkg3", "1.0.0"))
};
var diff1 = _engine.ComputeLayerAttributedDiff(fromId, fromLayers, toId, toLayers);
var diff2 = _engine.ComputeLayerAttributedDiff(fromId, fromLayers, toId, toLayers);
diff1.BaseDiff.Summary.Should().BeEquivalentTo(diff2.BaseDiff.Summary);
diff1.ChangesByLayer.Length.Should().Be(diff2.ChangesByLayer.Length);
diff1.LayerSummaries.Length.Should().Be(diff2.LayerSummaries.Length);
}
#endregion
}

View File

@@ -0,0 +1,155 @@
using FluentAssertions;
using Microsoft.Extensions.Logging.Abstractions;
using Microsoft.Extensions.Time.Testing;
using StellaOps.Scanner.Reachability.Slices;
using Xunit;
namespace StellaOps.Scanner.Reachability.Tests.Slices;
/// <summary>
/// Unit tests for <see cref="InMemorySliceCache"/> with proper timer testing using FakeTimeProvider.
/// </summary>
[Trait("Category", "Unit")]
public sealed class InMemorySliceCacheTests : IDisposable
{
private readonly FakeTimeProvider _timeProvider;
private readonly InMemorySliceCache _cache;
public InMemorySliceCacheTests()
{
_timeProvider = new FakeTimeProvider(new DateTimeOffset(2026, 1, 18, 12, 0, 0, TimeSpan.Zero));
_cache = new InMemorySliceCache(
NullLogger<InMemorySliceCache>.Instance,
_timeProvider);
}
public void Dispose()
{
_cache.Dispose();
}
[Fact]
public async Task TryGetAsync_WithValidEntry_ReturnsValue()
{
// Arrange
var result = CreateTestResult("digest1");
await _cache.SetAsync("key1", result, TimeSpan.FromHours(1));
// Act
var retrieved = await _cache.TryGetAsync("key1");
// Assert
retrieved.Should().NotBeNull();
retrieved!.SliceDigest.Should().Be("digest1");
}
[Fact]
public async Task TryGetAsync_WithExpiredEntry_ReturnsNull()
{
// Arrange
var result = CreateTestResult("digest1");
await _cache.SetAsync("key1", result, TimeSpan.FromMinutes(30));
// Advance time past expiration
_timeProvider.Advance(TimeSpan.FromMinutes(31));
// Act
var retrieved = await _cache.TryGetAsync("key1");
// Assert
retrieved.Should().BeNull();
}
[Fact]
public async Task TryGetAsync_BeforeExpiration_ReturnsValue()
{
// Arrange
var result = CreateTestResult("digest1");
await _cache.SetAsync("key1", result, TimeSpan.FromHours(1));
// Advance time but not past expiration
_timeProvider.Advance(TimeSpan.FromMinutes(59));
// Act
var retrieved = await _cache.TryGetAsync("key1");
// Assert
retrieved.Should().NotBeNull();
}
[Fact]
public async Task SetAsync_OverwritesExistingEntry()
{
// Arrange
var result1 = CreateTestResult("digest1");
var result2 = CreateTestResult("digest2");
await _cache.SetAsync("key1", result1, TimeSpan.FromHours(1));
// Act
await _cache.SetAsync("key1", result2, TimeSpan.FromHours(1));
var retrieved = await _cache.TryGetAsync("key1");
// Assert
retrieved.Should().NotBeNull();
retrieved!.SliceDigest.Should().Be("digest2");
}
[Fact]
public async Task TryGetAsync_NonExistentKey_ReturnsNull()
{
// Act
var retrieved = await _cache.TryGetAsync("nonexistent");
// Assert
retrieved.Should().BeNull();
}
[Fact]
public async Task GetStatistics_ReturnsAccurateStats()
{
// Arrange
var result = CreateTestResult("digest1");
await _cache.SetAsync("key1", result, TimeSpan.FromHours(1));
// Act - hit
await _cache.TryGetAsync("key1");
// Act - miss
await _cache.TryGetAsync("nonexistent");
var stats = _cache.GetStatistics();
// Assert
stats.HitCount.Should().Be(1);
stats.MissCount.Should().Be(1);
stats.EntryCount.Should().Be(1);
}
[Fact]
public async Task EvictionTimer_AdvanceTime_EvictsExpiredEntries()
{
// Arrange - add entries with short TTL
var result = CreateTestResult("digest1");
await _cache.SetAsync("key1", result, TimeSpan.FromSeconds(30));
// Advance time to trigger eviction timer (60 seconds interval)
_timeProvider.Advance(TimeSpan.FromSeconds(61));
// Allow the timer callback to complete
await Task.Delay(50);
// Act
var retrieved = await _cache.TryGetAsync("key1");
// Assert - should be evicted
retrieved.Should().BeNull();
}
private static CachedSliceResult CreateTestResult(string digest) => new()
{
SliceDigest = digest,
Verdict = "safe",
Confidence = 0.95,
PathWitnesses = new List<string> { "path1", "path2" },
CachedAt = DateTimeOffset.UtcNow
};
}

View File

@@ -0,0 +1,174 @@
using FluentAssertions;
using Microsoft.Extensions.Options;
using Microsoft.Extensions.Time.Testing;
using StellaOps.Scanner.Reachability.Slices;
using Xunit;
namespace StellaOps.Scanner.Reachability.Tests.Slices;
/// <summary>
/// Unit tests for <see cref="SliceCache"/> with proper timer testing using FakeTimeProvider.
/// </summary>
[Trait("Category", "Unit")]
public sealed class SliceCacheTests : IDisposable
{
private readonly FakeTimeProvider _timeProvider;
private readonly SliceCache _cache;
public SliceCacheTests()
{
_timeProvider = new FakeTimeProvider(new DateTimeOffset(2026, 1, 18, 12, 0, 0, TimeSpan.Zero));
var options = Options.Create(new SliceCacheOptions
{
Enabled = true,
Ttl = TimeSpan.FromHours(1),
MaxItems = 100
});
_cache = new SliceCache(options, _timeProvider);
}
public void Dispose()
{
_cache.Dispose();
}
[Fact]
public async Task TryGetAsync_WithValidEntry_ReturnsValue()
{
// Arrange
var result = CreateTestResult("digest1");
await _cache.SetAsync("key1", result, TimeSpan.FromHours(1));
// Act
var retrieved = await _cache.TryGetAsync("key1");
// Assert
retrieved.Should().NotBeNull();
retrieved!.SliceDigest.Should().Be("digest1");
}
[Fact]
public async Task TryGetAsync_WhenDisabled_ReturnsNull()
{
// Arrange
var options = Options.Create(new SliceCacheOptions { Enabled = false });
using var cache = new SliceCache(options, _timeProvider);
var result = CreateTestResult("digest1");
await cache.SetAsync("key1", result, TimeSpan.FromHours(1));
// Act
var retrieved = await cache.TryGetAsync("key1");
// Assert
retrieved.Should().BeNull();
}
[Fact]
public async Task TryGetAsync_WithExpiredEntry_ReturnsNull()
{
// Arrange
var result = CreateTestResult("digest1");
await _cache.SetAsync("key1", result, TimeSpan.FromMinutes(30));
// Advance time past expiration
_timeProvider.Advance(TimeSpan.FromMinutes(31));
// Act
var retrieved = await _cache.TryGetAsync("key1");
// Assert
retrieved.Should().BeNull();
}
[Fact]
public async Task TryGetAsync_BeforeExpiration_ReturnsValue()
{
// Arrange
var result = CreateTestResult("digest1");
await _cache.SetAsync("key1", result, TimeSpan.FromHours(1));
// Advance time but not past expiration
_timeProvider.Advance(TimeSpan.FromMinutes(59));
// Act
var retrieved = await _cache.TryGetAsync("key1");
// Assert
retrieved.Should().NotBeNull();
}
[Fact]
public async Task SetAsync_ExceedsMaxItems_EvictsOldest()
{
// Arrange - create cache with small max
var options = Options.Create(new SliceCacheOptions
{
Enabled = true,
MaxItems = 10,
Ttl = TimeSpan.FromHours(1)
});
using var cache = new SliceCache(options, _timeProvider);
// Add items up to max
for (int i = 0; i < 10; i++)
{
await cache.SetAsync($"key{i}", CreateTestResult($"digest{i}"), TimeSpan.FromHours(1));
_timeProvider.Advance(TimeSpan.FromSeconds(1)); // Ensure different access times
}
// Act - add one more, should trigger eviction
await cache.SetAsync("key_new", CreateTestResult("digest_new"), TimeSpan.FromHours(1));
// Assert - new item should be present
var retrieved = await cache.TryGetAsync("key_new");
retrieved.Should().NotBeNull();
}
[Fact]
public async Task EvictionTimer_AdvanceTime_EvictsExpiredEntries()
{
// Arrange - add entry with short TTL
var result = CreateTestResult("digest1");
await _cache.SetAsync("key1", result, TimeSpan.FromSeconds(30));
// Advance time past eviction timer interval (1 minute)
_timeProvider.Advance(TimeSpan.FromMinutes(1.5));
// Allow timer callback to complete
await Task.Delay(50);
// Act
var retrieved = await _cache.TryGetAsync("key1");
// Assert - should be evicted
retrieved.Should().BeNull();
}
[Fact]
public async Task GetStatistics_ReturnsAccurateHitMissCount()
{
// Arrange
var result = CreateTestResult("digest1");
await _cache.SetAsync("key1", result, TimeSpan.FromHours(1));
// Act
await _cache.TryGetAsync("key1"); // hit
await _cache.TryGetAsync("key1"); // hit
await _cache.TryGetAsync("nonexistent"); // miss
var stats = _cache.GetStatistics();
// Assert
stats.HitCount.Should().Be(2);
stats.MissCount.Should().Be(1);
}
private static CachedSliceResult CreateTestResult(string digest) => new()
{
SliceDigest = digest,
Verdict = "safe",
Confidence = 0.95,
PathWitnesses = new List<string> { "path1", "path2" },
CachedAt = DateTimeOffset.UtcNow
};
}

View File

@@ -11,8 +11,9 @@
<PackageReference Include="FsCheck" />
<PackageReference Include="FsCheck.Xunit.v3" />
<PackageReference Include="JsonSchema.Net" />
<PackageReference Include="Moq" />
</ItemGroup>
<PackageReference Include="Microsoft.Extensions.TimeProvider.Testing" />
<PackageReference Include="Moq" />
</ItemGroup>
<ItemGroup>
<ProjectReference Include="..\..\__Libraries\StellaOps.Scanner.Reachability\StellaOps.Scanner.Reachability.csproj" />