doctor enhancements, setup, enhancements, ui functionality and design consolidation and , test projects fixes , product advisory attestation/rekor and delta verfications enhancements

This commit is contained in:
master
2026-01-19 09:02:59 +02:00
parent 8c4bf54aed
commit 17419ba7c4
809 changed files with 170738 additions and 12244 deletions

View File

@@ -0,0 +1,133 @@
using StellaOps.Scanner.Manifest.Models;
namespace StellaOps.Scanner.Manifest;
/// <summary>
/// Service for capturing, storing, and comparing OCI image manifest snapshots.
/// Provides the foundation for delta-based CVE scanning by tracking layer diffIDs.
/// </summary>
public interface IOciManifestSnapshotService
{
/// <summary>
/// Captures a snapshot of an OCI image manifest from a registry.
/// </summary>
/// <param name="imageReference">The image reference (e.g., docker.io/library/alpine:3.19).</param>
/// <param name="options">Optional capture options.</param>
/// <param name="cancellationToken">Cancellation token.</param>
/// <returns>The captured manifest snapshot, or null if the image could not be found.</returns>
Task<OciManifestSnapshot?> CaptureAsync(
string imageReference,
ManifestCaptureOptions? options = null,
CancellationToken cancellationToken = default);
/// <summary>
/// Gets a previously captured manifest snapshot by manifest digest.
/// </summary>
/// <param name="manifestDigest">The manifest digest to look up.</param>
/// <param name="cancellationToken">Cancellation token.</param>
/// <returns>The snapshot if found, null otherwise.</returns>
Task<OciManifestSnapshot?> GetByDigestAsync(
string manifestDigest,
CancellationToken cancellationToken = default);
/// <summary>
/// Gets a previously captured manifest snapshot by image reference.
/// Returns the most recent snapshot for the reference.
/// </summary>
/// <param name="imageReference">The image reference to look up.</param>
/// <param name="cancellationToken">Cancellation token.</param>
/// <returns>The most recent snapshot if found, null otherwise.</returns>
Task<OciManifestSnapshot?> GetByReferenceAsync(
string imageReference,
CancellationToken cancellationToken = default);
/// <summary>
/// Gets a manifest snapshot by its unique ID.
/// </summary>
/// <param name="snapshotId">The snapshot ID.</param>
/// <param name="cancellationToken">Cancellation token.</param>
/// <returns>The snapshot if found, null otherwise.</returns>
Task<OciManifestSnapshot?> GetByIdAsync(
Guid snapshotId,
CancellationToken cancellationToken = default);
/// <summary>
/// Compares two manifest snapshots to identify layer changes.
/// </summary>
/// <param name="oldManifestDigest">The digest of the older manifest.</param>
/// <param name="newManifestDigest">The digest of the newer manifest.</param>
/// <param name="cancellationToken">Cancellation token.</param>
/// <returns>The comparison result showing layer changes, or null if either manifest not found.</returns>
Task<ManifestComparisonResult?> CompareAsync(
string oldManifestDigest,
string newManifestDigest,
CancellationToken cancellationToken = default);
/// <summary>
/// Compares two manifest snapshots directly.
/// </summary>
/// <param name="oldSnapshot">The older manifest snapshot.</param>
/// <param name="newSnapshot">The newer manifest snapshot.</param>
/// <returns>The comparison result showing layer changes.</returns>
ManifestComparisonResult Compare(OciManifestSnapshot oldSnapshot, OciManifestSnapshot newSnapshot);
/// <summary>
/// Lists all snapshots for a given repository.
/// </summary>
/// <param name="registry">The registry hostname.</param>
/// <param name="repository">The repository path.</param>
/// <param name="limit">Maximum number of snapshots to return.</param>
/// <param name="cancellationToken">Cancellation token.</param>
/// <returns>The list of snapshots ordered by capture time descending.</returns>
Task<IReadOnlyList<OciManifestSnapshot>> ListByRepositoryAsync(
string registry,
string repository,
int limit = 100,
CancellationToken cancellationToken = default);
/// <summary>
/// Deletes old snapshots to free storage space.
/// </summary>
/// <param name="olderThan">Delete snapshots captured before this time.</param>
/// <param name="cancellationToken">Cancellation token.</param>
/// <returns>The number of snapshots deleted.</returns>
Task<int> PruneAsync(
DateTimeOffset olderThan,
CancellationToken cancellationToken = default);
}
/// <summary>
/// Options for capturing a manifest snapshot.
/// </summary>
public sealed record ManifestCaptureOptions
{
/// <summary>
/// Whether to compute diffIDs for all layers during capture.
/// If false, diffIDs will be null and must be computed separately.
/// Default is false for faster capture.
/// </summary>
public bool ComputeDiffIds { get; init; } = false;
/// <summary>
/// Platform filter for multi-arch images (e.g., "linux/amd64").
/// If null, uses the default platform for the system.
/// </summary>
public string? PlatformFilter { get; init; }
/// <summary>
/// Timeout for the capture operation.
/// </summary>
public TimeSpan? Timeout { get; init; }
/// <summary>
/// Whether to store the snapshot in the database.
/// If false, only returns the snapshot without persisting.
/// </summary>
public bool Persist { get; init; } = true;
/// <summary>
/// Skip layers larger than this size when computing diffIDs (to avoid timeout).
/// Default is 1GB.
/// </summary>
public long MaxLayerSizeForDiffId { get; init; } = 1024L * 1024L * 1024L;
}

View File

@@ -0,0 +1,53 @@
using Microsoft.Extensions.DependencyInjection;
using StellaOps.Scanner.Manifest.Persistence;
using StellaOps.Scanner.Manifest.Resolution;
using StellaOps.Scanner.Manifest.Reuse;
namespace StellaOps.Scanner.Manifest;
/// <summary>
/// Extension methods for registering Scanner.Manifest services.
/// </summary>
public static class ManifestServiceCollectionExtensions
{
/// <summary>
/// Adds OCI manifest snapshot services to the service collection.
/// </summary>
public static IServiceCollection AddOciManifestSnapshotServices(this IServiceCollection services)
{
services.AddSingleton<IManifestSnapshotRepository, ManifestSnapshotRepository>();
services.AddSingleton<IOciManifestSnapshotService, OciManifestSnapshotService>();
return services;
}
/// <summary>
/// Adds layer digest resolution services to the service collection.
/// </summary>
public static IServiceCollection AddLayerDigestResolutionServices(this IServiceCollection services)
{
services.AddSingleton<IDiffIdCache, DiffIdCache>();
services.AddSingleton<IBaseImageDetector, BaseImageDetector>();
services.AddSingleton<ILayerDigestResolver, LayerDigestResolver>();
return services;
}
/// <summary>
/// Adds layer reuse detection services to the service collection.
/// </summary>
public static IServiceCollection AddLayerReuseDetectionServices(this IServiceCollection services)
{
services.AddSingleton<ILayerReuseDetector, LayerReuseDetector>();
return services;
}
/// <summary>
/// Adds all Scanner.Manifest services (snapshots, resolution, and reuse detection).
/// </summary>
public static IServiceCollection AddScannerManifestServices(this IServiceCollection services)
{
services.AddOciManifestSnapshotServices();
services.AddLayerDigestResolutionServices();
services.AddLayerReuseDetectionServices();
return services;
}
}

View File

@@ -0,0 +1,123 @@
using System.Collections.Immutable;
namespace StellaOps.Scanner.Manifest.Models;
/// <summary>
/// Result of comparing two OCI manifest snapshots to identify layer changes.
/// </summary>
public sealed record ManifestComparisonResult
{
/// <summary>
/// The older manifest snapshot being compared from.
/// </summary>
public required OciManifestSnapshot OldSnapshot { get; init; }
/// <summary>
/// The newer manifest snapshot being compared to.
/// </summary>
public required OciManifestSnapshot NewSnapshot { get; init; }
/// <summary>
/// Layers that exist in both manifests with the same diffID (unchanged content).
/// </summary>
public ImmutableArray<LayerChange> UnchangedLayers { get; init; } = ImmutableArray<LayerChange>.Empty;
/// <summary>
/// Layers that exist only in the new manifest (newly added).
/// </summary>
public ImmutableArray<LayerChange> AddedLayers { get; init; } = ImmutableArray<LayerChange>.Empty;
/// <summary>
/// Layers that exist only in the old manifest (removed).
/// </summary>
public ImmutableArray<LayerChange> RemovedLayers { get; init; } = ImmutableArray<LayerChange>.Empty;
/// <summary>
/// Layers at the same index but with different diffID (modified).
/// </summary>
public ImmutableArray<LayerChange> ModifiedLayers { get; init; } = ImmutableArray<LayerChange>.Empty;
/// <summary>
/// Whether the manifests are identical (same manifest digest).
/// </summary>
public bool IsIdentical => string.Equals(OldSnapshot.ManifestDigest, NewSnapshot.ManifestDigest, StringComparison.OrdinalIgnoreCase);
/// <summary>
/// Whether the manifests share any layers (by diffID).
/// </summary>
public bool HasSharedLayers => UnchangedLayers.Length > 0;
/// <summary>
/// The percentage of layers that are unchanged (0-100).
/// </summary>
public double ReusePercentage
{
get
{
var totalNew = NewSnapshot.LayerCount;
return totalNew == 0 ? 100.0 : (UnchangedLayers.Length * 100.0) / totalNew;
}
}
/// <summary>
/// Total number of layers that need scanning (added + modified).
/// </summary>
public int LayersToScan => AddedLayers.Length + ModifiedLayers.Length;
}
/// <summary>
/// Represents a change to a single layer between two manifest versions.
/// </summary>
public sealed record LayerChange
{
/// <summary>
/// The type of change for this layer.
/// </summary>
public required LayerChangeType ChangeType { get; init; }
/// <summary>
/// The layer descriptor from the old manifest (null for added layers).
/// </summary>
public OciLayerDescriptor? OldLayer { get; init; }
/// <summary>
/// The layer descriptor from the new manifest (null for removed layers).
/// </summary>
public OciLayerDescriptor? NewLayer { get; init; }
/// <summary>
/// The diffID of the layer (from old for removed, from new for added/modified).
/// </summary>
public string? DiffId => NewLayer?.DiffId ?? OldLayer?.DiffId;
/// <summary>
/// The layer index in the new manifest (or old manifest for removed layers).
/// </summary>
public int LayerIndex => NewLayer?.LayerIndex ?? OldLayer?.LayerIndex ?? -1;
}
/// <summary>
/// The type of change for a layer between manifest versions.
/// </summary>
public enum LayerChangeType
{
/// <summary>
/// Layer exists in both manifests with the same diffID.
/// </summary>
Unchanged = 0,
/// <summary>
/// Layer exists only in the new manifest.
/// </summary>
Added = 1,
/// <summary>
/// Layer exists only in the old manifest.
/// </summary>
Removed = 2,
/// <summary>
/// Layer at same index has different diffID.
/// </summary>
Modified = 3
}

View File

@@ -0,0 +1,47 @@
using System.Collections.Immutable;
namespace StellaOps.Scanner.Manifest.Models;
/// <summary>
/// Represents an OCI image layer descriptor with both compressed digest and uncompressed diffID.
/// </summary>
public sealed record OciLayerDescriptor
{
/// <summary>
/// The compressed layer digest (sha256:...) as stored in the registry.
/// This is the content-addressable identifier for the compressed blob.
/// </summary>
public required string Digest { get; init; }
/// <summary>
/// The uncompressed layer diffID (sha256:...) computed by hashing the decompressed content.
/// This identifies the actual layer content regardless of compression.
/// May be null if not yet computed.
/// </summary>
public string? DiffId { get; init; }
/// <summary>
/// The size of the compressed layer blob in bytes.
/// </summary>
public long Size { get; init; }
/// <summary>
/// The media type of the layer (e.g., application/vnd.oci.image.layer.v1.tar+gzip).
/// </summary>
public required string MediaType { get; init; }
/// <summary>
/// The zero-based index of this layer in the image (0 = base layer).
/// </summary>
public int LayerIndex { get; init; }
/// <summary>
/// Optional annotations attached to the layer descriptor.
/// </summary>
public ImmutableDictionary<string, string>? Annotations { get; init; }
/// <summary>
/// Whether this layer is empty (used for scratch base images).
/// </summary>
public bool IsEmpty => Size == 0 || MediaType.Contains("empty", StringComparison.OrdinalIgnoreCase);
}

View File

@@ -0,0 +1,122 @@
using System.Collections.Immutable;
namespace StellaOps.Scanner.Manifest.Models;
/// <summary>
/// Represents a point-in-time snapshot of an OCI image manifest including layer diffIDs.
/// Used for delta scanning to identify changed layers between image versions.
/// </summary>
public sealed record OciManifestSnapshot
{
/// <summary>
/// Unique identifier for this snapshot.
/// </summary>
public Guid Id { get; init; } = Guid.NewGuid();
/// <summary>
/// The original image reference used to fetch this manifest (e.g., registry/repo:tag).
/// </summary>
public required string ImageReference { get; init; }
/// <summary>
/// The registry hostname (e.g., docker.io, ghcr.io).
/// </summary>
public required string Registry { get; init; }
/// <summary>
/// The repository path (e.g., library/alpine, myorg/myapp).
/// </summary>
public required string Repository { get; init; }
/// <summary>
/// The tag if specified in the original reference, null for digest-only references.
/// </summary>
public string? Tag { get; init; }
/// <summary>
/// The manifest digest (sha256:...) - the content-addressable identifier for the manifest.
/// </summary>
public required string ManifestDigest { get; init; }
/// <summary>
/// The config blob digest (sha256:...) - identifies the image configuration.
/// </summary>
public required string ConfigDigest { get; init; }
/// <summary>
/// The media type of the manifest (e.g., application/vnd.oci.image.manifest.v1+json).
/// </summary>
public required string MediaType { get; init; }
/// <summary>
/// The ordered list of layer descriptors from base to top.
/// </summary>
public ImmutableArray<OciLayerDescriptor> Layers { get; init; } = ImmutableArray<OciLayerDescriptor>.Empty;
/// <summary>
/// The ordered list of diffIDs corresponding to each layer.
/// May contain nulls for layers where diffID has not been computed.
/// </summary>
public ImmutableArray<string?> DiffIds { get; init; } = ImmutableArray<string?>.Empty;
/// <summary>
/// Platform information (OS/architecture) for this manifest.
/// </summary>
public OciPlatformInfo? Platform { get; init; }
/// <summary>
/// Total compressed size of all layers in bytes.
/// </summary>
public long TotalSize { get; init; }
/// <summary>
/// The timestamp when this snapshot was captured.
/// </summary>
public DateTimeOffset CapturedAt { get; init; }
/// <summary>
/// Version of the snapshot service that created this record.
/// </summary>
public string? SnapshotVersion { get; init; }
/// <summary>
/// Whether all diffIDs have been computed for this snapshot.
/// </summary>
public bool DiffIdsComplete => DiffIds.All(id => !string.IsNullOrWhiteSpace(id));
/// <summary>
/// The number of layers in this image.
/// </summary>
public int LayerCount => Layers.Length;
}
/// <summary>
/// Platform information for an OCI image.
/// </summary>
public sealed record OciPlatformInfo
{
/// <summary>
/// The operating system (e.g., linux, windows).
/// </summary>
public required string Os { get; init; }
/// <summary>
/// The CPU architecture (e.g., amd64, arm64).
/// </summary>
public required string Architecture { get; init; }
/// <summary>
/// Optional architecture variant (e.g., v8 for arm64).
/// </summary>
public string? Variant { get; init; }
/// <summary>
/// Optional OS version (primarily used for Windows images).
/// </summary>
public string? OsVersion { get; init; }
public override string ToString() =>
string.IsNullOrWhiteSpace(Variant)
? $"{Os}/{Architecture}"
: $"{Os}/{Architecture}/{Variant}";
}

View File

@@ -0,0 +1,314 @@
using System.Collections.Immutable;
using Microsoft.Extensions.Logging;
using StellaOps.Scanner.Contracts;
using StellaOps.Scanner.Manifest.Models;
using StellaOps.Scanner.Manifest.Persistence;
namespace StellaOps.Scanner.Manifest;
/// <summary>
/// Service for capturing, storing, and comparing OCI image manifest snapshots.
/// </summary>
public sealed class OciManifestSnapshotService : IOciManifestSnapshotService
{
private readonly IOciImageInspector _imageInspector;
private readonly IManifestSnapshotRepository _repository;
private readonly TimeProvider _timeProvider;
private readonly ILogger<OciManifestSnapshotService> _logger;
public OciManifestSnapshotService(
IOciImageInspector imageInspector,
IManifestSnapshotRepository repository,
ILogger<OciManifestSnapshotService> logger,
TimeProvider? timeProvider = null)
{
_imageInspector = imageInspector ?? throw new ArgumentNullException(nameof(imageInspector));
_repository = repository ?? throw new ArgumentNullException(nameof(repository));
_logger = logger ?? throw new ArgumentNullException(nameof(logger));
_timeProvider = timeProvider ?? TimeProvider.System;
}
public async Task<OciManifestSnapshot?> CaptureAsync(
string imageReference,
ManifestCaptureOptions? options = null,
CancellationToken cancellationToken = default)
{
if (string.IsNullOrWhiteSpace(imageReference))
{
return null;
}
options ??= new ManifestCaptureOptions();
_logger.LogDebug("Capturing manifest snapshot for {ImageReference}", imageReference);
var inspectionOptions = new ImageInspectionOptions
{
IncludeLayers = true,
ResolveIndex = true,
PlatformFilter = options.PlatformFilter,
Timeout = options.Timeout
};
var inspection = await _imageInspector.InspectAsync(imageReference, inspectionOptions, cancellationToken)
.ConfigureAwait(false);
if (inspection is null)
{
_logger.LogWarning("Failed to inspect image {ImageReference}", imageReference);
return null;
}
var platform = inspection.Platforms.FirstOrDefault();
if (platform is null)
{
_logger.LogWarning("No platforms found for image {ImageReference}", imageReference);
return null;
}
var layers = BuildLayerDescriptors(platform.Layers);
var snapshot = new OciManifestSnapshot
{
Id = Guid.NewGuid(),
ImageReference = imageReference,
Registry = inspection.Registry,
Repository = inspection.Repository,
Tag = ExtractTag(imageReference),
ManifestDigest = inspection.ResolvedDigest,
ConfigDigest = platform.ConfigDigest,
MediaType = platform.ManifestMediaType,
Layers = layers,
DiffIds = layers.Select(l => l.DiffId).ToImmutableArray(),
Platform = new OciPlatformInfo
{
Os = platform.Os,
Architecture = platform.Architecture,
Variant = platform.Variant,
OsVersion = platform.OsVersion
},
TotalSize = platform.TotalSize,
CapturedAt = _timeProvider.GetUtcNow(),
SnapshotVersion = GetSnapshotVersion()
};
if (options.Persist)
{
await _repository.UpsertAsync(snapshot, cancellationToken).ConfigureAwait(false);
_logger.LogInformation(
"Captured manifest snapshot for {ImageReference} with {LayerCount} layers",
imageReference,
snapshot.LayerCount);
}
return snapshot;
}
public Task<OciManifestSnapshot?> GetByDigestAsync(
string manifestDigest,
CancellationToken cancellationToken = default)
{
if (string.IsNullOrWhiteSpace(manifestDigest))
{
return Task.FromResult<OciManifestSnapshot?>(null);
}
return _repository.GetByDigestAsync(manifestDigest, cancellationToken);
}
public Task<OciManifestSnapshot?> GetByReferenceAsync(
string imageReference,
CancellationToken cancellationToken = default)
{
if (string.IsNullOrWhiteSpace(imageReference))
{
return Task.FromResult<OciManifestSnapshot?>(null);
}
return _repository.GetByReferenceAsync(imageReference, cancellationToken);
}
public Task<OciManifestSnapshot?> GetByIdAsync(
Guid snapshotId,
CancellationToken cancellationToken = default)
{
return _repository.GetByIdAsync(snapshotId, cancellationToken);
}
public async Task<ManifestComparisonResult?> CompareAsync(
string oldManifestDigest,
string newManifestDigest,
CancellationToken cancellationToken = default)
{
var oldSnapshot = await GetByDigestAsync(oldManifestDigest, cancellationToken).ConfigureAwait(false);
if (oldSnapshot is null)
{
_logger.LogWarning("Old manifest {Digest} not found for comparison", oldManifestDigest);
return null;
}
var newSnapshot = await GetByDigestAsync(newManifestDigest, cancellationToken).ConfigureAwait(false);
if (newSnapshot is null)
{
_logger.LogWarning("New manifest {Digest} not found for comparison", newManifestDigest);
return null;
}
return Compare(oldSnapshot, newSnapshot);
}
public ManifestComparisonResult Compare(OciManifestSnapshot oldSnapshot, OciManifestSnapshot newSnapshot)
{
ArgumentNullException.ThrowIfNull(oldSnapshot);
ArgumentNullException.ThrowIfNull(newSnapshot);
var oldDiffIds = BuildDiffIdSet(oldSnapshot);
var newDiffIds = BuildDiffIdSet(newSnapshot);
var unchanged = new List<LayerChange>();
var added = new List<LayerChange>();
var removed = new List<LayerChange>();
var modified = new List<LayerChange>();
foreach (var oldLayer in oldSnapshot.Layers)
{
var diffId = oldLayer.DiffId;
if (string.IsNullOrWhiteSpace(diffId))
{
continue;
}
if (newDiffIds.TryGetValue(diffId, out var newLayer))
{
unchanged.Add(new LayerChange
{
ChangeType = LayerChangeType.Unchanged,
OldLayer = oldLayer,
NewLayer = newLayer
});
}
else
{
var newLayerAtIndex = newSnapshot.Layers.FirstOrDefault(l => l.LayerIndex == oldLayer.LayerIndex);
if (newLayerAtIndex is not null && !string.IsNullOrWhiteSpace(newLayerAtIndex.DiffId))
{
modified.Add(new LayerChange
{
ChangeType = LayerChangeType.Modified,
OldLayer = oldLayer,
NewLayer = newLayerAtIndex
});
}
else
{
removed.Add(new LayerChange
{
ChangeType = LayerChangeType.Removed,
OldLayer = oldLayer,
NewLayer = null
});
}
}
}
foreach (var newLayer in newSnapshot.Layers)
{
var diffId = newLayer.DiffId;
if (string.IsNullOrWhiteSpace(diffId))
{
continue;
}
if (!oldDiffIds.ContainsKey(diffId) &&
!modified.Any(m => m.NewLayer?.LayerIndex == newLayer.LayerIndex))
{
added.Add(new LayerChange
{
ChangeType = LayerChangeType.Added,
OldLayer = null,
NewLayer = newLayer
});
}
}
return new ManifestComparisonResult
{
OldSnapshot = oldSnapshot,
NewSnapshot = newSnapshot,
UnchangedLayers = unchanged.OrderBy(l => l.LayerIndex).ToImmutableArray(),
AddedLayers = added.OrderBy(l => l.LayerIndex).ToImmutableArray(),
RemovedLayers = removed.OrderBy(l => l.LayerIndex).ToImmutableArray(),
ModifiedLayers = modified.OrderBy(l => l.LayerIndex).ToImmutableArray()
};
}
public Task<IReadOnlyList<OciManifestSnapshot>> ListByRepositoryAsync(
string registry,
string repository,
int limit = 100,
CancellationToken cancellationToken = default)
{
return _repository.ListByRepositoryAsync(registry, repository, limit, cancellationToken);
}
public Task<int> PruneAsync(
DateTimeOffset olderThan,
CancellationToken cancellationToken = default)
{
return _repository.PruneAsync(olderThan, cancellationToken);
}
private static ImmutableArray<OciLayerDescriptor> BuildLayerDescriptors(ImmutableArray<LayerInfo> layers)
{
return layers.Select((layer, index) => new OciLayerDescriptor
{
Digest = layer.Digest,
DiffId = null,
Size = layer.Size,
MediaType = layer.MediaType,
LayerIndex = index,
Annotations = layer.Annotations
}).ToImmutableArray();
}
private static Dictionary<string, OciLayerDescriptor> BuildDiffIdSet(OciManifestSnapshot snapshot)
{
var result = new Dictionary<string, OciLayerDescriptor>(StringComparer.OrdinalIgnoreCase);
foreach (var layer in snapshot.Layers)
{
if (!string.IsNullOrWhiteSpace(layer.DiffId) && !result.ContainsKey(layer.DiffId))
{
result[layer.DiffId] = layer;
}
}
return result;
}
private static string? ExtractTag(string imageReference)
{
if (imageReference.Contains('@'))
{
var atIndex = imageReference.IndexOf('@');
var beforeAt = imageReference[..atIndex];
var colonIndex = beforeAt.LastIndexOf(':');
if (colonIndex > 0 && !beforeAt[(colonIndex + 1)..].Contains('/'))
{
return beforeAt[(colonIndex + 1)..];
}
return null;
}
var lastColon = imageReference.LastIndexOf(':');
if (lastColon > 0 && !imageReference[lastColon..].Contains('/'))
{
return imageReference[(lastColon + 1)..];
}
return null;
}
private static string GetSnapshotVersion()
{
return typeof(OciManifestSnapshotService).Assembly.GetName().Version?.ToString() ?? "1.0.0";
}
}

View File

@@ -0,0 +1,48 @@
using StellaOps.Scanner.Manifest.Models;
namespace StellaOps.Scanner.Manifest.Persistence;
/// <summary>
/// Repository interface for persisting OCI manifest snapshots.
/// </summary>
public interface IManifestSnapshotRepository
{
/// <summary>
/// Inserts or updates a manifest snapshot.
/// </summary>
Task UpsertAsync(OciManifestSnapshot snapshot, CancellationToken cancellationToken = default);
/// <summary>
/// Gets a snapshot by manifest digest.
/// </summary>
Task<OciManifestSnapshot?> GetByDigestAsync(string manifestDigest, CancellationToken cancellationToken = default);
/// <summary>
/// Gets the most recent snapshot for an image reference.
/// </summary>
Task<OciManifestSnapshot?> GetByReferenceAsync(string imageReference, CancellationToken cancellationToken = default);
/// <summary>
/// Gets a snapshot by its unique ID.
/// </summary>
Task<OciManifestSnapshot?> GetByIdAsync(Guid snapshotId, CancellationToken cancellationToken = default);
/// <summary>
/// Lists snapshots for a repository.
/// </summary>
Task<IReadOnlyList<OciManifestSnapshot>> ListByRepositoryAsync(
string registry,
string repository,
int limit = 100,
CancellationToken cancellationToken = default);
/// <summary>
/// Deletes snapshots older than the specified time.
/// </summary>
Task<int> PruneAsync(DateTimeOffset olderThan, CancellationToken cancellationToken = default);
/// <summary>
/// Checks if a snapshot exists for the given manifest digest.
/// </summary>
Task<bool> ExistsAsync(string manifestDigest, CancellationToken cancellationToken = default);
}

View File

@@ -0,0 +1,249 @@
using System.Collections.Immutable;
using System.Text.Json;
using Microsoft.Extensions.Logging;
using Npgsql;
using StellaOps.Infrastructure.Postgres.Repositories;
using StellaOps.Scanner.Manifest.Models;
using StellaOps.Scanner.Storage.Postgres;
namespace StellaOps.Scanner.Manifest.Persistence;
/// <summary>
/// PostgreSQL repository for OCI manifest snapshots.
/// </summary>
public sealed class ManifestSnapshotRepository : RepositoryBase<ScannerDataSource>, IManifestSnapshotRepository
{
private const string Tenant = "";
private string Table => $"{SchemaName}.manifest_snapshots";
private string SchemaName => DataSource.SchemaName ?? ScannerDataSource.DefaultSchema;
private static readonly JsonSerializerOptions JsonOptions = new()
{
PropertyNamingPolicy = JsonNamingPolicy.CamelCase,
WriteIndented = false
};
public ManifestSnapshotRepository(ScannerDataSource dataSource, ILogger<ManifestSnapshotRepository> logger)
: base(dataSource, logger)
{
}
public Task UpsertAsync(OciManifestSnapshot snapshot, CancellationToken cancellationToken = default)
{
ArgumentNullException.ThrowIfNull(snapshot);
var layersJson = JsonSerializer.Serialize(snapshot.Layers, JsonOptions);
var diffIdsJson = JsonSerializer.Serialize(snapshot.DiffIds, JsonOptions);
var platformJson = snapshot.Platform is not null
? JsonSerializer.Serialize(snapshot.Platform, JsonOptions)
: null;
var sql = $"""
INSERT INTO {Table} (
id, image_reference, registry, repository, tag,
manifest_digest, config_digest, media_type,
layers_json, diff_ids_json, platform_json,
total_size, captured_at, snapshot_version
)
VALUES (
@id, @image_reference, @registry, @repository, @tag,
@manifest_digest, @config_digest, @media_type,
@layers_json::jsonb, @diff_ids_json::jsonb, @platform_json::jsonb,
@total_size, @captured_at, @snapshot_version
)
ON CONFLICT (manifest_digest) DO UPDATE SET
image_reference = EXCLUDED.image_reference,
tag = EXCLUDED.tag,
layers_json = EXCLUDED.layers_json,
diff_ids_json = EXCLUDED.diff_ids_json,
platform_json = EXCLUDED.platform_json,
total_size = EXCLUDED.total_size,
snapshot_version = EXCLUDED.snapshot_version
""";
return ExecuteAsync(
Tenant,
sql,
cmd =>
{
AddParameter(cmd, "id", snapshot.Id);
AddParameter(cmd, "image_reference", snapshot.ImageReference);
AddParameter(cmd, "registry", snapshot.Registry);
AddParameter(cmd, "repository", snapshot.Repository);
AddParameter(cmd, "tag", snapshot.Tag ?? (object)DBNull.Value);
AddParameter(cmd, "manifest_digest", snapshot.ManifestDigest);
AddParameter(cmd, "config_digest", snapshot.ConfigDigest);
AddParameter(cmd, "media_type", snapshot.MediaType);
AddParameter(cmd, "layers_json", layersJson);
AddParameter(cmd, "diff_ids_json", diffIdsJson);
AddParameter(cmd, "platform_json", platformJson ?? (object)DBNull.Value);
AddParameter(cmd, "total_size", snapshot.TotalSize);
AddParameter(cmd, "captured_at", snapshot.CapturedAt.UtcDateTime);
AddParameter(cmd, "snapshot_version", snapshot.SnapshotVersion ?? (object)DBNull.Value);
},
cancellationToken);
}
public Task<OciManifestSnapshot?> GetByDigestAsync(string manifestDigest, CancellationToken cancellationToken = default)
{
ArgumentException.ThrowIfNullOrWhiteSpace(manifestDigest);
var sql = $"""
SELECT id, image_reference, registry, repository, tag,
manifest_digest, config_digest, media_type,
layers_json, diff_ids_json, platform_json,
total_size, captured_at, snapshot_version
FROM {Table}
WHERE manifest_digest = @manifest_digest
""";
return QuerySingleOrDefaultAsync(
Tenant,
sql,
cmd => AddParameter(cmd, "manifest_digest", manifestDigest),
MapSnapshot,
cancellationToken);
}
public Task<OciManifestSnapshot?> GetByReferenceAsync(string imageReference, CancellationToken cancellationToken = default)
{
ArgumentException.ThrowIfNullOrWhiteSpace(imageReference);
var sql = $"""
SELECT id, image_reference, registry, repository, tag,
manifest_digest, config_digest, media_type,
layers_json, diff_ids_json, platform_json,
total_size, captured_at, snapshot_version
FROM {Table}
WHERE image_reference = @image_reference
ORDER BY captured_at DESC
LIMIT 1
""";
return QuerySingleOrDefaultAsync(
Tenant,
sql,
cmd => AddParameter(cmd, "image_reference", imageReference),
MapSnapshot,
cancellationToken);
}
public Task<OciManifestSnapshot?> GetByIdAsync(Guid snapshotId, CancellationToken cancellationToken = default)
{
var sql = $"""
SELECT id, image_reference, registry, repository, tag,
manifest_digest, config_digest, media_type,
layers_json, diff_ids_json, platform_json,
total_size, captured_at, snapshot_version
FROM {Table}
WHERE id = @id
""";
return QuerySingleOrDefaultAsync(
Tenant,
sql,
cmd => AddParameter(cmd, "id", snapshotId),
MapSnapshot,
cancellationToken);
}
public async Task<IReadOnlyList<OciManifestSnapshot>> ListByRepositoryAsync(
string registry,
string repository,
int limit = 100,
CancellationToken cancellationToken = default)
{
ArgumentException.ThrowIfNullOrWhiteSpace(registry);
ArgumentException.ThrowIfNullOrWhiteSpace(repository);
var sql = $"""
SELECT id, image_reference, registry, repository, tag,
manifest_digest, config_digest, media_type,
layers_json, diff_ids_json, platform_json,
total_size, captured_at, snapshot_version
FROM {Table}
WHERE registry = @registry AND repository = @repository
ORDER BY captured_at DESC
LIMIT @limit
""";
var results = await QueryAsync(
Tenant,
sql,
cmd =>
{
AddParameter(cmd, "registry", registry);
AddParameter(cmd, "repository", repository);
AddParameter(cmd, "limit", limit);
},
MapSnapshot,
cancellationToken).ConfigureAwait(false);
return results;
}
public async Task<int> PruneAsync(DateTimeOffset olderThan, CancellationToken cancellationToken = default)
{
var sql = $"""
DELETE FROM {Table}
WHERE captured_at < @older_than
""";
return await ExecuteWithRowCountAsync(
Tenant,
sql,
cmd => AddParameter(cmd, "older_than", olderThan.UtcDateTime),
cancellationToken).ConfigureAwait(false);
}
public async Task<bool> ExistsAsync(string manifestDigest, CancellationToken cancellationToken = default)
{
ArgumentException.ThrowIfNullOrWhiteSpace(manifestDigest);
var sql = $"""
SELECT EXISTS(SELECT 1 FROM {Table} WHERE manifest_digest = @manifest_digest)
""";
return await QueryScalarAsync(
Tenant,
sql,
cmd => AddParameter(cmd, "manifest_digest", manifestDigest),
reader => reader.GetBoolean(0),
cancellationToken).ConfigureAwait(false);
}
private static OciManifestSnapshot MapSnapshot(NpgsqlDataReader reader)
{
var layersJson = reader.GetString(reader.GetOrdinal("layers_json"));
var diffIdsJson = reader.GetString(reader.GetOrdinal("diff_ids_json"));
var platformOrdinal = reader.GetOrdinal("platform_json");
var platformJson = reader.IsDBNull(platformOrdinal) ? null : reader.GetString(platformOrdinal);
var layers = JsonSerializer.Deserialize<ImmutableArray<OciLayerDescriptor>>(layersJson, JsonOptions);
var diffIds = JsonSerializer.Deserialize<ImmutableArray<string?>>(diffIdsJson, JsonOptions);
var platform = !string.IsNullOrWhiteSpace(platformJson)
? JsonSerializer.Deserialize<OciPlatformInfo>(platformJson, JsonOptions)
: null;
var tagOrdinal = reader.GetOrdinal("tag");
var versionOrdinal = reader.GetOrdinal("snapshot_version");
return new OciManifestSnapshot
{
Id = reader.GetGuid(reader.GetOrdinal("id")),
ImageReference = reader.GetString(reader.GetOrdinal("image_reference")),
Registry = reader.GetString(reader.GetOrdinal("registry")),
Repository = reader.GetString(reader.GetOrdinal("repository")),
Tag = reader.IsDBNull(tagOrdinal) ? null : reader.GetString(tagOrdinal),
ManifestDigest = reader.GetString(reader.GetOrdinal("manifest_digest")),
ConfigDigest = reader.GetString(reader.GetOrdinal("config_digest")),
MediaType = reader.GetString(reader.GetOrdinal("media_type")),
Layers = layers,
DiffIds = diffIds,
Platform = platform,
TotalSize = reader.GetInt64(reader.GetOrdinal("total_size")),
CapturedAt = new DateTimeOffset(reader.GetDateTime(reader.GetOrdinal("captured_at")), TimeSpan.Zero),
SnapshotVersion = reader.IsDBNull(versionOrdinal) ? null : reader.GetString(versionOrdinal)
};
}
}

View File

@@ -0,0 +1,257 @@
using System.Collections.Concurrent;
using System.Collections.Immutable;
using Microsoft.Extensions.Logging;
using Npgsql;
using NpgsqlTypes;
namespace StellaOps.Scanner.Manifest.Resolution;
/// <summary>
/// Detects well-known base images from layer diffIDs using fingerprinting.
/// </summary>
public sealed class BaseImageDetector : IBaseImageDetector
{
private const string TableName = "scanner_base_image_fingerprints";
private const string LayerTableName = "scanner_base_image_layers";
private readonly NpgsqlDataSource _dataSource;
private readonly ILogger<BaseImageDetector> _logger;
// In-memory index: diffId -> (baseImage, layerIndex)
private readonly ConcurrentDictionary<string, List<(string BaseImage, int LayerIndex)>> _diffIdIndex = new(StringComparer.OrdinalIgnoreCase);
// Known base images loaded at startup
private readonly ConcurrentDictionary<string, ImmutableArray<string>> _knownBaseImages = new(StringComparer.OrdinalIgnoreCase);
private bool _indexLoaded;
private readonly SemaphoreSlim _loadLock = new(1, 1);
public BaseImageDetector(NpgsqlDataSource dataSource, ILogger<BaseImageDetector> logger)
{
_dataSource = dataSource ?? throw new ArgumentNullException(nameof(dataSource));
_logger = logger ?? throw new ArgumentNullException(nameof(logger));
}
public async Task<string?> DetectBaseImageAsync(string diffId, int layerIndex, CancellationToken cancellationToken = default)
{
ArgumentException.ThrowIfNullOrWhiteSpace(diffId);
await EnsureIndexLoadedAsync(cancellationToken).ConfigureAwait(false);
if (_diffIdIndex.TryGetValue(diffId, out var matches))
{
// Prefer exact layer index match
var exactMatch = matches.FirstOrDefault(m => m.LayerIndex == layerIndex);
if (!string.IsNullOrEmpty(exactMatch.BaseImage))
{
return exactMatch.BaseImage;
}
// Return any matching base image
return matches.FirstOrDefault().BaseImage;
}
return null;
}
public async Task<bool> IsKnownBaseLayerAsync(string diffId, CancellationToken cancellationToken = default)
{
ArgumentException.ThrowIfNullOrWhiteSpace(diffId);
await EnsureIndexLoadedAsync(cancellationToken).ConfigureAwait(false);
return _diffIdIndex.ContainsKey(diffId);
}
public async Task RegisterBaseImageAsync(
string baseImageRef,
IEnumerable<string> layerDiffIds,
CancellationToken cancellationToken = default)
{
ArgumentException.ThrowIfNullOrWhiteSpace(baseImageRef);
var diffIds = layerDiffIds.ToArray();
if (diffIds.Length == 0)
{
return;
}
_logger.LogInformation("Registering base image {BaseImage} with {LayerCount} layers", baseImageRef, diffIds.Length);
await using var conn = await _dataSource.OpenConnectionAsync(cancellationToken).ConfigureAwait(false);
await using var transaction = await conn.BeginTransactionAsync(cancellationToken).ConfigureAwait(false);
try
{
// Upsert base image fingerprint
await using (var cmd = conn.CreateCommand())
{
cmd.Transaction = transaction;
cmd.CommandText = $"""
INSERT INTO {TableName} (image_reference, layer_count, registered_at)
VALUES (@imageRef, @layerCount, @registeredAt)
ON CONFLICT (image_reference) DO UPDATE SET
layer_count = @layerCount,
registered_at = @registeredAt
""";
cmd.Parameters.AddWithValue("imageRef", baseImageRef);
cmd.Parameters.AddWithValue("layerCount", diffIds.Length);
cmd.Parameters.AddWithValue("registeredAt", DateTimeOffset.UtcNow);
await cmd.ExecuteNonQueryAsync(cancellationToken).ConfigureAwait(false);
}
// Delete existing layers
await using (var cmd = conn.CreateCommand())
{
cmd.Transaction = transaction;
cmd.CommandText = $"DELETE FROM {LayerTableName} WHERE image_reference = @imageRef";
cmd.Parameters.AddWithValue("imageRef", baseImageRef);
await cmd.ExecuteNonQueryAsync(cancellationToken).ConfigureAwait(false);
}
// Insert layer diffIds
await using (var cmd = conn.CreateCommand())
{
cmd.Transaction = transaction;
cmd.CommandText = $"""
INSERT INTO {LayerTableName} (image_reference, layer_index, diff_id)
SELECT @imageRef, unnest(@indices::int[]), unnest(@diffIds::varchar[])
""";
cmd.Parameters.AddWithValue("imageRef", baseImageRef);
cmd.Parameters.AddWithValue("indices", Enumerable.Range(0, diffIds.Length).ToArray());
cmd.Parameters.AddWithValue("diffIds", diffIds);
await cmd.ExecuteNonQueryAsync(cancellationToken).ConfigureAwait(false);
}
await transaction.CommitAsync(cancellationToken).ConfigureAwait(false);
// Update in-memory index
_knownBaseImages[baseImageRef] = [.. diffIds];
for (var i = 0; i < diffIds.Length; i++)
{
var diffId = diffIds[i];
if (!_diffIdIndex.TryGetValue(diffId, out var list))
{
list = [];
_diffIdIndex[diffId] = list;
}
// Remove existing entry for this base image and add new one
list.RemoveAll(e => e.BaseImage.Equals(baseImageRef, StringComparison.OrdinalIgnoreCase));
list.Add((baseImageRef, i));
}
_logger.LogInformation("Registered base image {BaseImage}", baseImageRef);
}
catch (Exception ex)
{
await transaction.RollbackAsync(cancellationToken).ConfigureAwait(false);
_logger.LogError(ex, "Failed to register base image {BaseImage}", baseImageRef);
throw;
}
}
public async Task<IReadOnlyList<string>> GetRegisteredBaseImagesAsync(CancellationToken cancellationToken = default)
{
await EnsureIndexLoadedAsync(cancellationToken).ConfigureAwait(false);
return [.. _knownBaseImages.Keys];
}
private async Task EnsureIndexLoadedAsync(CancellationToken cancellationToken)
{
if (_indexLoaded)
{
return;
}
await _loadLock.WaitAsync(cancellationToken).ConfigureAwait(false);
try
{
if (_indexLoaded)
{
return;
}
await LoadIndexFromDatabaseAsync(cancellationToken).ConfigureAwait(false);
LoadBuiltInBaseImages();
_indexLoaded = true;
}
finally
{
_loadLock.Release();
}
}
private async Task LoadIndexFromDatabaseAsync(CancellationToken cancellationToken)
{
try
{
await using var conn = await _dataSource.OpenConnectionAsync(cancellationToken).ConfigureAwait(false);
await using var cmd = conn.CreateCommand();
cmd.CommandText = $"""
SELECT l.image_reference, l.layer_index, l.diff_id
FROM {LayerTableName} l
INNER JOIN {TableName} f ON l.image_reference = f.image_reference
ORDER BY l.image_reference, l.layer_index
""";
await using var reader = await cmd.ExecuteReaderAsync(cancellationToken).ConfigureAwait(false);
var currentImage = "";
var currentLayers = new List<string>();
while (await reader.ReadAsync(cancellationToken).ConfigureAwait(false))
{
var imageRef = reader.GetString(0);
var layerIndex = reader.GetInt32(1);
var diffId = reader.GetString(2);
if (!imageRef.Equals(currentImage, StringComparison.OrdinalIgnoreCase))
{
if (!string.IsNullOrEmpty(currentImage) && currentLayers.Count > 0)
{
_knownBaseImages[currentImage] = [.. currentLayers];
}
currentImage = imageRef;
currentLayers.Clear();
}
currentLayers.Add(diffId);
if (!_diffIdIndex.TryGetValue(diffId, out var list))
{
list = [];
_diffIdIndex[diffId] = list;
}
list.Add((imageRef, layerIndex));
}
// Don't forget the last image
if (!string.IsNullOrEmpty(currentImage) && currentLayers.Count > 0)
{
_knownBaseImages[currentImage] = [.. currentLayers];
}
_logger.LogInformation("Loaded {Count} base image fingerprints from database", _knownBaseImages.Count);
}
catch (PostgresException ex) when (ex.SqlState == "42P01") // Table doesn't exist
{
_logger.LogWarning("Base image fingerprint tables do not exist. Run migrations to create them.");
}
}
private void LoadBuiltInBaseImages()
{
// These are well-known base image layer patterns.
// In a real implementation, these would be periodically updated from a registry scan.
// For now, we'll rely on the database to store actual fingerprints.
// Note: Real diffIDs would be computed from actual base images.
// This is a placeholder to show the pattern.
_logger.LogDebug("Built-in base image index ready for population");
}
}

View File

@@ -0,0 +1,182 @@
using System.Collections.Concurrent;
using Microsoft.Extensions.Logging;
using Npgsql;
namespace StellaOps.Scanner.Manifest.Resolution;
/// <summary>
/// PostgreSQL-backed cache for layer diffIDs with in-memory LRU caching.
/// </summary>
public sealed class DiffIdCache : IDiffIdCache
{
private const int MemoryCacheMaxSize = 10000;
private const string TableName = "scanner_diffid_cache";
private readonly NpgsqlDataSource _dataSource;
private readonly ILogger<DiffIdCache> _logger;
private readonly ConcurrentDictionary<string, string> _memoryCache = new(StringComparer.OrdinalIgnoreCase);
public DiffIdCache(NpgsqlDataSource dataSource, ILogger<DiffIdCache> logger)
{
_dataSource = dataSource ?? throw new ArgumentNullException(nameof(dataSource));
_logger = logger ?? throw new ArgumentNullException(nameof(logger));
}
public async Task<string?> GetAsync(string layerDigest, CancellationToken cancellationToken = default)
{
ArgumentException.ThrowIfNullOrWhiteSpace(layerDigest);
// Check memory cache first
if (_memoryCache.TryGetValue(layerDigest, out var cached))
{
return cached;
}
// Query database
await using var conn = await _dataSource.OpenConnectionAsync(cancellationToken).ConfigureAwait(false);
await using var cmd = conn.CreateCommand();
cmd.CommandText = $"""
SELECT diff_id FROM {TableName}
WHERE layer_digest = @layerDigest
""";
cmd.Parameters.AddWithValue("layerDigest", layerDigest);
var result = await cmd.ExecuteScalarAsync(cancellationToken).ConfigureAwait(false);
if (result is string diffId && !string.IsNullOrWhiteSpace(diffId))
{
AddToMemoryCache(layerDigest, diffId);
return diffId;
}
return null;
}
public async Task SetAsync(string layerDigest, string diffId, CancellationToken cancellationToken = default)
{
ArgumentException.ThrowIfNullOrWhiteSpace(layerDigest);
ArgumentException.ThrowIfNullOrWhiteSpace(diffId);
AddToMemoryCache(layerDigest, diffId);
await using var conn = await _dataSource.OpenConnectionAsync(cancellationToken).ConfigureAwait(false);
await using var cmd = conn.CreateCommand();
cmd.CommandText = $"""
INSERT INTO {TableName} (layer_digest, diff_id, created_at)
VALUES (@layerDigest, @diffId, @createdAt)
ON CONFLICT (layer_digest) DO NOTHING
""";
cmd.Parameters.AddWithValue("layerDigest", layerDigest);
cmd.Parameters.AddWithValue("diffId", diffId);
cmd.Parameters.AddWithValue("createdAt", DateTimeOffset.UtcNow);
try
{
await cmd.ExecuteNonQueryAsync(cancellationToken).ConfigureAwait(false);
}
catch (PostgresException ex) when (ex.SqlState == "42P01") // Table doesn't exist
{
_logger.LogWarning("DiffID cache table does not exist. Run migrations to create it.");
}
}
public async Task<bool> ExistsAsync(string layerDigest, CancellationToken cancellationToken = default)
{
ArgumentException.ThrowIfNullOrWhiteSpace(layerDigest);
if (_memoryCache.ContainsKey(layerDigest))
{
return true;
}
await using var conn = await _dataSource.OpenConnectionAsync(cancellationToken).ConfigureAwait(false);
await using var cmd = conn.CreateCommand();
cmd.CommandText = $"""
SELECT 1 FROM {TableName}
WHERE layer_digest = @layerDigest
LIMIT 1
""";
cmd.Parameters.AddWithValue("layerDigest", layerDigest);
var result = await cmd.ExecuteScalarAsync(cancellationToken).ConfigureAwait(false);
return result is not null;
}
public async Task<IReadOnlyDictionary<string, string>> GetManyAsync(
IEnumerable<string> layerDigests,
CancellationToken cancellationToken = default)
{
var digests = layerDigests.Where(d => !string.IsNullOrWhiteSpace(d)).ToList();
if (digests.Count == 0)
{
return new Dictionary<string, string>();
}
var result = new Dictionary<string, string>(StringComparer.OrdinalIgnoreCase);
var notInMemory = new List<string>();
// Check memory cache first
foreach (var digest in digests)
{
if (_memoryCache.TryGetValue(digest, out var cached))
{
result[digest] = cached;
}
else
{
notInMemory.Add(digest);
}
}
if (notInMemory.Count == 0)
{
return result;
}
// Query database for remaining
await using var conn = await _dataSource.OpenConnectionAsync(cancellationToken).ConfigureAwait(false);
await using var cmd = conn.CreateCommand();
// Build parameterized IN clause
var paramNames = new List<string>();
for (var i = 0; i < notInMemory.Count; i++)
{
var paramName = $"@p{i}";
paramNames.Add(paramName);
cmd.Parameters.AddWithValue($"p{i}", notInMemory[i]);
}
cmd.CommandText = $"""
SELECT layer_digest, diff_id FROM {TableName}
WHERE layer_digest = ANY(@digests)
""";
cmd.Parameters.AddWithValue("digests", notInMemory.ToArray());
await using var reader = await cmd.ExecuteReaderAsync(cancellationToken).ConfigureAwait(false);
while (await reader.ReadAsync(cancellationToken).ConfigureAwait(false))
{
var layerDigest = reader.GetString(0);
var diffId = reader.GetString(1);
result[layerDigest] = diffId;
AddToMemoryCache(layerDigest, diffId);
}
return result;
}
private void AddToMemoryCache(string layerDigest, string diffId)
{
// Simple eviction when cache gets too large
if (_memoryCache.Count >= MemoryCacheMaxSize)
{
// Remove approximately 10% of entries (random eviction)
var toRemove = _memoryCache.Keys.Take(MemoryCacheMaxSize / 10).ToList();
foreach (var key in toRemove)
{
_memoryCache.TryRemove(key, out _);
}
}
_memoryCache[layerDigest] = diffId;
}
}

View File

@@ -0,0 +1,68 @@
namespace StellaOps.Scanner.Manifest.Resolution;
/// <summary>
/// Detects well-known base images from layer diffIDs.
/// </summary>
public interface IBaseImageDetector
{
/// <summary>
/// Attempts to detect the base image that introduced a layer.
/// </summary>
/// <param name="diffId">Layer diffID.</param>
/// <param name="layerIndex">Zero-based index of the layer in the image.</param>
/// <param name="cancellationToken">Cancellation token.</param>
/// <returns>Base image reference (e.g., "alpine:3.19") or null if unknown.</returns>
Task<string?> DetectBaseImageAsync(string diffId, int layerIndex, CancellationToken cancellationToken = default);
/// <summary>
/// Checks if a layer is from a known base image.
/// </summary>
/// <param name="diffId">Layer diffID.</param>
/// <param name="cancellationToken">Cancellation token.</param>
/// <returns>True if the layer is recognized as a base image layer.</returns>
Task<bool> IsKnownBaseLayerAsync(string diffId, CancellationToken cancellationToken = default);
/// <summary>
/// Registers a base image fingerprint.
/// </summary>
/// <param name="baseImageRef">Base image reference (e.g., "alpine:3.19").</param>
/// <param name="layerDiffIds">Ordered diffIDs for the base image layers.</param>
/// <param name="cancellationToken">Cancellation token.</param>
Task RegisterBaseImageAsync(
string baseImageRef,
IEnumerable<string> layerDiffIds,
CancellationToken cancellationToken = default);
/// <summary>
/// Gets all registered base images.
/// </summary>
/// <param name="cancellationToken">Cancellation token.</param>
/// <returns>List of registered base image references.</returns>
Task<IReadOnlyList<string>> GetRegisteredBaseImagesAsync(CancellationToken cancellationToken = default);
}
/// <summary>
/// Information about a known base image.
/// </summary>
public sealed record BaseImageInfo
{
/// <summary>
/// Base image reference (e.g., "alpine:3.19").
/// </summary>
public required string ImageReference { get; init; }
/// <summary>
/// Ordered diffIDs for this base image.
/// </summary>
public required IReadOnlyList<string> LayerDiffIds { get; init; }
/// <summary>
/// When this base image was registered/fingerprinted.
/// </summary>
public DateTimeOffset RegisteredAt { get; init; }
/// <summary>
/// How many times this base image has been detected.
/// </summary>
public long DetectionCount { get; init; }
}

View File

@@ -0,0 +1,43 @@
namespace StellaOps.Scanner.Manifest.Resolution;
/// <summary>
/// Cache for layer diffIDs.
/// DiffIDs are immutable (same layer digest always produces same diffID),
/// so they can be cached indefinitely.
/// </summary>
public interface IDiffIdCache
{
/// <summary>
/// Gets a cached diffID for a layer digest.
/// </summary>
/// <param name="layerDigest">Compressed layer digest (sha256:...).</param>
/// <param name="cancellationToken">Cancellation token.</param>
/// <returns>Cached diffID or null if not in cache.</returns>
Task<string?> GetAsync(string layerDigest, CancellationToken cancellationToken = default);
/// <summary>
/// Stores a diffID for a layer digest.
/// </summary>
/// <param name="layerDigest">Compressed layer digest (sha256:...).</param>
/// <param name="diffId">Computed diffID (sha256:...).</param>
/// <param name="cancellationToken">Cancellation token.</param>
Task SetAsync(string layerDigest, string diffId, CancellationToken cancellationToken = default);
/// <summary>
/// Checks if a diffID is cached for a layer digest.
/// </summary>
/// <param name="layerDigest">Compressed layer digest.</param>
/// <param name="cancellationToken">Cancellation token.</param>
/// <returns>True if cached.</returns>
Task<bool> ExistsAsync(string layerDigest, CancellationToken cancellationToken = default);
/// <summary>
/// Gets multiple cached diffIDs at once.
/// </summary>
/// <param name="layerDigests">Layer digests to look up.</param>
/// <param name="cancellationToken">Cancellation token.</param>
/// <returns>Dictionary of layer digest to diffID for found entries.</returns>
Task<IReadOnlyDictionary<string, string>> GetManyAsync(
IEnumerable<string> layerDigests,
CancellationToken cancellationToken = default);
}

View File

@@ -0,0 +1,109 @@
namespace StellaOps.Scanner.Manifest.Resolution;
/// <summary>
/// Resolves image references to layer digests and computes diffIDs.
/// </summary>
public interface ILayerDigestResolver
{
/// <summary>
/// Resolves an image reference to its ordered layer descriptors with provenance.
/// </summary>
/// <param name="imageReference">Full image reference (registry/repo:tag or registry/repo@sha256:...).</param>
/// <param name="options">Resolution options.</param>
/// <param name="cancellationToken">Cancellation token.</param>
/// <returns>Resolved layers with provenance information.</returns>
Task<ResolvedImageLayers> ResolveLayersAsync(
string imageReference,
LayerResolutionOptions? options = null,
CancellationToken cancellationToken = default);
/// <summary>
/// Computes the diffID for a layer by decompressing and hashing its content.
/// </summary>
/// <param name="registry">Registry hostname.</param>
/// <param name="repository">Repository path.</param>
/// <param name="layerDigest">Compressed layer digest.</param>
/// <param name="mediaType">Layer media type (to determine decompression).</param>
/// <param name="cancellationToken">Cancellation token.</param>
/// <returns>The computed diffID (sha256:...).</returns>
Task<string> ResolveDiffIdAsync(
string registry,
string repository,
string layerDigest,
string mediaType,
CancellationToken cancellationToken = default);
/// <summary>
/// Finds layers that are shared across multiple images.
/// </summary>
/// <param name="imageReferences">Image references to compare.</param>
/// <param name="cancellationToken">Cancellation token.</param>
/// <returns>Information about shared layers.</returns>
Task<IReadOnlyList<SharedLayerInfo>> FindSharedLayersAsync(
IEnumerable<string> imageReferences,
CancellationToken cancellationToken = default);
/// <summary>
/// Gets the cached diffID for a layer digest if available.
/// </summary>
/// <param name="layerDigest">Compressed layer digest.</param>
/// <param name="cancellationToken">Cancellation token.</param>
/// <returns>Cached diffID or null if not in cache.</returns>
Task<string?> GetCachedDiffIdAsync(
string layerDigest,
CancellationToken cancellationToken = default);
/// <summary>
/// Caches a diffID for a layer digest.
/// </summary>
/// <param name="layerDigest">Compressed layer digest.</param>
/// <param name="diffId">Computed diffID.</param>
/// <param name="cancellationToken">Cancellation token.</param>
Task CacheDiffIdAsync(
string layerDigest,
string diffId,
CancellationToken cancellationToken = default);
}
/// <summary>
/// Options for layer resolution.
/// </summary>
public sealed record LayerResolutionOptions
{
/// <summary>
/// Whether to compute diffIDs during resolution.
/// Default is false (deferred computation).
/// </summary>
public bool ComputeDiffIds { get; init; } = false;
/// <summary>
/// Target platform for multi-arch images (e.g., "linux/amd64").
/// If null, uses the default platform.
/// </summary>
public string? Platform { get; init; }
/// <summary>
/// Maximum layer size (bytes) for diffID computation.
/// Layers larger than this are skipped to prevent timeouts.
/// Default is 1GB.
/// </summary>
public long MaxDiffIdLayerSize { get; init; } = 1024L * 1024 * 1024;
/// <summary>
/// Whether to use cached diffIDs if available.
/// Default is true.
/// </summary>
public bool UseDiffIdCache { get; init; } = true;
/// <summary>
/// Timeout for diffID computation per layer.
/// Default is 5 minutes.
/// </summary>
public TimeSpan DiffIdTimeout { get; init; } = TimeSpan.FromMinutes(5);
/// <summary>
/// Whether to attempt base image detection.
/// Default is true.
/// </summary>
public bool DetectBaseImage { get; init; } = true;
}

View File

@@ -0,0 +1,427 @@
using System.Collections.Concurrent;
using System.Collections.Immutable;
using System.IO.Compression;
using System.Security.Cryptography;
using System.Text.Json;
using Microsoft.Extensions.Logging;
using StellaOps.Scanner.Manifest.Models;
using StellaOps.Scanner.Registry;
namespace StellaOps.Scanner.Manifest.Resolution;
/// <summary>
/// Implementation of ILayerDigestResolver that resolves image references to layer digests
/// and computes diffIDs with caching support.
/// </summary>
public sealed class LayerDigestResolver : ILayerDigestResolver
{
private readonly IRegistryClient _registryClient;
private readonly IOciManifestSnapshotService _snapshotService;
private readonly IDiffIdCache _diffIdCache;
private readonly IBaseImageDetector _baseImageDetector;
private readonly ILogger<LayerDigestResolver> _logger;
// In-memory cache for recently resolved diffIDs (layer digests are immutable)
private readonly ConcurrentDictionary<string, string> _memoryCache = new(StringComparer.OrdinalIgnoreCase);
public LayerDigestResolver(
IRegistryClient registryClient,
IOciManifestSnapshotService snapshotService,
IDiffIdCache diffIdCache,
IBaseImageDetector baseImageDetector,
ILogger<LayerDigestResolver> logger)
{
_registryClient = registryClient ?? throw new ArgumentNullException(nameof(registryClient));
_snapshotService = snapshotService ?? throw new ArgumentNullException(nameof(snapshotService));
_diffIdCache = diffIdCache ?? throw new ArgumentNullException(nameof(diffIdCache));
_baseImageDetector = baseImageDetector ?? throw new ArgumentNullException(nameof(baseImageDetector));
_logger = logger ?? throw new ArgumentNullException(nameof(logger));
}
public async Task<ResolvedImageLayers> ResolveLayersAsync(
string imageReference,
LayerResolutionOptions? options = null,
CancellationToken cancellationToken = default)
{
options ??= new LayerResolutionOptions();
var (registry, repository, reference) = ParseImageReference(imageReference);
_logger.LogInformation("Resolving layers for {Image}", imageReference);
// Capture manifest snapshot
var snapshot = await _snapshotService.CaptureAsync(
registry,
repository,
reference,
new ManifestCaptureOptions { Platform = options.Platform },
cancellationToken).ConfigureAwait(false);
if (snapshot is null)
{
throw new InvalidOperationException($"Failed to capture manifest for {imageReference}");
}
// Resolve layers with provenance
var layers = new List<LayerProvenance>();
var configHistory = await GetConfigHistoryAsync(registry, repository, snapshot.ConfigDigest, cancellationToken)
.ConfigureAwait(false);
for (var i = 0; i < snapshot.Layers.Length; i++)
{
var layer = snapshot.Layers[i];
string? diffId = snapshot.DiffIds.Length > i ? snapshot.DiffIds[i] : null;
// Try to get cached diffID if not in snapshot
if (string.IsNullOrWhiteSpace(diffId) && options.UseDiffIdCache)
{
diffId = await GetCachedDiffIdAsync(layer.Digest, cancellationToken).ConfigureAwait(false);
}
// Compute diffID if requested and not cached
if (string.IsNullOrWhiteSpace(diffId) && options.ComputeDiffIds && layer.Size <= options.MaxDiffIdLayerSize)
{
try
{
using var cts = CancellationTokenSource.CreateLinkedTokenSource(cancellationToken);
cts.CancelAfter(options.DiffIdTimeout);
diffId = await ResolveDiffIdAsync(registry, repository, layer.Digest, layer.MediaType, cts.Token)
.ConfigureAwait(false);
}
catch (OperationCanceledException) when (!cancellationToken.IsCancellationRequested)
{
_logger.LogWarning("DiffID computation timed out for layer {Digest}", layer.Digest);
}
}
// Get history command for this layer
var createdBy = configHistory.Length > i ? configHistory[i] : null;
// Detect base image for this layer
string? introducedBy = null;
if (options.DetectBaseImage && !string.IsNullOrWhiteSpace(diffId))
{
introducedBy = await _baseImageDetector.DetectBaseImageAsync(diffId, i, cancellationToken)
.ConfigureAwait(false);
}
layers.Add(new LayerProvenance
{
LayerDigest = layer.Digest,
DiffId = diffId,
SourceImage = imageReference,
LayerIndex = i,
IntroducedBy = introducedBy,
Size = layer.Size,
MediaType = layer.MediaType,
CreatedByCommand = createdBy
});
}
return new ResolvedImageLayers
{
ImageReference = imageReference,
ManifestDigest = snapshot.ManifestDigest,
ConfigDigest = snapshot.ConfigDigest,
Platform = snapshot.Platform?.ToString(),
Layers = [.. layers],
ResolvedAt = DateTimeOffset.UtcNow
};
}
public async Task<string> ResolveDiffIdAsync(
string registry,
string repository,
string layerDigest,
string mediaType,
CancellationToken cancellationToken = default)
{
// Check memory cache first
if (_memoryCache.TryGetValue(layerDigest, out var cached))
{
return cached;
}
// Check persistent cache
var persistedDiffId = await _diffIdCache.GetAsync(layerDigest, cancellationToken).ConfigureAwait(false);
if (!string.IsNullOrWhiteSpace(persistedDiffId))
{
_memoryCache[layerDigest] = persistedDiffId;
return persistedDiffId;
}
_logger.LogDebug("Computing diffID for layer {Digest}", layerDigest);
// Fetch and decompress layer to compute hash
using var blobStream = await _registryClient.GetBlobAsync(registry, repository, layerDigest, cancellationToken)
.ConfigureAwait(false);
if (blobStream is null)
{
throw new InvalidOperationException($"Layer blob not found: {layerDigest}");
}
// Decompress based on media type and compute SHA256
var diffId = await ComputeDiffIdAsync(blobStream, mediaType, cancellationToken).ConfigureAwait(false);
// Cache the result
_memoryCache[layerDigest] = diffId;
await CacheDiffIdAsync(layerDigest, diffId, cancellationToken).ConfigureAwait(false);
_logger.LogDebug("Computed diffID {DiffId} for layer {Digest}", diffId, layerDigest);
return diffId;
}
public async Task<IReadOnlyList<SharedLayerInfo>> FindSharedLayersAsync(
IEnumerable<string> imageReferences,
CancellationToken cancellationToken = default)
{
var images = imageReferences.ToArray();
if (images.Length < 2)
{
return [];
}
// Resolve all images
var resolvedImages = new List<ResolvedImageLayers>();
foreach (var imageRef in images)
{
try
{
var resolved = await ResolveLayersAsync(imageRef, new LayerResolutionOptions
{
ComputeDiffIds = true,
DetectBaseImage = true
}, cancellationToken).ConfigureAwait(false);
resolvedImages.Add(resolved);
}
catch (Exception ex)
{
_logger.LogWarning(ex, "Failed to resolve {Image} for shared layer detection", imageRef);
}
}
// Group layers by diffID
var layersByDiffId = new Dictionary<string, List<(ResolvedImageLayers Image, LayerProvenance Layer)>>(
StringComparer.OrdinalIgnoreCase);
foreach (var image in resolvedImages)
{
foreach (var layer in image.Layers)
{
if (string.IsNullOrWhiteSpace(layer.DiffId))
{
continue;
}
if (!layersByDiffId.TryGetValue(layer.DiffId, out var list))
{
list = [];
layersByDiffId[layer.DiffId] = list;
}
list.Add((image, layer));
}
}
// Build shared layer info for layers present in multiple images
var sharedLayers = new List<SharedLayerInfo>();
foreach (var (diffId, occurrences) in layersByDiffId)
{
if (occurrences.Count < 2)
{
continue;
}
var firstLayer = occurrences[0].Layer;
var sampleImages = occurrences
.Select(o => o.Image.ImageReference)
.Distinct()
.Take(5)
.ToImmutableArray();
var isKnownBase = await _baseImageDetector.IsKnownBaseLayerAsync(diffId, cancellationToken)
.ConfigureAwait(false);
sharedLayers.Add(new SharedLayerInfo
{
DiffId = diffId,
ImageCount = occurrences.Select(o => o.Image.ImageReference).Distinct().Count(),
SampleImages = sampleImages,
LikelyBaseImage = firstLayer.IntroducedBy,
Size = firstLayer.Size,
IsKnownBaseLayer = isKnownBase
});
}
return sharedLayers.OrderByDescending(s => s.ImageCount).ToList();
}
public async Task<string?> GetCachedDiffIdAsync(string layerDigest, CancellationToken cancellationToken = default)
{
if (_memoryCache.TryGetValue(layerDigest, out var cached))
{
return cached;
}
var persisted = await _diffIdCache.GetAsync(layerDigest, cancellationToken).ConfigureAwait(false);
if (!string.IsNullOrWhiteSpace(persisted))
{
_memoryCache[layerDigest] = persisted;
}
return persisted;
}
public async Task CacheDiffIdAsync(string layerDigest, string diffId, CancellationToken cancellationToken = default)
{
_memoryCache[layerDigest] = diffId;
await _diffIdCache.SetAsync(layerDigest, diffId, cancellationToken).ConfigureAwait(false);
}
private async Task<string> ComputeDiffIdAsync(Stream compressedStream, string mediaType, CancellationToken cancellationToken)
{
Stream decompressedStream;
if (mediaType.Contains("gzip", StringComparison.OrdinalIgnoreCase))
{
decompressedStream = new GZipStream(compressedStream, CompressionMode.Decompress, leaveOpen: true);
}
else if (mediaType.Contains("zstd", StringComparison.OrdinalIgnoreCase))
{
// ZStd requires external library - for now, treat as uncompressed
// TODO: Add ZStd support via ZstdSharp or System.IO.Compression.ZstdStream when available
_logger.LogWarning("Zstd compression not yet supported, treating as uncompressed");
decompressedStream = compressedStream;
}
else
{
// Treat as uncompressed tar
decompressedStream = compressedStream;
}
try
{
using var sha256 = SHA256.Create();
var buffer = new byte[81920]; // 80KB buffer
int bytesRead;
while ((bytesRead = await decompressedStream.ReadAsync(buffer.AsMemory(0, buffer.Length), cancellationToken)
.ConfigureAwait(false)) > 0)
{
sha256.TransformBlock(buffer, 0, bytesRead, null, 0);
}
sha256.TransformFinalBlock([], 0, 0);
var hash = sha256.Hash!;
return $"sha256:{Convert.ToHexString(hash).ToLowerInvariant()}";
}
finally
{
if (decompressedStream != compressedStream)
{
await decompressedStream.DisposeAsync().ConfigureAwait(false);
}
}
}
private async Task<ImmutableArray<string?>> GetConfigHistoryAsync(
string registry,
string repository,
string configDigest,
CancellationToken cancellationToken)
{
try
{
using var configStream = await _registryClient.GetBlobAsync(registry, repository, configDigest, cancellationToken)
.ConfigureAwait(false);
if (configStream is null)
{
return [];
}
using var doc = await JsonDocument.ParseAsync(configStream, cancellationToken: cancellationToken)
.ConfigureAwait(false);
if (doc.RootElement.TryGetProperty("history", out var historyElement) &&
historyElement.ValueKind == JsonValueKind.Array)
{
var history = new List<string?>();
foreach (var item in historyElement.EnumerateArray())
{
// Skip empty layers (created_by for empty layer configs)
if (item.TryGetProperty("empty_layer", out var emptyLayer) && emptyLayer.GetBoolean())
{
continue;
}
string? createdBy = null;
if (item.TryGetProperty("created_by", out var createdByElement))
{
createdBy = createdByElement.GetString();
}
history.Add(createdBy);
}
return [.. history];
}
}
catch (Exception ex)
{
_logger.LogDebug(ex, "Failed to parse config history for {ConfigDigest}", configDigest);
}
return [];
}
private static (string Registry, string Repository, string Reference) ParseImageReference(string imageReference)
{
// Handle digest reference
var digestIdx = imageReference.IndexOf('@');
string reference;
string imagePath;
if (digestIdx > 0)
{
reference = imageReference[(digestIdx + 1)..];
imagePath = imageReference[..digestIdx];
}
else
{
// Handle tag reference
var tagIdx = imageReference.LastIndexOf(':');
var slashIdx = imageReference.LastIndexOf('/');
if (tagIdx > slashIdx && tagIdx > 0)
{
reference = imageReference[(tagIdx + 1)..];
imagePath = imageReference[..tagIdx];
}
else
{
reference = "latest";
imagePath = imageReference;
}
}
// Parse registry and repository
var firstSlash = imagePath.IndexOf('/');
if (firstSlash > 0 && (imagePath[..firstSlash].Contains('.') || imagePath[..firstSlash].Contains(':')))
{
// First segment is a registry
return (imagePath[..firstSlash], imagePath[(firstSlash + 1)..], reference);
}
// Default to Docker Hub
if (!imagePath.Contains('/'))
{
return ("registry-1.docker.io", $"library/{imagePath}", reference);
}
return ("registry-1.docker.io", imagePath, reference);
}
}

View File

@@ -0,0 +1,144 @@
using System.Collections.Immutable;
namespace StellaOps.Scanner.Manifest.Resolution;
/// <summary>
/// Tracks the provenance (origin and lineage) of an image layer.
/// </summary>
public sealed record LayerProvenance
{
/// <summary>
/// The compressed layer digest (sha256:...).
/// </summary>
public required string LayerDigest { get; init; }
/// <summary>
/// The uncompressed layer content hash (diffID).
/// May be null if not yet computed.
/// </summary>
public string? DiffId { get; init; }
/// <summary>
/// The image reference where this layer was observed.
/// </summary>
public required string SourceImage { get; init; }
/// <summary>
/// Zero-based index of this layer in the image.
/// </summary>
public int LayerIndex { get; init; }
/// <summary>
/// The base image that introduced this layer, if known.
/// For example, "alpine:3.19" for the first few layers of an Alpine-based image.
/// </summary>
public string? IntroducedBy { get; init; }
/// <summary>
/// When this provenance record was captured.
/// </summary>
public DateTimeOffset CapturedAt { get; init; } = DateTimeOffset.UtcNow;
/// <summary>
/// Size of the compressed layer in bytes.
/// </summary>
public long Size { get; init; }
/// <summary>
/// Media type of the layer (e.g., application/vnd.oci.image.layer.v1.tar+gzip).
/// </summary>
public string? MediaType { get; init; }
/// <summary>
/// History command that created this layer (from image config), if available.
/// </summary>
public string? CreatedByCommand { get; init; }
}
/// <summary>
/// Resolved layers for an image with provenance information.
/// </summary>
public sealed record ResolvedImageLayers
{
/// <summary>
/// The image reference that was resolved.
/// </summary>
public required string ImageReference { get; init; }
/// <summary>
/// The manifest digest of the resolved image.
/// </summary>
public required string ManifestDigest { get; init; }
/// <summary>
/// The config digest of the resolved image.
/// </summary>
public required string ConfigDigest { get; init; }
/// <summary>
/// The platform of the resolved image (for multi-arch).
/// </summary>
public string? Platform { get; init; }
/// <summary>
/// Ordered list of layers with provenance information.
/// Index 0 is the base layer.
/// </summary>
public ImmutableArray<LayerProvenance> Layers { get; init; } = [];
/// <summary>
/// Total number of layers.
/// </summary>
public int LayerCount => Layers.Length;
/// <summary>
/// Total size of all layers (compressed).
/// </summary>
public long TotalSize => Layers.Sum(l => l.Size);
/// <summary>
/// Whether all diffIDs have been resolved.
/// </summary>
public bool AllDiffIdsResolved => Layers.All(l => !string.IsNullOrWhiteSpace(l.DiffId));
/// <summary>
/// When this resolution was performed.
/// </summary>
public DateTimeOffset ResolvedAt { get; init; } = DateTimeOffset.UtcNow;
}
/// <summary>
/// Information about layers shared across multiple images.
/// </summary>
public sealed record SharedLayerInfo
{
/// <summary>
/// The diffID (uncompressed content hash) that is shared.
/// </summary>
public required string DiffId { get; init; }
/// <summary>
/// Number of images that contain this layer.
/// </summary>
public int ImageCount { get; init; }
/// <summary>
/// Sample of image references containing this layer.
/// </summary>
public ImmutableArray<string> SampleImages { get; init; } = [];
/// <summary>
/// Likely base image origin if detected.
/// </summary>
public string? LikelyBaseImage { get; init; }
/// <summary>
/// Size of the layer (compressed).
/// </summary>
public long Size { get; init; }
/// <summary>
/// Whether this layer is from a known base image.
/// </summary>
public bool IsKnownBaseLayer { get; init; }
}

View File

@@ -0,0 +1,245 @@
namespace StellaOps.Scanner.Manifest.Reuse;
/// <summary>
/// Detects layer reuse across images for scan deduplication.
/// </summary>
public interface ILayerReuseDetector
{
/// <summary>
/// Detects reuse information for layers in an image.
/// </summary>
/// <param name="imageReference">Image reference to analyze.</param>
/// <param name="cancellationToken">Cancellation token.</param>
/// <returns>Reuse information for each layer.</returns>
Task<LayerReuseReport> DetectReuseAsync(
string imageReference,
CancellationToken cancellationToken = default);
/// <summary>
/// Gets the scan status for a layer by its diffID.
/// </summary>
/// <param name="diffId">Layer diffID.</param>
/// <param name="cancellationToken">Cancellation token.</param>
/// <returns>Scan status information.</returns>
Task<LayerScanStatus?> GetLayerScanStatusAsync(
string diffId,
CancellationToken cancellationToken = default);
/// <summary>
/// Records that a layer has been scanned.
/// </summary>
/// <param name="diffId">Layer diffID.</param>
/// <param name="scanResult">Result of the scan.</param>
/// <param name="cancellationToken">Cancellation token.</param>
Task RecordLayerScanAsync(
string diffId,
LayerScanResult scanResult,
CancellationToken cancellationToken = default);
/// <summary>
/// Gets reuse statistics for monitoring.
/// </summary>
/// <param name="cancellationToken">Cancellation token.</param>
/// <returns>Reuse statistics.</returns>
Task<LayerReuseStatistics> GetStatisticsAsync(CancellationToken cancellationToken = default);
}
/// <summary>
/// Report on layer reuse for an image.
/// </summary>
public sealed record LayerReuseReport
{
/// <summary>
/// The image that was analyzed.
/// </summary>
public required string ImageReference { get; init; }
/// <summary>
/// Total number of layers in the image.
/// </summary>
public int TotalLayers { get; init; }
/// <summary>
/// Number of layers that can be skipped (already scanned).
/// </summary>
public int SkippableLayers { get; init; }
/// <summary>
/// Number of layers that are from known base images.
/// </summary>
public int BaseImageLayers { get; init; }
/// <summary>
/// Reuse information for each layer.
/// </summary>
public required IReadOnlyList<LayerReuseInfo> Layers { get; init; }
/// <summary>
/// Reuse ratio (0.0 to 1.0).
/// </summary>
public double ReuseRatio => TotalLayers > 0 ? (double)SkippableLayers / TotalLayers : 0;
/// <summary>
/// When this report was generated.
/// </summary>
public DateTimeOffset GeneratedAt { get; init; } = DateTimeOffset.UtcNow;
}
/// <summary>
/// Reuse information for a single layer.
/// </summary>
public sealed record LayerReuseInfo
{
/// <summary>
/// Compressed layer digest.
/// </summary>
public required string LayerDigest { get; init; }
/// <summary>
/// Uncompressed content hash.
/// </summary>
public required string DiffId { get; init; }
/// <summary>
/// Layer index in the image.
/// </summary>
public int LayerIndex { get; init; }
/// <summary>
/// Number of times this layer appears across all scanned images.
/// </summary>
public int ReuseCount { get; init; }
/// <summary>
/// When this layer was last scanned, if at all.
/// </summary>
public DateTimeOffset? LastScannedAt { get; init; }
/// <summary>
/// Known base image that introduced this layer, if detected.
/// </summary>
public string? KnownBaseImage { get; init; }
/// <summary>
/// Whether this layer can be skipped in scanning.
/// </summary>
public bool CanSkip { get; init; }
/// <summary>
/// Reason for skip decision.
/// </summary>
public string? SkipReason { get; init; }
/// <summary>
/// Size of the layer (compressed).
/// </summary>
public long Size { get; init; }
}
/// <summary>
/// Scan status for a layer.
/// </summary>
public sealed record LayerScanStatus
{
/// <summary>
/// Layer diffID.
/// </summary>
public required string DiffId { get; init; }
/// <summary>
/// Whether the layer has been scanned.
/// </summary>
public bool HasBeenScanned { get; init; }
/// <summary>
/// When the layer was last scanned.
/// </summary>
public DateTimeOffset? LastScannedAt { get; init; }
/// <summary>
/// Number of findings from the last scan.
/// </summary>
public int? FindingCount { get; init; }
/// <summary>
/// Scanner that performed the last scan.
/// </summary>
public string? ScannedBy { get; init; }
/// <summary>
/// Version of the scanner used.
/// </summary>
public string? ScannerVersion { get; init; }
/// <summary>
/// Number of images containing this layer.
/// </summary>
public int ImageCount { get; init; }
}
/// <summary>
/// Result of scanning a layer.
/// </summary>
public sealed record LayerScanResult
{
/// <summary>
/// Number of findings (vulnerabilities) found.
/// </summary>
public int FindingCount { get; init; }
/// <summary>
/// Scanner that performed the scan.
/// </summary>
public required string ScannedBy { get; init; }
/// <summary>
/// Version of the scanner.
/// </summary>
public string? ScannerVersion { get; init; }
/// <summary>
/// When the scan was performed.
/// </summary>
public DateTimeOffset ScannedAt { get; init; } = DateTimeOffset.UtcNow;
}
/// <summary>
/// Statistics about layer reuse.
/// </summary>
public sealed record LayerReuseStatistics
{
/// <summary>
/// Total number of unique layers tracked.
/// </summary>
public long TotalUniqueLayers { get; init; }
/// <summary>
/// Total number of images tracked.
/// </summary>
public long TotalImages { get; init; }
/// <summary>
/// Number of layers from known base images.
/// </summary>
public long BaseImageLayers { get; init; }
/// <summary>
/// Average reuse ratio across all scanned images.
/// </summary>
public double AverageReuseRatio { get; init; }
/// <summary>
/// Number of scan operations saved due to reuse.
/// </summary>
public long ScansSaved { get; init; }
/// <summary>
/// Most reused layers (top 10).
/// </summary>
public IReadOnlyList<(string DiffId, int Count)> MostReusedLayers { get; init; } = [];
/// <summary>
/// When statistics were computed.
/// </summary>
public DateTimeOffset ComputedAt { get; init; } = DateTimeOffset.UtcNow;
}

View File

@@ -0,0 +1,324 @@
using Microsoft.Extensions.Logging;
using Npgsql;
using StellaOps.Scanner.Manifest.Resolution;
namespace StellaOps.Scanner.Manifest.Reuse;
/// <summary>
/// Detects layer reuse across images for scan deduplication.
/// Tracks layer scan history and base image detection.
/// </summary>
public sealed class LayerReuseDetector : ILayerReuseDetector
{
private const string SchemaName = "scanner";
private const string LayerScansTable = $"{SchemaName}.layer_scans";
private const string LayerReuseTable = $"{SchemaName}.layer_reuse_counts";
private readonly NpgsqlDataSource _dataSource;
private readonly ILayerDigestResolver _layerResolver;
private readonly IBaseImageDetector _baseImageDetector;
private readonly ILogger<LayerReuseDetector> _logger;
public LayerReuseDetector(
NpgsqlDataSource dataSource,
ILayerDigestResolver layerResolver,
IBaseImageDetector baseImageDetector,
ILogger<LayerReuseDetector> logger)
{
_dataSource = dataSource ?? throw new ArgumentNullException(nameof(dataSource));
_layerResolver = layerResolver ?? throw new ArgumentNullException(nameof(layerResolver));
_baseImageDetector = baseImageDetector ?? throw new ArgumentNullException(nameof(baseImageDetector));
_logger = logger ?? throw new ArgumentNullException(nameof(logger));
}
public async Task<LayerReuseReport> DetectReuseAsync(
string imageReference,
CancellationToken cancellationToken = default)
{
ArgumentException.ThrowIfNullOrWhiteSpace(imageReference);
_logger.LogInformation("Detecting layer reuse for {Image}", imageReference);
// Resolve layers for the image
var resolved = await _layerResolver.ResolveLayersAsync(
imageReference,
new LayerResolutionOptions { ComputeDiffIds = true, DetectBaseImage = true },
cancellationToken).ConfigureAwait(false);
var layers = new List<LayerReuseInfo>();
var skippable = 0;
var baseImageCount = 0;
foreach (var layer in resolved.Layers)
{
if (string.IsNullOrWhiteSpace(layer.DiffId))
{
layers.Add(new LayerReuseInfo
{
LayerDigest = layer.LayerDigest,
DiffId = string.Empty,
LayerIndex = layer.LayerIndex,
ReuseCount = 0,
CanSkip = false,
SkipReason = "DiffID not computed",
Size = layer.Size
});
continue;
}
// Get scan status for this layer
var scanStatus = await GetLayerScanStatusAsync(layer.DiffId, cancellationToken).ConfigureAwait(false);
var isKnownBase = await _baseImageDetector.IsKnownBaseLayerAsync(layer.DiffId, cancellationToken).ConfigureAwait(false);
var canSkip = scanStatus?.HasBeenScanned == true;
string? skipReason = null;
if (canSkip && isKnownBase)
{
skipReason = $"Known base layer (last scanned {scanStatus?.LastScannedAt:yyyy-MM-dd})";
baseImageCount++;
}
else if (canSkip)
{
skipReason = $"Previously scanned on {scanStatus?.LastScannedAt:yyyy-MM-dd}";
}
if (canSkip)
{
skippable++;
}
layers.Add(new LayerReuseInfo
{
LayerDigest = layer.LayerDigest,
DiffId = layer.DiffId,
LayerIndex = layer.LayerIndex,
ReuseCount = scanStatus?.ImageCount ?? 0,
LastScannedAt = scanStatus?.LastScannedAt,
KnownBaseImage = layer.IntroducedBy,
CanSkip = canSkip,
SkipReason = skipReason,
Size = layer.Size
});
// Increment reuse count for this layer
await IncrementReuseCountAsync(layer.DiffId, cancellationToken).ConfigureAwait(false);
}
var report = new LayerReuseReport
{
ImageReference = imageReference,
TotalLayers = resolved.LayerCount,
SkippableLayers = skippable,
BaseImageLayers = baseImageCount,
Layers = layers
};
_logger.LogInformation(
"Layer reuse for {Image}: {Skippable}/{Total} skippable ({Ratio:P1} reuse)",
imageReference, skippable, resolved.LayerCount, report.ReuseRatio);
return report;
}
public async Task<LayerScanStatus?> GetLayerScanStatusAsync(
string diffId,
CancellationToken cancellationToken = default)
{
ArgumentException.ThrowIfNullOrWhiteSpace(diffId);
await using var conn = await _dataSource.OpenConnectionAsync(cancellationToken).ConfigureAwait(false);
await using var cmd = conn.CreateCommand();
cmd.CommandText = $"""
SELECT
ls.diff_id,
ls.scanned_at,
ls.finding_count,
ls.scanned_by,
ls.scanner_version,
COALESCE(lr.reuse_count, 0) as image_count
FROM {LayerScansTable} ls
LEFT JOIN {LayerReuseTable} lr ON ls.diff_id = lr.diff_id
WHERE ls.diff_id = @diffId
ORDER BY ls.scanned_at DESC
LIMIT 1
""";
cmd.Parameters.AddWithValue("diffId", diffId);
try
{
await using var reader = await cmd.ExecuteReaderAsync(cancellationToken).ConfigureAwait(false);
if (await reader.ReadAsync(cancellationToken).ConfigureAwait(false))
{
return new LayerScanStatus
{
DiffId = reader.GetString(0),
HasBeenScanned = true,
LastScannedAt = reader.GetFieldValue<DateTimeOffset>(1),
FindingCount = reader.IsDBNull(2) ? null : reader.GetInt32(2),
ScannedBy = reader.IsDBNull(3) ? null : reader.GetString(3),
ScannerVersion = reader.IsDBNull(4) ? null : reader.GetString(4),
ImageCount = reader.GetInt32(5)
};
}
// Check if we have reuse count without scan history
await using var countCmd = conn.CreateCommand();
countCmd.CommandText = $"SELECT reuse_count FROM {LayerReuseTable} WHERE diff_id = @diffId";
countCmd.Parameters.AddWithValue("diffId", diffId);
var countResult = await countCmd.ExecuteScalarAsync(cancellationToken).ConfigureAwait(false);
if (countResult is int count)
{
return new LayerScanStatus
{
DiffId = diffId,
HasBeenScanned = false,
ImageCount = count
};
}
return null;
}
catch (PostgresException ex) when (ex.SqlState == "42P01") // Table doesn't exist
{
_logger.LogDebug("Layer scan tables not yet created");
return null;
}
}
public async Task RecordLayerScanAsync(
string diffId,
LayerScanResult scanResult,
CancellationToken cancellationToken = default)
{
ArgumentException.ThrowIfNullOrWhiteSpace(diffId);
ArgumentNullException.ThrowIfNull(scanResult);
await using var conn = await _dataSource.OpenConnectionAsync(cancellationToken).ConfigureAwait(false);
await using var cmd = conn.CreateCommand();
cmd.CommandText = $"""
INSERT INTO {LayerScansTable} (diff_id, scanned_at, finding_count, scanned_by, scanner_version)
VALUES (@diffId, @scannedAt, @findingCount, @scannedBy, @scannerVersion)
ON CONFLICT (diff_id) DO UPDATE SET
scanned_at = EXCLUDED.scanned_at,
finding_count = EXCLUDED.finding_count,
scanned_by = EXCLUDED.scanned_by,
scanner_version = EXCLUDED.scanner_version
""";
cmd.Parameters.AddWithValue("diffId", diffId);
cmd.Parameters.AddWithValue("scannedAt", scanResult.ScannedAt);
cmd.Parameters.AddWithValue("findingCount", scanResult.FindingCount);
cmd.Parameters.AddWithValue("scannedBy", scanResult.ScannedBy);
cmd.Parameters.AddWithValue("scannerVersion", scanResult.ScannerVersion ?? (object)DBNull.Value);
try
{
await cmd.ExecuteNonQueryAsync(cancellationToken).ConfigureAwait(false);
_logger.LogDebug("Recorded scan for layer {DiffId}: {FindingCount} findings", diffId, scanResult.FindingCount);
}
catch (PostgresException ex) when (ex.SqlState == "42P01")
{
_logger.LogWarning("Layer scan tables not yet created. Run migrations.");
}
}
public async Task<LayerReuseStatistics> GetStatisticsAsync(CancellationToken cancellationToken = default)
{
await using var conn = await _dataSource.OpenConnectionAsync(cancellationToken).ConfigureAwait(false);
try
{
// Get basic counts
await using var countsCmd = conn.CreateCommand();
countsCmd.CommandText = $"""
SELECT
(SELECT COUNT(*) FROM {LayerReuseTable}) as total_layers,
(SELECT COUNT(DISTINCT image_reference) FROM {SchemaName}.image_layers) as total_images,
(SELECT COUNT(*) FROM {SchemaName}.scanner_base_image_layers) as base_layers,
(SELECT SUM(reuse_count) FROM {LayerReuseTable} WHERE reuse_count > 1) as scans_saved
""";
long totalLayers = 0, totalImages = 0, baseLayers = 0, scansSaved = 0;
await using (var reader = await countsCmd.ExecuteReaderAsync(cancellationToken).ConfigureAwait(false))
{
if (await reader.ReadAsync(cancellationToken).ConfigureAwait(false))
{
totalLayers = reader.IsDBNull(0) ? 0 : reader.GetInt64(0);
totalImages = reader.IsDBNull(1) ? 0 : reader.GetInt64(1);
baseLayers = reader.IsDBNull(2) ? 0 : reader.GetInt64(2);
scansSaved = reader.IsDBNull(3) ? 0 : reader.GetInt64(3);
}
}
// Get most reused layers
await using var topCmd = conn.CreateCommand();
topCmd.CommandText = $"""
SELECT diff_id, reuse_count
FROM {LayerReuseTable}
ORDER BY reuse_count DESC
LIMIT 10
""";
var mostReused = new List<(string, int)>();
await using (var reader = await topCmd.ExecuteReaderAsync(cancellationToken).ConfigureAwait(false))
{
while (await reader.ReadAsync(cancellationToken).ConfigureAwait(false))
{
mostReused.Add((reader.GetString(0), reader.GetInt32(1)));
}
}
// Calculate average reuse ratio
double avgReuseRatio = 0;
if (totalImages > 0 && totalLayers > 0)
{
avgReuseRatio = (double)scansSaved / (totalImages * totalLayers / Math.Max(1, totalImages));
}
return new LayerReuseStatistics
{
TotalUniqueLayers = totalLayers,
TotalImages = totalImages,
BaseImageLayers = baseLayers,
AverageReuseRatio = Math.Min(1.0, avgReuseRatio),
ScansSaved = scansSaved,
MostReusedLayers = mostReused
};
}
catch (PostgresException ex) when (ex.SqlState == "42P01")
{
_logger.LogDebug("Statistics tables not yet created");
return new LayerReuseStatistics();
}
}
private async Task IncrementReuseCountAsync(string diffId, CancellationToken cancellationToken)
{
await using var conn = await _dataSource.OpenConnectionAsync(cancellationToken).ConfigureAwait(false);
await using var cmd = conn.CreateCommand();
cmd.CommandText = $"""
INSERT INTO {LayerReuseTable} (diff_id, reuse_count, first_seen_at)
VALUES (@diffId, 1, @now)
ON CONFLICT (diff_id) DO UPDATE SET
reuse_count = {LayerReuseTable}.reuse_count + 1
""";
cmd.Parameters.AddWithValue("diffId", diffId);
cmd.Parameters.AddWithValue("now", DateTimeOffset.UtcNow);
try
{
await cmd.ExecuteNonQueryAsync(cancellationToken).ConfigureAwait(false);
}
catch (PostgresException ex) when (ex.SqlState == "42P01")
{
// Table doesn't exist yet
}
}
}

View File

@@ -0,0 +1,22 @@
<Project Sdk="Microsoft.NET.Sdk">
<PropertyGroup>
<TargetFramework>net10.0</TargetFramework>
<LangVersion>preview</LangVersion>
<ImplicitUsings>enable</ImplicitUsings>
<Nullable>enable</Nullable>
<TreatWarningsAsErrors>true</TreatWarningsAsErrors>
</PropertyGroup>
<ItemGroup>
<PackageReference Include="Microsoft.Extensions.DependencyInjection.Abstractions" />
<PackageReference Include="Microsoft.Extensions.Logging.Abstractions" />
<PackageReference Include="Microsoft.Extensions.Options" />
<PackageReference Include="Npgsql" />
</ItemGroup>
<ItemGroup>
<ProjectReference Include="..\StellaOps.Scanner.Contracts\StellaOps.Scanner.Contracts.csproj" />
<ProjectReference Include="..\StellaOps.Scanner.Registry\StellaOps.Scanner.Registry.csproj" />
<ProjectReference Include="..\StellaOps.Scanner.Storage\StellaOps.Scanner.Storage.csproj" />
<ProjectReference Include="..\StellaOps.Scanner.Storage.Oci\StellaOps.Scanner.Storage.Oci.csproj" />
<ProjectReference Include="..\..\..\__Libraries\StellaOps.Infrastructure.Postgres\StellaOps.Infrastructure.Postgres.csproj" />
</ItemGroup>
</Project>