sprints and audit work
This commit is contained in:
379
src/__Libraries/StellaOps.Facet/GlobFacetExtractor.cs
Normal file
379
src/__Libraries/StellaOps.Facet/GlobFacetExtractor.cs
Normal file
@@ -0,0 +1,379 @@
|
||||
// <copyright file="GlobFacetExtractor.cs" company="StellaOps">
|
||||
// Copyright (c) StellaOps. Licensed under AGPL-3.0-or-later.
|
||||
// </copyright>
|
||||
|
||||
using System.Collections.Immutable;
|
||||
using System.Diagnostics;
|
||||
using System.Formats.Tar;
|
||||
using System.IO.Compression;
|
||||
using Microsoft.Extensions.Logging;
|
||||
using Microsoft.Extensions.Logging.Abstractions;
|
||||
|
||||
namespace StellaOps.Facet;
|
||||
|
||||
/// <summary>
|
||||
/// Extracts facets from container images using glob pattern matching.
|
||||
/// </summary>
|
||||
public sealed class GlobFacetExtractor : IFacetExtractor
|
||||
{
|
||||
private readonly FacetSealer _sealer;
|
||||
private readonly ICryptoHash _cryptoHash;
|
||||
private readonly ILogger<GlobFacetExtractor> _logger;
|
||||
|
||||
/// <summary>
|
||||
/// Initializes a new instance of the <see cref="GlobFacetExtractor"/> class.
|
||||
/// </summary>
|
||||
/// <param name="timeProvider">Time provider for timestamps.</param>
|
||||
/// <param name="cryptoHash">Hash implementation.</param>
|
||||
/// <param name="logger">Logger instance.</param>
|
||||
public GlobFacetExtractor(
|
||||
TimeProvider? timeProvider = null,
|
||||
ICryptoHash? cryptoHash = null,
|
||||
ILogger<GlobFacetExtractor>? logger = null)
|
||||
{
|
||||
_cryptoHash = cryptoHash ?? new DefaultCryptoHash();
|
||||
_sealer = new FacetSealer(timeProvider, cryptoHash);
|
||||
_logger = logger ?? NullLogger<GlobFacetExtractor>.Instance;
|
||||
}
|
||||
|
||||
/// <inheritdoc/>
|
||||
public async Task<FacetExtractionResult> ExtractFromDirectoryAsync(
|
||||
string rootPath,
|
||||
FacetExtractionOptions? options = null,
|
||||
CancellationToken ct = default)
|
||||
{
|
||||
ArgumentException.ThrowIfNullOrWhiteSpace(rootPath);
|
||||
|
||||
if (!Directory.Exists(rootPath))
|
||||
{
|
||||
throw new DirectoryNotFoundException($"Directory not found: {rootPath}");
|
||||
}
|
||||
|
||||
options ??= FacetExtractionOptions.Default;
|
||||
var sw = Stopwatch.StartNew();
|
||||
|
||||
var facets = options.Facets.IsDefault || options.Facets.IsEmpty
|
||||
? BuiltInFacets.All.ToList()
|
||||
: options.Facets.ToList();
|
||||
|
||||
var matchers = facets.ToDictionary(f => f.FacetId, GlobMatcher.ForFacet);
|
||||
var excludeMatcher = options.ExcludePatterns.Length > 0
|
||||
? new GlobMatcher(options.ExcludePatterns)
|
||||
: null;
|
||||
|
||||
var facetFiles = facets.ToDictionary(f => f.FacetId, _ => new List<FacetFileEntry>());
|
||||
var unmatchedFiles = new List<FacetFileEntry>();
|
||||
var skippedFiles = new List<SkippedFile>();
|
||||
var warnings = new List<string>();
|
||||
|
||||
int totalFilesProcessed = 0;
|
||||
long totalBytes = 0;
|
||||
|
||||
foreach (var filePath in Directory.EnumerateFiles(rootPath, "*", SearchOption.AllDirectories))
|
||||
{
|
||||
ct.ThrowIfCancellationRequested();
|
||||
|
||||
var relativePath = GetRelativePath(rootPath, filePath);
|
||||
|
||||
// Check exclusion patterns
|
||||
if (excludeMatcher?.IsMatch(relativePath) == true)
|
||||
{
|
||||
skippedFiles.Add(new SkippedFile(relativePath, "Matched exclusion pattern"));
|
||||
continue;
|
||||
}
|
||||
|
||||
try
|
||||
{
|
||||
var fileInfo = new FileInfo(filePath);
|
||||
|
||||
// Skip symlinks if not following
|
||||
if (!options.FollowSymlinks && fileInfo.LinkTarget is not null)
|
||||
{
|
||||
skippedFiles.Add(new SkippedFile(relativePath, "Symlink"));
|
||||
continue;
|
||||
}
|
||||
|
||||
// Skip files too large
|
||||
if (fileInfo.Length > options.MaxFileSizeBytes)
|
||||
{
|
||||
skippedFiles.Add(new SkippedFile(relativePath, $"Exceeds max size ({fileInfo.Length} > {options.MaxFileSizeBytes})"));
|
||||
continue;
|
||||
}
|
||||
|
||||
totalFilesProcessed++;
|
||||
totalBytes += fileInfo.Length;
|
||||
|
||||
var entry = await CreateFileEntryAsync(filePath, relativePath, fileInfo, options.HashAlgorithm, ct)
|
||||
.ConfigureAwait(false);
|
||||
|
||||
bool matched = false;
|
||||
foreach (var facet in facets)
|
||||
{
|
||||
if (matchers[facet.FacetId].IsMatch(relativePath))
|
||||
{
|
||||
facetFiles[facet.FacetId].Add(entry);
|
||||
matched = true;
|
||||
// Don't break - a file can match multiple facets
|
||||
}
|
||||
}
|
||||
|
||||
if (!matched)
|
||||
{
|
||||
unmatchedFiles.Add(entry);
|
||||
}
|
||||
}
|
||||
catch (Exception ex) when (ex is IOException or UnauthorizedAccessException)
|
||||
{
|
||||
_logger.LogWarning(ex, "Failed to process file: {Path}", relativePath);
|
||||
skippedFiles.Add(new SkippedFile(relativePath, ex.Message));
|
||||
}
|
||||
}
|
||||
|
||||
sw.Stop();
|
||||
|
||||
return BuildResult(facets, facetFiles, unmatchedFiles, skippedFiles, warnings, totalFilesProcessed, totalBytes, sw.Elapsed, options);
|
||||
}
|
||||
|
||||
/// <inheritdoc/>
|
||||
public async Task<FacetExtractionResult> ExtractFromTarAsync(
|
||||
Stream tarStream,
|
||||
FacetExtractionOptions? options = null,
|
||||
CancellationToken ct = default)
|
||||
{
|
||||
ArgumentNullException.ThrowIfNull(tarStream);
|
||||
|
||||
options ??= FacetExtractionOptions.Default;
|
||||
var sw = Stopwatch.StartNew();
|
||||
|
||||
var facets = options.Facets.IsDefault || options.Facets.IsEmpty
|
||||
? BuiltInFacets.All.ToList()
|
||||
: options.Facets.ToList();
|
||||
|
||||
var matchers = facets.ToDictionary(f => f.FacetId, GlobMatcher.ForFacet);
|
||||
var excludeMatcher = options.ExcludePatterns.Length > 0
|
||||
? new GlobMatcher(options.ExcludePatterns)
|
||||
: null;
|
||||
|
||||
var facetFiles = facets.ToDictionary(f => f.FacetId, _ => new List<FacetFileEntry>());
|
||||
var unmatchedFiles = new List<FacetFileEntry>();
|
||||
var skippedFiles = new List<SkippedFile>();
|
||||
var warnings = new List<string>();
|
||||
|
||||
int totalFilesProcessed = 0;
|
||||
long totalBytes = 0;
|
||||
|
||||
using var tarReader = new TarReader(tarStream, leaveOpen: true);
|
||||
|
||||
while (await tarReader.GetNextEntryAsync(copyData: false, ct).ConfigureAwait(false) is { } tarEntry)
|
||||
{
|
||||
ct.ThrowIfCancellationRequested();
|
||||
|
||||
// Skip non-regular files
|
||||
if (tarEntry.EntryType != TarEntryType.RegularFile &&
|
||||
tarEntry.EntryType != TarEntryType.V7RegularFile)
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
var path = NormalizeTarPath(tarEntry.Name);
|
||||
|
||||
if (excludeMatcher?.IsMatch(path) == true)
|
||||
{
|
||||
skippedFiles.Add(new SkippedFile(path, "Matched exclusion pattern"));
|
||||
continue;
|
||||
}
|
||||
|
||||
if (tarEntry.Length > options.MaxFileSizeBytes)
|
||||
{
|
||||
skippedFiles.Add(new SkippedFile(path, $"Exceeds max size ({tarEntry.Length} > {options.MaxFileSizeBytes})"));
|
||||
continue;
|
||||
}
|
||||
|
||||
// Skip symlinks if not following
|
||||
if (!options.FollowSymlinks && tarEntry.EntryType == TarEntryType.SymbolicLink)
|
||||
{
|
||||
skippedFiles.Add(new SkippedFile(path, "Symlink"));
|
||||
continue;
|
||||
}
|
||||
|
||||
try
|
||||
{
|
||||
totalFilesProcessed++;
|
||||
totalBytes += tarEntry.Length;
|
||||
|
||||
var entry = await CreateFileEntryFromTarAsync(tarEntry, path, options.HashAlgorithm, ct)
|
||||
.ConfigureAwait(false);
|
||||
|
||||
bool matched = false;
|
||||
foreach (var facet in facets)
|
||||
{
|
||||
if (matchers[facet.FacetId].IsMatch(path))
|
||||
{
|
||||
facetFiles[facet.FacetId].Add(entry);
|
||||
matched = true;
|
||||
}
|
||||
}
|
||||
|
||||
if (!matched)
|
||||
{
|
||||
unmatchedFiles.Add(entry);
|
||||
}
|
||||
}
|
||||
catch (Exception ex) when (ex is IOException or InvalidDataException)
|
||||
{
|
||||
_logger.LogWarning(ex, "Failed to process tar entry: {Path}", path);
|
||||
skippedFiles.Add(new SkippedFile(path, ex.Message));
|
||||
}
|
||||
}
|
||||
|
||||
sw.Stop();
|
||||
|
||||
return BuildResult(facets, facetFiles, unmatchedFiles, skippedFiles, warnings, totalFilesProcessed, totalBytes, sw.Elapsed, options);
|
||||
}
|
||||
|
||||
/// <inheritdoc/>
|
||||
public async Task<FacetExtractionResult> ExtractFromOciLayerAsync(
|
||||
Stream layerStream,
|
||||
FacetExtractionOptions? options = null,
|
||||
CancellationToken ct = default)
|
||||
{
|
||||
ArgumentNullException.ThrowIfNull(layerStream);
|
||||
|
||||
// OCI layers are gzipped tars - decompress then delegate
|
||||
await using var gzipStream = new GZipStream(layerStream, CompressionMode.Decompress, leaveOpen: true);
|
||||
return await ExtractFromTarAsync(gzipStream, options, ct).ConfigureAwait(false);
|
||||
}
|
||||
|
||||
private async Task<FacetFileEntry> CreateFileEntryAsync(
|
||||
string fullPath,
|
||||
string relativePath,
|
||||
FileInfo fileInfo,
|
||||
string algorithm,
|
||||
CancellationToken ct)
|
||||
{
|
||||
await using var stream = File.OpenRead(fullPath);
|
||||
var hashBytes = await _cryptoHash.ComputeHashAsync(stream, algorithm, ct).ConfigureAwait(false);
|
||||
var digest = FormatDigest(hashBytes, algorithm);
|
||||
|
||||
return new FacetFileEntry(
|
||||
relativePath,
|
||||
digest,
|
||||
fileInfo.Length,
|
||||
fileInfo.LastWriteTimeUtc);
|
||||
}
|
||||
|
||||
private async Task<FacetFileEntry> CreateFileEntryFromTarAsync(
|
||||
TarEntry entry,
|
||||
string path,
|
||||
string algorithm,
|
||||
CancellationToken ct)
|
||||
{
|
||||
var dataStream = entry.DataStream;
|
||||
if (dataStream is null)
|
||||
{
|
||||
// Empty file
|
||||
var emptyHashBytes = await _cryptoHash.ComputeHashAsync(Stream.Null, algorithm, ct).ConfigureAwait(false);
|
||||
var emptyDigest = FormatDigest(emptyHashBytes, algorithm);
|
||||
return new FacetFileEntry(path, emptyDigest, 0, entry.ModificationTime);
|
||||
}
|
||||
|
||||
var hashBytes = await _cryptoHash.ComputeHashAsync(dataStream, algorithm, ct).ConfigureAwait(false);
|
||||
var digest = FormatDigest(hashBytes, algorithm);
|
||||
|
||||
return new FacetFileEntry(
|
||||
path,
|
||||
digest,
|
||||
entry.Length,
|
||||
entry.ModificationTime);
|
||||
}
|
||||
|
||||
private static string FormatDigest(byte[] hashBytes, string algorithm)
|
||||
{
|
||||
var hex = Convert.ToHexString(hashBytes).ToLowerInvariant();
|
||||
return $"{algorithm.ToLowerInvariant()}:{hex}";
|
||||
}
|
||||
|
||||
private FacetExtractionResult BuildResult(
|
||||
List<IFacet> facets,
|
||||
Dictionary<string, List<FacetFileEntry>> facetFiles,
|
||||
List<FacetFileEntry> unmatchedFiles,
|
||||
List<SkippedFile> skippedFiles,
|
||||
List<string> warnings,
|
||||
int totalFilesProcessed,
|
||||
long totalBytes,
|
||||
TimeSpan duration,
|
||||
FacetExtractionOptions options)
|
||||
{
|
||||
var facetEntries = new List<FacetEntry>();
|
||||
int filesMatched = 0;
|
||||
|
||||
foreach (var facet in facets)
|
||||
{
|
||||
var files = facetFiles[facet.FacetId];
|
||||
if (files.Count == 0)
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
filesMatched += files.Count;
|
||||
|
||||
// Sort files deterministically for consistent Merkle root
|
||||
var sortedFiles = files.OrderBy(f => f.Path, StringComparer.Ordinal).ToList();
|
||||
|
||||
var entry = _sealer.CreateFacetEntry(facet, sortedFiles, options.IncludeFileDetails);
|
||||
facetEntries.Add(entry);
|
||||
}
|
||||
|
||||
// Sort facet entries deterministically
|
||||
var sortedFacets = facetEntries.OrderBy(f => f.FacetId, StringComparer.Ordinal).ToImmutableArray();
|
||||
|
||||
var merkleTree = new FacetMerkleTree(_cryptoHash);
|
||||
var combinedRoot = merkleTree.ComputeCombinedRoot(sortedFacets);
|
||||
|
||||
var stats = new FacetExtractionStats
|
||||
{
|
||||
TotalFilesProcessed = totalFilesProcessed,
|
||||
TotalBytes = totalBytes,
|
||||
FilesMatched = filesMatched,
|
||||
FilesUnmatched = unmatchedFiles.Count,
|
||||
FilesSkipped = skippedFiles.Count,
|
||||
Duration = duration
|
||||
};
|
||||
|
||||
return new FacetExtractionResult
|
||||
{
|
||||
Facets = sortedFacets,
|
||||
UnmatchedFiles = options.IncludeFileDetails
|
||||
? [.. unmatchedFiles.OrderBy(f => f.Path, StringComparer.Ordinal)]
|
||||
: [],
|
||||
SkippedFiles = [.. skippedFiles],
|
||||
CombinedMerkleRoot = combinedRoot,
|
||||
Stats = stats,
|
||||
Warnings = [.. warnings]
|
||||
};
|
||||
}
|
||||
|
||||
private static string GetRelativePath(string rootPath, string fullPath)
|
||||
{
|
||||
var relative = Path.GetRelativePath(rootPath, fullPath);
|
||||
// Normalize to Unix-style path with leading slash
|
||||
return "/" + relative.Replace('\\', '/');
|
||||
}
|
||||
|
||||
private static string NormalizeTarPath(string path)
|
||||
{
|
||||
// Remove leading ./ if present
|
||||
if (path.StartsWith("./", StringComparison.Ordinal))
|
||||
{
|
||||
path = path[2..];
|
||||
}
|
||||
|
||||
// Ensure leading slash
|
||||
if (!path.StartsWith('/'))
|
||||
{
|
||||
path = "/" + path;
|
||||
}
|
||||
|
||||
return path;
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user