Files
git.stella-ops.org/src/__Libraries/StellaOps.Facet/GlobFacetExtractor.cs
2026-01-07 09:43:12 +02:00

380 lines
13 KiB
C#

// <copyright file="GlobFacetExtractor.cs" company="StellaOps">
// Copyright (c) StellaOps. Licensed under AGPL-3.0-or-later.
// </copyright>
using System.Collections.Immutable;
using System.Diagnostics;
using System.Formats.Tar;
using System.IO.Compression;
using Microsoft.Extensions.Logging;
using Microsoft.Extensions.Logging.Abstractions;
namespace StellaOps.Facet;
/// <summary>
/// Extracts facets from container images using glob pattern matching.
/// </summary>
public sealed class GlobFacetExtractor : IFacetExtractor
{
private readonly FacetSealer _sealer;
private readonly ICryptoHash _cryptoHash;
private readonly ILogger<GlobFacetExtractor> _logger;
/// <summary>
/// Initializes a new instance of the <see cref="GlobFacetExtractor"/> class.
/// </summary>
/// <param name="timeProvider">Time provider for timestamps.</param>
/// <param name="cryptoHash">Hash implementation.</param>
/// <param name="logger">Logger instance.</param>
public GlobFacetExtractor(
TimeProvider? timeProvider = null,
ICryptoHash? cryptoHash = null,
ILogger<GlobFacetExtractor>? logger = null)
{
_cryptoHash = cryptoHash ?? new DefaultCryptoHash();
_sealer = new FacetSealer(timeProvider, cryptoHash);
_logger = logger ?? NullLogger<GlobFacetExtractor>.Instance;
}
/// <inheritdoc/>
public async Task<FacetExtractionResult> ExtractFromDirectoryAsync(
string rootPath,
FacetExtractionOptions? options = null,
CancellationToken ct = default)
{
ArgumentException.ThrowIfNullOrWhiteSpace(rootPath);
if (!Directory.Exists(rootPath))
{
throw new DirectoryNotFoundException($"Directory not found: {rootPath}");
}
options ??= FacetExtractionOptions.Default;
var sw = Stopwatch.StartNew();
var facets = options.Facets.IsDefault || options.Facets.IsEmpty
? BuiltInFacets.All.ToList()
: options.Facets.ToList();
var matchers = facets.ToDictionary(f => f.FacetId, GlobMatcher.ForFacet);
var excludeMatcher = options.ExcludePatterns.Length > 0
? new GlobMatcher(options.ExcludePatterns)
: null;
var facetFiles = facets.ToDictionary(f => f.FacetId, _ => new List<FacetFileEntry>());
var unmatchedFiles = new List<FacetFileEntry>();
var skippedFiles = new List<SkippedFile>();
var warnings = new List<string>();
int totalFilesProcessed = 0;
long totalBytes = 0;
foreach (var filePath in Directory.EnumerateFiles(rootPath, "*", SearchOption.AllDirectories))
{
ct.ThrowIfCancellationRequested();
var relativePath = GetRelativePath(rootPath, filePath);
// Check exclusion patterns
if (excludeMatcher?.IsMatch(relativePath) == true)
{
skippedFiles.Add(new SkippedFile(relativePath, "Matched exclusion pattern"));
continue;
}
try
{
var fileInfo = new FileInfo(filePath);
// Skip symlinks if not following
if (!options.FollowSymlinks && fileInfo.LinkTarget is not null)
{
skippedFiles.Add(new SkippedFile(relativePath, "Symlink"));
continue;
}
// Skip files too large
if (fileInfo.Length > options.MaxFileSizeBytes)
{
skippedFiles.Add(new SkippedFile(relativePath, $"Exceeds max size ({fileInfo.Length} > {options.MaxFileSizeBytes})"));
continue;
}
totalFilesProcessed++;
totalBytes += fileInfo.Length;
var entry = await CreateFileEntryAsync(filePath, relativePath, fileInfo, options.HashAlgorithm, ct)
.ConfigureAwait(false);
bool matched = false;
foreach (var facet in facets)
{
if (matchers[facet.FacetId].IsMatch(relativePath))
{
facetFiles[facet.FacetId].Add(entry);
matched = true;
// Don't break - a file can match multiple facets
}
}
if (!matched)
{
unmatchedFiles.Add(entry);
}
}
catch (Exception ex) when (ex is IOException or UnauthorizedAccessException)
{
_logger.LogWarning(ex, "Failed to process file: {Path}", relativePath);
skippedFiles.Add(new SkippedFile(relativePath, ex.Message));
}
}
sw.Stop();
return BuildResult(facets, facetFiles, unmatchedFiles, skippedFiles, warnings, totalFilesProcessed, totalBytes, sw.Elapsed, options);
}
/// <inheritdoc/>
public async Task<FacetExtractionResult> ExtractFromTarAsync(
Stream tarStream,
FacetExtractionOptions? options = null,
CancellationToken ct = default)
{
ArgumentNullException.ThrowIfNull(tarStream);
options ??= FacetExtractionOptions.Default;
var sw = Stopwatch.StartNew();
var facets = options.Facets.IsDefault || options.Facets.IsEmpty
? BuiltInFacets.All.ToList()
: options.Facets.ToList();
var matchers = facets.ToDictionary(f => f.FacetId, GlobMatcher.ForFacet);
var excludeMatcher = options.ExcludePatterns.Length > 0
? new GlobMatcher(options.ExcludePatterns)
: null;
var facetFiles = facets.ToDictionary(f => f.FacetId, _ => new List<FacetFileEntry>());
var unmatchedFiles = new List<FacetFileEntry>();
var skippedFiles = new List<SkippedFile>();
var warnings = new List<string>();
int totalFilesProcessed = 0;
long totalBytes = 0;
using var tarReader = new TarReader(tarStream, leaveOpen: true);
while (await tarReader.GetNextEntryAsync(copyData: false, ct).ConfigureAwait(false) is { } tarEntry)
{
ct.ThrowIfCancellationRequested();
// Skip non-regular files
if (tarEntry.EntryType != TarEntryType.RegularFile &&
tarEntry.EntryType != TarEntryType.V7RegularFile)
{
continue;
}
var path = NormalizeTarPath(tarEntry.Name);
if (excludeMatcher?.IsMatch(path) == true)
{
skippedFiles.Add(new SkippedFile(path, "Matched exclusion pattern"));
continue;
}
if (tarEntry.Length > options.MaxFileSizeBytes)
{
skippedFiles.Add(new SkippedFile(path, $"Exceeds max size ({tarEntry.Length} > {options.MaxFileSizeBytes})"));
continue;
}
// Skip symlinks if not following
if (!options.FollowSymlinks && tarEntry.EntryType == TarEntryType.SymbolicLink)
{
skippedFiles.Add(new SkippedFile(path, "Symlink"));
continue;
}
try
{
totalFilesProcessed++;
totalBytes += tarEntry.Length;
var entry = await CreateFileEntryFromTarAsync(tarEntry, path, options.HashAlgorithm, ct)
.ConfigureAwait(false);
bool matched = false;
foreach (var facet in facets)
{
if (matchers[facet.FacetId].IsMatch(path))
{
facetFiles[facet.FacetId].Add(entry);
matched = true;
}
}
if (!matched)
{
unmatchedFiles.Add(entry);
}
}
catch (Exception ex) when (ex is IOException or InvalidDataException)
{
_logger.LogWarning(ex, "Failed to process tar entry: {Path}", path);
skippedFiles.Add(new SkippedFile(path, ex.Message));
}
}
sw.Stop();
return BuildResult(facets, facetFiles, unmatchedFiles, skippedFiles, warnings, totalFilesProcessed, totalBytes, sw.Elapsed, options);
}
/// <inheritdoc/>
public async Task<FacetExtractionResult> ExtractFromOciLayerAsync(
Stream layerStream,
FacetExtractionOptions? options = null,
CancellationToken ct = default)
{
ArgumentNullException.ThrowIfNull(layerStream);
// OCI layers are gzipped tars - decompress then delegate
await using var gzipStream = new GZipStream(layerStream, CompressionMode.Decompress, leaveOpen: true);
return await ExtractFromTarAsync(gzipStream, options, ct).ConfigureAwait(false);
}
private async Task<FacetFileEntry> CreateFileEntryAsync(
string fullPath,
string relativePath,
FileInfo fileInfo,
string algorithm,
CancellationToken ct)
{
await using var stream = File.OpenRead(fullPath);
var hashBytes = await _cryptoHash.ComputeHashAsync(stream, algorithm, ct).ConfigureAwait(false);
var digest = FormatDigest(hashBytes, algorithm);
return new FacetFileEntry(
relativePath,
digest,
fileInfo.Length,
fileInfo.LastWriteTimeUtc);
}
private async Task<FacetFileEntry> CreateFileEntryFromTarAsync(
TarEntry entry,
string path,
string algorithm,
CancellationToken ct)
{
var dataStream = entry.DataStream;
if (dataStream is null)
{
// Empty file
var emptyHashBytes = await _cryptoHash.ComputeHashAsync(Stream.Null, algorithm, ct).ConfigureAwait(false);
var emptyDigest = FormatDigest(emptyHashBytes, algorithm);
return new FacetFileEntry(path, emptyDigest, 0, entry.ModificationTime);
}
var hashBytes = await _cryptoHash.ComputeHashAsync(dataStream, algorithm, ct).ConfigureAwait(false);
var digest = FormatDigest(hashBytes, algorithm);
return new FacetFileEntry(
path,
digest,
entry.Length,
entry.ModificationTime);
}
private static string FormatDigest(byte[] hashBytes, string algorithm)
{
var hex = Convert.ToHexString(hashBytes).ToLowerInvariant();
return $"{algorithm.ToLowerInvariant()}:{hex}";
}
private FacetExtractionResult BuildResult(
List<IFacet> facets,
Dictionary<string, List<FacetFileEntry>> facetFiles,
List<FacetFileEntry> unmatchedFiles,
List<SkippedFile> skippedFiles,
List<string> warnings,
int totalFilesProcessed,
long totalBytes,
TimeSpan duration,
FacetExtractionOptions options)
{
var facetEntries = new List<FacetEntry>();
int filesMatched = 0;
foreach (var facet in facets)
{
var files = facetFiles[facet.FacetId];
if (files.Count == 0)
{
continue;
}
filesMatched += files.Count;
// Sort files deterministically for consistent Merkle root
var sortedFiles = files.OrderBy(f => f.Path, StringComparer.Ordinal).ToList();
var entry = _sealer.CreateFacetEntry(facet, sortedFiles, options.IncludeFileDetails);
facetEntries.Add(entry);
}
// Sort facet entries deterministically
var sortedFacets = facetEntries.OrderBy(f => f.FacetId, StringComparer.Ordinal).ToImmutableArray();
var merkleTree = new FacetMerkleTree(_cryptoHash);
var combinedRoot = merkleTree.ComputeCombinedRoot(sortedFacets);
var stats = new FacetExtractionStats
{
TotalFilesProcessed = totalFilesProcessed,
TotalBytes = totalBytes,
FilesMatched = filesMatched,
FilesUnmatched = unmatchedFiles.Count,
FilesSkipped = skippedFiles.Count,
Duration = duration
};
return new FacetExtractionResult
{
Facets = sortedFacets,
UnmatchedFiles = options.IncludeFileDetails
? [.. unmatchedFiles.OrderBy(f => f.Path, StringComparer.Ordinal)]
: [],
SkippedFiles = [.. skippedFiles],
CombinedMerkleRoot = combinedRoot,
Stats = stats,
Warnings = [.. warnings]
};
}
private static string GetRelativePath(string rootPath, string fullPath)
{
var relative = Path.GetRelativePath(rootPath, fullPath);
// Normalize to Unix-style path with leading slash
return "/" + relative.Replace('\\', '/');
}
private static string NormalizeTarPath(string path)
{
// Remove leading ./ if present
if (path.StartsWith("./", StringComparison.Ordinal))
{
path = path[2..];
}
// Ensure leading slash
if (!path.StartsWith('/'))
{
path = "/" + path;
}
return path;
}
}