// // Copyright (c) StellaOps. Licensed under AGPL-3.0-or-later. // using System.Collections.Immutable; using System.Diagnostics; using System.Formats.Tar; using System.IO.Compression; using Microsoft.Extensions.Logging; using Microsoft.Extensions.Logging.Abstractions; namespace StellaOps.Facet; /// /// Extracts facets from container images using glob pattern matching. /// public sealed class GlobFacetExtractor : IFacetExtractor { private readonly FacetSealer _sealer; private readonly ICryptoHash _cryptoHash; private readonly ILogger _logger; /// /// Initializes a new instance of the class. /// /// Time provider for timestamps. /// Hash implementation. /// Logger instance. public GlobFacetExtractor( TimeProvider? timeProvider = null, ICryptoHash? cryptoHash = null, ILogger? logger = null) { _cryptoHash = cryptoHash ?? new DefaultCryptoHash(); _sealer = new FacetSealer(timeProvider, cryptoHash); _logger = logger ?? NullLogger.Instance; } /// public async Task ExtractFromDirectoryAsync( string rootPath, FacetExtractionOptions? options = null, CancellationToken ct = default) { ArgumentException.ThrowIfNullOrWhiteSpace(rootPath); if (!Directory.Exists(rootPath)) { throw new DirectoryNotFoundException($"Directory not found: {rootPath}"); } options ??= FacetExtractionOptions.Default; var sw = Stopwatch.StartNew(); var facets = options.Facets.IsDefault || options.Facets.IsEmpty ? BuiltInFacets.All.ToList() : options.Facets.ToList(); var matchers = facets.ToDictionary(f => f.FacetId, GlobMatcher.ForFacet); var excludeMatcher = options.ExcludePatterns.Length > 0 ? new GlobMatcher(options.ExcludePatterns) : null; var facetFiles = facets.ToDictionary(f => f.FacetId, _ => new List()); var unmatchedFiles = new List(); var skippedFiles = new List(); var warnings = new List(); int totalFilesProcessed = 0; long totalBytes = 0; foreach (var filePath in Directory.EnumerateFiles(rootPath, "*", SearchOption.AllDirectories)) { ct.ThrowIfCancellationRequested(); var relativePath = GetRelativePath(rootPath, filePath); // Check exclusion patterns if (excludeMatcher?.IsMatch(relativePath) == true) { skippedFiles.Add(new SkippedFile(relativePath, "Matched exclusion pattern")); continue; } try { var fileInfo = new FileInfo(filePath); // Skip symlinks if not following if (!options.FollowSymlinks && fileInfo.LinkTarget is not null) { skippedFiles.Add(new SkippedFile(relativePath, "Symlink")); continue; } // Skip files too large if (fileInfo.Length > options.MaxFileSizeBytes) { skippedFiles.Add(new SkippedFile(relativePath, $"Exceeds max size ({fileInfo.Length} > {options.MaxFileSizeBytes})")); continue; } totalFilesProcessed++; totalBytes += fileInfo.Length; var entry = await CreateFileEntryAsync(filePath, relativePath, fileInfo, options.HashAlgorithm, ct) .ConfigureAwait(false); bool matched = false; foreach (var facet in facets) { if (matchers[facet.FacetId].IsMatch(relativePath)) { facetFiles[facet.FacetId].Add(entry); matched = true; // Don't break - a file can match multiple facets } } if (!matched) { unmatchedFiles.Add(entry); } } catch (Exception ex) when (ex is IOException or UnauthorizedAccessException) { _logger.LogWarning(ex, "Failed to process file: {Path}", relativePath); skippedFiles.Add(new SkippedFile(relativePath, ex.Message)); } } sw.Stop(); return BuildResult(facets, facetFiles, unmatchedFiles, skippedFiles, warnings, totalFilesProcessed, totalBytes, sw.Elapsed, options); } /// public async Task ExtractFromTarAsync( Stream tarStream, FacetExtractionOptions? options = null, CancellationToken ct = default) { ArgumentNullException.ThrowIfNull(tarStream); options ??= FacetExtractionOptions.Default; var sw = Stopwatch.StartNew(); var facets = options.Facets.IsDefault || options.Facets.IsEmpty ? BuiltInFacets.All.ToList() : options.Facets.ToList(); var matchers = facets.ToDictionary(f => f.FacetId, GlobMatcher.ForFacet); var excludeMatcher = options.ExcludePatterns.Length > 0 ? new GlobMatcher(options.ExcludePatterns) : null; var facetFiles = facets.ToDictionary(f => f.FacetId, _ => new List()); var unmatchedFiles = new List(); var skippedFiles = new List(); var warnings = new List(); int totalFilesProcessed = 0; long totalBytes = 0; using var tarReader = new TarReader(tarStream, leaveOpen: true); while (await tarReader.GetNextEntryAsync(copyData: false, ct).ConfigureAwait(false) is { } tarEntry) { ct.ThrowIfCancellationRequested(); // Skip non-regular files if (tarEntry.EntryType != TarEntryType.RegularFile && tarEntry.EntryType != TarEntryType.V7RegularFile) { continue; } var path = NormalizeTarPath(tarEntry.Name); if (excludeMatcher?.IsMatch(path) == true) { skippedFiles.Add(new SkippedFile(path, "Matched exclusion pattern")); continue; } if (tarEntry.Length > options.MaxFileSizeBytes) { skippedFiles.Add(new SkippedFile(path, $"Exceeds max size ({tarEntry.Length} > {options.MaxFileSizeBytes})")); continue; } // Skip symlinks if not following if (!options.FollowSymlinks && tarEntry.EntryType == TarEntryType.SymbolicLink) { skippedFiles.Add(new SkippedFile(path, "Symlink")); continue; } try { totalFilesProcessed++; totalBytes += tarEntry.Length; var entry = await CreateFileEntryFromTarAsync(tarEntry, path, options.HashAlgorithm, ct) .ConfigureAwait(false); bool matched = false; foreach (var facet in facets) { if (matchers[facet.FacetId].IsMatch(path)) { facetFiles[facet.FacetId].Add(entry); matched = true; } } if (!matched) { unmatchedFiles.Add(entry); } } catch (Exception ex) when (ex is IOException or InvalidDataException) { _logger.LogWarning(ex, "Failed to process tar entry: {Path}", path); skippedFiles.Add(new SkippedFile(path, ex.Message)); } } sw.Stop(); return BuildResult(facets, facetFiles, unmatchedFiles, skippedFiles, warnings, totalFilesProcessed, totalBytes, sw.Elapsed, options); } /// public async Task ExtractFromOciLayerAsync( Stream layerStream, FacetExtractionOptions? options = null, CancellationToken ct = default) { ArgumentNullException.ThrowIfNull(layerStream); // OCI layers are gzipped tars - decompress then delegate await using var gzipStream = new GZipStream(layerStream, CompressionMode.Decompress, leaveOpen: true); return await ExtractFromTarAsync(gzipStream, options, ct).ConfigureAwait(false); } private async Task CreateFileEntryAsync( string fullPath, string relativePath, FileInfo fileInfo, string algorithm, CancellationToken ct) { await using var stream = File.OpenRead(fullPath); var hashBytes = await _cryptoHash.ComputeHashAsync(stream, algorithm, ct).ConfigureAwait(false); var digest = FormatDigest(hashBytes, algorithm); return new FacetFileEntry( relativePath, digest, fileInfo.Length, fileInfo.LastWriteTimeUtc); } private async Task CreateFileEntryFromTarAsync( TarEntry entry, string path, string algorithm, CancellationToken ct) { var dataStream = entry.DataStream; if (dataStream is null) { // Empty file var emptyHashBytes = await _cryptoHash.ComputeHashAsync(Stream.Null, algorithm, ct).ConfigureAwait(false); var emptyDigest = FormatDigest(emptyHashBytes, algorithm); return new FacetFileEntry(path, emptyDigest, 0, entry.ModificationTime); } var hashBytes = await _cryptoHash.ComputeHashAsync(dataStream, algorithm, ct).ConfigureAwait(false); var digest = FormatDigest(hashBytes, algorithm); return new FacetFileEntry( path, digest, entry.Length, entry.ModificationTime); } private static string FormatDigest(byte[] hashBytes, string algorithm) { var hex = Convert.ToHexString(hashBytes).ToLowerInvariant(); return $"{algorithm.ToLowerInvariant()}:{hex}"; } private FacetExtractionResult BuildResult( List facets, Dictionary> facetFiles, List unmatchedFiles, List skippedFiles, List warnings, int totalFilesProcessed, long totalBytes, TimeSpan duration, FacetExtractionOptions options) { var facetEntries = new List(); int filesMatched = 0; foreach (var facet in facets) { var files = facetFiles[facet.FacetId]; if (files.Count == 0) { continue; } filesMatched += files.Count; // Sort files deterministically for consistent Merkle root var sortedFiles = files.OrderBy(f => f.Path, StringComparer.Ordinal).ToList(); var entry = _sealer.CreateFacetEntry(facet, sortedFiles, options.IncludeFileDetails); facetEntries.Add(entry); } // Sort facet entries deterministically var sortedFacets = facetEntries.OrderBy(f => f.FacetId, StringComparer.Ordinal).ToImmutableArray(); var merkleTree = new FacetMerkleTree(_cryptoHash); var combinedRoot = merkleTree.ComputeCombinedRoot(sortedFacets); var stats = new FacetExtractionStats { TotalFilesProcessed = totalFilesProcessed, TotalBytes = totalBytes, FilesMatched = filesMatched, FilesUnmatched = unmatchedFiles.Count, FilesSkipped = skippedFiles.Count, Duration = duration }; return new FacetExtractionResult { Facets = sortedFacets, UnmatchedFiles = options.IncludeFileDetails ? [.. unmatchedFiles.OrderBy(f => f.Path, StringComparer.Ordinal)] : [], SkippedFiles = [.. skippedFiles], CombinedMerkleRoot = combinedRoot, Stats = stats, Warnings = [.. warnings] }; } private static string GetRelativePath(string rootPath, string fullPath) { var relative = Path.GetRelativePath(rootPath, fullPath); // Normalize to Unix-style path with leading slash return "/" + relative.Replace('\\', '/'); } private static string NormalizeTarPath(string path) { // Remove leading ./ if present if (path.StartsWith("./", StringComparison.Ordinal)) { path = path[2..]; } // Ensure leading slash if (!path.StartsWith('/')) { path = "/" + path; } return path; } }