380 lines
13 KiB
C#
380 lines
13 KiB
C#
// <copyright file="GlobFacetExtractor.cs" company="StellaOps">
|
|
// Copyright (c) StellaOps. Licensed under AGPL-3.0-or-later.
|
|
// </copyright>
|
|
|
|
using System.Collections.Immutable;
|
|
using System.Diagnostics;
|
|
using System.Formats.Tar;
|
|
using System.IO.Compression;
|
|
using Microsoft.Extensions.Logging;
|
|
using Microsoft.Extensions.Logging.Abstractions;
|
|
|
|
namespace StellaOps.Facet;
|
|
|
|
/// <summary>
|
|
/// Extracts facets from container images using glob pattern matching.
|
|
/// </summary>
|
|
public sealed class GlobFacetExtractor : IFacetExtractor
|
|
{
|
|
private readonly FacetSealer _sealer;
|
|
private readonly ICryptoHash _cryptoHash;
|
|
private readonly ILogger<GlobFacetExtractor> _logger;
|
|
|
|
/// <summary>
|
|
/// Initializes a new instance of the <see cref="GlobFacetExtractor"/> class.
|
|
/// </summary>
|
|
/// <param name="timeProvider">Time provider for timestamps.</param>
|
|
/// <param name="cryptoHash">Hash implementation.</param>
|
|
/// <param name="logger">Logger instance.</param>
|
|
public GlobFacetExtractor(
|
|
TimeProvider? timeProvider = null,
|
|
ICryptoHash? cryptoHash = null,
|
|
ILogger<GlobFacetExtractor>? logger = null)
|
|
{
|
|
_cryptoHash = cryptoHash ?? new DefaultCryptoHash();
|
|
_sealer = new FacetSealer(timeProvider, cryptoHash);
|
|
_logger = logger ?? NullLogger<GlobFacetExtractor>.Instance;
|
|
}
|
|
|
|
/// <inheritdoc/>
|
|
public async Task<FacetExtractionResult> ExtractFromDirectoryAsync(
|
|
string rootPath,
|
|
FacetExtractionOptions? options = null,
|
|
CancellationToken ct = default)
|
|
{
|
|
ArgumentException.ThrowIfNullOrWhiteSpace(rootPath);
|
|
|
|
if (!Directory.Exists(rootPath))
|
|
{
|
|
throw new DirectoryNotFoundException($"Directory not found: {rootPath}");
|
|
}
|
|
|
|
options ??= FacetExtractionOptions.Default;
|
|
var sw = Stopwatch.StartNew();
|
|
|
|
var facets = options.Facets.IsDefault || options.Facets.IsEmpty
|
|
? BuiltInFacets.All.ToList()
|
|
: options.Facets.ToList();
|
|
|
|
var matchers = facets.ToDictionary(f => f.FacetId, GlobMatcher.ForFacet);
|
|
var excludeMatcher = options.ExcludePatterns.Length > 0
|
|
? new GlobMatcher(options.ExcludePatterns)
|
|
: null;
|
|
|
|
var facetFiles = facets.ToDictionary(f => f.FacetId, _ => new List<FacetFileEntry>());
|
|
var unmatchedFiles = new List<FacetFileEntry>();
|
|
var skippedFiles = new List<SkippedFile>();
|
|
var warnings = new List<string>();
|
|
|
|
int totalFilesProcessed = 0;
|
|
long totalBytes = 0;
|
|
|
|
foreach (var filePath in Directory.EnumerateFiles(rootPath, "*", SearchOption.AllDirectories))
|
|
{
|
|
ct.ThrowIfCancellationRequested();
|
|
|
|
var relativePath = GetRelativePath(rootPath, filePath);
|
|
|
|
// Check exclusion patterns
|
|
if (excludeMatcher?.IsMatch(relativePath) == true)
|
|
{
|
|
skippedFiles.Add(new SkippedFile(relativePath, "Matched exclusion pattern"));
|
|
continue;
|
|
}
|
|
|
|
try
|
|
{
|
|
var fileInfo = new FileInfo(filePath);
|
|
|
|
// Skip symlinks if not following
|
|
if (!options.FollowSymlinks && fileInfo.LinkTarget is not null)
|
|
{
|
|
skippedFiles.Add(new SkippedFile(relativePath, "Symlink"));
|
|
continue;
|
|
}
|
|
|
|
// Skip files too large
|
|
if (fileInfo.Length > options.MaxFileSizeBytes)
|
|
{
|
|
skippedFiles.Add(new SkippedFile(relativePath, $"Exceeds max size ({fileInfo.Length} > {options.MaxFileSizeBytes})"));
|
|
continue;
|
|
}
|
|
|
|
totalFilesProcessed++;
|
|
totalBytes += fileInfo.Length;
|
|
|
|
var entry = await CreateFileEntryAsync(filePath, relativePath, fileInfo, options.HashAlgorithm, ct)
|
|
.ConfigureAwait(false);
|
|
|
|
bool matched = false;
|
|
foreach (var facet in facets)
|
|
{
|
|
if (matchers[facet.FacetId].IsMatch(relativePath))
|
|
{
|
|
facetFiles[facet.FacetId].Add(entry);
|
|
matched = true;
|
|
// Don't break - a file can match multiple facets
|
|
}
|
|
}
|
|
|
|
if (!matched)
|
|
{
|
|
unmatchedFiles.Add(entry);
|
|
}
|
|
}
|
|
catch (Exception ex) when (ex is IOException or UnauthorizedAccessException)
|
|
{
|
|
_logger.LogWarning(ex, "Failed to process file: {Path}", relativePath);
|
|
skippedFiles.Add(new SkippedFile(relativePath, ex.Message));
|
|
}
|
|
}
|
|
|
|
sw.Stop();
|
|
|
|
return BuildResult(facets, facetFiles, unmatchedFiles, skippedFiles, warnings, totalFilesProcessed, totalBytes, sw.Elapsed, options);
|
|
}
|
|
|
|
/// <inheritdoc/>
|
|
public async Task<FacetExtractionResult> ExtractFromTarAsync(
|
|
Stream tarStream,
|
|
FacetExtractionOptions? options = null,
|
|
CancellationToken ct = default)
|
|
{
|
|
ArgumentNullException.ThrowIfNull(tarStream);
|
|
|
|
options ??= FacetExtractionOptions.Default;
|
|
var sw = Stopwatch.StartNew();
|
|
|
|
var facets = options.Facets.IsDefault || options.Facets.IsEmpty
|
|
? BuiltInFacets.All.ToList()
|
|
: options.Facets.ToList();
|
|
|
|
var matchers = facets.ToDictionary(f => f.FacetId, GlobMatcher.ForFacet);
|
|
var excludeMatcher = options.ExcludePatterns.Length > 0
|
|
? new GlobMatcher(options.ExcludePatterns)
|
|
: null;
|
|
|
|
var facetFiles = facets.ToDictionary(f => f.FacetId, _ => new List<FacetFileEntry>());
|
|
var unmatchedFiles = new List<FacetFileEntry>();
|
|
var skippedFiles = new List<SkippedFile>();
|
|
var warnings = new List<string>();
|
|
|
|
int totalFilesProcessed = 0;
|
|
long totalBytes = 0;
|
|
|
|
using var tarReader = new TarReader(tarStream, leaveOpen: true);
|
|
|
|
while (await tarReader.GetNextEntryAsync(copyData: false, ct).ConfigureAwait(false) is { } tarEntry)
|
|
{
|
|
ct.ThrowIfCancellationRequested();
|
|
|
|
// Skip non-regular files
|
|
if (tarEntry.EntryType != TarEntryType.RegularFile &&
|
|
tarEntry.EntryType != TarEntryType.V7RegularFile)
|
|
{
|
|
continue;
|
|
}
|
|
|
|
var path = NormalizeTarPath(tarEntry.Name);
|
|
|
|
if (excludeMatcher?.IsMatch(path) == true)
|
|
{
|
|
skippedFiles.Add(new SkippedFile(path, "Matched exclusion pattern"));
|
|
continue;
|
|
}
|
|
|
|
if (tarEntry.Length > options.MaxFileSizeBytes)
|
|
{
|
|
skippedFiles.Add(new SkippedFile(path, $"Exceeds max size ({tarEntry.Length} > {options.MaxFileSizeBytes})"));
|
|
continue;
|
|
}
|
|
|
|
// Skip symlinks if not following
|
|
if (!options.FollowSymlinks && tarEntry.EntryType == TarEntryType.SymbolicLink)
|
|
{
|
|
skippedFiles.Add(new SkippedFile(path, "Symlink"));
|
|
continue;
|
|
}
|
|
|
|
try
|
|
{
|
|
totalFilesProcessed++;
|
|
totalBytes += tarEntry.Length;
|
|
|
|
var entry = await CreateFileEntryFromTarAsync(tarEntry, path, options.HashAlgorithm, ct)
|
|
.ConfigureAwait(false);
|
|
|
|
bool matched = false;
|
|
foreach (var facet in facets)
|
|
{
|
|
if (matchers[facet.FacetId].IsMatch(path))
|
|
{
|
|
facetFiles[facet.FacetId].Add(entry);
|
|
matched = true;
|
|
}
|
|
}
|
|
|
|
if (!matched)
|
|
{
|
|
unmatchedFiles.Add(entry);
|
|
}
|
|
}
|
|
catch (Exception ex) when (ex is IOException or InvalidDataException)
|
|
{
|
|
_logger.LogWarning(ex, "Failed to process tar entry: {Path}", path);
|
|
skippedFiles.Add(new SkippedFile(path, ex.Message));
|
|
}
|
|
}
|
|
|
|
sw.Stop();
|
|
|
|
return BuildResult(facets, facetFiles, unmatchedFiles, skippedFiles, warnings, totalFilesProcessed, totalBytes, sw.Elapsed, options);
|
|
}
|
|
|
|
/// <inheritdoc/>
|
|
public async Task<FacetExtractionResult> ExtractFromOciLayerAsync(
|
|
Stream layerStream,
|
|
FacetExtractionOptions? options = null,
|
|
CancellationToken ct = default)
|
|
{
|
|
ArgumentNullException.ThrowIfNull(layerStream);
|
|
|
|
// OCI layers are gzipped tars - decompress then delegate
|
|
await using var gzipStream = new GZipStream(layerStream, CompressionMode.Decompress, leaveOpen: true);
|
|
return await ExtractFromTarAsync(gzipStream, options, ct).ConfigureAwait(false);
|
|
}
|
|
|
|
private async Task<FacetFileEntry> CreateFileEntryAsync(
|
|
string fullPath,
|
|
string relativePath,
|
|
FileInfo fileInfo,
|
|
string algorithm,
|
|
CancellationToken ct)
|
|
{
|
|
await using var stream = File.OpenRead(fullPath);
|
|
var hashBytes = await _cryptoHash.ComputeHashAsync(stream, algorithm, ct).ConfigureAwait(false);
|
|
var digest = FormatDigest(hashBytes, algorithm);
|
|
|
|
return new FacetFileEntry(
|
|
relativePath,
|
|
digest,
|
|
fileInfo.Length,
|
|
fileInfo.LastWriteTimeUtc);
|
|
}
|
|
|
|
private async Task<FacetFileEntry> CreateFileEntryFromTarAsync(
|
|
TarEntry entry,
|
|
string path,
|
|
string algorithm,
|
|
CancellationToken ct)
|
|
{
|
|
var dataStream = entry.DataStream;
|
|
if (dataStream is null)
|
|
{
|
|
// Empty file
|
|
var emptyHashBytes = await _cryptoHash.ComputeHashAsync(Stream.Null, algorithm, ct).ConfigureAwait(false);
|
|
var emptyDigest = FormatDigest(emptyHashBytes, algorithm);
|
|
return new FacetFileEntry(path, emptyDigest, 0, entry.ModificationTime);
|
|
}
|
|
|
|
var hashBytes = await _cryptoHash.ComputeHashAsync(dataStream, algorithm, ct).ConfigureAwait(false);
|
|
var digest = FormatDigest(hashBytes, algorithm);
|
|
|
|
return new FacetFileEntry(
|
|
path,
|
|
digest,
|
|
entry.Length,
|
|
entry.ModificationTime);
|
|
}
|
|
|
|
private static string FormatDigest(byte[] hashBytes, string algorithm)
|
|
{
|
|
var hex = Convert.ToHexString(hashBytes).ToLowerInvariant();
|
|
return $"{algorithm.ToLowerInvariant()}:{hex}";
|
|
}
|
|
|
|
private FacetExtractionResult BuildResult(
|
|
List<IFacet> facets,
|
|
Dictionary<string, List<FacetFileEntry>> facetFiles,
|
|
List<FacetFileEntry> unmatchedFiles,
|
|
List<SkippedFile> skippedFiles,
|
|
List<string> warnings,
|
|
int totalFilesProcessed,
|
|
long totalBytes,
|
|
TimeSpan duration,
|
|
FacetExtractionOptions options)
|
|
{
|
|
var facetEntries = new List<FacetEntry>();
|
|
int filesMatched = 0;
|
|
|
|
foreach (var facet in facets)
|
|
{
|
|
var files = facetFiles[facet.FacetId];
|
|
if (files.Count == 0)
|
|
{
|
|
continue;
|
|
}
|
|
|
|
filesMatched += files.Count;
|
|
|
|
// Sort files deterministically for consistent Merkle root
|
|
var sortedFiles = files.OrderBy(f => f.Path, StringComparer.Ordinal).ToList();
|
|
|
|
var entry = _sealer.CreateFacetEntry(facet, sortedFiles, options.IncludeFileDetails);
|
|
facetEntries.Add(entry);
|
|
}
|
|
|
|
// Sort facet entries deterministically
|
|
var sortedFacets = facetEntries.OrderBy(f => f.FacetId, StringComparer.Ordinal).ToImmutableArray();
|
|
|
|
var merkleTree = new FacetMerkleTree(_cryptoHash);
|
|
var combinedRoot = merkleTree.ComputeCombinedRoot(sortedFacets);
|
|
|
|
var stats = new FacetExtractionStats
|
|
{
|
|
TotalFilesProcessed = totalFilesProcessed,
|
|
TotalBytes = totalBytes,
|
|
FilesMatched = filesMatched,
|
|
FilesUnmatched = unmatchedFiles.Count,
|
|
FilesSkipped = skippedFiles.Count,
|
|
Duration = duration
|
|
};
|
|
|
|
return new FacetExtractionResult
|
|
{
|
|
Facets = sortedFacets,
|
|
UnmatchedFiles = options.IncludeFileDetails
|
|
? [.. unmatchedFiles.OrderBy(f => f.Path, StringComparer.Ordinal)]
|
|
: [],
|
|
SkippedFiles = [.. skippedFiles],
|
|
CombinedMerkleRoot = combinedRoot,
|
|
Stats = stats,
|
|
Warnings = [.. warnings]
|
|
};
|
|
}
|
|
|
|
private static string GetRelativePath(string rootPath, string fullPath)
|
|
{
|
|
var relative = Path.GetRelativePath(rootPath, fullPath);
|
|
// Normalize to Unix-style path with leading slash
|
|
return "/" + relative.Replace('\\', '/');
|
|
}
|
|
|
|
private static string NormalizeTarPath(string path)
|
|
{
|
|
// Remove leading ./ if present
|
|
if (path.StartsWith("./", StringComparison.Ordinal))
|
|
{
|
|
path = path[2..];
|
|
}
|
|
|
|
// Ensure leading slash
|
|
if (!path.StartsWith('/'))
|
|
{
|
|
path = "/" + path;
|
|
}
|
|
|
|
return path;
|
|
}
|
|
}
|