using System.Buffers.Binary; namespace StellaOps.Scanner.Analyzers.OS.Pkgutil; /// /// Parses macOS BOM (Bill of Materials) files to enumerate installed files. /// BOM files are used by pkgutil to track which files were installed by a package. /// internal sealed class BomParser { /// /// BOM file magic header: "BOMStore" /// private static ReadOnlySpan BomMagic => "BOMStore"u8; /// /// Extracts the list of installed file paths from a BOM file. /// /// /// BOM files have a complex binary format. This implementation extracts /// the file paths from the BOM tree structure, focusing on the Paths tree. /// public IReadOnlyList Parse(string bomPath, CancellationToken cancellationToken = default) { ArgumentException.ThrowIfNullOrWhiteSpace(bomPath); if (!File.Exists(bomPath)) { return Array.Empty(); } try { using var stream = File.OpenRead(bomPath); return Parse(stream, cancellationToken); } catch (Exception ex) when (ex is IOException or UnauthorizedAccessException) { return Array.Empty(); } } /// /// Extracts file paths from a BOM stream. /// public IReadOnlyList Parse(Stream stream, CancellationToken cancellationToken = default) { ArgumentNullException.ThrowIfNull(stream); var results = new List(); try { // Read header (512+ bytes) var header = new byte[512]; if (stream.Read(header, 0, 512) < 512) { return results; } // Verify magic if (!header.AsSpan(0, 8).SequenceEqual(BomMagic)) { return results; } // BOM format is complex - we'll do a simplified extraction // by scanning for null-terminated strings that look like paths stream.Position = 0; using var reader = new BinaryReader(stream); var content = reader.ReadBytes((int)Math.Min(stream.Length, 10 * 1024 * 1024)); // Max 10MB var paths = ExtractPaths(content, cancellationToken); foreach (var path in paths) { results.Add(new BomFileEntry(path, IsDirectory: path.EndsWith('/'))); } } catch (Exception ex) when (ex is IOException or EndOfStreamException) { // Return partial results } return results; } /// /// Finds the corresponding BOM file for a receipt plist. /// public string? FindBomForReceipt(string plistPath) { if (string.IsNullOrWhiteSpace(plistPath)) { return null; } // BOM files are named with same base name as plist // e.g., com.apple.pkg.Safari.plist -> com.apple.pkg.Safari.bom var directory = Path.GetDirectoryName(plistPath); var baseName = Path.GetFileNameWithoutExtension(plistPath); if (string.IsNullOrEmpty(directory) || string.IsNullOrEmpty(baseName)) { return null; } var bomPath = Path.Combine(directory, baseName + ".bom"); return File.Exists(bomPath) ? bomPath : null; } private static IEnumerable ExtractPaths(byte[] content, CancellationToken cancellationToken) { var paths = new HashSet(StringComparer.Ordinal); // Scan for null-terminated strings that look like Unix paths int start = -1; for (int i = 0; i < content.Length; i++) { cancellationToken.ThrowIfCancellationRequested(); byte b = content[i]; if (start == -1) { // Look for path start indicators if (b == '/' || b == '.') { start = i; } } else { if (b == 0) // Null terminator { var length = i - start; if (length > 1 && length < 4096) { var potential = System.Text.Encoding.UTF8.GetString(content, start, length); if (IsValidPath(potential)) { paths.Add(potential); } } start = -1; } else if (!IsValidPathChar(b)) { start = -1; } } } return paths.OrderBy(p => p, StringComparer.Ordinal); } private static bool IsValidPath(string path) { if (string.IsNullOrWhiteSpace(path)) { return false; } // Must start with / or . if (!path.StartsWith('/') && !path.StartsWith('.')) { return false; } // Must not contain control characters or obviously invalid sequences foreach (char c in path) { if (c < 32 && c != '\t') { return false; } } // Filter out common false positives if (path.Contains("//") || path.EndsWith("/.") || path.Contains("/../")) { return false; } return true; } private static bool IsValidPathChar(byte b) { // Allow printable ASCII and common path characters return b >= 32 && b < 127; } } /// /// Represents a file entry from a BOM file. /// internal sealed record BomFileEntry(string Path, bool IsDirectory);