using System.Buffers.Binary;
namespace StellaOps.Scanner.Analyzers.OS.Pkgutil;
///
/// Parses macOS BOM (Bill of Materials) files to enumerate installed files.
/// BOM files are used by pkgutil to track which files were installed by a package.
///
internal sealed class BomParser
{
///
/// BOM file magic header: "BOMStore"
///
private static ReadOnlySpan BomMagic => "BOMStore"u8;
///
/// Extracts the list of installed file paths from a BOM file.
///
///
/// BOM files have a complex binary format. This implementation extracts
/// the file paths from the BOM tree structure, focusing on the Paths tree.
///
public IReadOnlyList Parse(string bomPath, CancellationToken cancellationToken = default)
{
ArgumentException.ThrowIfNullOrWhiteSpace(bomPath);
if (!File.Exists(bomPath))
{
return Array.Empty();
}
try
{
using var stream = File.OpenRead(bomPath);
return Parse(stream, cancellationToken);
}
catch (Exception ex) when (ex is IOException or UnauthorizedAccessException)
{
return Array.Empty();
}
}
///
/// Extracts file paths from a BOM stream.
///
public IReadOnlyList Parse(Stream stream, CancellationToken cancellationToken = default)
{
ArgumentNullException.ThrowIfNull(stream);
var results = new List();
try
{
// Read header (512+ bytes)
var header = new byte[512];
if (stream.Read(header, 0, 512) < 512)
{
return results;
}
// Verify magic
if (!header.AsSpan(0, 8).SequenceEqual(BomMagic))
{
return results;
}
// BOM format is complex - we'll do a simplified extraction
// by scanning for null-terminated strings that look like paths
stream.Position = 0;
using var reader = new BinaryReader(stream);
var content = reader.ReadBytes((int)Math.Min(stream.Length, 10 * 1024 * 1024)); // Max 10MB
var paths = ExtractPaths(content, cancellationToken);
foreach (var path in paths)
{
results.Add(new BomFileEntry(path, IsDirectory: path.EndsWith('/')));
}
}
catch (Exception ex) when (ex is IOException or EndOfStreamException)
{
// Return partial results
}
return results;
}
///
/// Finds the corresponding BOM file for a receipt plist.
///
public string? FindBomForReceipt(string plistPath)
{
if (string.IsNullOrWhiteSpace(plistPath))
{
return null;
}
// BOM files are named with same base name as plist
// e.g., com.apple.pkg.Safari.plist -> com.apple.pkg.Safari.bom
var directory = Path.GetDirectoryName(plistPath);
var baseName = Path.GetFileNameWithoutExtension(plistPath);
if (string.IsNullOrEmpty(directory) || string.IsNullOrEmpty(baseName))
{
return null;
}
var bomPath = Path.Combine(directory, baseName + ".bom");
return File.Exists(bomPath) ? bomPath : null;
}
private static IEnumerable ExtractPaths(byte[] content, CancellationToken cancellationToken)
{
var paths = new HashSet(StringComparer.Ordinal);
// Scan for null-terminated strings that look like Unix paths
int start = -1;
for (int i = 0; i < content.Length; i++)
{
cancellationToken.ThrowIfCancellationRequested();
byte b = content[i];
if (start == -1)
{
// Look for path start indicators
if (b == '/' || b == '.')
{
start = i;
}
}
else
{
if (b == 0) // Null terminator
{
var length = i - start;
if (length > 1 && length < 4096)
{
var potential = System.Text.Encoding.UTF8.GetString(content, start, length);
if (IsValidPath(potential))
{
paths.Add(potential);
}
}
start = -1;
}
else if (!IsValidPathChar(b))
{
start = -1;
}
}
}
return paths.OrderBy(p => p, StringComparer.Ordinal);
}
private static bool IsValidPath(string path)
{
if (string.IsNullOrWhiteSpace(path))
{
return false;
}
// Must start with / or .
if (!path.StartsWith('/') && !path.StartsWith('.'))
{
return false;
}
// Must not contain control characters or obviously invalid sequences
foreach (char c in path)
{
if (c < 32 && c != '\t')
{
return false;
}
}
// Filter out common false positives
if (path.Contains("//") || path.EndsWith("/.") || path.Contains("/../"))
{
return false;
}
return true;
}
private static bool IsValidPathChar(byte b)
{
// Allow printable ASCII and common path characters
return b >= 32 && b < 127;
}
}
///
/// Represents a file entry from a BOM file.
///
internal sealed record BomFileEntry(string Path, bool IsDirectory);