feat: add Attestation Chain and Triage Evidence API clients and models

- Implemented Attestation Chain API client with methods for verifying, fetching, and managing attestation chains.
- Created models for Attestation Chain, including DSSE envelope structures and verification results.
- Developed Triage Evidence API client for fetching finding evidence, including methods for evidence retrieval by CVE and component.
- Added models for Triage Evidence, encapsulating evidence responses, entry points, boundary proofs, and VEX evidence.
- Introduced mock implementations for both API clients to facilitate testing and development.
This commit is contained in:
master
2025-12-18 13:15:13 +02:00
parent 7d5250238c
commit 00d2c99af9
118 changed files with 13463 additions and 151 deletions

View File

@@ -0,0 +1,65 @@
using System.Text.Json.Serialization;
namespace StellaOps.Scanner.Analyzers.Native.Index;
/// <summary>
/// NDJSON format for Build-ID index entries.
/// Each line is one JSON object in this format.
/// </summary>
public sealed class BuildIdIndexEntry
{
/// <summary>
/// The Build-ID with prefix (e.g., "gnu-build-id:abc123", "pe-cv:guid-age", "macho-uuid:xyz").
/// </summary>
[JsonPropertyName("build_id")]
public required string BuildId { get; init; }
/// <summary>
/// Package URL for the binary.
/// </summary>
[JsonPropertyName("purl")]
public required string Purl { get; init; }
/// <summary>
/// Package version (extracted from PURL if not provided).
/// </summary>
[JsonPropertyName("version")]
public string? Version { get; init; }
/// <summary>
/// Source distribution (debian, ubuntu, alpine, fedora, etc.).
/// </summary>
[JsonPropertyName("distro")]
public string? Distro { get; init; }
/// <summary>
/// Confidence level: "exact", "inferred", or "heuristic".
/// </summary>
[JsonPropertyName("confidence")]
public string Confidence { get; init; } = "exact";
/// <summary>
/// When this entry was indexed (ISO-8601).
/// </summary>
[JsonPropertyName("indexed_at")]
public DateTimeOffset? IndexedAt { get; init; }
/// <summary>
/// Convert to lookup result.
/// </summary>
public BuildIdLookupResult ToLookupResult() => new(
BuildId,
Purl,
Version,
Distro,
ParseConfidence(Confidence),
IndexedAt ?? DateTimeOffset.MinValue);
private static BuildIdConfidence ParseConfidence(string? value) => value?.ToLowerInvariant() switch
{
"exact" => BuildIdConfidence.Exact,
"inferred" => BuildIdConfidence.Inferred,
"heuristic" => BuildIdConfidence.Heuristic,
_ => BuildIdConfidence.Heuristic
};
}

View File

@@ -0,0 +1,38 @@
namespace StellaOps.Scanner.Analyzers.Native.Index;
/// <summary>
/// Configuration options for the Build-ID index.
/// </summary>
public sealed class BuildIdIndexOptions
{
/// <summary>
/// Path to the offline NDJSON index file.
/// </summary>
public string? IndexPath { get; set; }
/// <summary>
/// Path to the DSSE signature file for the index.
/// </summary>
public string? SignaturePath { get; set; }
/// <summary>
/// Whether to require DSSE signature verification.
/// Defaults to true in production.
/// </summary>
public bool RequireSignature { get; set; } = true;
/// <summary>
/// Maximum age of the index before warning (for freshness checks).
/// </summary>
public TimeSpan MaxIndexAge { get; set; } = TimeSpan.FromDays(30);
/// <summary>
/// Whether to enable in-memory caching of index entries.
/// </summary>
public bool EnableCache { get; set; } = true;
/// <summary>
/// Maximum number of entries to cache in memory.
/// </summary>
public int MaxCacheEntries { get; set; } = 100_000;
}

View File

@@ -0,0 +1,39 @@
namespace StellaOps.Scanner.Analyzers.Native.Index;
/// <summary>
/// Confidence level for Build-ID to PURL mappings.
/// </summary>
public enum BuildIdConfidence
{
/// <summary>
/// Exact match from official distro metadata or verified source.
/// </summary>
Exact,
/// <summary>
/// Inferred from package metadata with high confidence.
/// </summary>
Inferred,
/// <summary>
/// Best-guess heuristic (version pattern matching, etc.).
/// </summary>
Heuristic
}
/// <summary>
/// Result of a Build-ID lookup.
/// </summary>
/// <param name="BuildId">The queried Build-ID (ELF build-id, PE GUID+Age, Mach-O UUID).</param>
/// <param name="Purl">Package URL for the binary.</param>
/// <param name="Version">Package version if known.</param>
/// <param name="SourceDistro">Source distribution (debian, alpine, fedora, etc.).</param>
/// <param name="Confidence">Confidence level of the match.</param>
/// <param name="IndexedAt">When this mapping was indexed.</param>
public sealed record BuildIdLookupResult(
string BuildId,
string Purl,
string? Version,
string? SourceDistro,
BuildIdConfidence Confidence,
DateTimeOffset IndexedAt);

View File

@@ -0,0 +1,42 @@
namespace StellaOps.Scanner.Analyzers.Native.Index;
/// <summary>
/// Interface for Build-ID to PURL index lookups.
/// Enables binary identification in distroless/scratch images.
/// </summary>
public interface IBuildIdIndex
{
/// <summary>
/// Look up a single Build-ID.
/// </summary>
/// <param name="buildId">The Build-ID to look up (e.g., "gnu-build-id:abc123", "pe-cv:guid-age", "macho-uuid:xyz").</param>
/// <param name="cancellationToken">Cancellation token.</param>
/// <returns>Lookup result if found; null otherwise.</returns>
Task<BuildIdLookupResult?> LookupAsync(string buildId, CancellationToken cancellationToken = default);
/// <summary>
/// Look up multiple Build-IDs efficiently.
/// </summary>
/// <param name="buildIds">Build-IDs to look up.</param>
/// <param name="cancellationToken">Cancellation token.</param>
/// <returns>Found results (unfound IDs are not included).</returns>
Task<IReadOnlyList<BuildIdLookupResult>> BatchLookupAsync(
IEnumerable<string> buildIds,
CancellationToken cancellationToken = default);
/// <summary>
/// Gets the number of entries in the index.
/// </summary>
int Count { get; }
/// <summary>
/// Gets whether the index has been loaded.
/// </summary>
bool IsLoaded { get; }
/// <summary>
/// Load or reload the index from the configured source.
/// </summary>
/// <param name="cancellationToken">Cancellation token.</param>
Task LoadAsync(CancellationToken cancellationToken = default);
}

View File

@@ -0,0 +1,207 @@
using System.Collections.Frozen;
using System.Text.Json;
using Microsoft.Extensions.Logging;
using Microsoft.Extensions.Options;
namespace StellaOps.Scanner.Analyzers.Native.Index;
/// <summary>
/// Offline Build-ID index that loads from NDJSON files.
/// Enables binary identification in distroless/scratch images.
/// </summary>
public sealed class OfflineBuildIdIndex : IBuildIdIndex
{
private readonly BuildIdIndexOptions _options;
private readonly ILogger<OfflineBuildIdIndex> _logger;
private FrozenDictionary<string, BuildIdLookupResult> _index = FrozenDictionary<string, BuildIdLookupResult>.Empty;
private bool _isLoaded;
private static readonly JsonSerializerOptions JsonOptions = new(JsonSerializerDefaults.Web)
{
PropertyNameCaseInsensitive = true
};
/// <summary>
/// Creates a new offline Build-ID index.
/// </summary>
public OfflineBuildIdIndex(IOptions<BuildIdIndexOptions> options, ILogger<OfflineBuildIdIndex> logger)
{
ArgumentNullException.ThrowIfNull(options);
ArgumentNullException.ThrowIfNull(logger);
_options = options.Value;
_logger = logger;
}
/// <inheritdoc />
public int Count => _index.Count;
/// <inheritdoc />
public bool IsLoaded => _isLoaded;
/// <inheritdoc />
public Task<BuildIdLookupResult?> LookupAsync(string buildId, CancellationToken cancellationToken = default)
{
if (string.IsNullOrWhiteSpace(buildId))
{
return Task.FromResult<BuildIdLookupResult?>(null);
}
// Normalize Build-ID (lowercase, trim)
var normalized = NormalizeBuildId(buildId);
var result = _index.TryGetValue(normalized, out var entry) ? entry : null;
return Task.FromResult(result);
}
/// <inheritdoc />
public Task<IReadOnlyList<BuildIdLookupResult>> BatchLookupAsync(
IEnumerable<string> buildIds,
CancellationToken cancellationToken = default)
{
ArgumentNullException.ThrowIfNull(buildIds);
var results = new List<BuildIdLookupResult>();
foreach (var buildId in buildIds)
{
if (string.IsNullOrWhiteSpace(buildId))
{
continue;
}
var normalized = NormalizeBuildId(buildId);
if (_index.TryGetValue(normalized, out var entry))
{
results.Add(entry);
}
}
return Task.FromResult<IReadOnlyList<BuildIdLookupResult>>(results);
}
/// <inheritdoc />
public async Task LoadAsync(CancellationToken cancellationToken = default)
{
if (string.IsNullOrWhiteSpace(_options.IndexPath))
{
_logger.LogWarning("No Build-ID index path configured; index will be empty");
_index = FrozenDictionary<string, BuildIdLookupResult>.Empty;
_isLoaded = true;
return;
}
if (!File.Exists(_options.IndexPath))
{
_logger.LogWarning("Build-ID index file not found at {IndexPath}; index will be empty", _options.IndexPath);
_index = FrozenDictionary<string, BuildIdLookupResult>.Empty;
_isLoaded = true;
return;
}
// TODO: BID-006 - Verify DSSE signature if RequireSignature is true
var entries = new Dictionary<string, BuildIdLookupResult>(StringComparer.OrdinalIgnoreCase);
var lineNumber = 0;
var errorCount = 0;
await using var stream = File.OpenRead(_options.IndexPath);
using var reader = new StreamReader(stream);
while (await reader.ReadLineAsync(cancellationToken).ConfigureAwait(false) is { } line)
{
lineNumber++;
if (string.IsNullOrWhiteSpace(line))
{
continue;
}
// Skip comment lines (for manifest headers)
if (line.StartsWith('#') || line.StartsWith("//", StringComparison.Ordinal))
{
continue;
}
try
{
var entry = JsonSerializer.Deserialize<BuildIdIndexEntry>(line, JsonOptions);
if (entry is null || string.IsNullOrWhiteSpace(entry.BuildId) || string.IsNullOrWhiteSpace(entry.Purl))
{
errorCount++;
continue;
}
var normalized = NormalizeBuildId(entry.BuildId);
entries[normalized] = entry.ToLookupResult();
}
catch (JsonException ex)
{
errorCount++;
if (errorCount <= 10)
{
_logger.LogWarning(ex, "Failed to parse Build-ID index line {LineNumber}", lineNumber);
}
}
}
if (errorCount > 0)
{
_logger.LogWarning("Build-ID index had {ErrorCount} parse errors out of {TotalLines} lines", errorCount, lineNumber);
}
_index = entries.ToFrozenDictionary(StringComparer.OrdinalIgnoreCase);
_isLoaded = true;
_logger.LogInformation("Loaded Build-ID index with {EntryCount} entries from {IndexPath}", _index.Count, _options.IndexPath);
// Check index freshness
if (_options.MaxIndexAge > TimeSpan.Zero)
{
var oldestAllowed = DateTimeOffset.UtcNow - _options.MaxIndexAge;
var latestEntry = entries.Values.MaxBy(e => e.IndexedAt);
if (latestEntry is not null && latestEntry.IndexedAt < oldestAllowed)
{
_logger.LogWarning(
"Build-ID index may be stale. Latest entry from {LatestDate}, max age is {MaxAge}",
latestEntry.IndexedAt,
_options.MaxIndexAge);
}
}
}
/// <summary>
/// Normalize a Build-ID for consistent lookup.
/// </summary>
private static string NormalizeBuildId(string buildId)
{
// Lowercase the entire string for case-insensitive matching
var normalized = buildId.Trim().ToLowerInvariant();
// Ensure consistent prefix format
// ELF: "gnu-build-id:..." or just the hex
// PE: "pe-cv:..." or "pe:guid-age"
// Mach-O: "macho-uuid:..." or just the hex
// If no prefix, try to detect format from length/pattern
if (!normalized.Contains(':'))
{
// 32 hex chars = Mach-O UUID (128 bits)
// 40 hex chars = ELF SHA-1 build-id
// GUID+Age pattern for PE
if (normalized.Length == 32 && IsHex(normalized))
{
// Could be Mach-O UUID or short ELF build-id
normalized = $"build-id:{normalized}";
}
else if (normalized.Length == 40 && IsHex(normalized))
{
normalized = $"gnu-build-id:{normalized}";
}
}
return normalized;
}
private static bool IsHex(string s) => s.All(c => char.IsAsciiHexDigit(c));
}

View File

@@ -0,0 +1,16 @@
namespace StellaOps.Scanner.Analyzers.Native;
/// <summary>
/// Code signature information from LC_CODE_SIGNATURE.
/// </summary>
/// <param name="TeamId">Team identifier (10-character Apple team ID).</param>
/// <param name="SigningId">Signing identifier (usually bundle ID).</param>
/// <param name="CdHash">Code Directory hash (SHA-256, lowercase hex).</param>
/// <param name="HasHardenedRuntime">Whether hardened runtime is enabled.</param>
/// <param name="Entitlements">Entitlements keys (not values, for privacy).</param>
public sealed record MachOCodeSignature(
string? TeamId,
string? SigningId,
string? CdHash,
bool HasHardenedRuntime,
IReadOnlyList<string> Entitlements);

View File

@@ -0,0 +1,24 @@
namespace StellaOps.Scanner.Analyzers.Native;
/// <summary>
/// Full identity information extracted from a Mach-O file.
/// </summary>
/// <param name="CpuType">CPU type (x86_64, arm64, etc.).</param>
/// <param name="CpuSubtype">CPU subtype for variant detection.</param>
/// <param name="Uuid">LC_UUID in lowercase hex (no dashes).</param>
/// <param name="IsFatBinary">Whether this is a fat/universal binary.</param>
/// <param name="Platform">Platform from LC_BUILD_VERSION.</param>
/// <param name="MinOsVersion">Minimum OS version from LC_VERSION_MIN_* or LC_BUILD_VERSION.</param>
/// <param name="SdkVersion">SDK version from LC_BUILD_VERSION.</param>
/// <param name="CodeSignature">Code signature information (if signed).</param>
/// <param name="Exports">Exported symbols from LC_DYLD_INFO_ONLY or LC_DYLD_EXPORTS_TRIE.</param>
public sealed record MachOIdentity(
string? CpuType,
uint CpuSubtype,
string? Uuid,
bool IsFatBinary,
MachOPlatform Platform,
string? MinOsVersion,
string? SdkVersion,
MachOCodeSignature? CodeSignature,
IReadOnlyList<string> Exports);

View File

@@ -0,0 +1,46 @@
namespace StellaOps.Scanner.Analyzers.Native;
/// <summary>
/// Mach-O platform values from LC_BUILD_VERSION.
/// </summary>
public enum MachOPlatform : uint
{
/// <summary>Unknown platform.</summary>
Unknown = 0,
/// <summary>macOS.</summary>
MacOS = 1,
/// <summary>iOS.</summary>
iOS = 2,
/// <summary>tvOS.</summary>
TvOS = 3,
/// <summary>watchOS.</summary>
WatchOS = 4,
/// <summary>BridgeOS.</summary>
BridgeOS = 5,
/// <summary>Mac Catalyst (iPad apps on Mac).</summary>
MacCatalyst = 6,
/// <summary>iOS Simulator.</summary>
iOSSimulator = 7,
/// <summary>tvOS Simulator.</summary>
TvOSSimulator = 8,
/// <summary>watchOS Simulator.</summary>
WatchOSSimulator = 9,
/// <summary>DriverKit.</summary>
DriverKit = 10,
/// <summary>visionOS.</summary>
VisionOS = 11,
/// <summary>visionOS Simulator.</summary>
VisionOSSimulator = 12
}

View File

@@ -0,0 +1,640 @@
using System.Buffers.Binary;
using System.Security.Cryptography;
using System.Text;
namespace StellaOps.Scanner.Analyzers.Native;
/// <summary>
/// Result from parsing a Mach-O file.
/// </summary>
/// <param name="Path">File path.</param>
/// <param name="LayerDigest">Container layer digest if applicable.</param>
/// <param name="Identities">List of identities (one per slice in fat binary).</param>
public sealed record MachOParseResult(
string Path,
string? LayerDigest,
IReadOnlyList<MachOIdentity> Identities);
/// <summary>
/// Full Mach-O file reader with identity extraction.
/// Handles both single-arch and fat (universal) binaries.
/// </summary>
public static class MachOReader
{
// Mach-O magic numbers
private const uint MH_MAGIC = 0xFEEDFACE; // 32-bit, native endian
private const uint MH_CIGAM = 0xCEFAEDFE; // 32-bit, reversed endian
private const uint MH_MAGIC_64 = 0xFEEDFACF; // 64-bit, native endian
private const uint MH_CIGAM_64 = 0xCFFAEDFE; // 64-bit, reversed endian
// Fat binary magic numbers
private const uint FAT_MAGIC = 0xCAFEBABE; // Big-endian
private const uint FAT_CIGAM = 0xBEBAFECA; // Little-endian
// Load command types
private const uint LC_UUID = 0x1B;
private const uint LC_CODE_SIGNATURE = 0x1D;
private const uint LC_VERSION_MIN_MACOSX = 0x24;
private const uint LC_VERSION_MIN_IPHONEOS = 0x25;
private const uint LC_VERSION_MIN_WATCHOS = 0x30;
private const uint LC_VERSION_MIN_TVOS = 0x2F;
private const uint LC_BUILD_VERSION = 0x32;
private const uint LC_DYLD_INFO = 0x22;
private const uint LC_DYLD_INFO_ONLY = 0x80000022;
private const uint LC_DYLD_EXPORTS_TRIE = 0x80000033;
// Code signature blob types
private const uint CSMAGIC_CODEDIRECTORY = 0xFADE0C02;
private const uint CSMAGIC_EMBEDDED_SIGNATURE = 0xFADE0CC0;
private const uint CSMAGIC_EMBEDDED_ENTITLEMENTS = 0xFADE7171;
// CPU types
private const int CPU_TYPE_X86 = 7;
private const int CPU_TYPE_X86_64 = CPU_TYPE_X86 | 0x01000000;
private const int CPU_TYPE_ARM = 12;
private const int CPU_TYPE_ARM64 = CPU_TYPE_ARM | 0x01000000;
/// <summary>
/// Parse a Mach-O file and extract full identity information.
/// For fat binaries, returns identities for all slices.
/// </summary>
public static MachOParseResult? Parse(Stream stream, string path, string? layerDigest = null)
{
if (!TryReadBytes(stream, 4, out var magicBytes))
{
return null;
}
stream.Position = 0;
var magic = BinaryPrimitives.ReadUInt32BigEndian(magicBytes);
// Check for fat binary
if (magic is FAT_MAGIC or FAT_CIGAM)
{
var identities = ParseFatBinary(stream);
return identities.Count > 0
? new MachOParseResult(path, layerDigest, identities)
: null;
}
// Single architecture binary
var identity = ParseSingleMachO(stream);
return identity is not null
? new MachOParseResult(path, layerDigest, [identity])
: null;
}
/// <summary>
/// Try to extract just the identity without full parsing.
/// </summary>
public static bool TryExtractIdentity(Stream stream, out MachOIdentity? identity)
{
identity = null;
if (!TryReadBytes(stream, 4, out var magicBytes))
{
return false;
}
stream.Position = 0;
var magic = BinaryPrimitives.ReadUInt32BigEndian(magicBytes);
// Skip fat binary quick extraction for now
if (magic is FAT_MAGIC or FAT_CIGAM)
{
var identities = ParseFatBinary(stream);
identity = identities.Count > 0 ? identities[0] : null;
return identity is not null;
}
identity = ParseSingleMachO(stream);
return identity is not null;
}
/// <summary>
/// Parse a fat binary and return all slice identities.
/// </summary>
public static IReadOnlyList<MachOIdentity> ParseFatBinary(Stream stream)
{
var identities = new List<MachOIdentity>();
if (!TryReadBytes(stream, 8, out var headerBytes))
{
return identities;
}
var magic = BinaryPrimitives.ReadUInt32BigEndian(headerBytes);
var swapBytes = magic == FAT_CIGAM;
var nfatArch = swapBytes
? BinaryPrimitives.ReadUInt32LittleEndian(headerBytes.AsSpan(4))
: BinaryPrimitives.ReadUInt32BigEndian(headerBytes.AsSpan(4));
if (nfatArch > 100)
{
// Sanity check
return identities;
}
for (var i = 0; i < nfatArch; i++)
{
if (!TryReadBytes(stream, 20, out var archBytes))
{
break;
}
// Fat arch structure is always big-endian (unless FAT_CIGAM)
uint offset, size;
if (swapBytes)
{
// cputype(4), cpusubtype(4), offset(4), size(4), align(4)
offset = BinaryPrimitives.ReadUInt32LittleEndian(archBytes.AsSpan(8));
size = BinaryPrimitives.ReadUInt32LittleEndian(archBytes.AsSpan(12));
}
else
{
offset = BinaryPrimitives.ReadUInt32BigEndian(archBytes.AsSpan(8));
size = BinaryPrimitives.ReadUInt32BigEndian(archBytes.AsSpan(12));
}
// Save position and parse the embedded Mach-O
var currentPos = stream.Position;
stream.Position = offset;
var sliceIdentity = ParseSingleMachO(stream, isFatSlice: true);
if (sliceIdentity is not null)
{
identities.Add(sliceIdentity);
}
stream.Position = currentPos;
}
return identities;
}
/// <summary>
/// Parse a single Mach-O binary (not fat).
/// </summary>
private static MachOIdentity? ParseSingleMachO(Stream stream, bool isFatSlice = false)
{
var startOffset = stream.Position;
if (!TryReadBytes(stream, 4, out var magicBytes))
{
return null;
}
var magic = BinaryPrimitives.ReadUInt32LittleEndian(magicBytes);
bool is64Bit;
bool swapBytes;
switch (magic)
{
case MH_MAGIC:
is64Bit = false;
swapBytes = false;
break;
case MH_CIGAM:
is64Bit = false;
swapBytes = true;
break;
case MH_MAGIC_64:
is64Bit = true;
swapBytes = false;
break;
case MH_CIGAM_64:
is64Bit = true;
swapBytes = true;
break;
default:
return null;
}
// Read rest of Mach header
var headerSize = is64Bit ? 32 : 28;
stream.Position = startOffset;
if (!TryReadBytes(stream, headerSize, out var headerBytes))
{
return null;
}
// Parse header
var cpuType = ReadInt32(headerBytes, 4, swapBytes);
var cpuSubtype = ReadUInt32(headerBytes, 8, swapBytes);
var ncmds = ReadUInt32(headerBytes, 16, swapBytes);
var sizeofcmds = ReadUInt32(headerBytes, 20, swapBytes);
var cpuTypeName = GetCpuTypeName(cpuType);
// Initialize identity fields
string? uuid = null;
var platform = MachOPlatform.Unknown;
string? minOsVersion = null;
string? sdkVersion = null;
MachOCodeSignature? codeSignature = null;
var exports = new List<string>();
// Read load commands
var loadCommandsStart = stream.Position;
var loadCommandsEnd = loadCommandsStart + sizeofcmds;
for (uint cmd = 0; cmd < ncmds && stream.Position < loadCommandsEnd; cmd++)
{
if (!TryReadBytes(stream, 8, out var cmdHeader))
{
break;
}
var cmdType = ReadUInt32(cmdHeader, 0, swapBytes);
var cmdSize = ReadUInt32(cmdHeader, 4, swapBytes);
if (cmdSize < 8)
{
break;
}
var cmdDataSize = (int)cmdSize - 8;
switch (cmdType)
{
case LC_UUID when cmdDataSize >= 16:
if (TryReadBytes(stream, 16, out var uuidBytes))
{
uuid = Convert.ToHexStringLower(uuidBytes);
}
stream.Position = loadCommandsStart + GetNextCmdOffset(cmd, ncmds, stream.Position - loadCommandsStart, cmdSize);
continue;
case LC_BUILD_VERSION when cmdDataSize >= 16:
if (TryReadBytes(stream, cmdDataSize, out var buildVersionBytes))
{
var platformValue = ReadUInt32(buildVersionBytes, 0, swapBytes);
platform = (MachOPlatform)platformValue;
var minos = ReadUInt32(buildVersionBytes, 4, swapBytes);
minOsVersion = FormatVersion(minos);
var sdk = ReadUInt32(buildVersionBytes, 8, swapBytes);
sdkVersion = FormatVersion(sdk);
}
continue;
case LC_VERSION_MIN_MACOSX:
case LC_VERSION_MIN_IPHONEOS:
case LC_VERSION_MIN_WATCHOS:
case LC_VERSION_MIN_TVOS:
if (TryReadBytes(stream, cmdDataSize, out var versionMinBytes))
{
if (platform == MachOPlatform.Unknown)
{
platform = cmdType switch
{
LC_VERSION_MIN_MACOSX => MachOPlatform.MacOS,
LC_VERSION_MIN_IPHONEOS => MachOPlatform.iOS,
LC_VERSION_MIN_WATCHOS => MachOPlatform.WatchOS,
LC_VERSION_MIN_TVOS => MachOPlatform.TvOS,
_ => MachOPlatform.Unknown
};
}
if (versionMinBytes.Length >= 8)
{
var version = ReadUInt32(versionMinBytes, 0, swapBytes);
if (minOsVersion is null)
{
minOsVersion = FormatVersion(version);
}
var sdk = ReadUInt32(versionMinBytes, 4, swapBytes);
if (sdkVersion is null)
{
sdkVersion = FormatVersion(sdk);
}
}
}
continue;
case LC_CODE_SIGNATURE:
if (TryReadBytes(stream, cmdDataSize, out var codeSignBytes) && codeSignBytes.Length >= 8)
{
var dataOff = ReadUInt32(codeSignBytes, 0, swapBytes);
var dataSize = ReadUInt32(codeSignBytes, 4, swapBytes);
// Parse code signature at offset
var currentPos = stream.Position;
stream.Position = startOffset + dataOff;
codeSignature = ParseCodeSignature(stream, (int)dataSize);
stream.Position = currentPos;
}
continue;
}
// Skip remaining bytes of command
var remaining = cmdDataSize - (stream.Position - loadCommandsStart - 8);
if (remaining > 0)
{
stream.Position += remaining;
}
}
return new MachOIdentity(
cpuTypeName,
cpuSubtype,
uuid,
isFatSlice,
platform,
minOsVersion,
sdkVersion,
codeSignature,
exports);
}
/// <summary>
/// Parse the code signature blob.
/// </summary>
private static MachOCodeSignature? ParseCodeSignature(Stream stream, int size)
{
if (!TryReadBytes(stream, 8, out var superBlobHeader))
{
return null;
}
var magic = BinaryPrimitives.ReadUInt32BigEndian(superBlobHeader);
if (magic != CSMAGIC_EMBEDDED_SIGNATURE)
{
return null;
}
var length = BinaryPrimitives.ReadUInt32BigEndian(superBlobHeader.AsSpan(4));
if (length > size || length < 12)
{
return null;
}
if (!TryReadBytes(stream, 4, out var countBytes))
{
return null;
}
var count = BinaryPrimitives.ReadUInt32BigEndian(countBytes);
if (count > 100)
{
return null;
}
var blobStart = stream.Position - 12;
// Read blob index entries
var blobs = new List<(uint type, uint offset)>();
for (uint i = 0; i < count; i++)
{
if (!TryReadBytes(stream, 8, out var indexEntry))
{
break;
}
var blobType = BinaryPrimitives.ReadUInt32BigEndian(indexEntry);
var blobOffset = BinaryPrimitives.ReadUInt32BigEndian(indexEntry.AsSpan(4));
blobs.Add((blobType, blobOffset));
}
string? teamId = null;
string? signingId = null;
string? cdHash = null;
var hasHardenedRuntime = false;
var entitlements = new List<string>();
foreach (var (blobType, blobOffset) in blobs)
{
stream.Position = blobStart + blobOffset;
if (!TryReadBytes(stream, 8, out var blobHeader))
{
continue;
}
var blobMagic = BinaryPrimitives.ReadUInt32BigEndian(blobHeader);
var blobLength = BinaryPrimitives.ReadUInt32BigEndian(blobHeader.AsSpan(4));
switch (blobMagic)
{
case CSMAGIC_CODEDIRECTORY:
(teamId, signingId, cdHash, hasHardenedRuntime) = ParseCodeDirectory(stream, blobStart + blobOffset, (int)blobLength);
break;
case CSMAGIC_EMBEDDED_ENTITLEMENTS:
entitlements = ParseEntitlements(stream, (int)blobLength - 8);
break;
}
}
if (teamId is null && signingId is null && cdHash is null)
{
return null;
}
return new MachOCodeSignature(teamId, signingId, cdHash, hasHardenedRuntime, entitlements);
}
/// <summary>
/// Parse CodeDirectory blob.
/// </summary>
private static (string? TeamId, string? SigningId, string? CdHash, bool HasHardenedRuntime) ParseCodeDirectory(
Stream stream, long blobStart, int length)
{
// CodeDirectory has a complex structure, we'll extract key fields
stream.Position = blobStart;
if (!TryReadBytes(stream, Math.Min(length, 52), out var cdBytes))
{
return (null, null, null, false);
}
// Offsets in CodeDirectory (all big-endian)
// +8: version
// +12: flags
// +16: hashOffset
// +20: identOffset
// +28: nCodeSlots
// +32: codeLimit
// +36: hashSize
// +37: hashType
// +38: platform
// +39: pageSize
// +44: spare2
// +48: scatterOffset (v2+)
// +52: teamOffset (v2+)
var version = BinaryPrimitives.ReadUInt32BigEndian(cdBytes.AsSpan(8));
var flags = BinaryPrimitives.ReadUInt32BigEndian(cdBytes.AsSpan(12));
var identOffset = BinaryPrimitives.ReadUInt32BigEndian(cdBytes.AsSpan(20));
// Check for hardened runtime (flag 0x10000)
var hasHardenedRuntime = (flags & 0x10000) != 0;
// Read signing identifier
string? signingId = null;
if (identOffset > 0 && identOffset < length)
{
stream.Position = blobStart + identOffset;
signingId = ReadNullTerminatedString(stream, 256);
}
// Read team ID (version 0x20200 and later)
string? teamId = null;
if (version >= 0x20200 && cdBytes.Length >= 56)
{
var teamOffset = BinaryPrimitives.ReadUInt32BigEndian(cdBytes.AsSpan(52));
if (teamOffset > 0 && teamOffset < length)
{
stream.Position = blobStart + teamOffset;
teamId = ReadNullTerminatedString(stream, 20);
}
}
// Compute CDHash (SHA-256 of the entire CodeDirectory blob)
stream.Position = blobStart;
if (TryReadBytes(stream, length, out var fullCdBytes))
{
var hash = SHA256.HashData(fullCdBytes);
var cdHash = Convert.ToHexStringLower(hash);
return (teamId, signingId, cdHash, hasHardenedRuntime);
}
return (teamId, signingId, null, hasHardenedRuntime);
}
/// <summary>
/// Parse entitlements plist and extract keys.
/// </summary>
private static List<string> ParseEntitlements(Stream stream, int length)
{
var keys = new List<string>();
if (!TryReadBytes(stream, length, out var plistBytes))
{
return keys;
}
// Simple plist key extraction (looks for <key>...</key> patterns)
var plist = Encoding.UTF8.GetString(plistBytes);
var keyStart = 0;
while ((keyStart = plist.IndexOf("<key>", keyStart, StringComparison.Ordinal)) >= 0)
{
keyStart += 5;
var keyEnd = plist.IndexOf("</key>", keyStart, StringComparison.Ordinal);
if (keyEnd > keyStart)
{
var key = plist[keyStart..keyEnd];
if (!string.IsNullOrWhiteSpace(key))
{
keys.Add(key);
}
keyStart = keyEnd + 6;
}
else
{
break;
}
}
return keys;
}
/// <summary>
/// Get CPU type name from CPU type value.
/// </summary>
private static string? GetCpuTypeName(int cpuType) => cpuType switch
{
CPU_TYPE_X86 => "i386",
CPU_TYPE_X86_64 => "x86_64",
CPU_TYPE_ARM => "arm",
CPU_TYPE_ARM64 => "arm64",
_ => $"cpu_{cpuType}"
};
/// <summary>
/// Format version number (major.minor.patch from packed uint32).
/// </summary>
private static string FormatVersion(uint version)
{
var major = (version >> 16) & 0xFFFF;
var minor = (version >> 8) & 0xFF;
var patch = version & 0xFF;
return patch == 0 ? $"{major}.{minor}" : $"{major}.{minor}.{patch}";
}
/// <summary>
/// Read a null-terminated string from stream.
/// </summary>
private static string? ReadNullTerminatedString(Stream stream, int maxLength)
{
var bytes = new byte[maxLength];
var count = 0;
while (count < maxLength)
{
var b = stream.ReadByte();
if (b <= 0)
{
break;
}
bytes[count++] = (byte)b;
}
return count > 0 ? Encoding.UTF8.GetString(bytes, 0, count) : null;
}
/// <summary>
/// Try to read exactly the specified number of bytes.
/// </summary>
private static bool TryReadBytes(Stream stream, int count, out byte[] bytes)
{
bytes = new byte[count];
var totalRead = 0;
while (totalRead < count)
{
var read = stream.Read(bytes, totalRead, count - totalRead);
if (read == 0)
{
return false;
}
totalRead += read;
}
return true;
}
/// <summary>
/// Read int32 with optional byte swapping.
/// </summary>
private static int ReadInt32(byte[] data, int offset, bool swap) =>
swap
? BinaryPrimitives.ReadInt32BigEndian(data.AsSpan(offset))
: BinaryPrimitives.ReadInt32LittleEndian(data.AsSpan(offset));
/// <summary>
/// Read uint32 with optional byte swapping.
/// </summary>
private static uint ReadUInt32(byte[] data, int offset, bool swap) =>
swap
? BinaryPrimitives.ReadUInt32BigEndian(data.AsSpan(offset))
: BinaryPrimitives.ReadUInt32LittleEndian(data.AsSpan(offset));
/// <summary>
/// Calculate the offset for the next load command.
/// </summary>
private static long GetNextCmdOffset(uint currentCmd, uint totalCmds, long currentOffset, uint cmdSize) =>
currentOffset + cmdSize - 8;
}

View File

@@ -1,5 +1,23 @@
namespace StellaOps.Scanner.Analyzers.Native;
/// <summary>
/// Identity information extracted from a native binary (ELF, PE, Mach-O).
/// </summary>
/// <param name="Format">Binary format (ELF, PE, Mach-O).</param>
/// <param name="CpuArchitecture">CPU architecture (x86, x86_64, arm64, etc.).</param>
/// <param name="OperatingSystem">Target OS (linux, windows, darwin, etc.).</param>
/// <param name="Endianness">Byte order (le, be).</param>
/// <param name="BuildId">ELF GNU Build-ID (hex string).</param>
/// <param name="Uuid">Mach-O LC_UUID (hex string).</param>
/// <param name="InterpreterPath">ELF interpreter path (e.g., /lib64/ld-linux-x86-64.so.2).</param>
/// <param name="CodeViewGuid">PE CodeView GUID (lowercase hex, no dashes).</param>
/// <param name="CodeViewAge">PE CodeView Age (increments on rebuild).</param>
/// <param name="ProductVersion">PE version resource ProductVersion.</param>
/// <param name="MachOPlatform">Mach-O platform (macOS, iOS, etc.).</param>
/// <param name="MachOMinOsVersion">Mach-O minimum OS version.</param>
/// <param name="MachOSdkVersion">Mach-O SDK version.</param>
/// <param name="MachOCdHash">Mach-O CodeDirectory hash (SHA-256).</param>
/// <param name="MachOTeamId">Mach-O code signing Team ID.</param>
public sealed record NativeBinaryIdentity(
NativeFormat Format,
string? CpuArchitecture,
@@ -7,4 +25,13 @@ public sealed record NativeBinaryIdentity(
string? Endianness,
string? BuildId,
string? Uuid,
string? InterpreterPath);
string? InterpreterPath,
string? CodeViewGuid = null,
int? CodeViewAge = null,
string? ProductVersion = null,
MachOPlatform? MachOPlatform = null,
string? MachOMinOsVersion = null,
string? MachOSdkVersion = null,
string? MachOCdHash = null,
string? MachOTeamId = null);

View File

@@ -180,6 +180,24 @@ public static class NativeFormatDetector
return false;
}
// Try full PE parsing for CodeView GUID and other identity info
if (PeReader.TryExtractIdentity(span, out var peIdentity) && peIdentity is not null)
{
identity = new NativeBinaryIdentity(
NativeFormat.Pe,
peIdentity.Machine,
"windows",
Endianness: "le",
BuildId: null,
Uuid: null,
InterpreterPath: null,
CodeViewGuid: peIdentity.CodeViewGuid,
CodeViewAge: peIdentity.CodeViewAge,
ProductVersion: peIdentity.ProductVersion);
return true;
}
// Fallback to basic parsing
var machine = BinaryPrimitives.ReadUInt16LittleEndian(span.Slice(peHeaderOffset + 4, 2));
var arch = MapPeMachine(machine);
@@ -205,6 +223,30 @@ public static class NativeFormatDetector
return false;
}
// Try full parsing with MachOReader
using var stream = new MemoryStream(span.ToArray());
if (MachOReader.TryExtractIdentity(stream, out var machOIdentity) && machOIdentity is not null)
{
var endianness = magic is 0xCAFEBABE or 0xFEEDFACE or 0xFEEDFACF ? "be" : "le";
var prefixedUuid = machOIdentity.Uuid is not null ? $"macho-uuid:{machOIdentity.Uuid}" : null;
identity = new NativeBinaryIdentity(
NativeFormat.MachO,
machOIdentity.CpuType,
"darwin",
Endianness: endianness,
BuildId: prefixedUuid,
Uuid: prefixedUuid,
InterpreterPath: null,
MachOPlatform: machOIdentity.Platform,
MachOMinOsVersion: machOIdentity.MinOsVersion,
MachOSdkVersion: machOIdentity.SdkVersion,
MachOCdHash: machOIdentity.CodeSignature?.CdHash,
MachOTeamId: machOIdentity.CodeSignature?.TeamId);
return true;
}
// Fallback to basic parsing
bool bigEndian = magic is 0xCAFEBABE or 0xFEEDFACE or 0xFEEDFACF;
uint cputype;
@@ -229,7 +271,7 @@ public static class NativeFormatDetector
}
var arch = MapMachCpuType(cputype);
var endianness = bigEndian ? "be" : "le";
var fallbackEndianness = bigEndian ? "be" : "le";
string? uuid = null;
if (!isFat)
@@ -269,7 +311,7 @@ public static class NativeFormatDetector
}
// Store Mach-O UUID in BuildId field (prefixed) and also in Uuid for backwards compatibility
identity = new NativeBinaryIdentity(NativeFormat.MachO, arch, "darwin", Endianness: endianness, BuildId: uuid, Uuid: uuid, InterpreterPath: null);
identity = new NativeBinaryIdentity(NativeFormat.MachO, arch, "darwin", Endianness: fallbackEndianness, BuildId: uuid, Uuid: uuid, InterpreterPath: null);
return true;
}

View File

@@ -0,0 +1,12 @@
namespace StellaOps.Scanner.Analyzers.Native;
/// <summary>
/// Compiler/linker hint extracted from PE Rich Header.
/// </summary>
/// <param name="ToolId">Tool ID (@comp.id) - identifies the compiler/linker.</param>
/// <param name="ToolVersion">Tool version (@prod.id) - identifies the version.</param>
/// <param name="UseCount">Number of times this tool was used.</param>
public sealed record PeCompilerHint(
ushort ToolId,
ushort ToolVersion,
int UseCount);

View File

@@ -0,0 +1,34 @@
namespace StellaOps.Scanner.Analyzers.Native;
/// <summary>
/// Full identity information extracted from a PE (Portable Executable) file.
/// </summary>
/// <param name="Machine">Machine type (x86, x86_64, ARM64, etc.).</param>
/// <param name="Is64Bit">Whether this is a 64-bit PE (PE32+).</param>
/// <param name="Subsystem">PE subsystem (Console, GUI, Native, etc.).</param>
/// <param name="CodeViewGuid">CodeView PDB70 GUID in lowercase hex (no dashes).</param>
/// <param name="CodeViewAge">CodeView Age field (increments on rebuild).</param>
/// <param name="PdbPath">Original PDB path from debug directory.</param>
/// <param name="ProductVersion">Product version from version resource.</param>
/// <param name="FileVersion">File version from version resource.</param>
/// <param name="CompanyName">Company name from version resource.</param>
/// <param name="ProductName">Product name from version resource.</param>
/// <param name="OriginalFilename">Original filename from version resource.</param>
/// <param name="RichHeaderHash">Rich header hash (XOR of all entries).</param>
/// <param name="CompilerHints">Compiler hints from rich header.</param>
/// <param name="Exports">Exported symbols from export directory.</param>
public sealed record PeIdentity(
string? Machine,
bool Is64Bit,
PeSubsystem Subsystem,
string? CodeViewGuid,
int? CodeViewAge,
string? PdbPath,
string? ProductVersion,
string? FileVersion,
string? CompanyName,
string? ProductName,
string? OriginalFilename,
uint? RichHeaderHash,
IReadOnlyList<PeCompilerHint> CompilerHints,
IReadOnlyList<string> Exports);

View File

@@ -0,0 +1,757 @@
using System.Buffers.Binary;
using System.Text;
namespace StellaOps.Scanner.Analyzers.Native;
/// <summary>
/// Full PE file reader with identity extraction including CodeView GUID, Rich header, and version resources.
/// </summary>
public static class PeReader
{
// PE Data Directory Indices
private const int IMAGE_DIRECTORY_ENTRY_EXPORT = 0;
private const int IMAGE_DIRECTORY_ENTRY_DEBUG = 6;
private const int IMAGE_DIRECTORY_ENTRY_RESOURCE = 2;
// Debug Types
private const uint IMAGE_DEBUG_TYPE_CODEVIEW = 2;
// CodeView Signatures
private const uint RSDS_SIGNATURE = 0x53445352; // "RSDS" in little-endian
// Rich Header Markers
private const uint RICH_MARKER = 0x68636952; // "Rich" in little-endian
private const uint DANS_MARKER = 0x536E6144; // "DanS" in little-endian
/// <summary>
/// Parse result containing identity and any parsing metadata.
/// </summary>
public sealed record PeParseResult(
PeIdentity Identity,
string? ParseWarning);
/// <summary>
/// Parse a PE file and extract full identity information.
/// </summary>
/// <param name="stream">Stream containing PE file data.</param>
/// <param name="path">File path for context (not accessed).</param>
/// <param name="layerDigest">Optional container layer digest.</param>
/// <returns>Parse result, or null if not a valid PE file.</returns>
public static PeParseResult? Parse(Stream stream, string path, string? layerDigest = null)
{
ArgumentNullException.ThrowIfNull(stream);
using var buffer = new MemoryStream();
stream.CopyTo(buffer);
var data = buffer.ToArray();
if (!TryExtractIdentity(data, out var identity) || identity is null)
{
return null;
}
return new PeParseResult(identity, null);
}
/// <summary>
/// Try to extract identity from PE file data.
/// </summary>
/// <param name="data">PE file bytes.</param>
/// <param name="identity">Extracted identity if successful.</param>
/// <returns>True if valid PE file, false otherwise.</returns>
public static bool TryExtractIdentity(ReadOnlySpan<byte> data, out PeIdentity? identity)
{
identity = null;
// Validate DOS header
if (!ValidateDosHeader(data, out var peHeaderOffset))
{
return false;
}
// Validate PE signature
if (!ValidatePeSignature(data, peHeaderOffset))
{
return false;
}
// Parse COFF header
if (!ParseCoffHeader(data, peHeaderOffset, out var machine, out var numberOfSections, out var sizeOfOptionalHeader))
{
return false;
}
// Parse Optional header
if (!ParseOptionalHeader(data, peHeaderOffset, sizeOfOptionalHeader,
out var is64Bit, out var subsystem, out var numberOfRvaAndSizes, out var dataDirectoryOffset))
{
return false;
}
var machineStr = MapPeMachine(machine);
// Parse section headers for RVA-to-file-offset translation
var sectionHeadersOffset = peHeaderOffset + 24 + sizeOfOptionalHeader;
var sections = ParseSectionHeaders(data, sectionHeadersOffset, numberOfSections);
// Extract Rich header (before PE header in DOS stub)
uint? richHeaderHash = null;
var compilerHints = new List<PeCompilerHint>();
ParseRichHeader(data, peHeaderOffset, out richHeaderHash, compilerHints);
// Extract CodeView debug info
string? codeViewGuid = null;
int? codeViewAge = null;
string? pdbPath = null;
if (numberOfRvaAndSizes > IMAGE_DIRECTORY_ENTRY_DEBUG)
{
ParseDebugDirectory(data, dataDirectoryOffset, numberOfRvaAndSizes, sections,
out codeViewGuid, out codeViewAge, out pdbPath);
}
// Extract version resources
string? productVersion = null;
string? fileVersion = null;
string? companyName = null;
string? productName = null;
string? originalFilename = null;
if (numberOfRvaAndSizes > IMAGE_DIRECTORY_ENTRY_RESOURCE)
{
ParseVersionResource(data, dataDirectoryOffset, sections, is64Bit,
out productVersion, out fileVersion, out companyName, out productName, out originalFilename);
}
// Extract exports
var exports = new List<string>();
if (numberOfRvaAndSizes > IMAGE_DIRECTORY_ENTRY_EXPORT)
{
ParseExportDirectory(data, dataDirectoryOffset, sections, exports);
}
identity = new PeIdentity(
Machine: machineStr,
Is64Bit: is64Bit,
Subsystem: subsystem,
CodeViewGuid: codeViewGuid,
CodeViewAge: codeViewAge,
PdbPath: pdbPath,
ProductVersion: productVersion,
FileVersion: fileVersion,
CompanyName: companyName,
ProductName: productName,
OriginalFilename: originalFilename,
RichHeaderHash: richHeaderHash,
CompilerHints: compilerHints,
Exports: exports
);
return true;
}
/// <summary>
/// Validate DOS header and extract PE header offset.
/// </summary>
private static bool ValidateDosHeader(ReadOnlySpan<byte> data, out int peHeaderOffset)
{
peHeaderOffset = 0;
if (data.Length < 0x40)
{
return false;
}
// Check MZ signature
if (data[0] != 'M' || data[1] != 'Z')
{
return false;
}
// Read e_lfanew (offset to PE header) at offset 0x3C
peHeaderOffset = BinaryPrimitives.ReadInt32LittleEndian(data.Slice(0x3C, 4));
if (peHeaderOffset < 0 || peHeaderOffset + 24 > data.Length)
{
return false;
}
return true;
}
/// <summary>
/// Validate PE signature at the given offset.
/// </summary>
private static bool ValidatePeSignature(ReadOnlySpan<byte> data, int peHeaderOffset)
{
if (peHeaderOffset + 4 > data.Length)
{
return false;
}
// Check "PE\0\0" signature
return data[peHeaderOffset] == 'P'
&& data[peHeaderOffset + 1] == 'E'
&& data[peHeaderOffset + 2] == 0
&& data[peHeaderOffset + 3] == 0;
}
/// <summary>
/// Parse COFF header.
/// </summary>
private static bool ParseCoffHeader(ReadOnlySpan<byte> data, int peHeaderOffset,
out ushort machine, out ushort numberOfSections, out ushort sizeOfOptionalHeader)
{
machine = 0;
numberOfSections = 0;
sizeOfOptionalHeader = 0;
var coffOffset = peHeaderOffset + 4;
if (coffOffset + 20 > data.Length)
{
return false;
}
machine = BinaryPrimitives.ReadUInt16LittleEndian(data.Slice(coffOffset, 2));
numberOfSections = BinaryPrimitives.ReadUInt16LittleEndian(data.Slice(coffOffset + 2, 2));
sizeOfOptionalHeader = BinaryPrimitives.ReadUInt16LittleEndian(data.Slice(coffOffset + 16, 2));
return sizeOfOptionalHeader > 0;
}
/// <summary>
/// Parse Optional header.
/// </summary>
private static bool ParseOptionalHeader(ReadOnlySpan<byte> data, int peHeaderOffset, ushort sizeOfOptionalHeader,
out bool is64Bit, out PeSubsystem subsystem, out uint numberOfRvaAndSizes, out int dataDirectoryOffset)
{
is64Bit = false;
subsystem = PeSubsystem.Unknown;
numberOfRvaAndSizes = 0;
dataDirectoryOffset = 0;
var optionalHeaderOffset = peHeaderOffset + 24;
if (optionalHeaderOffset + sizeOfOptionalHeader > data.Length)
{
return false;
}
var magic = BinaryPrimitives.ReadUInt16LittleEndian(data.Slice(optionalHeaderOffset, 2));
is64Bit = magic == 0x20b; // PE32+
if (magic != 0x10b && magic != 0x20b) // PE32 or PE32+
{
return false;
}
// Subsystem offset: 68 for both PE32 and PE32+
var subsystemOffset = optionalHeaderOffset + 68;
if (subsystemOffset + 2 <= data.Length)
{
subsystem = (PeSubsystem)BinaryPrimitives.ReadUInt16LittleEndian(data.Slice(subsystemOffset, 2));
}
// NumberOfRvaAndSizes
var rvaAndSizesOffset = optionalHeaderOffset + (is64Bit ? 108 : 92);
if (rvaAndSizesOffset + 4 <= data.Length)
{
numberOfRvaAndSizes = BinaryPrimitives.ReadUInt32LittleEndian(data.Slice(rvaAndSizesOffset, 4));
}
// Data directories start after the numberOfRvaAndSizes field
dataDirectoryOffset = optionalHeaderOffset + (is64Bit ? 112 : 96);
return true;
}
/// <summary>
/// Parse section headers for RVA-to-file-offset translation.
/// </summary>
private static List<SectionHeader> ParseSectionHeaders(ReadOnlySpan<byte> data, int offset, ushort numberOfSections)
{
const int SECTION_HEADER_SIZE = 40;
var sections = new List<SectionHeader>();
for (var i = 0; i < numberOfSections; i++)
{
var entryOffset = offset + i * SECTION_HEADER_SIZE;
if (entryOffset + SECTION_HEADER_SIZE > data.Length)
{
break;
}
var virtualSize = BinaryPrimitives.ReadUInt32LittleEndian(data.Slice(entryOffset + 8, 4));
var virtualAddress = BinaryPrimitives.ReadUInt32LittleEndian(data.Slice(entryOffset + 12, 4));
var rawDataSize = BinaryPrimitives.ReadUInt32LittleEndian(data.Slice(entryOffset + 16, 4));
var rawDataPointer = BinaryPrimitives.ReadUInt32LittleEndian(data.Slice(entryOffset + 20, 4));
sections.Add(new SectionHeader(virtualAddress, virtualSize, rawDataPointer, rawDataSize));
}
return sections;
}
/// <summary>
/// Convert RVA to file offset using section headers.
/// </summary>
private static bool TryRvaToFileOffset(uint rva, List<SectionHeader> sections, out uint fileOffset)
{
fileOffset = 0;
foreach (var section in sections)
{
if (rva >= section.VirtualAddress && rva < section.VirtualAddress + section.VirtualSize)
{
fileOffset = rva - section.VirtualAddress + section.RawDataPointer;
return true;
}
}
return false;
}
/// <summary>
/// Parse Rich header from DOS stub.
/// </summary>
private static void ParseRichHeader(ReadOnlySpan<byte> data, int peHeaderOffset,
out uint? richHeaderHash, List<PeCompilerHint> compilerHints)
{
richHeaderHash = null;
// Search for "Rich" marker backwards from PE header
var searchEnd = Math.Min(peHeaderOffset, data.Length);
var richOffset = -1;
for (var i = searchEnd - 4; i >= 0x40; i--)
{
var marker = BinaryPrimitives.ReadUInt32LittleEndian(data.Slice(i, 4));
if (marker == RICH_MARKER)
{
richOffset = i;
break;
}
}
if (richOffset < 0 || richOffset + 8 > data.Length)
{
return;
}
// XOR key follows "Rich" marker
var xorKey = BinaryPrimitives.ReadUInt32LittleEndian(data.Slice(richOffset + 4, 4));
richHeaderHash = xorKey;
// Search backwards for "DanS" marker (XOR'd)
var dansOffset = -1;
for (var i = richOffset - 4; i >= 0x40; i -= 4)
{
if (i + 4 > data.Length)
{
continue;
}
var value = BinaryPrimitives.ReadUInt32LittleEndian(data.Slice(i, 4));
if ((value ^ xorKey) == DANS_MARKER)
{
dansOffset = i;
break;
}
}
if (dansOffset < 0)
{
return;
}
// Parse entries between DanS and Rich (skip first 16 bytes after DanS which are padding)
var entriesStart = dansOffset + 16;
for (var i = entriesStart; i < richOffset; i += 8)
{
if (i + 8 > data.Length)
{
break;
}
var compId = BinaryPrimitives.ReadUInt32LittleEndian(data.Slice(i, 4)) ^ xorKey;
var useCount = BinaryPrimitives.ReadUInt32LittleEndian(data.Slice(i + 4, 4)) ^ xorKey;
if (compId == 0 && useCount == 0)
{
continue;
}
var toolId = (ushort)(compId & 0xFFFF);
var toolVersion = (ushort)((compId >> 16) & 0xFFFF);
compilerHints.Add(new PeCompilerHint(toolId, toolVersion, (int)useCount));
}
}
/// <summary>
/// Parse debug directory for CodeView GUID.
/// </summary>
private static void ParseDebugDirectory(ReadOnlySpan<byte> data, int dataDirectoryOffset, uint numberOfRvaAndSizes,
List<SectionHeader> sections, out string? codeViewGuid, out int? codeViewAge, out string? pdbPath)
{
codeViewGuid = null;
codeViewAge = null;
pdbPath = null;
if (numberOfRvaAndSizes <= IMAGE_DIRECTORY_ENTRY_DEBUG)
{
return;
}
var debugDirOffset = dataDirectoryOffset + IMAGE_DIRECTORY_ENTRY_DEBUG * 8;
if (debugDirOffset + 8 > data.Length)
{
return;
}
var debugRva = BinaryPrimitives.ReadUInt32LittleEndian(data.Slice(debugDirOffset, 4));
var debugSize = BinaryPrimitives.ReadUInt32LittleEndian(data.Slice(debugDirOffset + 4, 4));
if (debugRva == 0 || debugSize == 0)
{
return;
}
if (!TryRvaToFileOffset(debugRva, sections, out var debugFileOffset))
{
return;
}
// Each debug directory entry is 28 bytes
const int DEBUG_ENTRY_SIZE = 28;
var numEntries = debugSize / DEBUG_ENTRY_SIZE;
for (var i = 0; i < numEntries; i++)
{
var entryOffset = (int)debugFileOffset + i * DEBUG_ENTRY_SIZE;
if (entryOffset + DEBUG_ENTRY_SIZE > data.Length)
{
break;
}
var debugType = BinaryPrimitives.ReadUInt32LittleEndian(data.Slice(entryOffset + 12, 4));
if (debugType != IMAGE_DEBUG_TYPE_CODEVIEW)
{
continue;
}
var sizeOfData = BinaryPrimitives.ReadUInt32LittleEndian(data.Slice(entryOffset + 16, 4));
var pointerToRawData = BinaryPrimitives.ReadUInt32LittleEndian(data.Slice(entryOffset + 24, 4));
if (pointerToRawData == 0 || sizeOfData < 24)
{
continue;
}
if (pointerToRawData + sizeOfData > data.Length)
{
continue;
}
var cvSpan = data.Slice((int)pointerToRawData, (int)sizeOfData);
// Check for RSDS signature (PDB70)
var signature = BinaryPrimitives.ReadUInt32LittleEndian(cvSpan);
if (signature != RSDS_SIGNATURE)
{
continue;
}
// GUID is 16 bytes at offset 4
var guidBytes = cvSpan.Slice(4, 16);
codeViewGuid = FormatGuidAsLowercaseHex(guidBytes);
// Age is 4 bytes at offset 20
codeViewAge = (int)BinaryPrimitives.ReadUInt32LittleEndian(cvSpan.Slice(20, 4));
// PDB path is null-terminated string starting at offset 24
var pdbPathSpan = cvSpan[24..];
var nullTerminator = pdbPathSpan.IndexOf((byte)0);
var pathLength = nullTerminator >= 0 ? nullTerminator : pdbPathSpan.Length;
if (pathLength > 0)
{
pdbPath = Encoding.UTF8.GetString(pdbPathSpan[..pathLength]);
}
break; // Found CodeView, done
}
}
/// <summary>
/// Format GUID bytes as lowercase hex without dashes.
/// </summary>
private static string FormatGuidAsLowercaseHex(ReadOnlySpan<byte> guidBytes)
{
// GUID structure: Data1 (LE 4 bytes), Data2 (LE 2 bytes), Data3 (LE 2 bytes), Data4 (8 bytes BE)
var sb = new StringBuilder(32);
// Data1 - 4 bytes, little endian
sb.Append(BinaryPrimitives.ReadUInt32LittleEndian(guidBytes).ToString("x8"));
// Data2 - 2 bytes, little endian
sb.Append(BinaryPrimitives.ReadUInt16LittleEndian(guidBytes.Slice(4, 2)).ToString("x4"));
// Data3 - 2 bytes, little endian
sb.Append(BinaryPrimitives.ReadUInt16LittleEndian(guidBytes.Slice(6, 2)).ToString("x4"));
// Data4 - 8 bytes, big endian (stored as-is)
for (var i = 8; i < 16; i++)
{
sb.Append(guidBytes[i].ToString("x2"));
}
return sb.ToString();
}
/// <summary>
/// Parse version resource for product/file information.
/// </summary>
private static void ParseVersionResource(ReadOnlySpan<byte> data, int dataDirectoryOffset,
List<SectionHeader> sections, bool is64Bit,
out string? productVersion, out string? fileVersion,
out string? companyName, out string? productName, out string? originalFilename)
{
productVersion = null;
fileVersion = null;
companyName = null;
productName = null;
originalFilename = null;
var resourceDirOffset = dataDirectoryOffset + IMAGE_DIRECTORY_ENTRY_RESOURCE * 8;
if (resourceDirOffset + 8 > data.Length)
{
return;
}
var resourceRva = BinaryPrimitives.ReadUInt32LittleEndian(data.Slice(resourceDirOffset, 4));
var resourceSize = BinaryPrimitives.ReadUInt32LittleEndian(data.Slice(resourceDirOffset + 4, 4));
if (resourceRva == 0 || resourceSize == 0)
{
return;
}
if (!TryRvaToFileOffset(resourceRva, sections, out var resourceFileOffset))
{
return;
}
// Search for VS_VERSION_INFO signature in resources
// This is a simplified approach - searching for the signature in the resource section
var searchSpan = data.Slice((int)resourceFileOffset, (int)Math.Min(resourceSize, data.Length - resourceFileOffset));
// Look for "VS_VERSION_INFO" signature (wide string)
var vsVersionInfo = Encoding.Unicode.GetBytes("VS_VERSION_INFO");
var vsInfoOffset = IndexOf(searchSpan, vsVersionInfo);
if (vsInfoOffset < 0)
{
return;
}
// Parse StringFileInfo to extract version strings
var versionInfoStart = (int)resourceFileOffset + vsInfoOffset;
ParseVersionStrings(data, versionInfoStart, searchSpan.Length - vsInfoOffset,
ref productVersion, ref fileVersion, ref companyName, ref productName, ref originalFilename);
}
/// <summary>
/// Parse version strings from VS_VERSION_INFO structure.
/// </summary>
private static void ParseVersionStrings(ReadOnlySpan<byte> data, int offset, int maxLength,
ref string? productVersion, ref string? fileVersion,
ref string? companyName, ref string? productName, ref string? originalFilename)
{
// Search for common version string keys
var keys = new[] { "ProductVersion", "FileVersion", "CompanyName", "ProductName", "OriginalFilename" };
var searchSpan = data.Slice(offset, Math.Min(maxLength, data.Length - offset));
foreach (var key in keys)
{
var keyBytes = Encoding.Unicode.GetBytes(key);
var keyOffset = IndexOf(searchSpan, keyBytes);
if (keyOffset < 0)
{
continue;
}
// Value follows the key, aligned to 4-byte boundary
var valueStart = keyOffset + keyBytes.Length + 2; // +2 for null terminator
// Align to 4-byte boundary
valueStart = (valueStart + 3) & ~3;
if (offset + valueStart >= data.Length)
{
continue;
}
// Read null-terminated wide string value
var valueSpan = searchSpan[valueStart..];
var nullTerm = -1;
for (var i = 0; i < valueSpan.Length - 1; i += 2)
{
if (valueSpan[i] == 0 && valueSpan[i + 1] == 0)
{
nullTerm = i;
break;
}
}
if (nullTerm > 0)
{
var value = Encoding.Unicode.GetString(valueSpan[..nullTerm]);
if (!string.IsNullOrWhiteSpace(value))
{
switch (key)
{
case "ProductVersion":
productVersion = value;
break;
case "FileVersion":
fileVersion = value;
break;
case "CompanyName":
companyName = value;
break;
case "ProductName":
productName = value;
break;
case "OriginalFilename":
originalFilename = value;
break;
}
}
}
}
}
/// <summary>
/// Parse export directory for exported symbols.
/// </summary>
private static void ParseExportDirectory(ReadOnlySpan<byte> data, int dataDirectoryOffset,
List<SectionHeader> sections, List<string> exports)
{
const int MAX_EXPORTS = 10000;
var exportDirOffset = dataDirectoryOffset + IMAGE_DIRECTORY_ENTRY_EXPORT * 8;
if (exportDirOffset + 8 > data.Length)
{
return;
}
var exportRva = BinaryPrimitives.ReadUInt32LittleEndian(data.Slice(exportDirOffset, 4));
var exportSize = BinaryPrimitives.ReadUInt32LittleEndian(data.Slice(exportDirOffset + 4, 4));
if (exportRva == 0 || exportSize == 0)
{
return;
}
if (!TryRvaToFileOffset(exportRva, sections, out var exportFileOffset))
{
return;
}
if (exportFileOffset + 40 > data.Length)
{
return;
}
var exportSpan = data.Slice((int)exportFileOffset, 40);
var numberOfNames = BinaryPrimitives.ReadUInt32LittleEndian(exportSpan.Slice(24, 4));
var addressOfNames = BinaryPrimitives.ReadUInt32LittleEndian(exportSpan.Slice(32, 4));
if (numberOfNames == 0 || addressOfNames == 0)
{
return;
}
if (!TryRvaToFileOffset(addressOfNames, sections, out var namesFileOffset))
{
return;
}
var count = Math.Min((int)numberOfNames, MAX_EXPORTS);
for (var i = 0; i < count; i++)
{
var nameRvaOffset = (int)namesFileOffset + i * 4;
if (nameRvaOffset + 4 > data.Length)
{
break;
}
var nameRva = BinaryPrimitives.ReadUInt32LittleEndian(data.Slice(nameRvaOffset, 4));
if (!TryRvaToFileOffset(nameRva, sections, out var nameFileOffset))
{
continue;
}
if (nameFileOffset >= data.Length)
{
continue;
}
var nameSpan = data[(int)nameFileOffset..];
var nullTerm = nameSpan.IndexOf((byte)0);
var nameLength = nullTerm >= 0 ? nullTerm : Math.Min(256, nameSpan.Length);
if (nameLength > 0)
{
var name = Encoding.ASCII.GetString(nameSpan[..nameLength]);
if (!string.IsNullOrWhiteSpace(name))
{
exports.Add(name);
}
}
}
}
/// <summary>
/// Simple byte sequence search.
/// </summary>
private static int IndexOf(ReadOnlySpan<byte> haystack, ReadOnlySpan<byte> needle)
{
for (var i = 0; i <= haystack.Length - needle.Length; i++)
{
if (haystack.Slice(i, needle.Length).SequenceEqual(needle))
{
return i;
}
}
return -1;
}
/// <summary>
/// Map PE machine type to architecture string.
/// </summary>
private static string? MapPeMachine(ushort machine)
{
return machine switch
{
0x014c => "x86",
0x0200 => "ia64",
0x8664 => "x86_64",
0x01c0 => "arm",
0x01c2 => "thumb",
0x01c4 => "armnt",
0xaa64 => "arm64",
0x5032 => "riscv32",
0x5064 => "riscv64",
0x5128 => "riscv128",
_ => null
};
}
/// <summary>
/// Section header for RVA translation.
/// </summary>
private sealed record SectionHeader(
uint VirtualAddress,
uint VirtualSize,
uint RawDataPointer,
uint RawDataSize);
}