using System.Buffers.Binary; using System.Text; using System.Xml; namespace StellaOps.Scanner.Analyzers.Native; /// /// Parses PE import tables, delay-load imports, and embedded manifests. /// public static class PeImportParser { private const int IMAGE_DIRECTORY_ENTRY_IMPORT = 1; private const int IMAGE_DIRECTORY_ENTRY_RESOURCE = 2; private const int IMAGE_DIRECTORY_ENTRY_DELAY_IMPORT = 13; private const int RT_MANIFEST = 24; /// /// Parses PE import information from a stream. /// public static bool TryParse(Stream stream, out PeImportInfo importInfo, CancellationToken cancellationToken = default) { ArgumentNullException.ThrowIfNull(stream); importInfo = new PeImportInfo(null, PeSubsystem.Unknown, false, [], [], []); using var buffer = new MemoryStream(); stream.CopyTo(buffer); var data = buffer.ToArray(); var span = data.AsSpan(); if (!IsValidPe(span, out var peHeaderOffset)) { return false; } // Parse COFF header var machine = BinaryPrimitives.ReadUInt16LittleEndian(span.Slice(peHeaderOffset + 4, 2)); var numberOfSections = BinaryPrimitives.ReadUInt16LittleEndian(span.Slice(peHeaderOffset + 6, 2)); var sizeOfOptionalHeader = BinaryPrimitives.ReadUInt16LittleEndian(span.Slice(peHeaderOffset + 20, 2)); if (sizeOfOptionalHeader == 0) { return false; } var optionalHeaderOffset = peHeaderOffset + 24; var magic = BinaryPrimitives.ReadUInt16LittleEndian(span.Slice(optionalHeaderOffset, 2)); var is64Bit = magic == 0x20b; // PE32+ if (magic != 0x10b && magic != 0x20b) // PE32 or PE32+ { return false; } // Get subsystem var subsystemOffset = optionalHeaderOffset + (is64Bit ? 68 : 68); var subsystem = (PeSubsystem)BinaryPrimitives.ReadUInt16LittleEndian(span.Slice(subsystemOffset, 2)); // Get number of RVA and sizes var numberOfRvaAndSizes = BinaryPrimitives.ReadUInt32LittleEndian( span.Slice(optionalHeaderOffset + (is64Bit ? 108 : 92), 4)); // Data directories start after optional header fields var dataDirectoryOffset = optionalHeaderOffset + (is64Bit ? 112 : 96); // Section headers start after optional header var sectionHeadersOffset = optionalHeaderOffset + sizeOfOptionalHeader; // Parse sections for RVA-to-file-offset translation var sections = ParseSectionHeaders(span, sectionHeadersOffset, numberOfSections); // Parse import directory var dependencies = new List(); if (numberOfRvaAndSizes > IMAGE_DIRECTORY_ENTRY_IMPORT) { var importDirOffset = dataDirectoryOffset + IMAGE_DIRECTORY_ENTRY_IMPORT * 8; var importRva = BinaryPrimitives.ReadUInt32LittleEndian(span.Slice(importDirOffset, 4)); var importSize = BinaryPrimitives.ReadUInt32LittleEndian(span.Slice(importDirOffset + 4, 4)); if (importRva > 0 && importSize > 0) { dependencies = ParseImportDirectory(span, importRva, sections, "pe-import", is64Bit); } } // Parse delay-load import directory var delayLoadDependencies = new List(); if (numberOfRvaAndSizes > IMAGE_DIRECTORY_ENTRY_DELAY_IMPORT) { var delayImportDirOffset = dataDirectoryOffset + IMAGE_DIRECTORY_ENTRY_DELAY_IMPORT * 8; var delayImportRva = BinaryPrimitives.ReadUInt32LittleEndian(span.Slice(delayImportDirOffset, 4)); var delayImportSize = BinaryPrimitives.ReadUInt32LittleEndian(span.Slice(delayImportDirOffset + 4, 4)); if (delayImportRva > 0 && delayImportSize > 0) { delayLoadDependencies = ParseDelayImportDirectory(span, delayImportRva, sections, is64Bit); } } // Parse embedded manifest for SxS dependencies var sxsDependencies = new List(); if (numberOfRvaAndSizes > IMAGE_DIRECTORY_ENTRY_RESOURCE) { var resourceDirOffset = dataDirectoryOffset + IMAGE_DIRECTORY_ENTRY_RESOURCE * 8; var resourceRva = BinaryPrimitives.ReadUInt32LittleEndian(span.Slice(resourceDirOffset, 4)); var resourceSize = BinaryPrimitives.ReadUInt32LittleEndian(span.Slice(resourceDirOffset + 4, 4)); if (resourceRva > 0 && resourceSize > 0) { sxsDependencies = ParseManifestFromResources(span, resourceRva, sections); } } // Fallback: always search for manifest XML if none found via resources if (sxsDependencies.Count == 0) { var manifestData = SearchForManifestXml(span); if (manifestData is not null && manifestData.Length > 0) { sxsDependencies = ParseManifestXml(manifestData); } } var machineStr = MapPeMachine(machine); importInfo = new PeImportInfo(machineStr, subsystem, is64Bit, dependencies, delayLoadDependencies, sxsDependencies); return true; } private static bool IsValidPe(ReadOnlySpan span, out int peHeaderOffset) { peHeaderOffset = 0; if (span.Length < 0x40) { return false; } if (span[0] != 'M' || span[1] != 'Z') { return false; } peHeaderOffset = BinaryPrimitives.ReadInt32LittleEndian(span.Slice(0x3C, 4)); if (peHeaderOffset < 0 || peHeaderOffset + 24 > span.Length) { return false; } // Check PE signature return span[peHeaderOffset] == 'P' && span[peHeaderOffset + 1] == 'E' && span[peHeaderOffset + 2] == 0 && span[peHeaderOffset + 3] == 0; } private sealed record SectionInfo(string Name, uint VirtualAddress, uint VirtualSize, uint RawDataOffset, uint RawDataSize); private static List ParseSectionHeaders(ReadOnlySpan span, int offset, int count) { var sections = new List(count); const int sectionHeaderSize = 40; for (var i = 0; i < count; i++) { var sectionOffset = offset + i * sectionHeaderSize; if (sectionOffset + sectionHeaderSize > span.Length) { break; } var nameBytes = span.Slice(sectionOffset, 8); var nameEnd = nameBytes.IndexOf((byte)0); var name = nameEnd >= 0 ? Encoding.ASCII.GetString(nameBytes[..nameEnd]) : Encoding.ASCII.GetString(nameBytes); var virtualSize = BinaryPrimitives.ReadUInt32LittleEndian(span.Slice(sectionOffset + 8, 4)); var virtualAddress = BinaryPrimitives.ReadUInt32LittleEndian(span.Slice(sectionOffset + 12, 4)); var rawDataSize = BinaryPrimitives.ReadUInt32LittleEndian(span.Slice(sectionOffset + 16, 4)); var rawDataOffset = BinaryPrimitives.ReadUInt32LittleEndian(span.Slice(sectionOffset + 20, 4)); sections.Add(new SectionInfo(name, virtualAddress, virtualSize, rawDataOffset, rawDataSize)); } return sections; } private static int RvaToFileOffset(uint rva, List sections) { foreach (var section in sections) { if (rva >= section.VirtualAddress && rva < section.VirtualAddress + section.VirtualSize) { return (int)(section.RawDataOffset + (rva - section.VirtualAddress)); } } return -1; } private static List ParseImportDirectory( ReadOnlySpan span, uint importRva, List sections, string reasonCode, bool is64Bit) { var dependencies = new List(); var seen = new HashSet(StringComparer.OrdinalIgnoreCase); var importOffset = RvaToFileOffset(importRva, sections); if (importOffset < 0 || importOffset + 20 > span.Length) { return dependencies; } // Each import descriptor is 20 bytes const int descriptorSize = 20; var offset = importOffset; while (offset + descriptorSize <= span.Length) { var originalFirstThunk = BinaryPrimitives.ReadUInt32LittleEndian(span.Slice(offset, 4)); var nameRva = BinaryPrimitives.ReadUInt32LittleEndian(span.Slice(offset + 12, 4)); // End of import directory (null entry) if (nameRva == 0) { break; } var nameOffset = RvaToFileOffset(nameRva, sections); if (nameOffset >= 0 && nameOffset < span.Length) { var dllName = ReadNullTerminatedString(span, nameOffset); if (!string.IsNullOrEmpty(dllName) && seen.Add(dllName)) { // Parse imported function names (optional, for detailed analysis) var functions = ParseImportedFunctions(span, originalFirstThunk, sections, is64Bit); dependencies.Add(new PeDeclaredDependency(dllName, reasonCode, functions)); } } offset += descriptorSize; } return dependencies; } private static List ParseDelayImportDirectory( ReadOnlySpan span, uint delayImportRva, List sections, bool is64Bit) { var dependencies = new List(); var seen = new HashSet(StringComparer.OrdinalIgnoreCase); var delayImportOffset = RvaToFileOffset(delayImportRva, sections); if (delayImportOffset < 0) { return dependencies; } // Delay import descriptor is 32 bytes const int descriptorSize = 32; var offset = delayImportOffset; while (offset + descriptorSize <= span.Length) { var attributes = BinaryPrimitives.ReadUInt32LittleEndian(span.Slice(offset, 4)); var nameRva = BinaryPrimitives.ReadUInt32LittleEndian(span.Slice(offset + 4, 4)); var moduleHandleRva = BinaryPrimitives.ReadUInt32LittleEndian(span.Slice(offset + 8, 4)); var delayImportAddressTableRva = BinaryPrimitives.ReadUInt32LittleEndian(span.Slice(offset + 12, 4)); var delayImportNameTableRva = BinaryPrimitives.ReadUInt32LittleEndian(span.Slice(offset + 16, 4)); // End of delay import directory (null entry) if (nameRva == 0) { break; } var nameOffset = RvaToFileOffset(nameRva, sections); if (nameOffset >= 0 && nameOffset < span.Length) { var dllName = ReadNullTerminatedString(span, nameOffset); if (!string.IsNullOrEmpty(dllName) && seen.Add(dllName)) { var functions = ParseImportedFunctions(span, delayImportNameTableRva, sections, is64Bit); dependencies.Add(new PeDeclaredDependency(dllName, "pe-delayimport", functions)); } } offset += descriptorSize; } return dependencies; } private static List ParseImportedFunctions( ReadOnlySpan span, uint thunkRva, List sections, bool is64Bit) { var functions = new List(); if (thunkRva == 0) { return functions; } var thunkOffset = RvaToFileOffset(thunkRva, sections); if (thunkOffset < 0) { return functions; } var entrySize = is64Bit ? 8 : 4; var ordinalFlag = is64Bit ? 0x8000000000000000UL : 0x80000000UL; var offset = thunkOffset; var maxFunctions = 1000; // Limit to prevent infinite loops while (offset + entrySize <= span.Length && functions.Count < maxFunctions) { ulong thunkData = is64Bit ? BinaryPrimitives.ReadUInt64LittleEndian(span.Slice(offset, 8)) : BinaryPrimitives.ReadUInt32LittleEndian(span.Slice(offset, 4)); if (thunkData == 0) { break; } if ((thunkData & ordinalFlag) == 0) { // Import by name var hintNameRva = (uint)(thunkData & 0x7FFFFFFF); var hintNameOffset = RvaToFileOffset(hintNameRva, sections); if (hintNameOffset >= 0 && hintNameOffset + 2 < span.Length) { // Skip 2-byte hint, read function name var funcName = ReadNullTerminatedString(span, hintNameOffset + 2); if (!string.IsNullOrEmpty(funcName)) { functions.Add(funcName); } } } else { // Import by ordinal var ordinal = (ushort)(thunkData & 0xFFFF); functions.Add($"#ord{ordinal}"); } offset += entrySize; } return functions; } private static List ParseManifestFromResources( ReadOnlySpan span, uint resourceRva, List sections) { var sxsDependencies = new List(); var resourceOffset = RvaToFileOffset(resourceRva, sections); // Try to parse resource directory to find RT_MANIFEST byte[]? manifestData = null; if (resourceOffset >= 0 && resourceOffset + 16 <= span.Length) { manifestData = FindManifestResource(span, resourceOffset, resourceRva, sections); } // Fallback: search for manifest XML anywhere in the binary if (manifestData is null || manifestData.Length == 0) { manifestData = SearchForManifestXml(span); } if (manifestData is null || manifestData.Length == 0) { return sxsDependencies; } // Parse XML manifest return ParseManifestXml(manifestData); } private static byte[]? FindManifestResource( ReadOnlySpan span, int resourceOffset, uint resourceRva, List sections) { // Resource directory structure: // DWORD Characteristics, DWORD TimeDateStamp, WORD MajorVersion, WORD MinorVersion // WORD NumberOfNamedEntries, WORD NumberOfIdEntries // Then entries... if (resourceOffset + 16 > span.Length) { return null; } var numberOfNamedEntries = BinaryPrimitives.ReadUInt16LittleEndian(span.Slice(resourceOffset + 12, 2)); var numberOfIdEntries = BinaryPrimitives.ReadUInt16LittleEndian(span.Slice(resourceOffset + 14, 2)); var entryOffset = resourceOffset + 16; // Skip named entries, look for RT_MANIFEST (ID = 24) entryOffset += numberOfNamedEntries * 8; for (var i = 0; i < numberOfIdEntries; i++) { if (entryOffset + 8 > span.Length) { break; } var id = BinaryPrimitives.ReadUInt32LittleEndian(span.Slice(entryOffset, 4)); var offsetOrData = BinaryPrimitives.ReadUInt32LittleEndian(span.Slice(entryOffset + 4, 4)); if (id == RT_MANIFEST) { // High bit set means subdirectory if ((offsetOrData & 0x80000000) != 0) { var subDirOffset = resourceOffset + (int)(offsetOrData & 0x7FFFFFFF); return FindFirstResourceData(span, subDirOffset, resourceOffset, sections); } } entryOffset += 8; } return null; } private static byte[]? FindFirstResourceData(ReadOnlySpan span, int dirOffset, int resourceBase, List sections) { if (dirOffset + 16 > span.Length) { return null; } var numberOfNamedEntries = BinaryPrimitives.ReadUInt16LittleEndian(span.Slice(dirOffset + 12, 2)); var numberOfIdEntries = BinaryPrimitives.ReadUInt16LittleEndian(span.Slice(dirOffset + 14, 2)); var entryOffset = dirOffset + 16 + numberOfNamedEntries * 8; if (numberOfIdEntries > 0 && entryOffset + 8 <= span.Length) { var offsetOrData = BinaryPrimitives.ReadUInt32LittleEndian(span.Slice(entryOffset + 4, 4)); if ((offsetOrData & 0x80000000) != 0) { // Another subdirectory (language level) var langDirOffset = resourceBase + (int)(offsetOrData & 0x7FFFFFFF); return FindFirstResourceData(span, langDirOffset, resourceBase, sections); } else { // Data entry - IMAGE_RESOURCE_DATA_ENTRY structure var dataEntryOffset = resourceBase + (int)offsetOrData; if (dataEntryOffset + 16 <= span.Length) { var dataRva = BinaryPrimitives.ReadUInt32LittleEndian(span.Slice(dataEntryOffset, 4)); var dataSize = BinaryPrimitives.ReadUInt32LittleEndian(span.Slice(dataEntryOffset + 4, 4)); // Convert RVA to file offset using section headers var dataOffset = RvaToFileOffset(dataRva, sections); if (dataOffset >= 0 && dataSize > 0 && dataOffset + dataSize <= span.Length) { return span.Slice(dataOffset, (int)dataSize).ToArray(); } } } } return null; } private static byte[]? SearchForManifestXml(ReadOnlySpan span) { // Search for XML manifest markers var xmlMarker = ""u8; for (var j = i; j < span.Length - endMarker.Length; j++) { if (span.Slice(j).StartsWith(endMarker)) { var manifestLength = j - i + endMarker.Length; return span.Slice(i, manifestLength).ToArray(); } } } } return null; } private static List ParseManifestXml(byte[] manifestData) { var sxsDependencies = new List(); try { var xmlString = Encoding.UTF8.GetString(manifestData); // Handle BOM if present if (xmlString.Length > 0 && xmlString[0] == '\uFEFF') { xmlString = xmlString[1..]; } using var reader = XmlReader.Create(new StringReader(xmlString), new XmlReaderSettings { IgnoreWhitespace = true, IgnoreComments = true, DtdProcessing = DtdProcessing.Ignore, }); while (reader.Read()) { if (reader.NodeType == XmlNodeType.Element && reader.LocalName == "assemblyIdentity") { // Check if this is inside a element var name = reader.GetAttribute("name"); var version = reader.GetAttribute("version"); var publicKeyToken = reader.GetAttribute("publicKeyToken"); var processorArchitecture = reader.GetAttribute("processorArchitecture"); var type = reader.GetAttribute("type"); if (!string.IsNullOrEmpty(name) && name != "MyApplication") // Skip self-reference { sxsDependencies.Add(new PeSxsDependency( name, version, publicKeyToken, processorArchitecture, type)); } } } } catch { // Failed to parse manifest XML, return empty list } return sxsDependencies; } private static string ReadNullTerminatedString(ReadOnlySpan span, int offset) { if (offset < 0 || offset >= span.Length) { return string.Empty; } var remaining = span[offset..]; var terminator = remaining.IndexOf((byte)0); var length = terminator >= 0 ? terminator : Math.Min(remaining.Length, 256); return Encoding.ASCII.GetString(remaining[..length]); } private static string? MapPeMachine(ushort machine) => machine switch { 0x014c => "x86", 0x0200 => "ia64", 0x8664 => "x86_64", 0x01c0 => "arm", 0x01c4 => "armv7", 0xAA64 => "arm64", _ => null, }; }