Files
git.stella-ops.org/src/Scanner/StellaOps.Scanner.Analyzers.Native/PeImportParser.cs
StellaOps Bot e0f6efecce Add comprehensive tests for Go and Python version conflict detection and licensing normalization
- Implemented GoVersionConflictDetectorTests to validate pseudo-version detection, conflict analysis, and conflict retrieval for Go modules.
- Created VersionConflictDetectorTests for Python to assess conflict detection across various version scenarios, including major, minor, and patch differences.
- Added SpdxLicenseNormalizerTests to ensure accurate normalization of SPDX license strings and classifiers.
- Developed VendoredPackageDetectorTests to identify vendored packages and extract embedded packages from Python packages, including handling of vendor directories and known vendored packages.
2025-12-07 01:51:37 +02:00

572 lines
21 KiB
C#

using System.Buffers.Binary;
using System.Text;
using System.Xml;
namespace StellaOps.Scanner.Analyzers.Native;
/// <summary>
/// Parses PE import tables, delay-load imports, and embedded manifests.
/// </summary>
public static class PeImportParser
{
private const int IMAGE_DIRECTORY_ENTRY_IMPORT = 1;
private const int IMAGE_DIRECTORY_ENTRY_RESOURCE = 2;
private const int IMAGE_DIRECTORY_ENTRY_DELAY_IMPORT = 13;
private const int RT_MANIFEST = 24;
/// <summary>
/// Parses PE import information from a stream.
/// </summary>
public static bool TryParse(Stream stream, out PeImportInfo importInfo, CancellationToken cancellationToken = default)
{
ArgumentNullException.ThrowIfNull(stream);
importInfo = new PeImportInfo(null, PeSubsystem.Unknown, false, [], [], []);
using var buffer = new MemoryStream();
stream.CopyTo(buffer);
var data = buffer.ToArray();
var span = data.AsSpan();
if (!IsValidPe(span, out var peHeaderOffset))
{
return false;
}
// Parse COFF header
var machine = BinaryPrimitives.ReadUInt16LittleEndian(span.Slice(peHeaderOffset + 4, 2));
var numberOfSections = BinaryPrimitives.ReadUInt16LittleEndian(span.Slice(peHeaderOffset + 6, 2));
var sizeOfOptionalHeader = BinaryPrimitives.ReadUInt16LittleEndian(span.Slice(peHeaderOffset + 20, 2));
if (sizeOfOptionalHeader == 0)
{
return false;
}
var optionalHeaderOffset = peHeaderOffset + 24;
var magic = BinaryPrimitives.ReadUInt16LittleEndian(span.Slice(optionalHeaderOffset, 2));
var is64Bit = magic == 0x20b; // PE32+
if (magic != 0x10b && magic != 0x20b) // PE32 or PE32+
{
return false;
}
// Get subsystem
var subsystemOffset = optionalHeaderOffset + (is64Bit ? 68 : 68);
var subsystem = (PeSubsystem)BinaryPrimitives.ReadUInt16LittleEndian(span.Slice(subsystemOffset, 2));
// Get number of RVA and sizes
var numberOfRvaAndSizes = BinaryPrimitives.ReadUInt32LittleEndian(
span.Slice(optionalHeaderOffset + (is64Bit ? 108 : 92), 4));
// Data directories start after optional header fields
var dataDirectoryOffset = optionalHeaderOffset + (is64Bit ? 112 : 96);
// Section headers start after optional header
var sectionHeadersOffset = optionalHeaderOffset + sizeOfOptionalHeader;
// Parse sections for RVA-to-file-offset translation
var sections = ParseSectionHeaders(span, sectionHeadersOffset, numberOfSections);
// Parse import directory
var dependencies = new List<PeDeclaredDependency>();
if (numberOfRvaAndSizes > IMAGE_DIRECTORY_ENTRY_IMPORT)
{
var importDirOffset = dataDirectoryOffset + IMAGE_DIRECTORY_ENTRY_IMPORT * 8;
var importRva = BinaryPrimitives.ReadUInt32LittleEndian(span.Slice(importDirOffset, 4));
var importSize = BinaryPrimitives.ReadUInt32LittleEndian(span.Slice(importDirOffset + 4, 4));
if (importRva > 0 && importSize > 0)
{
dependencies = ParseImportDirectory(span, importRva, sections, "pe-import", is64Bit);
}
}
// Parse delay-load import directory
var delayLoadDependencies = new List<PeDeclaredDependency>();
if (numberOfRvaAndSizes > IMAGE_DIRECTORY_ENTRY_DELAY_IMPORT)
{
var delayImportDirOffset = dataDirectoryOffset + IMAGE_DIRECTORY_ENTRY_DELAY_IMPORT * 8;
var delayImportRva = BinaryPrimitives.ReadUInt32LittleEndian(span.Slice(delayImportDirOffset, 4));
var delayImportSize = BinaryPrimitives.ReadUInt32LittleEndian(span.Slice(delayImportDirOffset + 4, 4));
if (delayImportRva > 0 && delayImportSize > 0)
{
delayLoadDependencies = ParseDelayImportDirectory(span, delayImportRva, sections, is64Bit);
}
}
// Parse embedded manifest for SxS dependencies
var sxsDependencies = new List<PeSxsDependency>();
if (numberOfRvaAndSizes > IMAGE_DIRECTORY_ENTRY_RESOURCE)
{
var resourceDirOffset = dataDirectoryOffset + IMAGE_DIRECTORY_ENTRY_RESOURCE * 8;
var resourceRva = BinaryPrimitives.ReadUInt32LittleEndian(span.Slice(resourceDirOffset, 4));
var resourceSize = BinaryPrimitives.ReadUInt32LittleEndian(span.Slice(resourceDirOffset + 4, 4));
if (resourceRva > 0 && resourceSize > 0)
{
sxsDependencies = ParseManifestFromResources(span, resourceRva, sections);
}
}
// Fallback: always search for manifest XML if none found via resources
if (sxsDependencies.Count == 0)
{
var manifestData = SearchForManifestXml(span);
if (manifestData is not null && manifestData.Length > 0)
{
sxsDependencies = ParseManifestXml(manifestData);
}
}
var machineStr = MapPeMachine(machine);
importInfo = new PeImportInfo(machineStr, subsystem, is64Bit, dependencies, delayLoadDependencies, sxsDependencies);
return true;
}
private static bool IsValidPe(ReadOnlySpan<byte> span, out int peHeaderOffset)
{
peHeaderOffset = 0;
if (span.Length < 0x40)
{
return false;
}
if (span[0] != 'M' || span[1] != 'Z')
{
return false;
}
peHeaderOffset = BinaryPrimitives.ReadInt32LittleEndian(span.Slice(0x3C, 4));
if (peHeaderOffset < 0 || peHeaderOffset + 24 > span.Length)
{
return false;
}
// Check PE signature
return span[peHeaderOffset] == 'P' && span[peHeaderOffset + 1] == 'E' &&
span[peHeaderOffset + 2] == 0 && span[peHeaderOffset + 3] == 0;
}
private sealed record SectionInfo(string Name, uint VirtualAddress, uint VirtualSize, uint RawDataOffset, uint RawDataSize);
private static List<SectionInfo> ParseSectionHeaders(ReadOnlySpan<byte> span, int offset, int count)
{
var sections = new List<SectionInfo>(count);
const int sectionHeaderSize = 40;
for (var i = 0; i < count; i++)
{
var sectionOffset = offset + i * sectionHeaderSize;
if (sectionOffset + sectionHeaderSize > span.Length)
{
break;
}
var nameBytes = span.Slice(sectionOffset, 8);
var nameEnd = nameBytes.IndexOf((byte)0);
var name = nameEnd >= 0
? Encoding.ASCII.GetString(nameBytes[..nameEnd])
: Encoding.ASCII.GetString(nameBytes);
var virtualSize = BinaryPrimitives.ReadUInt32LittleEndian(span.Slice(sectionOffset + 8, 4));
var virtualAddress = BinaryPrimitives.ReadUInt32LittleEndian(span.Slice(sectionOffset + 12, 4));
var rawDataSize = BinaryPrimitives.ReadUInt32LittleEndian(span.Slice(sectionOffset + 16, 4));
var rawDataOffset = BinaryPrimitives.ReadUInt32LittleEndian(span.Slice(sectionOffset + 20, 4));
sections.Add(new SectionInfo(name, virtualAddress, virtualSize, rawDataOffset, rawDataSize));
}
return sections;
}
private static int RvaToFileOffset(uint rva, List<SectionInfo> sections)
{
foreach (var section in sections)
{
if (rva >= section.VirtualAddress && rva < section.VirtualAddress + section.VirtualSize)
{
return (int)(section.RawDataOffset + (rva - section.VirtualAddress));
}
}
return -1;
}
private static List<PeDeclaredDependency> ParseImportDirectory(
ReadOnlySpan<byte> span, uint importRva, List<SectionInfo> sections, string reasonCode, bool is64Bit)
{
var dependencies = new List<PeDeclaredDependency>();
var seen = new HashSet<string>(StringComparer.OrdinalIgnoreCase);
var importOffset = RvaToFileOffset(importRva, sections);
if (importOffset < 0 || importOffset + 20 > span.Length)
{
return dependencies;
}
// Each import descriptor is 20 bytes
const int descriptorSize = 20;
var offset = importOffset;
while (offset + descriptorSize <= span.Length)
{
var originalFirstThunk = BinaryPrimitives.ReadUInt32LittleEndian(span.Slice(offset, 4));
var nameRva = BinaryPrimitives.ReadUInt32LittleEndian(span.Slice(offset + 12, 4));
// End of import directory (null entry)
if (nameRva == 0)
{
break;
}
var nameOffset = RvaToFileOffset(nameRva, sections);
if (nameOffset >= 0 && nameOffset < span.Length)
{
var dllName = ReadNullTerminatedString(span, nameOffset);
if (!string.IsNullOrEmpty(dllName) && seen.Add(dllName))
{
// Parse imported function names (optional, for detailed analysis)
var functions = ParseImportedFunctions(span, originalFirstThunk, sections, is64Bit);
dependencies.Add(new PeDeclaredDependency(dllName, reasonCode, functions));
}
}
offset += descriptorSize;
}
return dependencies;
}
private static List<PeDeclaredDependency> ParseDelayImportDirectory(
ReadOnlySpan<byte> span, uint delayImportRva, List<SectionInfo> sections, bool is64Bit)
{
var dependencies = new List<PeDeclaredDependency>();
var seen = new HashSet<string>(StringComparer.OrdinalIgnoreCase);
var delayImportOffset = RvaToFileOffset(delayImportRva, sections);
if (delayImportOffset < 0)
{
return dependencies;
}
// Delay import descriptor is 32 bytes
const int descriptorSize = 32;
var offset = delayImportOffset;
while (offset + descriptorSize <= span.Length)
{
var attributes = BinaryPrimitives.ReadUInt32LittleEndian(span.Slice(offset, 4));
var nameRva = BinaryPrimitives.ReadUInt32LittleEndian(span.Slice(offset + 4, 4));
var moduleHandleRva = BinaryPrimitives.ReadUInt32LittleEndian(span.Slice(offset + 8, 4));
var delayImportAddressTableRva = BinaryPrimitives.ReadUInt32LittleEndian(span.Slice(offset + 12, 4));
var delayImportNameTableRva = BinaryPrimitives.ReadUInt32LittleEndian(span.Slice(offset + 16, 4));
// End of delay import directory (null entry)
if (nameRva == 0)
{
break;
}
var nameOffset = RvaToFileOffset(nameRva, sections);
if (nameOffset >= 0 && nameOffset < span.Length)
{
var dllName = ReadNullTerminatedString(span, nameOffset);
if (!string.IsNullOrEmpty(dllName) && seen.Add(dllName))
{
var functions = ParseImportedFunctions(span, delayImportNameTableRva, sections, is64Bit);
dependencies.Add(new PeDeclaredDependency(dllName, "pe-delayimport", functions));
}
}
offset += descriptorSize;
}
return dependencies;
}
private static List<string> ParseImportedFunctions(
ReadOnlySpan<byte> span, uint thunkRva, List<SectionInfo> sections, bool is64Bit)
{
var functions = new List<string>();
if (thunkRva == 0)
{
return functions;
}
var thunkOffset = RvaToFileOffset(thunkRva, sections);
if (thunkOffset < 0)
{
return functions;
}
var entrySize = is64Bit ? 8 : 4;
var ordinalFlag = is64Bit ? 0x8000000000000000UL : 0x80000000UL;
var offset = thunkOffset;
var maxFunctions = 1000; // Limit to prevent infinite loops
while (offset + entrySize <= span.Length && functions.Count < maxFunctions)
{
ulong thunkData = is64Bit
? BinaryPrimitives.ReadUInt64LittleEndian(span.Slice(offset, 8))
: BinaryPrimitives.ReadUInt32LittleEndian(span.Slice(offset, 4));
if (thunkData == 0)
{
break;
}
if ((thunkData & ordinalFlag) == 0)
{
// Import by name
var hintNameRva = (uint)(thunkData & 0x7FFFFFFF);
var hintNameOffset = RvaToFileOffset(hintNameRva, sections);
if (hintNameOffset >= 0 && hintNameOffset + 2 < span.Length)
{
// Skip 2-byte hint, read function name
var funcName = ReadNullTerminatedString(span, hintNameOffset + 2);
if (!string.IsNullOrEmpty(funcName))
{
functions.Add(funcName);
}
}
}
else
{
// Import by ordinal
var ordinal = (ushort)(thunkData & 0xFFFF);
functions.Add($"#ord{ordinal}");
}
offset += entrySize;
}
return functions;
}
private static List<PeSxsDependency> ParseManifestFromResources(
ReadOnlySpan<byte> span, uint resourceRva, List<SectionInfo> sections)
{
var sxsDependencies = new List<PeSxsDependency>();
var resourceOffset = RvaToFileOffset(resourceRva, sections);
// Try to parse resource directory to find RT_MANIFEST
byte[]? manifestData = null;
if (resourceOffset >= 0 && resourceOffset + 16 <= span.Length)
{
manifestData = FindManifestResource(span, resourceOffset, resourceRva, sections);
}
// Fallback: search for manifest XML anywhere in the binary
if (manifestData is null || manifestData.Length == 0)
{
manifestData = SearchForManifestXml(span);
}
if (manifestData is null || manifestData.Length == 0)
{
return sxsDependencies;
}
// Parse XML manifest
return ParseManifestXml(manifestData);
}
private static byte[]? FindManifestResource(
ReadOnlySpan<byte> span, int resourceOffset, uint resourceRva, List<SectionInfo> sections)
{
// Resource directory structure:
// DWORD Characteristics, DWORD TimeDateStamp, WORD MajorVersion, WORD MinorVersion
// WORD NumberOfNamedEntries, WORD NumberOfIdEntries
// Then entries...
if (resourceOffset + 16 > span.Length)
{
return null;
}
var numberOfNamedEntries = BinaryPrimitives.ReadUInt16LittleEndian(span.Slice(resourceOffset + 12, 2));
var numberOfIdEntries = BinaryPrimitives.ReadUInt16LittleEndian(span.Slice(resourceOffset + 14, 2));
var entryOffset = resourceOffset + 16;
// Skip named entries, look for RT_MANIFEST (ID = 24)
entryOffset += numberOfNamedEntries * 8;
for (var i = 0; i < numberOfIdEntries; i++)
{
if (entryOffset + 8 > span.Length)
{
break;
}
var id = BinaryPrimitives.ReadUInt32LittleEndian(span.Slice(entryOffset, 4));
var offsetOrData = BinaryPrimitives.ReadUInt32LittleEndian(span.Slice(entryOffset + 4, 4));
if (id == RT_MANIFEST)
{
// High bit set means subdirectory
if ((offsetOrData & 0x80000000) != 0)
{
var subDirOffset = resourceOffset + (int)(offsetOrData & 0x7FFFFFFF);
return FindFirstResourceData(span, subDirOffset, resourceOffset, sections);
}
}
entryOffset += 8;
}
return null;
}
private static byte[]? FindFirstResourceData(ReadOnlySpan<byte> span, int dirOffset, int resourceBase, List<SectionInfo> sections)
{
if (dirOffset + 16 > span.Length)
{
return null;
}
var numberOfNamedEntries = BinaryPrimitives.ReadUInt16LittleEndian(span.Slice(dirOffset + 12, 2));
var numberOfIdEntries = BinaryPrimitives.ReadUInt16LittleEndian(span.Slice(dirOffset + 14, 2));
var entryOffset = dirOffset + 16 + numberOfNamedEntries * 8;
if (numberOfIdEntries > 0 && entryOffset + 8 <= span.Length)
{
var offsetOrData = BinaryPrimitives.ReadUInt32LittleEndian(span.Slice(entryOffset + 4, 4));
if ((offsetOrData & 0x80000000) != 0)
{
// Another subdirectory (language level)
var langDirOffset = resourceBase + (int)(offsetOrData & 0x7FFFFFFF);
return FindFirstResourceData(span, langDirOffset, resourceBase, sections);
}
else
{
// Data entry - IMAGE_RESOURCE_DATA_ENTRY structure
var dataEntryOffset = resourceBase + (int)offsetOrData;
if (dataEntryOffset + 16 <= span.Length)
{
var dataRva = BinaryPrimitives.ReadUInt32LittleEndian(span.Slice(dataEntryOffset, 4));
var dataSize = BinaryPrimitives.ReadUInt32LittleEndian(span.Slice(dataEntryOffset + 4, 4));
// Convert RVA to file offset using section headers
var dataOffset = RvaToFileOffset(dataRva, sections);
if (dataOffset >= 0 && dataSize > 0 && dataOffset + dataSize <= span.Length)
{
return span.Slice(dataOffset, (int)dataSize).ToArray();
}
}
}
}
return null;
}
private static byte[]? SearchForManifestXml(ReadOnlySpan<byte> span)
{
// Search for XML manifest markers
var xmlMarker = "<?xml"u8;
var assemblyMarker = "<assembly"u8;
for (var i = 0; i < span.Length - 100; i++)
{
if (span.Slice(i).StartsWith(xmlMarker) || span.Slice(i).StartsWith(assemblyMarker))
{
// Find the end of the manifest
var endMarker = "</assembly>"u8;
for (var j = i; j < span.Length - endMarker.Length; j++)
{
if (span.Slice(j).StartsWith(endMarker))
{
var manifestLength = j - i + endMarker.Length;
return span.Slice(i, manifestLength).ToArray();
}
}
}
}
return null;
}
private static List<PeSxsDependency> ParseManifestXml(byte[] manifestData)
{
var sxsDependencies = new List<PeSxsDependency>();
try
{
var xmlString = Encoding.UTF8.GetString(manifestData);
// Handle BOM if present
if (xmlString.Length > 0 && xmlString[0] == '\uFEFF')
{
xmlString = xmlString[1..];
}
using var reader = XmlReader.Create(new StringReader(xmlString), new XmlReaderSettings
{
IgnoreWhitespace = true,
IgnoreComments = true,
DtdProcessing = DtdProcessing.Ignore,
});
while (reader.Read())
{
if (reader.NodeType == XmlNodeType.Element && reader.LocalName == "assemblyIdentity")
{
// Check if this is inside a <dependentAssembly> element
var name = reader.GetAttribute("name");
var version = reader.GetAttribute("version");
var publicKeyToken = reader.GetAttribute("publicKeyToken");
var processorArchitecture = reader.GetAttribute("processorArchitecture");
var type = reader.GetAttribute("type");
if (!string.IsNullOrEmpty(name) && name != "MyApplication") // Skip self-reference
{
sxsDependencies.Add(new PeSxsDependency(
name, version, publicKeyToken, processorArchitecture, type));
}
}
}
}
catch
{
// Failed to parse manifest XML, return empty list
}
return sxsDependencies;
}
private static string ReadNullTerminatedString(ReadOnlySpan<byte> span, int offset)
{
if (offset < 0 || offset >= span.Length)
{
return string.Empty;
}
var remaining = span[offset..];
var terminator = remaining.IndexOf((byte)0);
var length = terminator >= 0 ? terminator : Math.Min(remaining.Length, 256);
return Encoding.ASCII.GetString(remaining[..length]);
}
private static string? MapPeMachine(ushort machine) => machine switch
{
0x014c => "x86",
0x0200 => "ia64",
0x8664 => "x86_64",
0x01c0 => "arm",
0x01c4 => "armv7",
0xAA64 => "arm64",
_ => null,
};
}