// Copyright (c) StellaOps. All rights reserved. // Licensed under BUSL-1.1. See LICENSE in the project root. using Iced.Intel; using Microsoft.Extensions.Logging; using System.Collections.Immutable; using System.Text; namespace StellaOps.BinaryIndex.Disassembly.Iced; /// /// Iced-based disassembly plugin for x86/x64 binaries. /// Iced is a pure .NET, high-performance x86/x64 disassembler/assembler. /// public sealed class IcedDisassemblyPlugin : IDisassemblyPlugin { /// /// Plugin identifier. /// public const string PluginId = "stellaops.disasm.iced"; private readonly ILogger _logger; private static readonly DisassemblyCapabilities s_capabilities = new() { PluginId = PluginId, Name = "Iced Disassembler", Version = "1.21.0", SupportedArchitectures = [CpuArchitecture.X86, CpuArchitecture.X86_64], SupportedFormats = [BinaryFormat.ELF, BinaryFormat.PE, BinaryFormat.MachO, BinaryFormat.Raw], SupportsLifting = false, SupportsCfgRecovery = false, Priority = 100 // High priority for x86/x64 }; /// /// Creates a new Iced disassembly plugin. /// /// Logger instance. public IcedDisassemblyPlugin(ILogger logger) { _logger = logger ?? throw new ArgumentNullException(nameof(logger)); } /// public DisassemblyCapabilities Capabilities => s_capabilities; /// public BinaryInfo LoadBinary(Stream stream, CpuArchitecture? archHint = null, BinaryFormat? formatHint = null) { ArgumentNullException.ThrowIfNull(stream); using var memStream = new MemoryStream(); stream.CopyTo(memStream); return LoadBinary(memStream.ToArray(), archHint, formatHint); } /// public BinaryInfo LoadBinary(ReadOnlySpan bytes, CpuArchitecture? archHint = null, BinaryFormat? formatHint = null) { var byteArray = bytes.ToArray(); var format = formatHint ?? DetectFormat(byteArray); var architecture = archHint ?? DetectArchitecture(byteArray, format); var bitness = GetBitness(architecture); var endianness = Endianness.Little; // x86/x64 is always little-endian var abi = DetectAbi(format); _logger.LogDebug( "Loaded binary with Iced plugin: Format={Format}, Architecture={Architecture}, Size={Size}", format, architecture, byteArray.Length); var metadata = new Dictionary { ["size"] = byteArray.Length, ["bitness"] = bitness }; return new BinaryInfo( Format: format, Architecture: architecture, Bitness: bitness, Endianness: endianness, Abi: abi, EntryPoint: TryGetEntryPoint(byteArray, format), BuildId: null, Metadata: metadata, Handle: new IcedBinaryHandle(byteArray, bitness)); } /// public IEnumerable GetCodeRegions(BinaryInfo binary) { ArgumentNullException.ThrowIfNull(binary); var handle = GetHandle(binary); return binary.Format switch { BinaryFormat.ELF => ParseElfSections(handle.Bytes), BinaryFormat.PE => ParsePeSections(handle.Bytes), BinaryFormat.MachO => ParseMachOSections(handle.Bytes), _ => [new CodeRegion(".text", 0, 0, (ulong)handle.Bytes.Length, true, true, false)] }; } /// public IEnumerable GetSymbols(BinaryInfo binary) { ArgumentNullException.ThrowIfNull(binary); var handle = GetHandle(binary); return binary.Format switch { BinaryFormat.ELF => ParseElfSymbols(handle.Bytes), BinaryFormat.PE => ParsePeExports(handle.Bytes), _ => [] }; } /// public IEnumerable Disassemble(BinaryInfo binary, CodeRegion region) { ArgumentNullException.ThrowIfNull(binary); ArgumentNullException.ThrowIfNull(region); var handle = GetHandle(binary); var regionOffset = (int)region.FileOffset; var regionSize = (int)Math.Min(region.Size, (ulong)(handle.Bytes.Length - regionOffset)); if (regionOffset >= handle.Bytes.Length || regionSize <= 0) { _logger.LogWarning("Region {Name} is outside binary bounds", region.Name); yield break; } var regionBytes = handle.Bytes.AsSpan(regionOffset, regionSize); var codeReader = new ByteArrayCodeReader(regionBytes.ToArray()); var decoder = global::Iced.Intel.Decoder.Create(handle.Bitness, codeReader); decoder.IP = region.VirtualAddress; _logger.LogDebug( "Disassembling region {Name} from 0x{Start:X} ({Size} bytes, {Bitness}-bit)", region.Name, region.VirtualAddress, regionSize, handle.Bitness); while (codeReader.CanReadByte) { decoder.Decode(out var instruction); if (instruction.IsInvalid) { decoder.IP++; if (!codeReader.CanReadByte) break; continue; } yield return MapInstruction(instruction, handle.Bytes, regionOffset); } } /// public IEnumerable Disassemble(BinaryInfo binary, ulong startAddress, ulong length) { var region = new CodeRegion( Name: $"0x{startAddress:X}", VirtualAddress: startAddress, FileOffset: startAddress, // Simplified - assumes VA == file offset Size: length, IsExecutable: true, IsReadable: true, IsWritable: false); return Disassemble(binary, region); } /// public IEnumerable DisassembleSymbol(BinaryInfo binary, SymbolInfo symbol) { ArgumentNullException.ThrowIfNull(binary); ArgumentNullException.ThrowIfNull(symbol); var size = symbol.Size > 0 ? symbol.Size : 4096UL; var region = new CodeRegion( Name: symbol.Name, VirtualAddress: symbol.Address, FileOffset: symbol.Address, Size: size, IsExecutable: true, IsReadable: true, IsWritable: false); return Disassemble(binary, region); } #region Format/Architecture Detection private static BinaryFormat DetectFormat(byte[] bytes) { if (bytes.Length < 4) return BinaryFormat.Raw; // ELF magic: 0x7F 'E' 'L' 'F' if (bytes[0] == 0x7F && bytes[1] == 'E' && bytes[2] == 'L' && bytes[3] == 'F') return BinaryFormat.ELF; // PE magic: 'M' 'Z' if (bytes[0] == 'M' && bytes[1] == 'Z') return BinaryFormat.PE; // Mach-O magic if ((bytes[0] == 0xFE && bytes[1] == 0xED && bytes[2] == 0xFA && (bytes[3] == 0xCE || bytes[3] == 0xCF)) || (bytes[3] == 0xFE && bytes[2] == 0xED && bytes[1] == 0xFA && (bytes[0] == 0xCE || bytes[0] == 0xCF))) return BinaryFormat.MachO; return BinaryFormat.Raw; } private static CpuArchitecture DetectArchitecture(byte[] bytes, BinaryFormat format) { return format switch { BinaryFormat.ELF when bytes.Length > 18 => DetectElfArchitecture(bytes), BinaryFormat.PE when bytes.Length > 0x40 => DetectPeArchitecture(bytes), BinaryFormat.MachO when bytes.Length > 8 => DetectMachOArchitecture(bytes), _ => CpuArchitecture.X86_64 // Default }; } private static CpuArchitecture DetectElfArchitecture(byte[] bytes) { // e_machine at offset 18 (2 bytes) var machine = BitConverter.ToUInt16(bytes, 18); return machine switch { 0x03 => CpuArchitecture.X86, // EM_386 0x3E => CpuArchitecture.X86_64, // EM_X86_64 0x28 => CpuArchitecture.ARM32, // EM_ARM 0xB7 => CpuArchitecture.ARM64, // EM_AARCH64 0x08 => CpuArchitecture.MIPS32, // EM_MIPS 0xF3 => CpuArchitecture.RISCV64, // EM_RISCV _ => bytes[4] == 2 ? CpuArchitecture.X86_64 : CpuArchitecture.X86 }; } private static CpuArchitecture DetectPeArchitecture(byte[] bytes) { var peOffset = BitConverter.ToInt32(bytes, 0x3C); if (peOffset < 0 || peOffset + 6 > bytes.Length) return CpuArchitecture.X86; var machine = BitConverter.ToUInt16(bytes, peOffset + 4); return machine switch { 0x014c => CpuArchitecture.X86, // IMAGE_FILE_MACHINE_I386 0x8664 => CpuArchitecture.X86_64, // IMAGE_FILE_MACHINE_AMD64 0xaa64 => CpuArchitecture.ARM64, // IMAGE_FILE_MACHINE_ARM64 0x01c4 => CpuArchitecture.ARM32, // IMAGE_FILE_MACHINE_ARMNT _ => CpuArchitecture.X86 }; } private static CpuArchitecture DetectMachOArchitecture(byte[] bytes) { // Check if big-endian or little-endian magic bool isBigEndian = bytes[0] == 0xFE; int cpuTypeOffset = 4; uint cpuType = isBigEndian ? (uint)((bytes[cpuTypeOffset] << 24) | (bytes[cpuTypeOffset + 1] << 16) | (bytes[cpuTypeOffset + 2] << 8) | bytes[cpuTypeOffset + 3]) : BitConverter.ToUInt32(bytes, cpuTypeOffset); return cpuType switch { 0x00000007 => CpuArchitecture.X86, // CPU_TYPE_X86 0x01000007 => CpuArchitecture.X86_64, // CPU_TYPE_X86_64 0x0000000C => CpuArchitecture.ARM32, // CPU_TYPE_ARM 0x0100000C => CpuArchitecture.ARM64, // CPU_TYPE_ARM64 _ => CpuArchitecture.X86_64 }; } private static int GetBitness(CpuArchitecture arch) { return arch switch { CpuArchitecture.X86 or CpuArchitecture.ARM32 or CpuArchitecture.MIPS32 or CpuArchitecture.PPC32 => 32, _ => 64 }; } private static string? DetectAbi(BinaryFormat format) { return format switch { BinaryFormat.ELF => "gnu", BinaryFormat.PE => "msvc", BinaryFormat.MachO => "darwin", _ => null }; } private static ulong? TryGetEntryPoint(byte[] bytes, BinaryFormat format) { try { return format switch { BinaryFormat.ELF when bytes.Length > 24 => bytes[4] == 2 ? BitConverter.ToUInt64(bytes, 24) // 64-bit entry point : BitConverter.ToUInt32(bytes, 24), // 32-bit entry point BinaryFormat.PE when bytes.Length > 0x40 => GetPeEntryPoint(bytes), _ => null }; } catch { return null; } } private static ulong? GetPeEntryPoint(byte[] bytes) { var peOffset = BitConverter.ToInt32(bytes, 0x3C); if (peOffset < 0 || peOffset + 40 > bytes.Length) return null; var optionalHeaderOffset = peOffset + 24; var addressOfEntryPoint = BitConverter.ToUInt32(bytes, optionalHeaderOffset + 16); return addressOfEntryPoint; } #endregion #region Section/Symbol Parsing private static IEnumerable ParseElfSections(byte[] bytes) { if (bytes.Length < 52) yield break; var is64Bit = bytes[4] == 2; var shoff = is64Bit ? BitConverter.ToUInt64(bytes, 40) : BitConverter.ToUInt32(bytes, 32); var shentsize = BitConverter.ToUInt16(bytes, is64Bit ? 58 : 46); var shnum = BitConverter.ToUInt16(bytes, is64Bit ? 60 : 48); var shstrndx = BitConverter.ToUInt16(bytes, is64Bit ? 62 : 50); if (shoff == 0 || shnum == 0 || (long)shoff + shnum * shentsize > bytes.Length) { yield return new CodeRegion(".text", 0, 0, (ulong)bytes.Length, true, true, false); yield break; } // Get string table offset ulong strtabOffset = 0; if (shstrndx < shnum) { var strtabHeaderOff = (int)shoff + shstrndx * shentsize; strtabOffset = is64Bit ? BitConverter.ToUInt64(bytes, strtabHeaderOff + 24) : BitConverter.ToUInt32(bytes, strtabHeaderOff + 16); } for (int i = 0; i < shnum; i++) { var sectionOffset = (int)shoff + i * shentsize; if (sectionOffset + shentsize > bytes.Length) break; uint nameOffset = BitConverter.ToUInt32(bytes, sectionOffset); uint flags = BitConverter.ToUInt32(bytes, sectionOffset + (is64Bit ? 8 : 8)); ulong addr = is64Bit ? BitConverter.ToUInt64(bytes, sectionOffset + 16) : BitConverter.ToUInt32(bytes, sectionOffset + 12); ulong offset = is64Bit ? BitConverter.ToUInt64(bytes, sectionOffset + 24) : BitConverter.ToUInt32(bytes, sectionOffset + 16); ulong size = is64Bit ? BitConverter.ToUInt64(bytes, sectionOffset + 32) : BitConverter.ToUInt32(bytes, sectionOffset + 20); var name = ReadNullTerminatedString(bytes, (int)(strtabOffset + nameOffset)); if (string.IsNullOrEmpty(name)) name = $".section{i}"; // SHF_ALLOC = 2, SHF_EXECINSTR = 4, SHF_WRITE = 1 var isAllocated = (flags & 2) != 0; if (isAllocated && size > 0) { yield return new CodeRegion( name, addr, offset, size, IsExecutable: (flags & 4) != 0, IsReadable: true, IsWritable: (flags & 1) != 0); } } } private static IEnumerable ParsePeSections(byte[] bytes) { if (bytes.Length < 64) yield break; var peOffset = BitConverter.ToInt32(bytes, 0x3C); if (peOffset < 0 || peOffset + 24 > bytes.Length) yield break; if (bytes[peOffset] != 'P' || bytes[peOffset + 1] != 'E') yield break; var numSections = BitConverter.ToUInt16(bytes, peOffset + 6); var optHeaderSize = BitConverter.ToUInt16(bytes, peOffset + 20); var sectionTableOffset = peOffset + 24 + optHeaderSize; for (int i = 0; i < numSections; i++) { var sectionOffset = sectionTableOffset + i * 40; if (sectionOffset + 40 > bytes.Length) break; var name = Encoding.ASCII.GetString(bytes, sectionOffset, 8).TrimEnd('\0'); var virtualSize = BitConverter.ToUInt32(bytes, sectionOffset + 8); var virtualAddress = BitConverter.ToUInt32(bytes, sectionOffset + 12); var rawSize = BitConverter.ToUInt32(bytes, sectionOffset + 16); var rawOffset = BitConverter.ToUInt32(bytes, sectionOffset + 20); var characteristics = BitConverter.ToUInt32(bytes, sectionOffset + 36); if (rawSize > 0) { yield return new CodeRegion( name, virtualAddress, rawOffset, rawSize, IsExecutable: (characteristics & 0x20000000) != 0, IsReadable: (characteristics & 0x40000000) != 0, IsWritable: (characteristics & 0x80000000) != 0); } } } private static IEnumerable ParseMachOSections(byte[] bytes) { // Simplified - return entire binary as code for now yield return new CodeRegion(".text", 0, 0, (ulong)bytes.Length, true, true, false); } private static IEnumerable ParseElfSymbols(byte[] bytes) { // Simplified - symbol parsing is complex return []; } private static IEnumerable ParsePeExports(byte[] bytes) { // Simplified - export parsing is complex return []; } private static string ReadNullTerminatedString(byte[] bytes, int offset) { if (offset < 0 || offset >= bytes.Length) return string.Empty; var end = Array.IndexOf(bytes, (byte)0, offset); if (end < 0) end = bytes.Length; var length = Math.Min(end - offset, 256); if (length <= 0) return string.Empty; return Encoding.ASCII.GetString(bytes, offset, length); } #endregion #region Instruction Mapping private static IcedBinaryHandle GetHandle(BinaryInfo binary) { if (binary.Handle is not IcedBinaryHandle handle) throw new ArgumentException("Invalid binary handle - not an Iced handle", nameof(binary)); return handle; } private static DisassembledInstruction MapInstruction(Instruction instruction, byte[] bytes, int regionOffset) { var instrOffset = (int)instruction.IP - regionOffset; var instrLength = instruction.Length; var rawBytes = instrOffset >= 0 && instrOffset + instrLength <= bytes.Length ? bytes.AsSpan(instrOffset, instrLength).ToArray().ToImmutableArray() : ImmutableArray.Empty; return new DisassembledInstruction( Address: instruction.IP, RawBytes: rawBytes, Mnemonic: instruction.Mnemonic.ToString(), OperandsText: FormatOperands(instruction), Kind: ClassifyInstruction(instruction), Operands: MapOperands(instruction)); } private static string FormatOperands(Instruction instruction) { var formatter = new NasmFormatter(); var output = new StringOutput(); formatter.Format(instruction, output); var full = output.ToStringAndReset(); var spaceIndex = full.IndexOf(' '); return spaceIndex >= 0 ? full[(spaceIndex + 1)..] : string.Empty; } private static InstructionKind ClassifyInstruction(Instruction instruction) { if (instruction.IsCallNear || instruction.IsCallFar) return InstructionKind.Call; if (instruction.Mnemonic == Mnemonic.Ret || instruction.Mnemonic == Mnemonic.Retf) return InstructionKind.Return; if (instruction.IsJmpShort || instruction.IsJmpNear || instruction.IsJmpFar || instruction.IsJmpShortOrNear || instruction.IsJmpNearIndirect || instruction.IsJmpFarIndirect) return InstructionKind.Branch; if (instruction.IsJccShort || instruction.IsJccNear || instruction.IsJccShortOrNear) return InstructionKind.ConditionalBranch; if (instruction.Mnemonic == Mnemonic.Nop) return InstructionKind.Nop; if (instruction.Mnemonic == Mnemonic.Syscall || instruction.Mnemonic == Mnemonic.Sysenter) return InstructionKind.Syscall; var mnemonic = instruction.Mnemonic; if (mnemonic is Mnemonic.Add or Mnemonic.Sub or Mnemonic.Mul or Mnemonic.Imul or Mnemonic.Div or Mnemonic.Idiv or Mnemonic.Inc or Mnemonic.Dec) return InstructionKind.Arithmetic; if (mnemonic is Mnemonic.And or Mnemonic.Or or Mnemonic.Xor or Mnemonic.Not or Mnemonic.Test) return InstructionKind.Logic; if (mnemonic is Mnemonic.Shl or Mnemonic.Shr or Mnemonic.Sal or Mnemonic.Sar or Mnemonic.Rol or Mnemonic.Ror) return InstructionKind.Shift; if (mnemonic is Mnemonic.Cmp) return InstructionKind.Compare; if (mnemonic is Mnemonic.Mov or Mnemonic.Movzx or Mnemonic.Movsx or Mnemonic.Lea or Mnemonic.Push or Mnemonic.Pop or Mnemonic.Xchg) return InstructionKind.Move; return InstructionKind.Unknown; } private static ImmutableArray MapOperands(Instruction instruction) { var builder = ImmutableArray.CreateBuilder(instruction.OpCount); for (int i = 0; i < instruction.OpCount; i++) { var opKind = instruction.GetOpKind(i); builder.Add(MapOperand(instruction, i, opKind)); } return builder.ToImmutable(); } private static Operand MapOperand(Instruction instruction, int index, OpKind kind) { return kind switch { OpKind.Register => new Operand( OperandType.Register, instruction.GetOpRegister(index).ToString(), Register: instruction.GetOpRegister(index).ToString()), OpKind.Immediate8 or OpKind.Immediate16 or OpKind.Immediate32 or OpKind.Immediate64 or OpKind.Immediate8to16 or OpKind.Immediate8to32 or OpKind.Immediate8to64 or OpKind.Immediate32to64 => new Operand( OperandType.Immediate, $"0x{instruction.GetImmediate(index):X}", Value: (long)instruction.GetImmediate(index)), OpKind.NearBranch16 or OpKind.NearBranch32 or OpKind.NearBranch64 => new Operand( OperandType.Address, $"0x{instruction.NearBranchTarget:X}", Value: (long)instruction.NearBranchTarget), OpKind.Memory => new Operand( OperandType.Memory, FormatMemoryOperand(instruction), MemoryBase: instruction.MemoryBase != global::Iced.Intel.Register.None ? instruction.MemoryBase.ToString() : null, MemoryIndex: instruction.MemoryIndex != global::Iced.Intel.Register.None ? instruction.MemoryIndex.ToString() : null, MemoryScale: instruction.MemoryIndexScale, MemoryDisplacement: (long)instruction.MemoryDisplacement64), _ => new Operand(OperandType.Unknown, kind.ToString()) }; } private static string FormatMemoryOperand(Instruction instruction) { var sb = new StringBuilder(); sb.Append('['); if (instruction.MemoryBase != global::Iced.Intel.Register.None) sb.Append(instruction.MemoryBase); if (instruction.MemoryIndex != global::Iced.Intel.Register.None) { if (sb.Length > 1) sb.Append('+'); sb.Append(instruction.MemoryIndex); if (instruction.MemoryIndexScale > 1) sb.Append('*').Append(instruction.MemoryIndexScale); } if (instruction.MemoryDisplacement64 != 0) { if (sb.Length > 1) sb.Append('+'); sb.Append($"0x{instruction.MemoryDisplacement64:X}"); } sb.Append(']'); return sb.ToString(); } #endregion } /// /// Internal handle for Iced binary data. /// internal sealed record IcedBinaryHandle(byte[] Bytes, int Bitness);