Files
git.stella-ops.org/src/BinaryIndex/__Libraries/StellaOps.BinaryIndex.Disassembly.Iced/IcedDisassemblyPlugin.cs
2026-02-01 21:37:40 +02:00

598 lines
22 KiB
C#

// Copyright (c) StellaOps. All rights reserved.
// Licensed under BUSL-1.1. See LICENSE in the project root.
using Iced.Intel;
using Microsoft.Extensions.Logging;
using System.Collections.Immutable;
using System.Text;
namespace StellaOps.BinaryIndex.Disassembly.Iced;
/// <summary>
/// Iced-based disassembly plugin for x86/x64 binaries.
/// Iced is a pure .NET, high-performance x86/x64 disassembler/assembler.
/// </summary>
public sealed class IcedDisassemblyPlugin : IDisassemblyPlugin
{
/// <summary>
/// Plugin identifier.
/// </summary>
public const string PluginId = "stellaops.disasm.iced";
private readonly ILogger<IcedDisassemblyPlugin> _logger;
private static readonly DisassemblyCapabilities s_capabilities = new()
{
PluginId = PluginId,
Name = "Iced Disassembler",
Version = "1.21.0",
SupportedArchitectures = [CpuArchitecture.X86, CpuArchitecture.X86_64],
SupportedFormats = [BinaryFormat.ELF, BinaryFormat.PE, BinaryFormat.MachO, BinaryFormat.Raw],
SupportsLifting = false,
SupportsCfgRecovery = false,
Priority = 100 // High priority for x86/x64
};
/// <summary>
/// Creates a new Iced disassembly plugin.
/// </summary>
/// <param name="logger">Logger instance.</param>
public IcedDisassemblyPlugin(ILogger<IcedDisassemblyPlugin> logger)
{
_logger = logger ?? throw new ArgumentNullException(nameof(logger));
}
/// <inheritdoc />
public DisassemblyCapabilities Capabilities => s_capabilities;
/// <inheritdoc />
public BinaryInfo LoadBinary(Stream stream, CpuArchitecture? archHint = null, BinaryFormat? formatHint = null)
{
ArgumentNullException.ThrowIfNull(stream);
using var memStream = new MemoryStream();
stream.CopyTo(memStream);
return LoadBinary(memStream.ToArray(), archHint, formatHint);
}
/// <inheritdoc />
public BinaryInfo LoadBinary(ReadOnlySpan<byte> bytes, CpuArchitecture? archHint = null, BinaryFormat? formatHint = null)
{
var byteArray = bytes.ToArray();
var format = formatHint ?? DetectFormat(byteArray);
var architecture = archHint ?? DetectArchitecture(byteArray, format);
var bitness = GetBitness(architecture);
var endianness = Endianness.Little; // x86/x64 is always little-endian
var abi = DetectAbi(format);
_logger.LogDebug(
"Loaded binary with Iced plugin: Format={Format}, Architecture={Architecture}, Size={Size}",
format, architecture, byteArray.Length);
var metadata = new Dictionary<string, object>
{
["size"] = byteArray.Length,
["bitness"] = bitness
};
return new BinaryInfo(
Format: format,
Architecture: architecture,
Bitness: bitness,
Endianness: endianness,
Abi: abi,
EntryPoint: TryGetEntryPoint(byteArray, format),
BuildId: null,
Metadata: metadata,
Handle: new IcedBinaryHandle(byteArray, bitness));
}
/// <inheritdoc />
public IEnumerable<CodeRegion> GetCodeRegions(BinaryInfo binary)
{
ArgumentNullException.ThrowIfNull(binary);
var handle = GetHandle(binary);
return binary.Format switch
{
BinaryFormat.ELF => ParseElfSections(handle.Bytes),
BinaryFormat.PE => ParsePeSections(handle.Bytes),
BinaryFormat.MachO => ParseMachOSections(handle.Bytes),
_ => [new CodeRegion(".text", 0, 0, (ulong)handle.Bytes.Length, true, true, false)]
};
}
/// <inheritdoc />
public IEnumerable<SymbolInfo> GetSymbols(BinaryInfo binary)
{
ArgumentNullException.ThrowIfNull(binary);
var handle = GetHandle(binary);
return binary.Format switch
{
BinaryFormat.ELF => ParseElfSymbols(handle.Bytes),
BinaryFormat.PE => ParsePeExports(handle.Bytes),
_ => []
};
}
/// <inheritdoc />
public IEnumerable<DisassembledInstruction> Disassemble(BinaryInfo binary, CodeRegion region)
{
ArgumentNullException.ThrowIfNull(binary);
ArgumentNullException.ThrowIfNull(region);
var handle = GetHandle(binary);
var regionOffset = (int)region.FileOffset;
var regionSize = (int)Math.Min(region.Size, (ulong)(handle.Bytes.Length - regionOffset));
if (regionOffset >= handle.Bytes.Length || regionSize <= 0)
{
_logger.LogWarning("Region {Name} is outside binary bounds", region.Name);
yield break;
}
var regionBytes = handle.Bytes.AsSpan(regionOffset, regionSize);
var codeReader = new ByteArrayCodeReader(regionBytes.ToArray());
var decoder = global::Iced.Intel.Decoder.Create(handle.Bitness, codeReader);
decoder.IP = region.VirtualAddress;
_logger.LogDebug(
"Disassembling region {Name} from 0x{Start:X} ({Size} bytes, {Bitness}-bit)",
region.Name, region.VirtualAddress, regionSize, handle.Bitness);
while (codeReader.CanReadByte)
{
decoder.Decode(out var instruction);
if (instruction.IsInvalid)
{
decoder.IP++;
if (!codeReader.CanReadByte) break;
continue;
}
yield return MapInstruction(instruction, handle.Bytes, regionOffset);
}
}
/// <inheritdoc />
public IEnumerable<DisassembledInstruction> Disassemble(BinaryInfo binary, ulong startAddress, ulong length)
{
var region = new CodeRegion(
Name: $"0x{startAddress:X}",
VirtualAddress: startAddress,
FileOffset: startAddress, // Simplified - assumes VA == file offset
Size: length,
IsExecutable: true,
IsReadable: true,
IsWritable: false);
return Disassemble(binary, region);
}
/// <inheritdoc />
public IEnumerable<DisassembledInstruction> DisassembleSymbol(BinaryInfo binary, SymbolInfo symbol)
{
ArgumentNullException.ThrowIfNull(binary);
ArgumentNullException.ThrowIfNull(symbol);
var size = symbol.Size > 0 ? symbol.Size : 4096UL;
var region = new CodeRegion(
Name: symbol.Name,
VirtualAddress: symbol.Address,
FileOffset: symbol.Address,
Size: size,
IsExecutable: true,
IsReadable: true,
IsWritable: false);
return Disassemble(binary, region);
}
#region Format/Architecture Detection
private static BinaryFormat DetectFormat(byte[] bytes)
{
if (bytes.Length < 4) return BinaryFormat.Raw;
// ELF magic: 0x7F 'E' 'L' 'F'
if (bytes[0] == 0x7F && bytes[1] == 'E' && bytes[2] == 'L' && bytes[3] == 'F')
return BinaryFormat.ELF;
// PE magic: 'M' 'Z'
if (bytes[0] == 'M' && bytes[1] == 'Z')
return BinaryFormat.PE;
// Mach-O magic
if ((bytes[0] == 0xFE && bytes[1] == 0xED && bytes[2] == 0xFA && (bytes[3] == 0xCE || bytes[3] == 0xCF)) ||
(bytes[3] == 0xFE && bytes[2] == 0xED && bytes[1] == 0xFA && (bytes[0] == 0xCE || bytes[0] == 0xCF)))
return BinaryFormat.MachO;
return BinaryFormat.Raw;
}
private static CpuArchitecture DetectArchitecture(byte[] bytes, BinaryFormat format)
{
return format switch
{
BinaryFormat.ELF when bytes.Length > 18 => DetectElfArchitecture(bytes),
BinaryFormat.PE when bytes.Length > 0x40 => DetectPeArchitecture(bytes),
BinaryFormat.MachO when bytes.Length > 8 => DetectMachOArchitecture(bytes),
_ => CpuArchitecture.X86_64 // Default
};
}
private static CpuArchitecture DetectElfArchitecture(byte[] bytes)
{
// e_machine at offset 18 (2 bytes)
var machine = BitConverter.ToUInt16(bytes, 18);
return machine switch
{
0x03 => CpuArchitecture.X86, // EM_386
0x3E => CpuArchitecture.X86_64, // EM_X86_64
0x28 => CpuArchitecture.ARM32, // EM_ARM
0xB7 => CpuArchitecture.ARM64, // EM_AARCH64
0x08 => CpuArchitecture.MIPS32, // EM_MIPS
0xF3 => CpuArchitecture.RISCV64, // EM_RISCV
_ => bytes[4] == 2 ? CpuArchitecture.X86_64 : CpuArchitecture.X86
};
}
private static CpuArchitecture DetectPeArchitecture(byte[] bytes)
{
var peOffset = BitConverter.ToInt32(bytes, 0x3C);
if (peOffset < 0 || peOffset + 6 > bytes.Length) return CpuArchitecture.X86;
var machine = BitConverter.ToUInt16(bytes, peOffset + 4);
return machine switch
{
0x014c => CpuArchitecture.X86, // IMAGE_FILE_MACHINE_I386
0x8664 => CpuArchitecture.X86_64, // IMAGE_FILE_MACHINE_AMD64
0xaa64 => CpuArchitecture.ARM64, // IMAGE_FILE_MACHINE_ARM64
0x01c4 => CpuArchitecture.ARM32, // IMAGE_FILE_MACHINE_ARMNT
_ => CpuArchitecture.X86
};
}
private static CpuArchitecture DetectMachOArchitecture(byte[] bytes)
{
// Check if big-endian or little-endian magic
bool isBigEndian = bytes[0] == 0xFE;
int cpuTypeOffset = 4;
uint cpuType = isBigEndian
? (uint)((bytes[cpuTypeOffset] << 24) | (bytes[cpuTypeOffset + 1] << 16) | (bytes[cpuTypeOffset + 2] << 8) | bytes[cpuTypeOffset + 3])
: BitConverter.ToUInt32(bytes, cpuTypeOffset);
return cpuType switch
{
0x00000007 => CpuArchitecture.X86, // CPU_TYPE_X86
0x01000007 => CpuArchitecture.X86_64, // CPU_TYPE_X86_64
0x0000000C => CpuArchitecture.ARM32, // CPU_TYPE_ARM
0x0100000C => CpuArchitecture.ARM64, // CPU_TYPE_ARM64
_ => CpuArchitecture.X86_64
};
}
private static int GetBitness(CpuArchitecture arch)
{
return arch switch
{
CpuArchitecture.X86 or CpuArchitecture.ARM32 or CpuArchitecture.MIPS32 or CpuArchitecture.PPC32 => 32,
_ => 64
};
}
private static string? DetectAbi(BinaryFormat format)
{
return format switch
{
BinaryFormat.ELF => "gnu",
BinaryFormat.PE => "msvc",
BinaryFormat.MachO => "darwin",
_ => null
};
}
private static ulong? TryGetEntryPoint(byte[] bytes, BinaryFormat format)
{
try
{
return format switch
{
BinaryFormat.ELF when bytes.Length > 24 => bytes[4] == 2
? BitConverter.ToUInt64(bytes, 24) // 64-bit entry point
: BitConverter.ToUInt32(bytes, 24), // 32-bit entry point
BinaryFormat.PE when bytes.Length > 0x40 => GetPeEntryPoint(bytes),
_ => null
};
}
catch
{
return null;
}
}
private static ulong? GetPeEntryPoint(byte[] bytes)
{
var peOffset = BitConverter.ToInt32(bytes, 0x3C);
if (peOffset < 0 || peOffset + 40 > bytes.Length) return null;
var optionalHeaderOffset = peOffset + 24;
var addressOfEntryPoint = BitConverter.ToUInt32(bytes, optionalHeaderOffset + 16);
return addressOfEntryPoint;
}
#endregion
#region Section/Symbol Parsing
private static IEnumerable<CodeRegion> ParseElfSections(byte[] bytes)
{
if (bytes.Length < 52) yield break;
var is64Bit = bytes[4] == 2;
var shoff = is64Bit ? BitConverter.ToUInt64(bytes, 40) : BitConverter.ToUInt32(bytes, 32);
var shentsize = BitConverter.ToUInt16(bytes, is64Bit ? 58 : 46);
var shnum = BitConverter.ToUInt16(bytes, is64Bit ? 60 : 48);
var shstrndx = BitConverter.ToUInt16(bytes, is64Bit ? 62 : 50);
if (shoff == 0 || shnum == 0 || (long)shoff + shnum * shentsize > bytes.Length)
{
yield return new CodeRegion(".text", 0, 0, (ulong)bytes.Length, true, true, false);
yield break;
}
// Get string table offset
ulong strtabOffset = 0;
if (shstrndx < shnum)
{
var strtabHeaderOff = (int)shoff + shstrndx * shentsize;
strtabOffset = is64Bit
? BitConverter.ToUInt64(bytes, strtabHeaderOff + 24)
: BitConverter.ToUInt32(bytes, strtabHeaderOff + 16);
}
for (int i = 0; i < shnum; i++)
{
var sectionOffset = (int)shoff + i * shentsize;
if (sectionOffset + shentsize > bytes.Length) break;
uint nameOffset = BitConverter.ToUInt32(bytes, sectionOffset);
uint flags = BitConverter.ToUInt32(bytes, sectionOffset + (is64Bit ? 8 : 8));
ulong addr = is64Bit ? BitConverter.ToUInt64(bytes, sectionOffset + 16) : BitConverter.ToUInt32(bytes, sectionOffset + 12);
ulong offset = is64Bit ? BitConverter.ToUInt64(bytes, sectionOffset + 24) : BitConverter.ToUInt32(bytes, sectionOffset + 16);
ulong size = is64Bit ? BitConverter.ToUInt64(bytes, sectionOffset + 32) : BitConverter.ToUInt32(bytes, sectionOffset + 20);
var name = ReadNullTerminatedString(bytes, (int)(strtabOffset + nameOffset));
if (string.IsNullOrEmpty(name)) name = $".section{i}";
// SHF_ALLOC = 2, SHF_EXECINSTR = 4, SHF_WRITE = 1
var isAllocated = (flags & 2) != 0;
if (isAllocated && size > 0)
{
yield return new CodeRegion(
name, addr, offset, size,
IsExecutable: (flags & 4) != 0,
IsReadable: true,
IsWritable: (flags & 1) != 0);
}
}
}
private static IEnumerable<CodeRegion> ParsePeSections(byte[] bytes)
{
if (bytes.Length < 64) yield break;
var peOffset = BitConverter.ToInt32(bytes, 0x3C);
if (peOffset < 0 || peOffset + 24 > bytes.Length) yield break;
if (bytes[peOffset] != 'P' || bytes[peOffset + 1] != 'E') yield break;
var numSections = BitConverter.ToUInt16(bytes, peOffset + 6);
var optHeaderSize = BitConverter.ToUInt16(bytes, peOffset + 20);
var sectionTableOffset = peOffset + 24 + optHeaderSize;
for (int i = 0; i < numSections; i++)
{
var sectionOffset = sectionTableOffset + i * 40;
if (sectionOffset + 40 > bytes.Length) break;
var name = Encoding.ASCII.GetString(bytes, sectionOffset, 8).TrimEnd('\0');
var virtualSize = BitConverter.ToUInt32(bytes, sectionOffset + 8);
var virtualAddress = BitConverter.ToUInt32(bytes, sectionOffset + 12);
var rawSize = BitConverter.ToUInt32(bytes, sectionOffset + 16);
var rawOffset = BitConverter.ToUInt32(bytes, sectionOffset + 20);
var characteristics = BitConverter.ToUInt32(bytes, sectionOffset + 36);
if (rawSize > 0)
{
yield return new CodeRegion(
name, virtualAddress, rawOffset, rawSize,
IsExecutable: (characteristics & 0x20000000) != 0,
IsReadable: (characteristics & 0x40000000) != 0,
IsWritable: (characteristics & 0x80000000) != 0);
}
}
}
private static IEnumerable<CodeRegion> ParseMachOSections(byte[] bytes)
{
// Simplified - return entire binary as code for now
yield return new CodeRegion(".text", 0, 0, (ulong)bytes.Length, true, true, false);
}
private static IEnumerable<SymbolInfo> ParseElfSymbols(byte[] bytes)
{
// Simplified - symbol parsing is complex
return [];
}
private static IEnumerable<SymbolInfo> ParsePeExports(byte[] bytes)
{
// Simplified - export parsing is complex
return [];
}
private static string ReadNullTerminatedString(byte[] bytes, int offset)
{
if (offset < 0 || offset >= bytes.Length) return string.Empty;
var end = Array.IndexOf(bytes, (byte)0, offset);
if (end < 0) end = bytes.Length;
var length = Math.Min(end - offset, 256);
if (length <= 0) return string.Empty;
return Encoding.ASCII.GetString(bytes, offset, length);
}
#endregion
#region Instruction Mapping
private static IcedBinaryHandle GetHandle(BinaryInfo binary)
{
if (binary.Handle is not IcedBinaryHandle handle)
throw new ArgumentException("Invalid binary handle - not an Iced handle", nameof(binary));
return handle;
}
private static DisassembledInstruction MapInstruction(Instruction instruction, byte[] bytes, int regionOffset)
{
var instrOffset = (int)instruction.IP - regionOffset;
var instrLength = instruction.Length;
var rawBytes = instrOffset >= 0 && instrOffset + instrLength <= bytes.Length
? bytes.AsSpan(instrOffset, instrLength).ToArray().ToImmutableArray()
: ImmutableArray<byte>.Empty;
return new DisassembledInstruction(
Address: instruction.IP,
RawBytes: rawBytes,
Mnemonic: instruction.Mnemonic.ToString(),
OperandsText: FormatOperands(instruction),
Kind: ClassifyInstruction(instruction),
Operands: MapOperands(instruction));
}
private static string FormatOperands(Instruction instruction)
{
var formatter = new NasmFormatter();
var output = new StringOutput();
formatter.Format(instruction, output);
var full = output.ToStringAndReset();
var spaceIndex = full.IndexOf(' ');
return spaceIndex >= 0 ? full[(spaceIndex + 1)..] : string.Empty;
}
private static InstructionKind ClassifyInstruction(Instruction instruction)
{
if (instruction.IsCallNear || instruction.IsCallFar) return InstructionKind.Call;
if (instruction.Mnemonic == Mnemonic.Ret || instruction.Mnemonic == Mnemonic.Retf) return InstructionKind.Return;
if (instruction.IsJmpShort || instruction.IsJmpNear || instruction.IsJmpFar ||
instruction.IsJmpShortOrNear || instruction.IsJmpNearIndirect || instruction.IsJmpFarIndirect)
return InstructionKind.Branch;
if (instruction.IsJccShort || instruction.IsJccNear || instruction.IsJccShortOrNear)
return InstructionKind.ConditionalBranch;
if (instruction.Mnemonic == Mnemonic.Nop) return InstructionKind.Nop;
if (instruction.Mnemonic == Mnemonic.Syscall || instruction.Mnemonic == Mnemonic.Sysenter) return InstructionKind.Syscall;
var mnemonic = instruction.Mnemonic;
if (mnemonic is Mnemonic.Add or Mnemonic.Sub or Mnemonic.Mul or Mnemonic.Imul or
Mnemonic.Div or Mnemonic.Idiv or Mnemonic.Inc or Mnemonic.Dec)
return InstructionKind.Arithmetic;
if (mnemonic is Mnemonic.And or Mnemonic.Or or Mnemonic.Xor or Mnemonic.Not or Mnemonic.Test)
return InstructionKind.Logic;
if (mnemonic is Mnemonic.Shl or Mnemonic.Shr or Mnemonic.Sal or Mnemonic.Sar or Mnemonic.Rol or Mnemonic.Ror)
return InstructionKind.Shift;
if (mnemonic is Mnemonic.Cmp) return InstructionKind.Compare;
if (mnemonic is Mnemonic.Mov or Mnemonic.Movzx or Mnemonic.Movsx or
Mnemonic.Lea or Mnemonic.Push or Mnemonic.Pop or Mnemonic.Xchg)
return InstructionKind.Move;
return InstructionKind.Unknown;
}
private static ImmutableArray<Operand> MapOperands(Instruction instruction)
{
var builder = ImmutableArray.CreateBuilder<Operand>(instruction.OpCount);
for (int i = 0; i < instruction.OpCount; i++)
{
var opKind = instruction.GetOpKind(i);
builder.Add(MapOperand(instruction, i, opKind));
}
return builder.ToImmutable();
}
private static Operand MapOperand(Instruction instruction, int index, OpKind kind)
{
return kind switch
{
OpKind.Register => new Operand(
OperandType.Register,
instruction.GetOpRegister(index).ToString(),
Register: instruction.GetOpRegister(index).ToString()),
OpKind.Immediate8 or OpKind.Immediate16 or OpKind.Immediate32 or OpKind.Immediate64 or
OpKind.Immediate8to16 or OpKind.Immediate8to32 or OpKind.Immediate8to64 or
OpKind.Immediate32to64 => new Operand(
OperandType.Immediate,
$"0x{instruction.GetImmediate(index):X}",
Value: (long)instruction.GetImmediate(index)),
OpKind.NearBranch16 or OpKind.NearBranch32 or OpKind.NearBranch64 => new Operand(
OperandType.Address,
$"0x{instruction.NearBranchTarget:X}",
Value: (long)instruction.NearBranchTarget),
OpKind.Memory => new Operand(
OperandType.Memory,
FormatMemoryOperand(instruction),
MemoryBase: instruction.MemoryBase != global::Iced.Intel.Register.None ? instruction.MemoryBase.ToString() : null,
MemoryIndex: instruction.MemoryIndex != global::Iced.Intel.Register.None ? instruction.MemoryIndex.ToString() : null,
MemoryScale: instruction.MemoryIndexScale,
MemoryDisplacement: (long)instruction.MemoryDisplacement64),
_ => new Operand(OperandType.Unknown, kind.ToString())
};
}
private static string FormatMemoryOperand(Instruction instruction)
{
var sb = new StringBuilder();
sb.Append('[');
if (instruction.MemoryBase != global::Iced.Intel.Register.None)
sb.Append(instruction.MemoryBase);
if (instruction.MemoryIndex != global::Iced.Intel.Register.None)
{
if (sb.Length > 1) sb.Append('+');
sb.Append(instruction.MemoryIndex);
if (instruction.MemoryIndexScale > 1)
sb.Append('*').Append(instruction.MemoryIndexScale);
}
if (instruction.MemoryDisplacement64 != 0)
{
if (sb.Length > 1) sb.Append('+');
sb.Append($"0x{instruction.MemoryDisplacement64:X}");
}
sb.Append(']');
return sb.ToString();
}
#endregion
}
/// <summary>
/// Internal handle for Iced binary data.
/// </summary>
internal sealed record IcedBinaryHandle(byte[] Bytes, int Bitness);