save progress

This commit is contained in:
StellaOps Bot
2026-01-02 21:06:27 +02:00
parent f46bde5575
commit 3f197814c5
441 changed files with 21545 additions and 4306 deletions

View File

@@ -0,0 +1,476 @@
// Copyright (c) StellaOps. All rights reserved.
// Licensed under AGPL-3.0-or-later. See LICENSE in the project root.
using System.Collections.Frozen;
using System.Collections.Immutable;
using B2R2;
using B2R2.FrontEnd;
using B2R2.FrontEnd.BinFile;
using B2R2.FrontEnd.BinInterface;
using B2R2.FrontEnd.BinLifter;
using Microsoft.Extensions.Logging;
using Microsoft.FSharp.Collections;
namespace StellaOps.BinaryIndex.Disassembly.B2R2;
/// <summary>
/// B2R2-based disassembly engine implementation.
/// B2R2 is a pure .NET binary analysis framework supporting ELF, PE, and Mach-O on x86-64 and ARM64.
/// </summary>
public sealed class B2R2DisassemblyEngine : IDisassemblyEngine
{
private readonly ILogger<B2R2DisassemblyEngine> _logger;
private static readonly FrozenSet<string> s_supportedArchitectures = new HashSet<string>(StringComparer.OrdinalIgnoreCase)
{
"x86_64", "x64", "amd64",
"aarch64", "arm64"
}.ToFrozenSet(StringComparer.OrdinalIgnoreCase);
private static readonly FrozenSet<string> s_supportedFormats = new HashSet<string>(StringComparer.OrdinalIgnoreCase)
{
"ELF", "PE", "MachO", "Mach-O"
}.ToFrozenSet(StringComparer.OrdinalIgnoreCase);
/// <summary>
/// Creates a new B2R2 disassembly engine.
/// </summary>
/// <param name="logger">Logger instance.</param>
public B2R2DisassemblyEngine(ILogger<B2R2DisassemblyEngine> logger)
{
_logger = logger ?? throw new ArgumentNullException(nameof(logger));
}
/// <inheritdoc />
public IReadOnlySet<string> SupportedArchitectures => s_supportedArchitectures;
/// <inheritdoc />
public IReadOnlySet<string> SupportedFormats => s_supportedFormats;
/// <inheritdoc />
public bool SupportsArchitecture(string architecture) =>
s_supportedArchitectures.Contains(architecture);
/// <inheritdoc />
public bool SupportsFormat(string format) =>
s_supportedFormats.Contains(format);
/// <inheritdoc />
public BinaryInfo LoadBinary(Stream stream, string? hint = null)
{
ArgumentNullException.ThrowIfNull(stream);
_logger.LogDebug("Loading binary from stream (hint: {Hint})", hint ?? "none");
// Read stream to byte array for B2R2
using var memStream = new MemoryStream();
stream.CopyTo(memStream);
var bytes = memStream.ToArray();
// Use B2R2 to detect and load the binary
var binHandle = BinHandle.Init(ISA.DefaultISA, bytes);
var binFile = binHandle.File;
var format = DetectFormat(binFile);
var architecture = MapArchitecture(binHandle.File.ISA);
var abi = DetectAbi(binFile, format);
var buildId = ExtractBuildId(binFile);
var metadata = ExtractMetadata(binFile, binHandle);
_logger.LogInformation(
"Loaded binary: Format={Format}, Architecture={Architecture}, ABI={Abi}",
format, architecture, abi ?? "unknown");
return new BinaryInfo(
Format: format,
Architecture: architecture,
Abi: abi,
BuildId: buildId,
Metadata: metadata,
Handle: binHandle);
}
/// <inheritdoc />
public IEnumerable<CodeRegion> GetCodeRegions(BinaryInfo binary)
{
ArgumentNullException.ThrowIfNull(binary);
var handle = GetHandle(binary);
var sections = handle.File.GetSections();
foreach (var section in sections)
{
// Filter to executable sections
var isExecutable = IsExecutableSection(section, binary.Format);
if (!isExecutable && !IsDataSection(section))
continue;
yield return new CodeRegion(
Name: section.Name,
VirtualAddress: section.Address,
FileOffset: (ulong)section.Offset,
Size: section.Size,
IsExecutable: isExecutable,
IsReadable: true, // Most sections are readable
IsWritable: IsWritableSection(section, binary.Format));
}
}
/// <inheritdoc />
public IEnumerable<SymbolInfo> GetSymbols(BinaryInfo binary)
{
ArgumentNullException.ThrowIfNull(binary);
var handle = GetHandle(binary);
var symbols = handle.File.GetSymbols();
foreach (var symbol in symbols)
{
// Skip empty or section symbols by default
if (string.IsNullOrEmpty(symbol.Name))
continue;
yield return new SymbolInfo(
Name: symbol.Name,
Address: symbol.Address,
Size: symbol.Size,
Type: MapSymbolType(symbol),
Binding: MapSymbolBinding(symbol),
Section: GetSymbolSection(handle, symbol));
}
}
/// <inheritdoc />
public IEnumerable<DisassembledInstruction> Disassemble(BinaryInfo binary, CodeRegion region)
{
ArgumentNullException.ThrowIfNull(binary);
ArgumentNullException.ThrowIfNull(region);
var handle = GetHandle(binary);
var addr = region.VirtualAddress;
var endAddr = region.VirtualAddress + region.Size;
_logger.LogDebug(
"Disassembling region {Name} from 0x{Start:X} to 0x{End:X}",
region.Name, addr, endAddr);
while (addr < endAddr)
{
var result = handle.TryParseInstr(addr);
if (result.IsError)
{
// Skip bad instruction and advance by 1 byte
addr++;
continue;
}
var instr = result.ResultValue;
var instrBytes = handle.File.Slice(addr, (int)instr.Length);
yield return MapInstruction(instr, instrBytes, addr);
addr += instr.Length;
}
}
/// <inheritdoc />
public IEnumerable<DisassembledInstruction> DisassembleSymbol(BinaryInfo binary, SymbolInfo symbol)
{
ArgumentNullException.ThrowIfNull(binary);
ArgumentNullException.ThrowIfNull(symbol);
if (symbol.Size == 0)
{
_logger.LogWarning(
"Symbol {Name} has zero size, attempting heuristic boundary detection",
symbol.Name);
}
// Create a virtual code region for the symbol
var region = new CodeRegion(
Name: symbol.Name,
VirtualAddress: symbol.Address,
FileOffset: 0, // Not used for disassembly
Size: symbol.Size > 0 ? symbol.Size : 4096, // Default max if unknown
IsExecutable: true,
IsReadable: true,
IsWritable: false);
return Disassemble(binary, region);
}
private static BinHandle GetHandle(BinaryInfo binary)
{
if (binary.Handle is not BinHandle handle)
throw new ArgumentException("Invalid binary handle - not a B2R2 BinHandle", nameof(binary));
return handle;
}
private static string DetectFormat(IBinFile file)
{
return file.Format switch
{
FileFormat.ELFBinary => "ELF",
FileFormat.PEBinary => "PE",
FileFormat.MachBinary => "MachO",
_ => "Unknown"
};
}
private static string MapArchitecture(ISA isa)
{
return isa.Arch switch
{
Architecture.IntelX64 => "x86_64",
Architecture.IntelX86 => "x86",
Architecture.AARCH64 => "aarch64",
Architecture.ARMv7 => "arm",
Architecture.MIPS32 => "mips",
Architecture.MIPS64 => "mips64",
Architecture.RISCV64 => "riscv64",
_ => "unknown"
};
}
private static string? DetectAbi(IBinFile file, string format)
{
if (format == "ELF")
{
// Attempt to detect ABI from ELF OSABI or interpreter path
// Default to gnu for Linux ELF
return "gnu";
}
else if (format == "PE")
{
return "msvc";
}
else if (format == "MachO")
{
return "darwin";
}
return null;
}
private static string? ExtractBuildId(IBinFile file)
{
// For ELF, extract .note.gnu.build-id if present
try
{
var sections = file.GetSections();
var buildIdSection = sections.FirstOrDefault(s =>
s.Name == ".note.gnu.build-id" || s.Name == ".note.go.buildid");
if (buildIdSection.Size > 0)
{
// Parse NOTE structure and extract build ID
// Simplified - would need proper NOTE parsing
return null;
}
}
catch
{
// Build ID extraction is best-effort
}
return null;
}
private static IReadOnlyDictionary<string, object> ExtractMetadata(IBinFile file, BinHandle handle)
{
var metadata = new Dictionary<string, object>
{
["entryPoint"] = file.EntryPoint,
["isStripped"] = !handle.File.GetSymbols().Any(),
["sectionCount"] = file.GetSections().Count()
};
return metadata;
}
private static bool IsExecutableSection(Section section, string format)
{
// Check section name conventions
var name = section.Name;
if (name == ".text" || name == ".init" || name == ".fini" || name == ".plt")
return true;
// For PE, check .text and CODE sections
if (format == "PE" && (name == ".text" || name.Contains("CODE", StringComparison.OrdinalIgnoreCase)))
return true;
return false;
}
private static bool IsDataSection(Section section)
{
var name = section.Name;
return name == ".data" || name == ".rodata" || name == ".bss";
}
private static bool IsWritableSection(Section section, string format)
{
var name = section.Name;
return name == ".data" || name == ".bss" || name.Contains("rw", StringComparison.OrdinalIgnoreCase);
}
private static SymbolType MapSymbolType(Symbol symbol)
{
return symbol.Kind switch
{
SymbolKind.FunctionType => SymbolType.Function,
SymbolKind.ObjectType => SymbolType.Object,
SymbolKind.SectionType => SymbolType.Section,
SymbolKind.FileType => SymbolType.File,
_ => SymbolType.Unknown
};
}
private static SymbolBinding MapSymbolBinding(Symbol symbol)
{
return symbol.Visibility switch
{
SymbolVisibility.VisibilityLocal or
SymbolVisibility.HiddenVisibility or
SymbolVisibility.InternalVisibility => SymbolBinding.Local,
SymbolVisibility.DefaultVisibility => SymbolBinding.Global,
_ => SymbolBinding.Unknown
};
}
private static string? GetSymbolSection(BinHandle handle, Symbol symbol)
{
try
{
var sections = handle.File.GetSections();
var section = sections.FirstOrDefault(s =>
symbol.Address >= s.Address && symbol.Address < s.Address + s.Size);
return section.Name;
}
catch
{
return null;
}
}
private static DisassembledInstruction MapInstruction(Instruction instr, FSharpList<byte> rawBytes, ulong address)
{
var bytes = rawBytes.ToArray().ToImmutableArray();
var mnemonic = instr.Mnemonic;
var operands = instr.Operands.ToImmutableArray();
// Build operands text
var operandsText = string.Join(", ",
operands.Select(op => op.ToString()));
var kind = ClassifyInstruction(mnemonic);
var parsedOperands = operands
.Select(MapOperand)
.ToImmutableArray();
return new DisassembledInstruction(
Address: address,
RawBytes: bytes,
Mnemonic: mnemonic,
OperandsText: operandsText,
Kind: kind,
Operands: parsedOperands);
}
private static InstructionKind ClassifyInstruction(string mnemonic)
{
var upper = mnemonic.ToUpperInvariant();
// Returns
if (upper is "RET" or "RETN" or "RETF")
return InstructionKind.Return;
// Calls
if (upper.StartsWith("CALL", StringComparison.Ordinal))
return InstructionKind.Call;
// Unconditional jumps
if (upper is "JMP" or "B" or "BR")
return InstructionKind.Branch;
// Conditional jumps (x86)
if (upper.StartsWith("J", StringComparison.Ordinal) && upper.Length > 1)
return InstructionKind.ConditionalBranch;
// ARM conditional branches
if (upper.StartsWith("B.", StringComparison.Ordinal) ||
upper.StartsWith("CB", StringComparison.Ordinal) ||
upper.StartsWith("TB", StringComparison.Ordinal))
return InstructionKind.ConditionalBranch;
// NOPs
if (upper is "NOP" or "FNOP")
return InstructionKind.Nop;
// System calls
if (upper is "SYSCALL" or "SYSENTER" or "INT" or "SVC")
return InstructionKind.Syscall;
// Arithmetic
if (upper is "ADD" or "SUB" or "MUL" or "DIV" or "IMUL" or "IDIV" or
"INC" or "DEC" or "NEG" or "ADC" or "SBB")
return InstructionKind.Arithmetic;
// Logic
if (upper is "AND" or "OR" or "XOR" or "NOT" or "TEST")
return InstructionKind.Logic;
// Shifts
if (upper is "SHL" or "SHR" or "SAL" or "SAR" or "ROL" or "ROR" or
"LSL" or "LSR" or "ASR")
return InstructionKind.Shift;
// Moves
if (upper.StartsWith("MOV", StringComparison.Ordinal) ||
upper is "LEA" or "PUSH" or "POP" or "XCHG")
return InstructionKind.Move;
// Loads (ARM)
if (upper.StartsWith("LDR", StringComparison.Ordinal) ||
upper.StartsWith("LD", StringComparison.Ordinal))
return InstructionKind.Load;
// Stores (ARM)
if (upper.StartsWith("STR", StringComparison.Ordinal) ||
upper.StartsWith("ST", StringComparison.Ordinal))
return InstructionKind.Store;
// Compares
if (upper is "CMP" or "CMPS" or "SCAS" or "TEST")
return InstructionKind.Compare;
// Vector/SIMD
if (upper.StartsWith("V", StringComparison.Ordinal) ||
upper.Contains("XMM", StringComparison.Ordinal) ||
upper.Contains("YMM", StringComparison.Ordinal) ||
upper.Contains("ZMM", StringComparison.Ordinal))
return InstructionKind.Vector;
// Floating point
if (upper.StartsWith("F", StringComparison.Ordinal) &&
(upper.Contains("ADD", StringComparison.Ordinal) ||
upper.Contains("SUB", StringComparison.Ordinal) ||
upper.Contains("MUL", StringComparison.Ordinal) ||
upper.Contains("DIV", StringComparison.Ordinal)))
return InstructionKind.FloatingPoint;
return InstructionKind.Unknown;
}
private static Operand MapOperand(IOperand operand)
{
var text = operand.ToString();
// Simplified operand parsing - B2R2 provides typed operands
// but we need to handle architecture-specific details
return new Operand(
Type: OperandType.Unknown,
Text: text);
}
}

View File

@@ -0,0 +1,45 @@
// Copyright (c) StellaOps. All rights reserved.
// Licensed under AGPL-3.0-or-later. See LICENSE in the project root.
using Microsoft.Extensions.DependencyInjection;
using Microsoft.Extensions.DependencyInjection.Extensions;
using StellaOps.BinaryIndex.Disassembly.Iced;
namespace StellaOps.BinaryIndex.Disassembly;
/// <summary>
/// Extension methods for configuring disassembly services.
/// </summary>
public static class DisassemblyServiceCollectionExtensions
{
/// <summary>
/// Adds the Iced-based disassembly engine to the service collection.
/// Supports x86 and x86-64 architectures.
/// </summary>
/// <param name="services">The service collection.</param>
/// <returns>The service collection for chaining.</returns>
public static IServiceCollection AddIcedDisassembly(this IServiceCollection services)
{
ArgumentNullException.ThrowIfNull(services);
services.TryAddSingleton<IDisassemblyEngine, IcedDisassemblyEngine>();
return services;
}
/// <summary>
/// Adds a custom disassembly engine implementation.
/// </summary>
/// <typeparam name="TEngine">The engine implementation type.</typeparam>
/// <param name="services">The service collection.</param>
/// <returns>The service collection for chaining.</returns>
public static IServiceCollection AddDisassemblyEngine<TEngine>(this IServiceCollection services)
where TEngine : class, IDisassemblyEngine
{
ArgumentNullException.ThrowIfNull(services);
services.TryAddSingleton<IDisassemblyEngine, TEngine>();
return services;
}
}

View File

@@ -0,0 +1,256 @@
// Copyright (c) StellaOps. All rights reserved.
// Licensed under AGPL-3.0-or-later. See LICENSE in the project root.
using System.Collections.Immutable;
namespace StellaOps.BinaryIndex.Disassembly;
/// <summary>
/// Abstraction over binary disassembly engines.
/// Hides implementation details (B2R2's F#) from C# consumers.
/// </summary>
public interface IDisassemblyEngine
{
/// <summary>
/// Gets supported architectures.
/// </summary>
IReadOnlySet<string> SupportedArchitectures { get; }
/// <summary>
/// Gets supported binary formats.
/// </summary>
IReadOnlySet<string> SupportedFormats { get; }
/// <summary>
/// Loads a binary from a stream and detects format/architecture.
/// </summary>
/// <param name="stream">The binary stream to load.</param>
/// <param name="hint">Optional hint for format/architecture detection.</param>
/// <returns>Binary information including format, architecture, and metadata.</returns>
BinaryInfo LoadBinary(Stream stream, string? hint = null);
/// <summary>
/// Gets executable code regions (sections) from the binary.
/// </summary>
/// <param name="binary">The loaded binary information.</param>
/// <returns>Enumerable of code regions.</returns>
IEnumerable<CodeRegion> GetCodeRegions(BinaryInfo binary);
/// <summary>
/// Gets symbols (functions) from the binary.
/// </summary>
/// <param name="binary">The loaded binary information.</param>
/// <returns>Enumerable of symbol information.</returns>
IEnumerable<SymbolInfo> GetSymbols(BinaryInfo binary);
/// <summary>
/// Disassembles a code region to instructions.
/// </summary>
/// <param name="binary">The loaded binary information.</param>
/// <param name="region">The code region to disassemble.</param>
/// <returns>Enumerable of disassembled instructions.</returns>
IEnumerable<DisassembledInstruction> Disassemble(BinaryInfo binary, CodeRegion region);
/// <summary>
/// Disassembles a specific symbol/function.
/// </summary>
/// <param name="binary">The loaded binary information.</param>
/// <param name="symbol">The symbol to disassemble.</param>
/// <returns>Enumerable of disassembled instructions.</returns>
IEnumerable<DisassembledInstruction> DisassembleSymbol(BinaryInfo binary, SymbolInfo symbol);
/// <summary>
/// Checks if the engine supports the given architecture.
/// </summary>
bool SupportsArchitecture(string architecture);
/// <summary>
/// Checks if the engine supports the given format.
/// </summary>
bool SupportsFormat(string format);
}
/// <summary>
/// Information about a loaded binary.
/// </summary>
/// <param name="Format">Binary format: ELF, PE, MachO.</param>
/// <param name="Architecture">CPU architecture: x86_64, aarch64.</param>
/// <param name="Abi">Application binary interface: gnu, musl, msvc.</param>
/// <param name="BuildId">Build identifier if present.</param>
/// <param name="Metadata">Additional metadata from the binary.</param>
/// <param name="Handle">Internal handle for the disassembly engine.</param>
public sealed record BinaryInfo(
string Format,
string Architecture,
string? Abi,
string? BuildId,
IReadOnlyDictionary<string, object> Metadata,
object Handle);
/// <summary>
/// Represents a code region (section) in a binary.
/// </summary>
/// <param name="Name">Section name: .text, .rodata, etc.</param>
/// <param name="VirtualAddress">Virtual address in memory.</param>
/// <param name="FileOffset">Offset in the binary file.</param>
/// <param name="Size">Size in bytes.</param>
/// <param name="IsExecutable">Whether the region contains executable code.</param>
/// <param name="IsReadable">Whether the region is readable.</param>
/// <param name="IsWritable">Whether the region is writable.</param>
public sealed record CodeRegion(
string Name,
ulong VirtualAddress,
ulong FileOffset,
ulong Size,
bool IsExecutable,
bool IsReadable,
bool IsWritable);
/// <summary>
/// Information about a symbol in the binary.
/// </summary>
/// <param name="Name">Symbol name.</param>
/// <param name="Address">Virtual address of the symbol.</param>
/// <param name="Size">Size in bytes (0 if unknown).</param>
/// <param name="Type">Symbol type.</param>
/// <param name="Binding">Symbol binding.</param>
/// <param name="Section">Section containing the symbol.</param>
public sealed record SymbolInfo(
string Name,
ulong Address,
ulong Size,
SymbolType Type,
SymbolBinding Binding,
string? Section);
/// <summary>
/// Type of symbol.
/// </summary>
public enum SymbolType
{
/// <summary>Unknown or unspecified type.</summary>
Unknown,
/// <summary>Function/procedure.</summary>
Function,
/// <summary>Data object.</summary>
Object,
/// <summary>Section symbol.</summary>
Section,
/// <summary>Source file name.</summary>
File,
/// <summary>Common block symbol.</summary>
Common,
/// <summary>Thread-local storage.</summary>
Tls
}
/// <summary>
/// Symbol binding/visibility.
/// </summary>
public enum SymbolBinding
{
/// <summary>Unknown binding.</summary>
Unknown,
/// <summary>Local symbol (not visible outside the object).</summary>
Local,
/// <summary>Global symbol (visible to other objects).</summary>
Global,
/// <summary>Weak symbol (can be overridden).</summary>
Weak
}
/// <summary>
/// A disassembled instruction.
/// </summary>
/// <param name="Address">Virtual address of the instruction.</param>
/// <param name="RawBytes">Raw bytes of the instruction.</param>
/// <param name="Mnemonic">Instruction mnemonic (e.g., MOV, ADD, JMP).</param>
/// <param name="OperandsText">Text representation of operands.</param>
/// <param name="Kind">Classification of the instruction.</param>
/// <param name="Operands">Parsed operands.</param>
public sealed record DisassembledInstruction(
ulong Address,
ImmutableArray<byte> RawBytes,
string Mnemonic,
string OperandsText,
InstructionKind Kind,
ImmutableArray<Operand> Operands);
/// <summary>
/// Classification of instruction types.
/// </summary>
public enum InstructionKind
{
/// <summary>Unknown or unclassified instruction.</summary>
Unknown,
/// <summary>Arithmetic operation (ADD, SUB, MUL, DIV).</summary>
Arithmetic,
/// <summary>Logical operation (AND, OR, XOR, NOT).</summary>
Logic,
/// <summary>Data movement (MOV, PUSH, POP).</summary>
Move,
/// <summary>Memory load operation.</summary>
Load,
/// <summary>Memory store operation.</summary>
Store,
/// <summary>Unconditional branch (JMP).</summary>
Branch,
/// <summary>Conditional branch (JE, JNE, JL, etc.).</summary>
ConditionalBranch,
/// <summary>Function call.</summary>
Call,
/// <summary>Function return.</summary>
Return,
/// <summary>No operation.</summary>
Nop,
/// <summary>System call.</summary>
Syscall,
/// <summary>Software interrupt.</summary>
Interrupt,
/// <summary>Compare operation.</summary>
Compare,
/// <summary>Shift operation.</summary>
Shift,
/// <summary>Vector/SIMD operation.</summary>
Vector,
/// <summary>Floating point operation.</summary>
FloatingPoint
}
/// <summary>
/// An instruction operand.
/// </summary>
/// <param name="Type">Operand type.</param>
/// <param name="Text">Text representation.</param>
/// <param name="Value">Immediate value if applicable.</param>
/// <param name="Register">Register name if applicable.</param>
/// <param name="MemoryBase">Base register for memory operand.</param>
/// <param name="MemoryIndex">Index register for memory operand.</param>
/// <param name="MemoryScale">Scale factor for indexed memory operand.</param>
/// <param name="MemoryDisplacement">Displacement for memory operand.</param>
public sealed record Operand(
OperandType Type,
string Text,
long? Value = null,
string? Register = null,
string? MemoryBase = null,
string? MemoryIndex = null,
int? MemoryScale = null,
long? MemoryDisplacement = null);
/// <summary>
/// Type of operand.
/// </summary>
public enum OperandType
{
/// <summary>Unknown operand type.</summary>
Unknown,
/// <summary>CPU register.</summary>
Register,
/// <summary>Immediate value.</summary>
Immediate,
/// <summary>Memory reference.</summary>
Memory,
/// <summary>Address/label.</summary>
Address
}

View File

@@ -0,0 +1,597 @@
// Copyright (c) StellaOps. All rights reserved.
// Licensed under AGPL-3.0-or-later. See LICENSE in the project root.
using System.Collections.Frozen;
using System.Collections.Immutable;
using System.Text;
using Iced.Intel;
using Microsoft.Extensions.Logging;
namespace StellaOps.BinaryIndex.Disassembly.Iced;
/// <summary>
/// Iced-based disassembly engine for x86/x64 binaries.
/// Iced is a pure .NET, high-performance x86/x64 disassembler.
/// </summary>
public sealed class IcedDisassemblyEngine : IDisassemblyEngine
{
private readonly ILogger<IcedDisassemblyEngine> _logger;
private static readonly FrozenSet<string> s_supportedArchitectures = new HashSet<string>(StringComparer.OrdinalIgnoreCase)
{
"x86_64", "x64", "amd64",
"x86", "i386", "i686"
}.ToFrozenSet(StringComparer.OrdinalIgnoreCase);
private static readonly FrozenSet<string> s_supportedFormats = new HashSet<string>(StringComparer.OrdinalIgnoreCase)
{
"ELF", "PE", "Raw"
}.ToFrozenSet(StringComparer.OrdinalIgnoreCase);
/// <summary>
/// Creates a new Iced disassembly engine.
/// </summary>
/// <param name="logger">Logger instance.</param>
public IcedDisassemblyEngine(ILogger<IcedDisassemblyEngine> logger)
{
_logger = logger ?? throw new ArgumentNullException(nameof(logger));
}
/// <inheritdoc />
public IReadOnlySet<string> SupportedArchitectures => s_supportedArchitectures;
/// <inheritdoc />
public IReadOnlySet<string> SupportedFormats => s_supportedFormats;
/// <inheritdoc />
public bool SupportsArchitecture(string architecture) =>
s_supportedArchitectures.Contains(architecture);
/// <inheritdoc />
public bool SupportsFormat(string format) =>
s_supportedFormats.Contains(format);
/// <inheritdoc />
public BinaryInfo LoadBinary(Stream stream, string? hint = null)
{
ArgumentNullException.ThrowIfNull(stream);
_logger.LogDebug("Loading binary from stream (hint: {Hint})", hint ?? "none");
// Read stream to byte array
using var memStream = new MemoryStream();
stream.CopyTo(memStream);
var bytes = memStream.ToArray();
// Detect format from magic bytes
var format = DetectFormat(bytes);
var architecture = DetectArchitecture(bytes, format, hint);
var abi = DetectAbi(format);
var metadata = new Dictionary<string, object>
{
["size"] = bytes.Length,
["format"] = format,
["architecture"] = architecture
};
_logger.LogInformation(
"Loaded binary: Format={Format}, Architecture={Architecture}, Size={Size}",
format, architecture, bytes.Length);
return new BinaryInfo(
Format: format,
Architecture: architecture,
Abi: abi,
BuildId: null,
Metadata: metadata,
Handle: bytes);
}
/// <inheritdoc />
public IEnumerable<CodeRegion> GetCodeRegions(BinaryInfo binary)
{
ArgumentNullException.ThrowIfNull(binary);
var bytes = GetBytes(binary);
if (binary.Format == "ELF")
{
return ParseElfSections(bytes);
}
else if (binary.Format == "PE")
{
return ParsePeSections(bytes);
}
else
{
// Raw binary - treat entire content as code
yield return new CodeRegion(
Name: ".text",
VirtualAddress: 0,
FileOffset: 0,
Size: (ulong)bytes.Length,
IsExecutable: true,
IsReadable: true,
IsWritable: false);
}
}
/// <inheritdoc />
public IEnumerable<SymbolInfo> GetSymbols(BinaryInfo binary)
{
ArgumentNullException.ThrowIfNull(binary);
var bytes = GetBytes(binary);
if (binary.Format == "ELF")
{
return ParseElfSymbols(bytes);
}
else if (binary.Format == "PE")
{
return ParsePeExports(bytes);
}
// Raw binaries have no symbol information
return [];
}
/// <inheritdoc />
public IEnumerable<DisassembledInstruction> Disassemble(BinaryInfo binary, CodeRegion region)
{
ArgumentNullException.ThrowIfNull(binary);
ArgumentNullException.ThrowIfNull(region);
var bytes = GetBytes(binary);
var bitness = GetBitness(binary.Architecture);
// Extract region bytes
var regionOffset = (int)region.FileOffset;
var regionSize = (int)Math.Min(region.Size, (ulong)(bytes.Length - regionOffset));
if (regionOffset >= bytes.Length || regionSize <= 0)
{
_logger.LogWarning("Region {Name} is outside binary bounds", region.Name);
yield break;
}
var regionBytes = bytes.AsSpan(regionOffset, regionSize);
var codeReader = new ByteArrayCodeReader(regionBytes.ToArray());
var decoder = Decoder.Create(bitness, codeReader);
decoder.IP = region.VirtualAddress;
_logger.LogDebug(
"Disassembling region {Name} from 0x{Start:X} ({Size} bytes, {Bitness}-bit)",
region.Name, region.VirtualAddress, regionSize, bitness);
while (codeReader.CanReadByte)
{
decoder.Decode(out var instruction);
if (instruction.IsInvalid)
{
// Skip invalid byte and continue
decoder.IP++;
if (!codeReader.CanReadByte) break;
continue;
}
yield return MapInstruction(instruction, bytes, regionOffset);
}
}
/// <inheritdoc />
public IEnumerable<DisassembledInstruction> DisassembleSymbol(BinaryInfo binary, SymbolInfo symbol)
{
ArgumentNullException.ThrowIfNull(binary);
ArgumentNullException.ThrowIfNull(symbol);
// Create a virtual code region for the symbol
var size = symbol.Size > 0 ? symbol.Size : 4096UL; // Default max if unknown
var region = new CodeRegion(
Name: symbol.Name,
VirtualAddress: symbol.Address,
FileOffset: symbol.Address, // Simplified - assumes VA == file offset for now
Size: size,
IsExecutable: true,
IsReadable: true,
IsWritable: false);
return Disassemble(binary, region);
}
private static byte[] GetBytes(BinaryInfo binary)
{
if (binary.Handle is not byte[] bytes)
throw new ArgumentException("Invalid binary handle - not a byte array", nameof(binary));
return bytes;
}
private static string DetectFormat(byte[] bytes)
{
if (bytes.Length < 4) return "Raw";
// ELF magic: 0x7F 'E' 'L' 'F'
if (bytes[0] == 0x7F && bytes[1] == 'E' && bytes[2] == 'L' && bytes[3] == 'F')
return "ELF";
// PE magic: 'M' 'Z'
if (bytes[0] == 'M' && bytes[1] == 'Z')
return "PE";
// Mach-O magic: 0xFEEDFACE (32-bit) or 0xFEEDFACF (64-bit)
if ((bytes[0] == 0xFE && bytes[1] == 0xED && bytes[2] == 0xFA && bytes[3] == 0xCE) ||
(bytes[0] == 0xFE && bytes[1] == 0xED && bytes[2] == 0xFA && bytes[3] == 0xCF) ||
(bytes[0] == 0xCE && bytes[1] == 0xFA && bytes[2] == 0xED && bytes[3] == 0xFE) ||
(bytes[0] == 0xCF && bytes[1] == 0xFA && bytes[2] == 0xED && bytes[3] == 0xFE))
return "MachO";
return "Raw";
}
private static string DetectArchitecture(byte[] bytes, string format, string? hint)
{
if (!string.IsNullOrEmpty(hint))
{
if (hint.Contains("64", StringComparison.OrdinalIgnoreCase))
return "x86_64";
if (hint.Contains("32", StringComparison.OrdinalIgnoreCase) ||
hint.Contains("i386", StringComparison.OrdinalIgnoreCase) ||
hint.Contains("i686", StringComparison.OrdinalIgnoreCase))
return "x86";
}
if (format == "ELF" && bytes.Length > 5)
{
// ELF class: bytes[4] - 1=32-bit, 2=64-bit
return bytes[4] == 2 ? "x86_64" : "x86";
}
if (format == "PE" && bytes.Length > 0x40)
{
// PE: Check Machine type at PE header offset
var peOffset = BitConverter.ToInt32(bytes, 0x3C);
if (peOffset > 0 && peOffset + 6 < bytes.Length)
{
var machine = BitConverter.ToUInt16(bytes, peOffset + 4);
return machine == 0x8664 ? "x86_64" : "x86";
}
}
// Default to 64-bit
return "x86_64";
}
private static string? DetectAbi(string format)
{
return format switch
{
"ELF" => "gnu",
"PE" => "msvc",
"MachO" => "darwin",
_ => null
};
}
private static int GetBitness(string architecture)
{
return architecture.Contains("64", StringComparison.OrdinalIgnoreCase) ? 64 : 32;
}
private static IEnumerable<CodeRegion> ParseElfSections(byte[] bytes)
{
// Simplified ELF section parsing
if (bytes.Length < 52) yield break;
var is64Bit = bytes[4] == 2;
var headerSize = is64Bit ? 64 : 52;
if (bytes.Length < headerSize) yield break;
// Parse section header table offset and count
ulong shoff;
ushort shentsize, shnum;
if (is64Bit)
{
shoff = BitConverter.ToUInt64(bytes, 40);
shentsize = BitConverter.ToUInt16(bytes, 58);
shnum = BitConverter.ToUInt16(bytes, 60);
}
else
{
shoff = BitConverter.ToUInt32(bytes, 32);
shentsize = BitConverter.ToUInt16(bytes, 46);
shnum = BitConverter.ToUInt16(bytes, 48);
}
if (shoff == 0 || shnum == 0 || (long)shoff + shnum * shentsize > bytes.Length)
{
// No section headers or invalid
yield return new CodeRegion(".text", 0, 0, (ulong)bytes.Length, true, true, false);
yield break;
}
// Get section name string table index
var shstrndx = BitConverter.ToUInt16(bytes, is64Bit ? 62 : 50);
// Read section name string table offset
ulong strtabOffset = 0;
if (shstrndx < shnum)
{
var strtabHeaderOff = (int)shoff + shstrndx * shentsize;
strtabOffset = is64Bit
? BitConverter.ToUInt64(bytes, strtabHeaderOff + 24)
: BitConverter.ToUInt32(bytes, strtabHeaderOff + 16);
}
for (int i = 0; i < shnum; i++)
{
var sectionOffset = (int)shoff + i * shentsize;
if (sectionOffset + shentsize > bytes.Length) break;
uint nameOffset;
ulong addr, offset, size;
uint flags;
if (is64Bit)
{
nameOffset = BitConverter.ToUInt32(bytes, sectionOffset);
flags = BitConverter.ToUInt32(bytes, sectionOffset + 8);
addr = BitConverter.ToUInt64(bytes, sectionOffset + 16);
offset = BitConverter.ToUInt64(bytes, sectionOffset + 24);
size = BitConverter.ToUInt64(bytes, sectionOffset + 32);
}
else
{
nameOffset = BitConverter.ToUInt32(bytes, sectionOffset);
flags = BitConverter.ToUInt32(bytes, sectionOffset + 8);
addr = BitConverter.ToUInt32(bytes, sectionOffset + 12);
offset = BitConverter.ToUInt32(bytes, sectionOffset + 16);
size = BitConverter.ToUInt32(bytes, sectionOffset + 20);
}
// Read section name
var name = ReadNullTerminatedString(bytes, (int)(strtabOffset + nameOffset));
if (string.IsNullOrEmpty(name)) name = $".section{i}";
// SHF_ALLOC = 2, SHF_EXECINSTR = 4, SHF_WRITE = 1
var isExecutable = (flags & 4) != 0;
var isWritable = (flags & 1) != 0;
var isAllocated = (flags & 2) != 0;
if (isAllocated && size > 0)
{
yield return new CodeRegion(name, addr, offset, size, isExecutable, true, isWritable);
}
}
}
private static IEnumerable<CodeRegion> ParsePeSections(byte[] bytes)
{
// Simplified PE section parsing
if (bytes.Length < 64) yield break;
var peOffset = BitConverter.ToInt32(bytes, 0x3C);
if (peOffset < 0 || peOffset + 24 > bytes.Length) yield break;
// Check PE signature
if (bytes[peOffset] != 'P' || bytes[peOffset + 1] != 'E') yield break;
var numSections = BitConverter.ToUInt16(bytes, peOffset + 6);
var optHeaderSize = BitConverter.ToUInt16(bytes, peOffset + 20);
var sectionTableOffset = peOffset + 24 + optHeaderSize;
for (int i = 0; i < numSections; i++)
{
var sectionOffset = sectionTableOffset + i * 40;
if (sectionOffset + 40 > bytes.Length) break;
var name = Encoding.ASCII.GetString(bytes, sectionOffset, 8).TrimEnd('\0');
var virtualSize = BitConverter.ToUInt32(bytes, sectionOffset + 8);
var virtualAddress = BitConverter.ToUInt32(bytes, sectionOffset + 12);
var rawSize = BitConverter.ToUInt32(bytes, sectionOffset + 16);
var rawOffset = BitConverter.ToUInt32(bytes, sectionOffset + 20);
var characteristics = BitConverter.ToUInt32(bytes, sectionOffset + 36);
// IMAGE_SCN_MEM_EXECUTE = 0x20000000
// IMAGE_SCN_MEM_READ = 0x40000000
// IMAGE_SCN_MEM_WRITE = 0x80000000
var isExecutable = (characteristics & 0x20000000) != 0;
var isReadable = (characteristics & 0x40000000) != 0;
var isWritable = (characteristics & 0x80000000) != 0;
if (rawSize > 0)
{
yield return new CodeRegion(name, virtualAddress, rawOffset, rawSize, isExecutable, isReadable, isWritable);
}
}
}
private static IEnumerable<SymbolInfo> ParseElfSymbols(byte[] bytes)
{
// Simplified - would need full ELF symbol table parsing
// For now, return empty - symbols are optional for delta signatures
return [];
}
private static IEnumerable<SymbolInfo> ParsePeExports(byte[] bytes)
{
// Simplified - would need full PE export table parsing
// For now, return empty - exports are optional for delta signatures
return [];
}
private static string ReadNullTerminatedString(byte[] bytes, int offset)
{
if (offset < 0 || offset >= bytes.Length) return string.Empty;
var end = Array.IndexOf(bytes, (byte)0, offset);
if (end < 0) end = bytes.Length;
var length = end - offset;
if (length <= 0 || length > 256) return string.Empty;
return Encoding.ASCII.GetString(bytes, offset, length);
}
private static DisassembledInstruction MapInstruction(Instruction instruction, byte[] bytes, int regionOffset)
{
// Get raw instruction bytes
var instrOffset = (int)(instruction.IP) - regionOffset;
var instrLength = instruction.Length;
var rawBytes = instrOffset >= 0 && instrOffset + instrLength <= bytes.Length
? bytes.AsSpan(instrOffset, instrLength).ToArray().ToImmutableArray()
: ImmutableArray<byte>.Empty;
var kind = ClassifyInstruction(instruction);
var operands = MapOperands(instruction);
return new DisassembledInstruction(
Address: instruction.IP,
RawBytes: rawBytes,
Mnemonic: instruction.Mnemonic.ToString(),
OperandsText: FormatOperands(instruction),
Kind: kind,
Operands: operands);
}
private static InstructionKind ClassifyInstruction(Instruction instruction)
{
if (instruction.IsCallNear || instruction.IsCallFar)
return InstructionKind.Call;
if (instruction.Mnemonic == Mnemonic.Ret || instruction.Mnemonic == Mnemonic.Retf)
return InstructionKind.Return;
if (instruction.IsJmpNear || instruction.IsJmpFar)
return InstructionKind.Branch;
if (instruction.IsJccShort || instruction.IsJccNear)
return InstructionKind.ConditionalBranch;
if (instruction.Mnemonic == Mnemonic.Nop || instruction.Mnemonic == Mnemonic.Fnop)
return InstructionKind.Nop;
if (instruction.Mnemonic == Mnemonic.Syscall || instruction.Mnemonic == Mnemonic.Sysenter ||
instruction.Mnemonic == Mnemonic.Int)
return InstructionKind.Syscall;
var mnemonic = instruction.Mnemonic;
// Arithmetic
if (mnemonic is Mnemonic.Add or Mnemonic.Sub or Mnemonic.Mul or Mnemonic.Imul or
Mnemonic.Div or Mnemonic.Idiv or Mnemonic.Inc or Mnemonic.Dec or
Mnemonic.Neg or Mnemonic.Adc or Mnemonic.Sbb)
return InstructionKind.Arithmetic;
// Logic
if (mnemonic is Mnemonic.And or Mnemonic.Or or Mnemonic.Xor or Mnemonic.Not or
Mnemonic.Test)
return InstructionKind.Logic;
// Shifts
if (mnemonic is Mnemonic.Shl or Mnemonic.Shr or Mnemonic.Sal or Mnemonic.Sar or
Mnemonic.Rol or Mnemonic.Ror)
return InstructionKind.Shift;
// Compare
if (mnemonic is Mnemonic.Cmp or Mnemonic.Test)
return InstructionKind.Compare;
// Move/Load/Store
if (mnemonic is Mnemonic.Mov or Mnemonic.Movzx or Mnemonic.Movsx or
Mnemonic.Lea or Mnemonic.Push or Mnemonic.Pop or Mnemonic.Xchg)
return InstructionKind.Move;
return InstructionKind.Unknown;
}
private static ImmutableArray<Operand> MapOperands(Instruction instruction)
{
var operands = ImmutableArray.CreateBuilder<Operand>();
for (int i = 0; i < instruction.OpCount; i++)
{
var opKind = instruction.GetOpKind(i);
operands.Add(MapOperand(instruction, i, opKind));
}
return operands.ToImmutable();
}
private static Operand MapOperand(Instruction instruction, int index, OpKind kind)
{
return kind switch
{
OpKind.Register => new Operand(
Type: OperandType.Register,
Text: instruction.GetOpRegister(index).ToString(),
Register: instruction.GetOpRegister(index).ToString()),
OpKind.Immediate8 or OpKind.Immediate16 or OpKind.Immediate32 or OpKind.Immediate64 or
OpKind.Immediate8to16 or OpKind.Immediate8to32 or OpKind.Immediate8to64 or
OpKind.Immediate32to64 => new Operand(
Type: OperandType.Immediate,
Text: $"0x{instruction.GetImmediate(index):X}",
Value: (long)instruction.GetImmediate(index)),
OpKind.NearBranch16 or OpKind.NearBranch32 or OpKind.NearBranch64 => new Operand(
Type: OperandType.Address,
Text: $"0x{instruction.NearBranchTarget:X}",
Value: (long)instruction.NearBranchTarget),
OpKind.Memory => new Operand(
Type: OperandType.Memory,
Text: FormatMemoryOperand(instruction),
MemoryBase: instruction.MemoryBase != Register.None
? instruction.MemoryBase.ToString() : null,
MemoryIndex: instruction.MemoryIndex != Register.None
? instruction.MemoryIndex.ToString() : null,
MemoryScale: instruction.MemoryIndexScale,
MemoryDisplacement: (long)instruction.MemoryDisplacement64),
_ => new Operand(Type: OperandType.Unknown, Text: kind.ToString())
};
}
private static string FormatOperands(Instruction instruction)
{
var formatter = new NasmFormatter();
var output = new StringOutput();
formatter.Format(instruction, output);
var full = output.ToStringAndReset();
// Remove mnemonic prefix to get just operands
var spaceIndex = full.IndexOf(' ');
return spaceIndex >= 0 ? full[(spaceIndex + 1)..] : string.Empty;
}
private static string FormatMemoryOperand(Instruction instruction)
{
var parts = new StringBuilder();
parts.Append('[');
if (instruction.MemoryBase != Register.None)
parts.Append(instruction.MemoryBase);
if (instruction.MemoryIndex != Register.None)
{
if (parts.Length > 1) parts.Append('+');
parts.Append(instruction.MemoryIndex);
if (instruction.MemoryIndexScale > 1)
parts.Append('*').Append(instruction.MemoryIndexScale);
}
if (instruction.MemoryDisplacement64 != 0)
{
if (parts.Length > 1) parts.Append('+');
parts.Append($"0x{instruction.MemoryDisplacement64:X}");
}
parts.Append(']');
return parts.ToString();
}
}

View File

@@ -0,0 +1,25 @@
<Project Sdk="Microsoft.NET.Sdk">
<PropertyGroup>
<TargetFramework>net10.0</TargetFramework>
<ImplicitUsings>enable</ImplicitUsings>
<Nullable>enable</Nullable>
<LangVersion>preview</LangVersion>
<GenerateDocumentationFile>true</GenerateDocumentationFile>
<TreatWarningsAsErrors>true</TreatWarningsAsErrors>
<Description>Binary disassembly abstraction layer for StellaOps. Provides a unified interface over multiple disassembly engines (B2R2) for ELF, PE, and Mach-O binaries on x86-64 and ARM64 architectures.</Description>
</PropertyGroup>
<ItemGroup>
<PackageReference Include="Microsoft.Extensions.Logging.Abstractions" />
</ItemGroup>
<!-- Iced for x86/x64 disassembly - pure .NET, highly performant -->
<ItemGroup>
<PackageReference Include="Iced" />
</ItemGroup>
<!-- ELF/PE/Mach-O parsing -->
<ItemGroup>
<PackageReference Include="Mono.Cecil" />
</ItemGroup>
</Project>