349 lines
10 KiB
C#
349 lines
10 KiB
C#
// Copyright (c) StellaOps. All rights reserved.
|
|
// Licensed under BUSL-1.1. See LICENSE in the project root.
|
|
|
|
using System.Collections.Immutable;
|
|
|
|
namespace StellaOps.BinaryIndex.Disassembly;
|
|
|
|
/// <summary>
|
|
/// CPU architecture identifier.
|
|
/// </summary>
|
|
public enum CpuArchitecture
|
|
{
|
|
/// <summary>Unknown architecture.</summary>
|
|
Unknown = 0,
|
|
|
|
/// <summary>Intel/AMD 32-bit x86.</summary>
|
|
X86 = 1,
|
|
|
|
/// <summary>Intel/AMD 64-bit x86-64 (amd64).</summary>
|
|
X86_64 = 2,
|
|
|
|
/// <summary>ARM 32-bit (ARMv7).</summary>
|
|
ARM32 = 3,
|
|
|
|
/// <summary>ARM 64-bit (AArch64/ARMv8).</summary>
|
|
ARM64 = 4,
|
|
|
|
/// <summary>MIPS 32-bit.</summary>
|
|
MIPS32 = 5,
|
|
|
|
/// <summary>MIPS 64-bit.</summary>
|
|
MIPS64 = 6,
|
|
|
|
/// <summary>RISC-V 64-bit.</summary>
|
|
RISCV64 = 7,
|
|
|
|
/// <summary>PowerPC 32-bit.</summary>
|
|
PPC32 = 8,
|
|
|
|
/// <summary>PowerPC 64-bit.</summary>
|
|
PPC64 = 9,
|
|
|
|
/// <summary>SPARC.</summary>
|
|
SPARC = 10,
|
|
|
|
/// <summary>SuperH SH4.</summary>
|
|
SH4 = 11,
|
|
|
|
/// <summary>AVR microcontroller.</summary>
|
|
AVR = 12,
|
|
|
|
/// <summary>Ethereum Virtual Machine.</summary>
|
|
EVM = 13,
|
|
|
|
/// <summary>WebAssembly.</summary>
|
|
WASM = 14
|
|
}
|
|
|
|
/// <summary>
|
|
/// Binary executable format.
|
|
/// </summary>
|
|
public enum BinaryFormat
|
|
{
|
|
/// <summary>Unknown format.</summary>
|
|
Unknown = 0,
|
|
|
|
/// <summary>Raw binary data (no format metadata).</summary>
|
|
Raw = 1,
|
|
|
|
/// <summary>Executable and Linkable Format (Linux, BSD, etc.).</summary>
|
|
ELF = 2,
|
|
|
|
/// <summary>Portable Executable (Windows).</summary>
|
|
PE = 3,
|
|
|
|
/// <summary>Mach-O (macOS, iOS).</summary>
|
|
MachO = 4,
|
|
|
|
/// <summary>WebAssembly module.</summary>
|
|
WASM = 5
|
|
}
|
|
|
|
/// <summary>
|
|
/// Describes the capabilities of a disassembly plugin.
|
|
/// </summary>
|
|
public sealed record DisassemblyCapabilities
|
|
{
|
|
/// <summary>
|
|
/// The unique identifier of the plugin.
|
|
/// </summary>
|
|
public required string PluginId { get; init; }
|
|
|
|
/// <summary>
|
|
/// Display name of the disassembly engine.
|
|
/// </summary>
|
|
public required string Name { get; init; }
|
|
|
|
/// <summary>
|
|
/// Version of the underlying disassembly library.
|
|
/// </summary>
|
|
public required string Version { get; init; }
|
|
|
|
/// <summary>
|
|
/// Supported CPU architectures.
|
|
/// </summary>
|
|
public required ImmutableHashSet<CpuArchitecture> SupportedArchitectures { get; init; }
|
|
|
|
/// <summary>
|
|
/// Supported binary formats.
|
|
/// </summary>
|
|
public required ImmutableHashSet<BinaryFormat> SupportedFormats { get; init; }
|
|
|
|
/// <summary>
|
|
/// Whether the plugin supports lifting to intermediate representation.
|
|
/// </summary>
|
|
public bool SupportsLifting { get; init; }
|
|
|
|
/// <summary>
|
|
/// Whether the plugin supports control flow graph recovery.
|
|
/// </summary>
|
|
public bool SupportsCfgRecovery { get; init; }
|
|
|
|
/// <summary>
|
|
/// Priority for plugin selection when multiple plugins support the same arch/format.
|
|
/// Higher values indicate higher priority.
|
|
/// </summary>
|
|
public int Priority { get; init; } = 0;
|
|
|
|
/// <summary>
|
|
/// Checks if this plugin supports the given architecture.
|
|
/// </summary>
|
|
public bool SupportsArchitecture(CpuArchitecture arch) =>
|
|
SupportedArchitectures.Contains(arch);
|
|
|
|
/// <summary>
|
|
/// Checks if this plugin supports the given format.
|
|
/// </summary>
|
|
public bool SupportsFormat(BinaryFormat format) =>
|
|
SupportedFormats.Contains(format);
|
|
|
|
/// <summary>
|
|
/// Checks if this plugin can handle the given architecture and format combination.
|
|
/// </summary>
|
|
public bool CanHandle(CpuArchitecture arch, BinaryFormat format) =>
|
|
SupportsArchitecture(arch) && SupportsFormat(format);
|
|
}
|
|
|
|
/// <summary>
|
|
/// Information about a loaded binary.
|
|
/// </summary>
|
|
/// <param name="Format">Binary format: ELF, PE, MachO, etc.</param>
|
|
/// <param name="Architecture">CPU architecture.</param>
|
|
/// <param name="Bitness">32 or 64 bit.</param>
|
|
/// <param name="Endianness">Byte order.</param>
|
|
/// <param name="Abi">Application binary interface hint (gnu, musl, msvc, darwin).</param>
|
|
/// <param name="EntryPoint">Entry point address if available.</param>
|
|
/// <param name="BuildId">Build identifier if present (e.g., GNU build-id).</param>
|
|
/// <param name="Metadata">Additional metadata from the binary.</param>
|
|
/// <param name="Handle">Internal handle for the disassembly engine (engine-specific).</param>
|
|
public sealed record BinaryInfo(
|
|
BinaryFormat Format,
|
|
CpuArchitecture Architecture,
|
|
int Bitness,
|
|
Endianness Endianness,
|
|
string? Abi,
|
|
ulong? EntryPoint,
|
|
string? BuildId,
|
|
IReadOnlyDictionary<string, object> Metadata,
|
|
object Handle);
|
|
|
|
/// <summary>
|
|
/// Byte order.
|
|
/// </summary>
|
|
public enum Endianness
|
|
{
|
|
/// <summary>Little-endian (LSB first).</summary>
|
|
Little,
|
|
/// <summary>Big-endian (MSB first).</summary>
|
|
Big
|
|
}
|
|
|
|
/// <summary>
|
|
/// Represents a code region (section) in a binary.
|
|
/// </summary>
|
|
/// <param name="Name">Section name: .text, .rodata, etc.</param>
|
|
/// <param name="VirtualAddress">Virtual address in memory.</param>
|
|
/// <param name="FileOffset">Offset in the binary file.</param>
|
|
/// <param name="Size">Size in bytes.</param>
|
|
/// <param name="IsExecutable">Whether the region contains executable code.</param>
|
|
/// <param name="IsReadable">Whether the region is readable.</param>
|
|
/// <param name="IsWritable">Whether the region is writable.</param>
|
|
public sealed record CodeRegion(
|
|
string Name,
|
|
ulong VirtualAddress,
|
|
ulong FileOffset,
|
|
ulong Size,
|
|
bool IsExecutable,
|
|
bool IsReadable,
|
|
bool IsWritable);
|
|
|
|
/// <summary>
|
|
/// Information about a symbol in the binary.
|
|
/// </summary>
|
|
/// <param name="Name">Symbol name.</param>
|
|
/// <param name="Address">Virtual address of the symbol.</param>
|
|
/// <param name="Size">Size in bytes (0 if unknown).</param>
|
|
/// <param name="Type">Symbol type.</param>
|
|
/// <param name="Binding">Symbol binding.</param>
|
|
/// <param name="Section">Section containing the symbol.</param>
|
|
public sealed record SymbolInfo(
|
|
string Name,
|
|
ulong Address,
|
|
ulong Size,
|
|
SymbolType Type,
|
|
SymbolBinding Binding,
|
|
string? Section);
|
|
|
|
/// <summary>
|
|
/// Type of symbol.
|
|
/// </summary>
|
|
public enum SymbolType
|
|
{
|
|
/// <summary>Unknown or unspecified type.</summary>
|
|
Unknown,
|
|
/// <summary>Function/procedure.</summary>
|
|
Function,
|
|
/// <summary>Data object.</summary>
|
|
Object,
|
|
/// <summary>Section symbol.</summary>
|
|
Section,
|
|
/// <summary>Source file name.</summary>
|
|
File,
|
|
/// <summary>Common block symbol.</summary>
|
|
Common,
|
|
/// <summary>Thread-local storage.</summary>
|
|
Tls
|
|
}
|
|
|
|
/// <summary>
|
|
/// Symbol binding/visibility.
|
|
/// </summary>
|
|
public enum SymbolBinding
|
|
{
|
|
/// <summary>Unknown binding.</summary>
|
|
Unknown,
|
|
/// <summary>Local symbol (not visible outside the object).</summary>
|
|
Local,
|
|
/// <summary>Global symbol (visible to other objects).</summary>
|
|
Global,
|
|
/// <summary>Weak symbol (can be overridden).</summary>
|
|
Weak
|
|
}
|
|
|
|
/// <summary>
|
|
/// A disassembled instruction.
|
|
/// </summary>
|
|
/// <param name="Address">Virtual address of the instruction.</param>
|
|
/// <param name="RawBytes">Raw bytes of the instruction.</param>
|
|
/// <param name="Mnemonic">Instruction mnemonic (e.g., MOV, ADD, JMP).</param>
|
|
/// <param name="OperandsText">Text representation of operands.</param>
|
|
/// <param name="Kind">Classification of the instruction.</param>
|
|
/// <param name="Operands">Parsed operands.</param>
|
|
public sealed record DisassembledInstruction(
|
|
ulong Address,
|
|
ImmutableArray<byte> RawBytes,
|
|
string Mnemonic,
|
|
string OperandsText,
|
|
InstructionKind Kind,
|
|
ImmutableArray<Operand> Operands);
|
|
|
|
/// <summary>
|
|
/// Classification of instruction types.
|
|
/// </summary>
|
|
public enum InstructionKind
|
|
{
|
|
/// <summary>Unknown or unclassified instruction.</summary>
|
|
Unknown,
|
|
/// <summary>Arithmetic operation (ADD, SUB, MUL, DIV).</summary>
|
|
Arithmetic,
|
|
/// <summary>Logical operation (AND, OR, XOR, NOT).</summary>
|
|
Logic,
|
|
/// <summary>Data movement (MOV, PUSH, POP).</summary>
|
|
Move,
|
|
/// <summary>Memory load operation.</summary>
|
|
Load,
|
|
/// <summary>Memory store operation.</summary>
|
|
Store,
|
|
/// <summary>Unconditional branch (JMP).</summary>
|
|
Branch,
|
|
/// <summary>Conditional branch (JE, JNE, JL, etc.).</summary>
|
|
ConditionalBranch,
|
|
/// <summary>Function call.</summary>
|
|
Call,
|
|
/// <summary>Function return.</summary>
|
|
Return,
|
|
/// <summary>No operation.</summary>
|
|
Nop,
|
|
/// <summary>System call.</summary>
|
|
Syscall,
|
|
/// <summary>Software interrupt.</summary>
|
|
Interrupt,
|
|
/// <summary>Compare operation.</summary>
|
|
Compare,
|
|
/// <summary>Shift operation.</summary>
|
|
Shift,
|
|
/// <summary>Vector/SIMD operation.</summary>
|
|
Vector,
|
|
/// <summary>Floating point operation.</summary>
|
|
FloatingPoint
|
|
}
|
|
|
|
/// <summary>
|
|
/// An instruction operand.
|
|
/// </summary>
|
|
/// <param name="Type">Operand type.</param>
|
|
/// <param name="Text">Text representation.</param>
|
|
/// <param name="Value">Immediate value if applicable.</param>
|
|
/// <param name="Register">Register name if applicable.</param>
|
|
/// <param name="MemoryBase">Base register for memory operand.</param>
|
|
/// <param name="MemoryIndex">Index register for memory operand.</param>
|
|
/// <param name="MemoryScale">Scale factor for indexed memory operand.</param>
|
|
/// <param name="MemoryDisplacement">Displacement for memory operand.</param>
|
|
public sealed record Operand(
|
|
OperandType Type,
|
|
string Text,
|
|
long? Value = null,
|
|
string? Register = null,
|
|
string? MemoryBase = null,
|
|
string? MemoryIndex = null,
|
|
int? MemoryScale = null,
|
|
long? MemoryDisplacement = null);
|
|
|
|
/// <summary>
|
|
/// Type of operand.
|
|
/// </summary>
|
|
public enum OperandType
|
|
{
|
|
/// <summary>Unknown operand type.</summary>
|
|
Unknown,
|
|
/// <summary>CPU register.</summary>
|
|
Register,
|
|
/// <summary>Immediate value.</summary>
|
|
Immediate,
|
|
/// <summary>Memory reference.</summary>
|
|
Memory,
|
|
/// <summary>Address/label.</summary>
|
|
Address
|
|
}
|