Files

349 lines
10 KiB
C#

// Copyright (c) StellaOps. All rights reserved.
// Licensed under BUSL-1.1. See LICENSE in the project root.
using System.Collections.Immutable;
namespace StellaOps.BinaryIndex.Disassembly;
/// <summary>
/// CPU architecture identifier.
/// </summary>
public enum CpuArchitecture
{
/// <summary>Unknown architecture.</summary>
Unknown = 0,
/// <summary>Intel/AMD 32-bit x86.</summary>
X86 = 1,
/// <summary>Intel/AMD 64-bit x86-64 (amd64).</summary>
X86_64 = 2,
/// <summary>ARM 32-bit (ARMv7).</summary>
ARM32 = 3,
/// <summary>ARM 64-bit (AArch64/ARMv8).</summary>
ARM64 = 4,
/// <summary>MIPS 32-bit.</summary>
MIPS32 = 5,
/// <summary>MIPS 64-bit.</summary>
MIPS64 = 6,
/// <summary>RISC-V 64-bit.</summary>
RISCV64 = 7,
/// <summary>PowerPC 32-bit.</summary>
PPC32 = 8,
/// <summary>PowerPC 64-bit.</summary>
PPC64 = 9,
/// <summary>SPARC.</summary>
SPARC = 10,
/// <summary>SuperH SH4.</summary>
SH4 = 11,
/// <summary>AVR microcontroller.</summary>
AVR = 12,
/// <summary>Ethereum Virtual Machine.</summary>
EVM = 13,
/// <summary>WebAssembly.</summary>
WASM = 14
}
/// <summary>
/// Binary executable format.
/// </summary>
public enum BinaryFormat
{
/// <summary>Unknown format.</summary>
Unknown = 0,
/// <summary>Raw binary data (no format metadata).</summary>
Raw = 1,
/// <summary>Executable and Linkable Format (Linux, BSD, etc.).</summary>
ELF = 2,
/// <summary>Portable Executable (Windows).</summary>
PE = 3,
/// <summary>Mach-O (macOS, iOS).</summary>
MachO = 4,
/// <summary>WebAssembly module.</summary>
WASM = 5
}
/// <summary>
/// Describes the capabilities of a disassembly plugin.
/// </summary>
public sealed record DisassemblyCapabilities
{
/// <summary>
/// The unique identifier of the plugin.
/// </summary>
public required string PluginId { get; init; }
/// <summary>
/// Display name of the disassembly engine.
/// </summary>
public required string Name { get; init; }
/// <summary>
/// Version of the underlying disassembly library.
/// </summary>
public required string Version { get; init; }
/// <summary>
/// Supported CPU architectures.
/// </summary>
public required ImmutableHashSet<CpuArchitecture> SupportedArchitectures { get; init; }
/// <summary>
/// Supported binary formats.
/// </summary>
public required ImmutableHashSet<BinaryFormat> SupportedFormats { get; init; }
/// <summary>
/// Whether the plugin supports lifting to intermediate representation.
/// </summary>
public bool SupportsLifting { get; init; }
/// <summary>
/// Whether the plugin supports control flow graph recovery.
/// </summary>
public bool SupportsCfgRecovery { get; init; }
/// <summary>
/// Priority for plugin selection when multiple plugins support the same arch/format.
/// Higher values indicate higher priority.
/// </summary>
public int Priority { get; init; } = 0;
/// <summary>
/// Checks if this plugin supports the given architecture.
/// </summary>
public bool SupportsArchitecture(CpuArchitecture arch) =>
SupportedArchitectures.Contains(arch);
/// <summary>
/// Checks if this plugin supports the given format.
/// </summary>
public bool SupportsFormat(BinaryFormat format) =>
SupportedFormats.Contains(format);
/// <summary>
/// Checks if this plugin can handle the given architecture and format combination.
/// </summary>
public bool CanHandle(CpuArchitecture arch, BinaryFormat format) =>
SupportsArchitecture(arch) && SupportsFormat(format);
}
/// <summary>
/// Information about a loaded binary.
/// </summary>
/// <param name="Format">Binary format: ELF, PE, MachO, etc.</param>
/// <param name="Architecture">CPU architecture.</param>
/// <param name="Bitness">32 or 64 bit.</param>
/// <param name="Endianness">Byte order.</param>
/// <param name="Abi">Application binary interface hint (gnu, musl, msvc, darwin).</param>
/// <param name="EntryPoint">Entry point address if available.</param>
/// <param name="BuildId">Build identifier if present (e.g., GNU build-id).</param>
/// <param name="Metadata">Additional metadata from the binary.</param>
/// <param name="Handle">Internal handle for the disassembly engine (engine-specific).</param>
public sealed record BinaryInfo(
BinaryFormat Format,
CpuArchitecture Architecture,
int Bitness,
Endianness Endianness,
string? Abi,
ulong? EntryPoint,
string? BuildId,
IReadOnlyDictionary<string, object> Metadata,
object Handle);
/// <summary>
/// Byte order.
/// </summary>
public enum Endianness
{
/// <summary>Little-endian (LSB first).</summary>
Little,
/// <summary>Big-endian (MSB first).</summary>
Big
}
/// <summary>
/// Represents a code region (section) in a binary.
/// </summary>
/// <param name="Name">Section name: .text, .rodata, etc.</param>
/// <param name="VirtualAddress">Virtual address in memory.</param>
/// <param name="FileOffset">Offset in the binary file.</param>
/// <param name="Size">Size in bytes.</param>
/// <param name="IsExecutable">Whether the region contains executable code.</param>
/// <param name="IsReadable">Whether the region is readable.</param>
/// <param name="IsWritable">Whether the region is writable.</param>
public sealed record CodeRegion(
string Name,
ulong VirtualAddress,
ulong FileOffset,
ulong Size,
bool IsExecutable,
bool IsReadable,
bool IsWritable);
/// <summary>
/// Information about a symbol in the binary.
/// </summary>
/// <param name="Name">Symbol name.</param>
/// <param name="Address">Virtual address of the symbol.</param>
/// <param name="Size">Size in bytes (0 if unknown).</param>
/// <param name="Type">Symbol type.</param>
/// <param name="Binding">Symbol binding.</param>
/// <param name="Section">Section containing the symbol.</param>
public sealed record SymbolInfo(
string Name,
ulong Address,
ulong Size,
SymbolType Type,
SymbolBinding Binding,
string? Section);
/// <summary>
/// Type of symbol.
/// </summary>
public enum SymbolType
{
/// <summary>Unknown or unspecified type.</summary>
Unknown,
/// <summary>Function/procedure.</summary>
Function,
/// <summary>Data object.</summary>
Object,
/// <summary>Section symbol.</summary>
Section,
/// <summary>Source file name.</summary>
File,
/// <summary>Common block symbol.</summary>
Common,
/// <summary>Thread-local storage.</summary>
Tls
}
/// <summary>
/// Symbol binding/visibility.
/// </summary>
public enum SymbolBinding
{
/// <summary>Unknown binding.</summary>
Unknown,
/// <summary>Local symbol (not visible outside the object).</summary>
Local,
/// <summary>Global symbol (visible to other objects).</summary>
Global,
/// <summary>Weak symbol (can be overridden).</summary>
Weak
}
/// <summary>
/// A disassembled instruction.
/// </summary>
/// <param name="Address">Virtual address of the instruction.</param>
/// <param name="RawBytes">Raw bytes of the instruction.</param>
/// <param name="Mnemonic">Instruction mnemonic (e.g., MOV, ADD, JMP).</param>
/// <param name="OperandsText">Text representation of operands.</param>
/// <param name="Kind">Classification of the instruction.</param>
/// <param name="Operands">Parsed operands.</param>
public sealed record DisassembledInstruction(
ulong Address,
ImmutableArray<byte> RawBytes,
string Mnemonic,
string OperandsText,
InstructionKind Kind,
ImmutableArray<Operand> Operands);
/// <summary>
/// Classification of instruction types.
/// </summary>
public enum InstructionKind
{
/// <summary>Unknown or unclassified instruction.</summary>
Unknown,
/// <summary>Arithmetic operation (ADD, SUB, MUL, DIV).</summary>
Arithmetic,
/// <summary>Logical operation (AND, OR, XOR, NOT).</summary>
Logic,
/// <summary>Data movement (MOV, PUSH, POP).</summary>
Move,
/// <summary>Memory load operation.</summary>
Load,
/// <summary>Memory store operation.</summary>
Store,
/// <summary>Unconditional branch (JMP).</summary>
Branch,
/// <summary>Conditional branch (JE, JNE, JL, etc.).</summary>
ConditionalBranch,
/// <summary>Function call.</summary>
Call,
/// <summary>Function return.</summary>
Return,
/// <summary>No operation.</summary>
Nop,
/// <summary>System call.</summary>
Syscall,
/// <summary>Software interrupt.</summary>
Interrupt,
/// <summary>Compare operation.</summary>
Compare,
/// <summary>Shift operation.</summary>
Shift,
/// <summary>Vector/SIMD operation.</summary>
Vector,
/// <summary>Floating point operation.</summary>
FloatingPoint
}
/// <summary>
/// An instruction operand.
/// </summary>
/// <param name="Type">Operand type.</param>
/// <param name="Text">Text representation.</param>
/// <param name="Value">Immediate value if applicable.</param>
/// <param name="Register">Register name if applicable.</param>
/// <param name="MemoryBase">Base register for memory operand.</param>
/// <param name="MemoryIndex">Index register for memory operand.</param>
/// <param name="MemoryScale">Scale factor for indexed memory operand.</param>
/// <param name="MemoryDisplacement">Displacement for memory operand.</param>
public sealed record Operand(
OperandType Type,
string Text,
long? Value = null,
string? Register = null,
string? MemoryBase = null,
string? MemoryIndex = null,
int? MemoryScale = null,
long? MemoryDisplacement = null);
/// <summary>
/// Type of operand.
/// </summary>
public enum OperandType
{
/// <summary>Unknown operand type.</summary>
Unknown,
/// <summary>CPU register.</summary>
Register,
/// <summary>Immediate value.</summary>
Immediate,
/// <summary>Memory reference.</summary>
Memory,
/// <summary>Address/label.</summary>
Address
}