save progress

This commit is contained in:
StellaOps Bot
2026-01-03 00:47:24 +02:00
parent 3f197814c5
commit ca578801fd
319 changed files with 32478 additions and 2202 deletions

View File

@@ -0,0 +1,35 @@
# Disassembly Abstractions Charter
## Mission
Define the platform-agnostic disassembly interfaces and models for binary analysis. Enable multiple disassembly backends (Iced, B2R2) to be plugged in without changing consuming code.
## Responsibilities
- Maintain `IDisassemblyPlugin` interface defining disassembly capabilities
- Define `IDisassemblyService` for coordinated plugin selection and fallback
- Provide format-neutral models: `DisassembledInstruction`, `BinaryInfo`, `SymbolInfo`, `CodeRegion`
- Keep interfaces stable to minimize breaking changes for plugin implementations
- Ensure deterministic output contracts
## Key Paths
- `IDisassemblyPlugin.cs` - Plugin contract with capability reporting
- `IDisassemblyService.cs` - Service coordinating multiple plugins
- `Models/BinaryInfo.cs` - Binary metadata (format, architecture, ABI)
- `Models/DisassembledInstruction.cs` - Decoded instruction with operands
- `Models/SymbolInfo.cs` - Function/symbol metadata
- `Models/CpuArchitecture.cs` - Supported architecture enum
## Coordination
- Disassembly plugin implementers (Iced, B2R2)
- Normalization pipeline consumers
- Scanner team for binary vulnerability analysis
## Required Reading
- `docs/modules/binaryindex/architecture.md`
- `docs/implplan/SPRINT_20260102_001_BE_binary_delta_signatures.md`
## Working Agreement
1. Update task status to `DOING`/`DONE` in sprint file when starting/finishing work.
2. Review this charter and Required Reading before coding.
3. Keep models immutable and serialization-friendly.
4. Add capability flags to `IDisassemblyPlugin` rather than extending interface.
5. Document all public types with XML doc comments.

View File

@@ -0,0 +1,140 @@
// Copyright (c) StellaOps. All rights reserved.
// Licensed under AGPL-3.0-or-later. See LICENSE in the project root.
namespace StellaOps.BinaryIndex.Disassembly;
/// <summary>
/// Abstraction over binary disassembly engine plugins.
/// Each plugin implements this interface to provide disassembly capabilities.
/// </summary>
public interface IDisassemblyPlugin
{
/// <summary>
/// Gets the capabilities of this disassembly plugin.
/// </summary>
DisassemblyCapabilities Capabilities { get; }
/// <summary>
/// Loads a binary from a stream and detects format/architecture.
/// </summary>
/// <param name="stream">The binary stream to load.</param>
/// <param name="archHint">Optional hint for architecture detection.</param>
/// <param name="formatHint">Optional hint for format detection.</param>
/// <returns>Binary information including format, architecture, and metadata.</returns>
BinaryInfo LoadBinary(Stream stream, CpuArchitecture? archHint = null, BinaryFormat? formatHint = null);
/// <summary>
/// Loads a binary from a byte array.
/// </summary>
/// <param name="bytes">The binary data.</param>
/// <param name="archHint">Optional hint for architecture detection.</param>
/// <param name="formatHint">Optional hint for format detection.</param>
/// <returns>Binary information including format, architecture, and metadata.</returns>
BinaryInfo LoadBinary(ReadOnlySpan<byte> bytes, CpuArchitecture? archHint = null, BinaryFormat? formatHint = null);
/// <summary>
/// Gets executable code regions (sections) from the binary.
/// </summary>
/// <param name="binary">The loaded binary information.</param>
/// <returns>Enumerable of code regions.</returns>
IEnumerable<CodeRegion> GetCodeRegions(BinaryInfo binary);
/// <summary>
/// Gets symbols (functions) from the binary.
/// </summary>
/// <param name="binary">The loaded binary information.</param>
/// <returns>Enumerable of symbol information.</returns>
IEnumerable<SymbolInfo> GetSymbols(BinaryInfo binary);
/// <summary>
/// Disassembles a code region to instructions.
/// </summary>
/// <param name="binary">The loaded binary information.</param>
/// <param name="region">The code region to disassemble.</param>
/// <returns>Enumerable of disassembled instructions.</returns>
IEnumerable<DisassembledInstruction> Disassemble(BinaryInfo binary, CodeRegion region);
/// <summary>
/// Disassembles starting at a specific address for a given length.
/// </summary>
/// <param name="binary">The loaded binary information.</param>
/// <param name="startAddress">Virtual address to start disassembly.</param>
/// <param name="length">Maximum number of bytes to disassemble.</param>
/// <returns>Enumerable of disassembled instructions.</returns>
IEnumerable<DisassembledInstruction> Disassemble(BinaryInfo binary, ulong startAddress, ulong length);
/// <summary>
/// Disassembles a specific symbol/function.
/// </summary>
/// <param name="binary">The loaded binary information.</param>
/// <param name="symbol">The symbol to disassemble.</param>
/// <returns>Enumerable of disassembled instructions.</returns>
IEnumerable<DisassembledInstruction> DisassembleSymbol(BinaryInfo binary, SymbolInfo symbol);
}
/// <summary>
/// Registry for disassembly plugins. Manages plugin discovery and selection.
/// </summary>
public interface IDisassemblyPluginRegistry
{
/// <summary>
/// Gets all registered plugins.
/// </summary>
IReadOnlyList<IDisassemblyPlugin> Plugins { get; }
/// <summary>
/// Finds the best plugin for the given architecture and format.
/// </summary>
/// <param name="architecture">Target CPU architecture.</param>
/// <param name="format">Target binary format.</param>
/// <returns>The best matching plugin, or null if none found.</returns>
IDisassemblyPlugin? FindPlugin(CpuArchitecture architecture, BinaryFormat format);
/// <summary>
/// Finds all plugins that support the given architecture.
/// </summary>
/// <param name="architecture">Target CPU architecture.</param>
/// <returns>All matching plugins ordered by priority.</returns>
IEnumerable<IDisassemblyPlugin> FindPluginsForArchitecture(CpuArchitecture architecture);
/// <summary>
/// Finds all plugins that support the given format.
/// </summary>
/// <param name="format">Target binary format.</param>
/// <returns>All matching plugins ordered by priority.</returns>
IEnumerable<IDisassemblyPlugin> FindPluginsForFormat(BinaryFormat format);
/// <summary>
/// Gets a plugin by its unique identifier.
/// </summary>
/// <param name="pluginId">The plugin identifier.</param>
/// <returns>The plugin if found, null otherwise.</returns>
IDisassemblyPlugin? GetPlugin(string pluginId);
}
/// <summary>
/// Facade service for disassembly operations. Automatically selects the best plugin.
/// </summary>
public interface IDisassemblyService
{
/// <summary>
/// Loads a binary and automatically selects the best plugin.
/// </summary>
/// <param name="stream">The binary stream to load.</param>
/// <param name="preferredPluginId">Optional preferred plugin ID.</param>
/// <returns>Binary information and the plugin used.</returns>
(BinaryInfo Binary, IDisassemblyPlugin Plugin) LoadBinary(Stream stream, string? preferredPluginId = null);
/// <summary>
/// Loads a binary from bytes and automatically selects the best plugin.
/// </summary>
/// <param name="bytes">The binary data.</param>
/// <param name="preferredPluginId">Optional preferred plugin ID.</param>
/// <returns>Binary information and the plugin used.</returns>
(BinaryInfo Binary, IDisassemblyPlugin Plugin) LoadBinary(ReadOnlySpan<byte> bytes, string? preferredPluginId = null);
/// <summary>
/// Gets the plugin registry.
/// </summary>
IDisassemblyPluginRegistry Registry { get; }
}

View File

@@ -0,0 +1,348 @@
// Copyright (c) StellaOps. All rights reserved.
// Licensed under AGPL-3.0-or-later. See LICENSE in the project root.
using System.Collections.Immutable;
namespace StellaOps.BinaryIndex.Disassembly;
/// <summary>
/// CPU architecture identifier.
/// </summary>
public enum CpuArchitecture
{
/// <summary>Unknown architecture.</summary>
Unknown = 0,
/// <summary>Intel/AMD 32-bit x86.</summary>
X86 = 1,
/// <summary>Intel/AMD 64-bit x86-64 (amd64).</summary>
X86_64 = 2,
/// <summary>ARM 32-bit (ARMv7).</summary>
ARM32 = 3,
/// <summary>ARM 64-bit (AArch64/ARMv8).</summary>
ARM64 = 4,
/// <summary>MIPS 32-bit.</summary>
MIPS32 = 5,
/// <summary>MIPS 64-bit.</summary>
MIPS64 = 6,
/// <summary>RISC-V 64-bit.</summary>
RISCV64 = 7,
/// <summary>PowerPC 32-bit.</summary>
PPC32 = 8,
/// <summary>PowerPC 64-bit.</summary>
PPC64 = 9,
/// <summary>SPARC.</summary>
SPARC = 10,
/// <summary>SuperH SH4.</summary>
SH4 = 11,
/// <summary>AVR microcontroller.</summary>
AVR = 12,
/// <summary>Ethereum Virtual Machine.</summary>
EVM = 13,
/// <summary>WebAssembly.</summary>
WASM = 14
}
/// <summary>
/// Binary executable format.
/// </summary>
public enum BinaryFormat
{
/// <summary>Unknown format.</summary>
Unknown = 0,
/// <summary>Raw binary data (no format metadata).</summary>
Raw = 1,
/// <summary>Executable and Linkable Format (Linux, BSD, etc.).</summary>
ELF = 2,
/// <summary>Portable Executable (Windows).</summary>
PE = 3,
/// <summary>Mach-O (macOS, iOS).</summary>
MachO = 4,
/// <summary>WebAssembly module.</summary>
WASM = 5
}
/// <summary>
/// Describes the capabilities of a disassembly plugin.
/// </summary>
public sealed record DisassemblyCapabilities
{
/// <summary>
/// The unique identifier of the plugin.
/// </summary>
public required string PluginId { get; init; }
/// <summary>
/// Display name of the disassembly engine.
/// </summary>
public required string Name { get; init; }
/// <summary>
/// Version of the underlying disassembly library.
/// </summary>
public required string Version { get; init; }
/// <summary>
/// Supported CPU architectures.
/// </summary>
public required ImmutableHashSet<CpuArchitecture> SupportedArchitectures { get; init; }
/// <summary>
/// Supported binary formats.
/// </summary>
public required ImmutableHashSet<BinaryFormat> SupportedFormats { get; init; }
/// <summary>
/// Whether the plugin supports lifting to intermediate representation.
/// </summary>
public bool SupportsLifting { get; init; }
/// <summary>
/// Whether the plugin supports control flow graph recovery.
/// </summary>
public bool SupportsCfgRecovery { get; init; }
/// <summary>
/// Priority for plugin selection when multiple plugins support the same arch/format.
/// Higher values indicate higher priority.
/// </summary>
public int Priority { get; init; } = 0;
/// <summary>
/// Checks if this plugin supports the given architecture.
/// </summary>
public bool SupportsArchitecture(CpuArchitecture arch) =>
SupportedArchitectures.Contains(arch);
/// <summary>
/// Checks if this plugin supports the given format.
/// </summary>
public bool SupportsFormat(BinaryFormat format) =>
SupportedFormats.Contains(format);
/// <summary>
/// Checks if this plugin can handle the given architecture and format combination.
/// </summary>
public bool CanHandle(CpuArchitecture arch, BinaryFormat format) =>
SupportsArchitecture(arch) && SupportsFormat(format);
}
/// <summary>
/// Information about a loaded binary.
/// </summary>
/// <param name="Format">Binary format: ELF, PE, MachO, etc.</param>
/// <param name="Architecture">CPU architecture.</param>
/// <param name="Bitness">32 or 64 bit.</param>
/// <param name="Endianness">Byte order.</param>
/// <param name="Abi">Application binary interface hint (gnu, musl, msvc, darwin).</param>
/// <param name="EntryPoint">Entry point address if available.</param>
/// <param name="BuildId">Build identifier if present (e.g., GNU build-id).</param>
/// <param name="Metadata">Additional metadata from the binary.</param>
/// <param name="Handle">Internal handle for the disassembly engine (engine-specific).</param>
public sealed record BinaryInfo(
BinaryFormat Format,
CpuArchitecture Architecture,
int Bitness,
Endianness Endianness,
string? Abi,
ulong? EntryPoint,
string? BuildId,
IReadOnlyDictionary<string, object> Metadata,
object Handle);
/// <summary>
/// Byte order.
/// </summary>
public enum Endianness
{
/// <summary>Little-endian (LSB first).</summary>
Little,
/// <summary>Big-endian (MSB first).</summary>
Big
}
/// <summary>
/// Represents a code region (section) in a binary.
/// </summary>
/// <param name="Name">Section name: .text, .rodata, etc.</param>
/// <param name="VirtualAddress">Virtual address in memory.</param>
/// <param name="FileOffset">Offset in the binary file.</param>
/// <param name="Size">Size in bytes.</param>
/// <param name="IsExecutable">Whether the region contains executable code.</param>
/// <param name="IsReadable">Whether the region is readable.</param>
/// <param name="IsWritable">Whether the region is writable.</param>
public sealed record CodeRegion(
string Name,
ulong VirtualAddress,
ulong FileOffset,
ulong Size,
bool IsExecutable,
bool IsReadable,
bool IsWritable);
/// <summary>
/// Information about a symbol in the binary.
/// </summary>
/// <param name="Name">Symbol name.</param>
/// <param name="Address">Virtual address of the symbol.</param>
/// <param name="Size">Size in bytes (0 if unknown).</param>
/// <param name="Type">Symbol type.</param>
/// <param name="Binding">Symbol binding.</param>
/// <param name="Section">Section containing the symbol.</param>
public sealed record SymbolInfo(
string Name,
ulong Address,
ulong Size,
SymbolType Type,
SymbolBinding Binding,
string? Section);
/// <summary>
/// Type of symbol.
/// </summary>
public enum SymbolType
{
/// <summary>Unknown or unspecified type.</summary>
Unknown,
/// <summary>Function/procedure.</summary>
Function,
/// <summary>Data object.</summary>
Object,
/// <summary>Section symbol.</summary>
Section,
/// <summary>Source file name.</summary>
File,
/// <summary>Common block symbol.</summary>
Common,
/// <summary>Thread-local storage.</summary>
Tls
}
/// <summary>
/// Symbol binding/visibility.
/// </summary>
public enum SymbolBinding
{
/// <summary>Unknown binding.</summary>
Unknown,
/// <summary>Local symbol (not visible outside the object).</summary>
Local,
/// <summary>Global symbol (visible to other objects).</summary>
Global,
/// <summary>Weak symbol (can be overridden).</summary>
Weak
}
/// <summary>
/// A disassembled instruction.
/// </summary>
/// <param name="Address">Virtual address of the instruction.</param>
/// <param name="RawBytes">Raw bytes of the instruction.</param>
/// <param name="Mnemonic">Instruction mnemonic (e.g., MOV, ADD, JMP).</param>
/// <param name="OperandsText">Text representation of operands.</param>
/// <param name="Kind">Classification of the instruction.</param>
/// <param name="Operands">Parsed operands.</param>
public sealed record DisassembledInstruction(
ulong Address,
ImmutableArray<byte> RawBytes,
string Mnemonic,
string OperandsText,
InstructionKind Kind,
ImmutableArray<Operand> Operands);
/// <summary>
/// Classification of instruction types.
/// </summary>
public enum InstructionKind
{
/// <summary>Unknown or unclassified instruction.</summary>
Unknown,
/// <summary>Arithmetic operation (ADD, SUB, MUL, DIV).</summary>
Arithmetic,
/// <summary>Logical operation (AND, OR, XOR, NOT).</summary>
Logic,
/// <summary>Data movement (MOV, PUSH, POP).</summary>
Move,
/// <summary>Memory load operation.</summary>
Load,
/// <summary>Memory store operation.</summary>
Store,
/// <summary>Unconditional branch (JMP).</summary>
Branch,
/// <summary>Conditional branch (JE, JNE, JL, etc.).</summary>
ConditionalBranch,
/// <summary>Function call.</summary>
Call,
/// <summary>Function return.</summary>
Return,
/// <summary>No operation.</summary>
Nop,
/// <summary>System call.</summary>
Syscall,
/// <summary>Software interrupt.</summary>
Interrupt,
/// <summary>Compare operation.</summary>
Compare,
/// <summary>Shift operation.</summary>
Shift,
/// <summary>Vector/SIMD operation.</summary>
Vector,
/// <summary>Floating point operation.</summary>
FloatingPoint
}
/// <summary>
/// An instruction operand.
/// </summary>
/// <param name="Type">Operand type.</param>
/// <param name="Text">Text representation.</param>
/// <param name="Value">Immediate value if applicable.</param>
/// <param name="Register">Register name if applicable.</param>
/// <param name="MemoryBase">Base register for memory operand.</param>
/// <param name="MemoryIndex">Index register for memory operand.</param>
/// <param name="MemoryScale">Scale factor for indexed memory operand.</param>
/// <param name="MemoryDisplacement">Displacement for memory operand.</param>
public sealed record Operand(
OperandType Type,
string Text,
long? Value = null,
string? Register = null,
string? MemoryBase = null,
string? MemoryIndex = null,
int? MemoryScale = null,
long? MemoryDisplacement = null);
/// <summary>
/// Type of operand.
/// </summary>
public enum OperandType
{
/// <summary>Unknown operand type.</summary>
Unknown,
/// <summary>CPU register.</summary>
Register,
/// <summary>Immediate value.</summary>
Immediate,
/// <summary>Memory reference.</summary>
Memory,
/// <summary>Address/label.</summary>
Address
}

View File

@@ -0,0 +1,16 @@
<Project Sdk="Microsoft.NET.Sdk">
<PropertyGroup>
<TargetFramework>net10.0</TargetFramework>
<ImplicitUsings>enable</ImplicitUsings>
<Nullable>enable</Nullable>
<LangVersion>preview</LangVersion>
<GenerateDocumentationFile>true</GenerateDocumentationFile>
<TreatWarningsAsErrors>true</TreatWarningsAsErrors>
<Description>Abstractions and interfaces for binary disassembly plugins in StellaOps. Defines the plugin contract for disassembly engines.</Description>
</PropertyGroup>
<ItemGroup>
<PackageReference Include="Microsoft.Extensions.Logging.Abstractions" />
<PackageReference Include="Microsoft.Extensions.DependencyInjection.Abstractions" />
</ItemGroup>
</Project>