Files
git.stella-ops.org/src/BinaryIndex/__Libraries/StellaOps.BinaryIndex.Ghidra/Services/GhidraDisassemblyPlugin.cs
StellaOps Bot 37e11918e0 save progress
2026-01-06 09:42:20 +02:00

541 lines
20 KiB
C#

// Copyright (c) StellaOps. All rights reserved.
// Licensed under AGPL-3.0-or-later. See LICENSE in the project root.
using System.Collections.Immutable;
using Microsoft.Extensions.Logging;
using Microsoft.Extensions.Options;
using StellaOps.BinaryIndex.Disassembly;
namespace StellaOps.BinaryIndex.Ghidra;
/// <summary>
/// Ghidra-based disassembly plugin providing broad architecture support as a fallback backend.
/// Ghidra is used for complex cases where B2R2 has limited coverage, supports 20+ architectures,
/// and provides mature decompilation, Version Tracking, and BSim capabilities.
/// </summary>
/// <remarks>
/// This plugin has lower priority than B2R2 since Ghidra requires external process invocation
/// (Java-based headless analysis) which is slower than native .NET disassembly. It serves as
/// the fallback when B2R2 returns low-confidence results or for architectures B2R2 handles poorly.
/// </remarks>
public sealed class GhidraDisassemblyPlugin : IDisassemblyPlugin, IDisposable
{
/// <summary>
/// Plugin identifier.
/// </summary>
public const string PluginId = "stellaops.disasm.ghidra";
private readonly IGhidraService _ghidraService;
private readonly GhidraOptions _options;
private readonly ILogger<GhidraDisassemblyPlugin> _logger;
private readonly TimeProvider _timeProvider;
private bool _disposed;
private static readonly DisassemblyCapabilities s_capabilities = new()
{
PluginId = PluginId,
Name = "Ghidra Disassembler",
Version = "11.x", // Ghidra 11.x
SupportedArchitectures =
[
// All architectures supported by both B2R2 and Ghidra
CpuArchitecture.X86,
CpuArchitecture.X86_64,
CpuArchitecture.ARM32,
CpuArchitecture.ARM64,
CpuArchitecture.MIPS32,
CpuArchitecture.MIPS64,
CpuArchitecture.RISCV64,
CpuArchitecture.PPC32,
CpuArchitecture.PPC64, // Ghidra supports PPC64 better than B2R2
CpuArchitecture.SPARC,
CpuArchitecture.SH4,
CpuArchitecture.AVR,
// Additional architectures Ghidra supports
CpuArchitecture.WASM
],
SupportedFormats =
[
BinaryFormat.ELF,
BinaryFormat.PE,
BinaryFormat.MachO,
BinaryFormat.WASM,
BinaryFormat.Raw
],
SupportsLifting = true, // P-Code lifting
SupportsCfgRecovery = true, // Full CFG recovery and decompilation
Priority = 25 // Lower than B2R2 (50) - used as fallback
};
/// <summary>
/// Creates a new Ghidra disassembly plugin.
/// </summary>
/// <param name="ghidraService">The Ghidra analysis service.</param>
/// <param name="options">Ghidra options.</param>
/// <param name="logger">Logger instance.</param>
/// <param name="timeProvider">Time provider for timestamps.</param>
public GhidraDisassemblyPlugin(
IGhidraService ghidraService,
IOptions<GhidraOptions> options,
ILogger<GhidraDisassemblyPlugin> logger,
TimeProvider timeProvider)
{
_ghidraService = ghidraService ?? throw new ArgumentNullException(nameof(ghidraService));
_options = options?.Value ?? throw new ArgumentNullException(nameof(options));
_logger = logger ?? throw new ArgumentNullException(nameof(logger));
_timeProvider = timeProvider ?? throw new ArgumentNullException(nameof(timeProvider));
}
/// <inheritdoc />
public DisassemblyCapabilities Capabilities => s_capabilities;
/// <inheritdoc />
public BinaryInfo LoadBinary(Stream stream, CpuArchitecture? archHint = null, BinaryFormat? formatHint = null)
{
ArgumentNullException.ThrowIfNull(stream);
ObjectDisposedException.ThrowIf(_disposed, this);
// Copy stream to memory for analysis
using var memStream = new MemoryStream();
stream.CopyTo(memStream);
return LoadBinary(memStream.ToArray(), archHint, formatHint);
}
/// <inheritdoc />
public BinaryInfo LoadBinary(ReadOnlySpan<byte> bytes, CpuArchitecture? archHint = null, BinaryFormat? formatHint = null)
{
ObjectDisposedException.ThrowIf(_disposed, this);
var byteArray = bytes.ToArray();
_logger.LogDebug("Loading binary with Ghidra plugin (size: {Size} bytes)", byteArray.Length);
// Run Ghidra analysis synchronously for IDisassemblyPlugin contract
var analysisTask = RunGhidraAnalysisAsync(byteArray, archHint, formatHint, CancellationToken.None);
var result = analysisTask.GetAwaiter().GetResult();
// Map Ghidra metadata to BinaryInfo
var format = MapFormat(result.Metadata.Format);
var architecture = MapArchitecture(result.Metadata.Architecture, result.Metadata.AddressSize);
var endianness = result.Metadata.Endianness.Equals("little", StringComparison.OrdinalIgnoreCase)
? Endianness.Little
: Endianness.Big;
var abi = DetectAbi(format);
_logger.LogInformation(
"Loaded binary with Ghidra: Format={Format}, Architecture={Architecture}, Processor={Processor}",
format, architecture, result.Metadata.Processor);
var metadata = new Dictionary<string, object>
{
["size"] = byteArray.Length,
["ghidra_processor"] = result.Metadata.Processor,
["ghidra_version"] = result.Metadata.GhidraVersion,
["analysis_duration_ms"] = result.Metadata.AnalysisDuration.TotalMilliseconds,
["function_count"] = result.Functions.Length,
["import_count"] = result.Imports.Length,
["export_count"] = result.Exports.Length
};
if (result.Metadata.Compiler is not null)
{
metadata["compiler"] = result.Metadata.Compiler;
}
return new BinaryInfo(
Format: format,
Architecture: architecture,
Bitness: result.Metadata.AddressSize,
Endianness: endianness,
Abi: abi,
EntryPoint: result.Metadata.EntryPoint,
BuildId: result.BinaryHash,
Metadata: metadata,
Handle: new GhidraBinaryHandle(result, byteArray));
}
/// <inheritdoc />
public IEnumerable<CodeRegion> GetCodeRegions(BinaryInfo binary)
{
ArgumentNullException.ThrowIfNull(binary);
ObjectDisposedException.ThrowIf(_disposed, this);
var handle = GetHandle(binary);
// Extract code regions from Ghidra memory blocks
foreach (var block in handle.Result.MemoryBlocks)
{
if (block.IsExecutable)
{
yield return new CodeRegion(
Name: block.Name,
VirtualAddress: block.Start,
FileOffset: block.Start - handle.Result.Metadata.ImageBase,
Size: (ulong)block.Size,
IsExecutable: block.IsExecutable,
IsReadable: true, // Executable sections are readable
IsWritable: block.IsWritable);
}
}
}
/// <inheritdoc />
public IEnumerable<SymbolInfo> GetSymbols(BinaryInfo binary)
{
ArgumentNullException.ThrowIfNull(binary);
ObjectDisposedException.ThrowIf(_disposed, this);
var handle = GetHandle(binary);
// Map functions to symbols
foreach (var func in handle.Result.Functions)
{
var binding = func.IsExternal ? SymbolBinding.Global : SymbolBinding.Local;
yield return new SymbolInfo(
Name: func.Name,
Address: func.Address,
Size: (ulong)func.Size,
Type: SymbolType.Function,
Binding: binding,
Section: DetermineSection(handle.Result.MemoryBlocks, func.Address));
}
// Also include exports as symbols
foreach (var export in handle.Result.Exports)
{
yield return new SymbolInfo(
Name: export.Name,
Address: export.Address,
Size: 0, // Unknown size for exports
Type: SymbolType.Function,
Binding: SymbolBinding.Global,
Section: DetermineSection(handle.Result.MemoryBlocks, export.Address));
}
}
/// <inheritdoc />
public IEnumerable<DisassembledInstruction> Disassemble(BinaryInfo binary, CodeRegion region)
{
ArgumentNullException.ThrowIfNull(binary);
ArgumentNullException.ThrowIfNull(region);
ObjectDisposedException.ThrowIf(_disposed, this);
var handle = GetHandle(binary);
_logger.LogDebug(
"Disassembling region {Name} from 0x{Start:X} to 0x{End:X}",
region.Name, region.VirtualAddress, region.VirtualAddress + region.Size);
// Find functions within the region and return their instructions
var regionEnd = region.VirtualAddress + region.Size;
foreach (var func in handle.Result.Functions)
{
if (func.Address >= region.VirtualAddress && func.Address < regionEnd)
{
foreach (var instr in DisassembleFunctionInstructions(func, handle))
{
if (instr.Address >= region.VirtualAddress && instr.Address < regionEnd)
{
yield return instr;
}
}
}
}
}
/// <inheritdoc />
public IEnumerable<DisassembledInstruction> Disassemble(BinaryInfo binary, ulong startAddress, ulong length)
{
var region = new CodeRegion(
Name: $"0x{startAddress:X}",
VirtualAddress: startAddress,
FileOffset: startAddress,
Size: length,
IsExecutable: true,
IsReadable: true,
IsWritable: false);
return Disassemble(binary, region);
}
/// <inheritdoc />
public IEnumerable<DisassembledInstruction> DisassembleSymbol(BinaryInfo binary, SymbolInfo symbol)
{
ArgumentNullException.ThrowIfNull(binary);
ArgumentNullException.ThrowIfNull(symbol);
ObjectDisposedException.ThrowIf(_disposed, this);
var handle = GetHandle(binary);
// Find the function matching the symbol
var func = handle.Result.Functions.FirstOrDefault(f =>
f.Address == symbol.Address || f.Name.Equals(symbol.Name, StringComparison.Ordinal));
if (func is null)
{
_logger.LogWarning(
"Function not found for symbol {Name} at 0x{Address:X}",
symbol.Name, symbol.Address);
yield break;
}
foreach (var instr in DisassembleFunctionInstructions(func, handle))
{
yield return instr;
}
}
#region Private Methods
private async Task<GhidraAnalysisResult> RunGhidraAnalysisAsync(
byte[] bytes,
CpuArchitecture? archHint,
BinaryFormat? formatHint,
CancellationToken ct)
{
// Write bytes to temp file
var tempPath = Path.Combine(
_options.WorkDir,
$"disasm_{_timeProvider.GetUtcNow():yyyyMMddHHmmssfff}_{Guid.NewGuid():N}.bin");
try
{
Directory.CreateDirectory(Path.GetDirectoryName(tempPath)!);
await File.WriteAllBytesAsync(tempPath, bytes, ct);
var options = new GhidraAnalysisOptions
{
RunFullAnalysis = true,
ExtractStrings = false, // Not needed for disassembly
ExtractFunctions = true,
ExtractDecompilation = false, // Can be expensive
TimeoutSeconds = _options.DefaultTimeoutSeconds
};
// Add architecture hint if provided
if (archHint.HasValue)
{
options = options with { ProcessorHint = MapToGhidraProcessor(archHint.Value) };
}
using var stream = File.OpenRead(tempPath);
return await _ghidraService.AnalyzeAsync(stream, options, ct);
}
finally
{
TryDeleteFile(tempPath);
}
}
private static IEnumerable<DisassembledInstruction> DisassembleFunctionInstructions(
GhidraFunction func,
GhidraBinaryHandle handle)
{
// Ghidra full analysis provides function boundaries but not individual instructions
// We synthesize instruction info from the function's decompiled code or from the raw bytes
// For now, return a synthetic instruction representing the function entry
// A full implementation would require running a Ghidra script to export instructions
// Calculate approximate instruction count based on function size and average instruction size
// x86/x64 average instruction size is ~3-4 bytes
var avgInstructionSize = handle.Result.Metadata.AddressSize == 64 ? 4 : 3;
var estimatedInstructions = Math.Max(1, func.Size / avgInstructionSize);
var address = func.Address;
for (var i = 0; i < estimatedInstructions && i < 1000; i++) // Cap at 1000 instructions
{
// Without actual Ghidra instruction export, we create placeholder entries
// Real implementation would parse Ghidra's instruction listing output
var rawBytes = ExtractBytes(handle.Bytes, address, handle.Result.Metadata.ImageBase, avgInstructionSize);
yield return new DisassembledInstruction(
Address: address,
RawBytes: rawBytes,
Mnemonic: "GHIDRA", // Placeholder - real impl would have actual mnemonics
OperandsText: $"; function {func.Name} + 0x{address - func.Address:X}",
Kind: i == 0 ? InstructionKind.Call : InstructionKind.Unknown,
Operands: []);
address += (ulong)avgInstructionSize;
if (address >= func.Address + (ulong)func.Size)
{
break;
}
}
}
private static ImmutableArray<byte> ExtractBytes(byte[] binary, ulong address, ulong imageBase, int count)
{
var offset = address - imageBase;
if (offset >= (ulong)binary.Length)
{
return [];
}
var available = Math.Min(count, binary.Length - (int)offset);
return binary.AsSpan((int)offset, available).ToArray().ToImmutableArray();
}
private static string? DetermineSection(ImmutableArray<GhidraMemoryBlock> blocks, ulong address)
{
foreach (var block in blocks)
{
if (address >= block.Start && address < block.End)
{
return block.Name;
}
}
return null;
}
private static GhidraBinaryHandle GetHandle(BinaryInfo binary)
{
if (binary.Handle is not GhidraBinaryHandle handle)
{
throw new ArgumentException("Invalid binary handle - not a Ghidra handle", nameof(binary));
}
return handle;
}
private static BinaryFormat MapFormat(string ghidraFormat)
{
return ghidraFormat.ToUpperInvariant() switch
{
"ELF" or "ELF32" or "ELF64" => BinaryFormat.ELF,
"PE" or "PE32" or "PE64" or "COFF" => BinaryFormat.PE,
"MACHO" or "MACH-O" or "MACHO32" or "MACHO64" => BinaryFormat.MachO,
"WASM" or "WEBASSEMBLY" => BinaryFormat.WASM,
"RAW" or "BINARY" => BinaryFormat.Raw,
_ => BinaryFormat.Unknown
};
}
private static CpuArchitecture MapArchitecture(string ghidraArch, int addressSize)
{
var arch = ghidraArch.ToUpperInvariant();
return arch switch
{
// Intel x86/x64
"X86" or "X86:LE:32:DEFAULT" => CpuArchitecture.X86,
"X86-64" or "X86:LE:64:DEFAULT" or "AMD64" => CpuArchitecture.X86_64,
var x when x.StartsWith("X86", StringComparison.Ordinal) && addressSize == 32 => CpuArchitecture.X86,
var x when x.StartsWith("X86", StringComparison.Ordinal) => CpuArchitecture.X86_64,
// ARM
"ARM" or "ARM:LE:32:V7" or "ARM:LE:32:V8" or "ARMV7" => CpuArchitecture.ARM32,
"AARCH64" or "ARM:LE:64:V8A" or "ARM64" => CpuArchitecture.ARM64,
var a when a.StartsWith("ARM", StringComparison.Ordinal) && addressSize == 32 => CpuArchitecture.ARM32,
var a when a.StartsWith("ARM", StringComparison.Ordinal) || a.StartsWith("AARCH", StringComparison.Ordinal) => CpuArchitecture.ARM64,
// MIPS
"MIPS" or "MIPS:BE:32:DEFAULT" or "MIPS:LE:32:DEFAULT" => CpuArchitecture.MIPS32,
"MIPS64" or "MIPS:BE:64:DEFAULT" or "MIPS:LE:64:DEFAULT" => CpuArchitecture.MIPS64,
var m when m.StartsWith("MIPS", StringComparison.Ordinal) && addressSize == 64 => CpuArchitecture.MIPS64,
var m when m.StartsWith("MIPS", StringComparison.Ordinal) => CpuArchitecture.MIPS32,
// RISC-V
"RISCV" or "RISCV:LE:64:RV64" or "RISCV64" => CpuArchitecture.RISCV64,
var r when r.StartsWith("RISCV", StringComparison.Ordinal) => CpuArchitecture.RISCV64,
// PowerPC
"PPC" or "POWERPC" or "PPC:BE:32:DEFAULT" => CpuArchitecture.PPC32,
"PPC64" or "POWERPC64" or "PPC:BE:64:DEFAULT" => CpuArchitecture.PPC64,
var p when p.StartsWith("PPC", StringComparison.Ordinal) && addressSize == 64 => CpuArchitecture.PPC64,
var p when p.StartsWith("PPC", StringComparison.Ordinal) || p.StartsWith("POWERPC", StringComparison.Ordinal) => CpuArchitecture.PPC32,
// SPARC
"SPARC" or "SPARC:BE:32:DEFAULT" => CpuArchitecture.SPARC,
var s when s.StartsWith("SPARC", StringComparison.Ordinal) => CpuArchitecture.SPARC,
// SuperH
"SH4" or "SUPERH" or "SH:LE:32:SH4" => CpuArchitecture.SH4,
var s when s.StartsWith("SH", StringComparison.Ordinal) || s.StartsWith("SUPERH", StringComparison.Ordinal) => CpuArchitecture.SH4,
// AVR
"AVR" or "AVR8:LE:16:DEFAULT" => CpuArchitecture.AVR,
var a when a.StartsWith("AVR", StringComparison.Ordinal) => CpuArchitecture.AVR,
// WASM
"WASM" or "WEBASSEMBLY" => CpuArchitecture.WASM,
// EVM (Ethereum)
"EVM" => CpuArchitecture.EVM,
_ => CpuArchitecture.Unknown
};
}
private static string? MapToGhidraProcessor(CpuArchitecture arch)
{
return arch switch
{
CpuArchitecture.X86 => "x86:LE:32:default",
CpuArchitecture.X86_64 => "x86:LE:64:default",
CpuArchitecture.ARM32 => "ARM:LE:32:v7",
CpuArchitecture.ARM64 => "AARCH64:LE:64:v8A",
CpuArchitecture.MIPS32 => "MIPS:BE:32:default",
CpuArchitecture.MIPS64 => "MIPS:BE:64:default",
CpuArchitecture.RISCV64 => "RISCV:LE:64:RV64IC",
CpuArchitecture.PPC32 => "PowerPC:BE:32:default",
CpuArchitecture.PPC64 => "PowerPC:BE:64:default",
CpuArchitecture.SPARC => "sparc:BE:32:default",
CpuArchitecture.SH4 => "SuperH4:LE:32:default",
CpuArchitecture.AVR => "avr8:LE:16:default",
CpuArchitecture.WASM => "Wasm:LE:32:default",
CpuArchitecture.EVM => "EVM:BE:256:default",
_ => null
};
}
private static string? DetectAbi(BinaryFormat format)
{
return format switch
{
BinaryFormat.ELF => "gnu",
BinaryFormat.PE => "msvc",
BinaryFormat.MachO => "darwin",
_ => null
};
}
private static void TryDeleteFile(string path)
{
try
{
if (File.Exists(path))
{
File.Delete(path);
}
}
catch
{
// Ignore cleanup failures
}
}
#endregion
/// <summary>
/// Disposes the plugin and releases resources.
/// </summary>
public void Dispose()
{
if (_disposed)
{
return;
}
_disposed = true;
}
}
/// <summary>
/// Internal handle for Ghidra-analyzed binaries.
/// </summary>
/// <param name="Result">The Ghidra analysis result.</param>
/// <param name="Bytes">The original binary bytes.</param>
internal sealed record GhidraBinaryHandle(
GhidraAnalysisResult Result,
byte[] Bytes);