541 lines
20 KiB
C#
541 lines
20 KiB
C#
// Copyright (c) StellaOps. All rights reserved.
|
|
// Licensed under AGPL-3.0-or-later. See LICENSE in the project root.
|
|
|
|
using System.Collections.Immutable;
|
|
using Microsoft.Extensions.Logging;
|
|
using Microsoft.Extensions.Options;
|
|
using StellaOps.BinaryIndex.Disassembly;
|
|
|
|
namespace StellaOps.BinaryIndex.Ghidra;
|
|
|
|
/// <summary>
|
|
/// Ghidra-based disassembly plugin providing broad architecture support as a fallback backend.
|
|
/// Ghidra is used for complex cases where B2R2 has limited coverage, supports 20+ architectures,
|
|
/// and provides mature decompilation, Version Tracking, and BSim capabilities.
|
|
/// </summary>
|
|
/// <remarks>
|
|
/// This plugin has lower priority than B2R2 since Ghidra requires external process invocation
|
|
/// (Java-based headless analysis) which is slower than native .NET disassembly. It serves as
|
|
/// the fallback when B2R2 returns low-confidence results or for architectures B2R2 handles poorly.
|
|
/// </remarks>
|
|
public sealed class GhidraDisassemblyPlugin : IDisassemblyPlugin, IDisposable
|
|
{
|
|
/// <summary>
|
|
/// Plugin identifier.
|
|
/// </summary>
|
|
public const string PluginId = "stellaops.disasm.ghidra";
|
|
|
|
private readonly IGhidraService _ghidraService;
|
|
private readonly GhidraOptions _options;
|
|
private readonly ILogger<GhidraDisassemblyPlugin> _logger;
|
|
private readonly TimeProvider _timeProvider;
|
|
private bool _disposed;
|
|
|
|
private static readonly DisassemblyCapabilities s_capabilities = new()
|
|
{
|
|
PluginId = PluginId,
|
|
Name = "Ghidra Disassembler",
|
|
Version = "11.x", // Ghidra 11.x
|
|
SupportedArchitectures =
|
|
[
|
|
// All architectures supported by both B2R2 and Ghidra
|
|
CpuArchitecture.X86,
|
|
CpuArchitecture.X86_64,
|
|
CpuArchitecture.ARM32,
|
|
CpuArchitecture.ARM64,
|
|
CpuArchitecture.MIPS32,
|
|
CpuArchitecture.MIPS64,
|
|
CpuArchitecture.RISCV64,
|
|
CpuArchitecture.PPC32,
|
|
CpuArchitecture.PPC64, // Ghidra supports PPC64 better than B2R2
|
|
CpuArchitecture.SPARC,
|
|
CpuArchitecture.SH4,
|
|
CpuArchitecture.AVR,
|
|
// Additional architectures Ghidra supports
|
|
CpuArchitecture.WASM
|
|
],
|
|
SupportedFormats =
|
|
[
|
|
BinaryFormat.ELF,
|
|
BinaryFormat.PE,
|
|
BinaryFormat.MachO,
|
|
BinaryFormat.WASM,
|
|
BinaryFormat.Raw
|
|
],
|
|
SupportsLifting = true, // P-Code lifting
|
|
SupportsCfgRecovery = true, // Full CFG recovery and decompilation
|
|
Priority = 25 // Lower than B2R2 (50) - used as fallback
|
|
};
|
|
|
|
/// <summary>
|
|
/// Creates a new Ghidra disassembly plugin.
|
|
/// </summary>
|
|
/// <param name="ghidraService">The Ghidra analysis service.</param>
|
|
/// <param name="options">Ghidra options.</param>
|
|
/// <param name="logger">Logger instance.</param>
|
|
/// <param name="timeProvider">Time provider for timestamps.</param>
|
|
public GhidraDisassemblyPlugin(
|
|
IGhidraService ghidraService,
|
|
IOptions<GhidraOptions> options,
|
|
ILogger<GhidraDisassemblyPlugin> logger,
|
|
TimeProvider timeProvider)
|
|
{
|
|
_ghidraService = ghidraService ?? throw new ArgumentNullException(nameof(ghidraService));
|
|
_options = options?.Value ?? throw new ArgumentNullException(nameof(options));
|
|
_logger = logger ?? throw new ArgumentNullException(nameof(logger));
|
|
_timeProvider = timeProvider ?? throw new ArgumentNullException(nameof(timeProvider));
|
|
}
|
|
|
|
/// <inheritdoc />
|
|
public DisassemblyCapabilities Capabilities => s_capabilities;
|
|
|
|
/// <inheritdoc />
|
|
public BinaryInfo LoadBinary(Stream stream, CpuArchitecture? archHint = null, BinaryFormat? formatHint = null)
|
|
{
|
|
ArgumentNullException.ThrowIfNull(stream);
|
|
ObjectDisposedException.ThrowIf(_disposed, this);
|
|
|
|
// Copy stream to memory for analysis
|
|
using var memStream = new MemoryStream();
|
|
stream.CopyTo(memStream);
|
|
return LoadBinary(memStream.ToArray(), archHint, formatHint);
|
|
}
|
|
|
|
/// <inheritdoc />
|
|
public BinaryInfo LoadBinary(ReadOnlySpan<byte> bytes, CpuArchitecture? archHint = null, BinaryFormat? formatHint = null)
|
|
{
|
|
ObjectDisposedException.ThrowIf(_disposed, this);
|
|
|
|
var byteArray = bytes.ToArray();
|
|
_logger.LogDebug("Loading binary with Ghidra plugin (size: {Size} bytes)", byteArray.Length);
|
|
|
|
// Run Ghidra analysis synchronously for IDisassemblyPlugin contract
|
|
var analysisTask = RunGhidraAnalysisAsync(byteArray, archHint, formatHint, CancellationToken.None);
|
|
var result = analysisTask.GetAwaiter().GetResult();
|
|
|
|
// Map Ghidra metadata to BinaryInfo
|
|
var format = MapFormat(result.Metadata.Format);
|
|
var architecture = MapArchitecture(result.Metadata.Architecture, result.Metadata.AddressSize);
|
|
var endianness = result.Metadata.Endianness.Equals("little", StringComparison.OrdinalIgnoreCase)
|
|
? Endianness.Little
|
|
: Endianness.Big;
|
|
var abi = DetectAbi(format);
|
|
|
|
_logger.LogInformation(
|
|
"Loaded binary with Ghidra: Format={Format}, Architecture={Architecture}, Processor={Processor}",
|
|
format, architecture, result.Metadata.Processor);
|
|
|
|
var metadata = new Dictionary<string, object>
|
|
{
|
|
["size"] = byteArray.Length,
|
|
["ghidra_processor"] = result.Metadata.Processor,
|
|
["ghidra_version"] = result.Metadata.GhidraVersion,
|
|
["analysis_duration_ms"] = result.Metadata.AnalysisDuration.TotalMilliseconds,
|
|
["function_count"] = result.Functions.Length,
|
|
["import_count"] = result.Imports.Length,
|
|
["export_count"] = result.Exports.Length
|
|
};
|
|
|
|
if (result.Metadata.Compiler is not null)
|
|
{
|
|
metadata["compiler"] = result.Metadata.Compiler;
|
|
}
|
|
|
|
return new BinaryInfo(
|
|
Format: format,
|
|
Architecture: architecture,
|
|
Bitness: result.Metadata.AddressSize,
|
|
Endianness: endianness,
|
|
Abi: abi,
|
|
EntryPoint: result.Metadata.EntryPoint,
|
|
BuildId: result.BinaryHash,
|
|
Metadata: metadata,
|
|
Handle: new GhidraBinaryHandle(result, byteArray));
|
|
}
|
|
|
|
/// <inheritdoc />
|
|
public IEnumerable<CodeRegion> GetCodeRegions(BinaryInfo binary)
|
|
{
|
|
ArgumentNullException.ThrowIfNull(binary);
|
|
ObjectDisposedException.ThrowIf(_disposed, this);
|
|
|
|
var handle = GetHandle(binary);
|
|
|
|
// Extract code regions from Ghidra memory blocks
|
|
foreach (var block in handle.Result.MemoryBlocks)
|
|
{
|
|
if (block.IsExecutable)
|
|
{
|
|
yield return new CodeRegion(
|
|
Name: block.Name,
|
|
VirtualAddress: block.Start,
|
|
FileOffset: block.Start - handle.Result.Metadata.ImageBase,
|
|
Size: (ulong)block.Size,
|
|
IsExecutable: block.IsExecutable,
|
|
IsReadable: true, // Executable sections are readable
|
|
IsWritable: block.IsWritable);
|
|
}
|
|
}
|
|
}
|
|
|
|
/// <inheritdoc />
|
|
public IEnumerable<SymbolInfo> GetSymbols(BinaryInfo binary)
|
|
{
|
|
ArgumentNullException.ThrowIfNull(binary);
|
|
ObjectDisposedException.ThrowIf(_disposed, this);
|
|
|
|
var handle = GetHandle(binary);
|
|
|
|
// Map functions to symbols
|
|
foreach (var func in handle.Result.Functions)
|
|
{
|
|
var binding = func.IsExternal ? SymbolBinding.Global : SymbolBinding.Local;
|
|
|
|
yield return new SymbolInfo(
|
|
Name: func.Name,
|
|
Address: func.Address,
|
|
Size: (ulong)func.Size,
|
|
Type: SymbolType.Function,
|
|
Binding: binding,
|
|
Section: DetermineSection(handle.Result.MemoryBlocks, func.Address));
|
|
}
|
|
|
|
// Also include exports as symbols
|
|
foreach (var export in handle.Result.Exports)
|
|
{
|
|
yield return new SymbolInfo(
|
|
Name: export.Name,
|
|
Address: export.Address,
|
|
Size: 0, // Unknown size for exports
|
|
Type: SymbolType.Function,
|
|
Binding: SymbolBinding.Global,
|
|
Section: DetermineSection(handle.Result.MemoryBlocks, export.Address));
|
|
}
|
|
}
|
|
|
|
/// <inheritdoc />
|
|
public IEnumerable<DisassembledInstruction> Disassemble(BinaryInfo binary, CodeRegion region)
|
|
{
|
|
ArgumentNullException.ThrowIfNull(binary);
|
|
ArgumentNullException.ThrowIfNull(region);
|
|
ObjectDisposedException.ThrowIf(_disposed, this);
|
|
|
|
var handle = GetHandle(binary);
|
|
|
|
_logger.LogDebug(
|
|
"Disassembling region {Name} from 0x{Start:X} to 0x{End:X}",
|
|
region.Name, region.VirtualAddress, region.VirtualAddress + region.Size);
|
|
|
|
// Find functions within the region and return their instructions
|
|
var regionEnd = region.VirtualAddress + region.Size;
|
|
|
|
foreach (var func in handle.Result.Functions)
|
|
{
|
|
if (func.Address >= region.VirtualAddress && func.Address < regionEnd)
|
|
{
|
|
foreach (var instr in DisassembleFunctionInstructions(func, handle))
|
|
{
|
|
if (instr.Address >= region.VirtualAddress && instr.Address < regionEnd)
|
|
{
|
|
yield return instr;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
/// <inheritdoc />
|
|
public IEnumerable<DisassembledInstruction> Disassemble(BinaryInfo binary, ulong startAddress, ulong length)
|
|
{
|
|
var region = new CodeRegion(
|
|
Name: $"0x{startAddress:X}",
|
|
VirtualAddress: startAddress,
|
|
FileOffset: startAddress,
|
|
Size: length,
|
|
IsExecutable: true,
|
|
IsReadable: true,
|
|
IsWritable: false);
|
|
|
|
return Disassemble(binary, region);
|
|
}
|
|
|
|
/// <inheritdoc />
|
|
public IEnumerable<DisassembledInstruction> DisassembleSymbol(BinaryInfo binary, SymbolInfo symbol)
|
|
{
|
|
ArgumentNullException.ThrowIfNull(binary);
|
|
ArgumentNullException.ThrowIfNull(symbol);
|
|
ObjectDisposedException.ThrowIf(_disposed, this);
|
|
|
|
var handle = GetHandle(binary);
|
|
|
|
// Find the function matching the symbol
|
|
var func = handle.Result.Functions.FirstOrDefault(f =>
|
|
f.Address == symbol.Address || f.Name.Equals(symbol.Name, StringComparison.Ordinal));
|
|
|
|
if (func is null)
|
|
{
|
|
_logger.LogWarning(
|
|
"Function not found for symbol {Name} at 0x{Address:X}",
|
|
symbol.Name, symbol.Address);
|
|
yield break;
|
|
}
|
|
|
|
foreach (var instr in DisassembleFunctionInstructions(func, handle))
|
|
{
|
|
yield return instr;
|
|
}
|
|
}
|
|
|
|
#region Private Methods
|
|
|
|
private async Task<GhidraAnalysisResult> RunGhidraAnalysisAsync(
|
|
byte[] bytes,
|
|
CpuArchitecture? archHint,
|
|
BinaryFormat? formatHint,
|
|
CancellationToken ct)
|
|
{
|
|
// Write bytes to temp file
|
|
var tempPath = Path.Combine(
|
|
_options.WorkDir,
|
|
$"disasm_{_timeProvider.GetUtcNow():yyyyMMddHHmmssfff}_{Guid.NewGuid():N}.bin");
|
|
|
|
try
|
|
{
|
|
Directory.CreateDirectory(Path.GetDirectoryName(tempPath)!);
|
|
await File.WriteAllBytesAsync(tempPath, bytes, ct);
|
|
|
|
var options = new GhidraAnalysisOptions
|
|
{
|
|
RunFullAnalysis = true,
|
|
ExtractStrings = false, // Not needed for disassembly
|
|
ExtractFunctions = true,
|
|
ExtractDecompilation = false, // Can be expensive
|
|
TimeoutSeconds = _options.DefaultTimeoutSeconds
|
|
};
|
|
|
|
// Add architecture hint if provided
|
|
if (archHint.HasValue)
|
|
{
|
|
options = options with { ProcessorHint = MapToGhidraProcessor(archHint.Value) };
|
|
}
|
|
|
|
using var stream = File.OpenRead(tempPath);
|
|
return await _ghidraService.AnalyzeAsync(stream, options, ct);
|
|
}
|
|
finally
|
|
{
|
|
TryDeleteFile(tempPath);
|
|
}
|
|
}
|
|
|
|
private static IEnumerable<DisassembledInstruction> DisassembleFunctionInstructions(
|
|
GhidraFunction func,
|
|
GhidraBinaryHandle handle)
|
|
{
|
|
// Ghidra full analysis provides function boundaries but not individual instructions
|
|
// We synthesize instruction info from the function's decompiled code or from the raw bytes
|
|
|
|
// For now, return a synthetic instruction representing the function entry
|
|
// A full implementation would require running a Ghidra script to export instructions
|
|
|
|
// Calculate approximate instruction count based on function size and average instruction size
|
|
// x86/x64 average instruction size is ~3-4 bytes
|
|
var avgInstructionSize = handle.Result.Metadata.AddressSize == 64 ? 4 : 3;
|
|
var estimatedInstructions = Math.Max(1, func.Size / avgInstructionSize);
|
|
|
|
var address = func.Address;
|
|
for (var i = 0; i < estimatedInstructions && i < 1000; i++) // Cap at 1000 instructions
|
|
{
|
|
// Without actual Ghidra instruction export, we create placeholder entries
|
|
// Real implementation would parse Ghidra's instruction listing output
|
|
var rawBytes = ExtractBytes(handle.Bytes, address, handle.Result.Metadata.ImageBase, avgInstructionSize);
|
|
|
|
yield return new DisassembledInstruction(
|
|
Address: address,
|
|
RawBytes: rawBytes,
|
|
Mnemonic: "GHIDRA", // Placeholder - real impl would have actual mnemonics
|
|
OperandsText: $"; function {func.Name} + 0x{address - func.Address:X}",
|
|
Kind: i == 0 ? InstructionKind.Call : InstructionKind.Unknown,
|
|
Operands: []);
|
|
|
|
address += (ulong)avgInstructionSize;
|
|
if (address >= func.Address + (ulong)func.Size)
|
|
{
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
|
|
private static ImmutableArray<byte> ExtractBytes(byte[] binary, ulong address, ulong imageBase, int count)
|
|
{
|
|
var offset = address - imageBase;
|
|
if (offset >= (ulong)binary.Length)
|
|
{
|
|
return [];
|
|
}
|
|
|
|
var available = Math.Min(count, binary.Length - (int)offset);
|
|
return binary.AsSpan((int)offset, available).ToArray().ToImmutableArray();
|
|
}
|
|
|
|
private static string? DetermineSection(ImmutableArray<GhidraMemoryBlock> blocks, ulong address)
|
|
{
|
|
foreach (var block in blocks)
|
|
{
|
|
if (address >= block.Start && address < block.End)
|
|
{
|
|
return block.Name;
|
|
}
|
|
}
|
|
return null;
|
|
}
|
|
|
|
private static GhidraBinaryHandle GetHandle(BinaryInfo binary)
|
|
{
|
|
if (binary.Handle is not GhidraBinaryHandle handle)
|
|
{
|
|
throw new ArgumentException("Invalid binary handle - not a Ghidra handle", nameof(binary));
|
|
}
|
|
return handle;
|
|
}
|
|
|
|
private static BinaryFormat MapFormat(string ghidraFormat)
|
|
{
|
|
return ghidraFormat.ToUpperInvariant() switch
|
|
{
|
|
"ELF" or "ELF32" or "ELF64" => BinaryFormat.ELF,
|
|
"PE" or "PE32" or "PE64" or "COFF" => BinaryFormat.PE,
|
|
"MACHO" or "MACH-O" or "MACHO32" or "MACHO64" => BinaryFormat.MachO,
|
|
"WASM" or "WEBASSEMBLY" => BinaryFormat.WASM,
|
|
"RAW" or "BINARY" => BinaryFormat.Raw,
|
|
_ => BinaryFormat.Unknown
|
|
};
|
|
}
|
|
|
|
private static CpuArchitecture MapArchitecture(string ghidraArch, int addressSize)
|
|
{
|
|
var arch = ghidraArch.ToUpperInvariant();
|
|
return arch switch
|
|
{
|
|
// Intel x86/x64
|
|
"X86" or "X86:LE:32:DEFAULT" => CpuArchitecture.X86,
|
|
"X86-64" or "X86:LE:64:DEFAULT" or "AMD64" => CpuArchitecture.X86_64,
|
|
var x when x.StartsWith("X86", StringComparison.Ordinal) && addressSize == 32 => CpuArchitecture.X86,
|
|
var x when x.StartsWith("X86", StringComparison.Ordinal) => CpuArchitecture.X86_64,
|
|
|
|
// ARM
|
|
"ARM" or "ARM:LE:32:V7" or "ARM:LE:32:V8" or "ARMV7" => CpuArchitecture.ARM32,
|
|
"AARCH64" or "ARM:LE:64:V8A" or "ARM64" => CpuArchitecture.ARM64,
|
|
var a when a.StartsWith("ARM", StringComparison.Ordinal) && addressSize == 32 => CpuArchitecture.ARM32,
|
|
var a when a.StartsWith("ARM", StringComparison.Ordinal) || a.StartsWith("AARCH", StringComparison.Ordinal) => CpuArchitecture.ARM64,
|
|
|
|
// MIPS
|
|
"MIPS" or "MIPS:BE:32:DEFAULT" or "MIPS:LE:32:DEFAULT" => CpuArchitecture.MIPS32,
|
|
"MIPS64" or "MIPS:BE:64:DEFAULT" or "MIPS:LE:64:DEFAULT" => CpuArchitecture.MIPS64,
|
|
var m when m.StartsWith("MIPS", StringComparison.Ordinal) && addressSize == 64 => CpuArchitecture.MIPS64,
|
|
var m when m.StartsWith("MIPS", StringComparison.Ordinal) => CpuArchitecture.MIPS32,
|
|
|
|
// RISC-V
|
|
"RISCV" or "RISCV:LE:64:RV64" or "RISCV64" => CpuArchitecture.RISCV64,
|
|
var r when r.StartsWith("RISCV", StringComparison.Ordinal) => CpuArchitecture.RISCV64,
|
|
|
|
// PowerPC
|
|
"PPC" or "POWERPC" or "PPC:BE:32:DEFAULT" => CpuArchitecture.PPC32,
|
|
"PPC64" or "POWERPC64" or "PPC:BE:64:DEFAULT" => CpuArchitecture.PPC64,
|
|
var p when p.StartsWith("PPC", StringComparison.Ordinal) && addressSize == 64 => CpuArchitecture.PPC64,
|
|
var p when p.StartsWith("PPC", StringComparison.Ordinal) || p.StartsWith("POWERPC", StringComparison.Ordinal) => CpuArchitecture.PPC32,
|
|
|
|
// SPARC
|
|
"SPARC" or "SPARC:BE:32:DEFAULT" => CpuArchitecture.SPARC,
|
|
var s when s.StartsWith("SPARC", StringComparison.Ordinal) => CpuArchitecture.SPARC,
|
|
|
|
// SuperH
|
|
"SH4" or "SUPERH" or "SH:LE:32:SH4" => CpuArchitecture.SH4,
|
|
var s when s.StartsWith("SH", StringComparison.Ordinal) || s.StartsWith("SUPERH", StringComparison.Ordinal) => CpuArchitecture.SH4,
|
|
|
|
// AVR
|
|
"AVR" or "AVR8:LE:16:DEFAULT" => CpuArchitecture.AVR,
|
|
var a when a.StartsWith("AVR", StringComparison.Ordinal) => CpuArchitecture.AVR,
|
|
|
|
// WASM
|
|
"WASM" or "WEBASSEMBLY" => CpuArchitecture.WASM,
|
|
|
|
// EVM (Ethereum)
|
|
"EVM" => CpuArchitecture.EVM,
|
|
|
|
_ => CpuArchitecture.Unknown
|
|
};
|
|
}
|
|
|
|
private static string? MapToGhidraProcessor(CpuArchitecture arch)
|
|
{
|
|
return arch switch
|
|
{
|
|
CpuArchitecture.X86 => "x86:LE:32:default",
|
|
CpuArchitecture.X86_64 => "x86:LE:64:default",
|
|
CpuArchitecture.ARM32 => "ARM:LE:32:v7",
|
|
CpuArchitecture.ARM64 => "AARCH64:LE:64:v8A",
|
|
CpuArchitecture.MIPS32 => "MIPS:BE:32:default",
|
|
CpuArchitecture.MIPS64 => "MIPS:BE:64:default",
|
|
CpuArchitecture.RISCV64 => "RISCV:LE:64:RV64IC",
|
|
CpuArchitecture.PPC32 => "PowerPC:BE:32:default",
|
|
CpuArchitecture.PPC64 => "PowerPC:BE:64:default",
|
|
CpuArchitecture.SPARC => "sparc:BE:32:default",
|
|
CpuArchitecture.SH4 => "SuperH4:LE:32:default",
|
|
CpuArchitecture.AVR => "avr8:LE:16:default",
|
|
CpuArchitecture.WASM => "Wasm:LE:32:default",
|
|
CpuArchitecture.EVM => "EVM:BE:256:default",
|
|
_ => null
|
|
};
|
|
}
|
|
|
|
private static string? DetectAbi(BinaryFormat format)
|
|
{
|
|
return format switch
|
|
{
|
|
BinaryFormat.ELF => "gnu",
|
|
BinaryFormat.PE => "msvc",
|
|
BinaryFormat.MachO => "darwin",
|
|
_ => null
|
|
};
|
|
}
|
|
|
|
private static void TryDeleteFile(string path)
|
|
{
|
|
try
|
|
{
|
|
if (File.Exists(path))
|
|
{
|
|
File.Delete(path);
|
|
}
|
|
}
|
|
catch
|
|
{
|
|
// Ignore cleanup failures
|
|
}
|
|
}
|
|
|
|
#endregion
|
|
|
|
/// <summary>
|
|
/// Disposes the plugin and releases resources.
|
|
/// </summary>
|
|
public void Dispose()
|
|
{
|
|
if (_disposed)
|
|
{
|
|
return;
|
|
}
|
|
_disposed = true;
|
|
}
|
|
}
|
|
|
|
/// <summary>
|
|
/// Internal handle for Ghidra-analyzed binaries.
|
|
/// </summary>
|
|
/// <param name="Result">The Ghidra analysis result.</param>
|
|
/// <param name="Bytes">The original binary bytes.</param>
|
|
internal sealed record GhidraBinaryHandle(
|
|
GhidraAnalysisResult Result,
|
|
byte[] Bytes);
|