// Copyright (c) StellaOps. All rights reserved. // Licensed under AGPL-3.0-or-later. See LICENSE in the project root. using System.Collections.Immutable; using Microsoft.Extensions.Logging; using Microsoft.Extensions.Options; using StellaOps.BinaryIndex.Disassembly; namespace StellaOps.BinaryIndex.Ghidra; /// /// Ghidra-based disassembly plugin providing broad architecture support as a fallback backend. /// Ghidra is used for complex cases where B2R2 has limited coverage, supports 20+ architectures, /// and provides mature decompilation, Version Tracking, and BSim capabilities. /// /// /// This plugin has lower priority than B2R2 since Ghidra requires external process invocation /// (Java-based headless analysis) which is slower than native .NET disassembly. It serves as /// the fallback when B2R2 returns low-confidence results or for architectures B2R2 handles poorly. /// public sealed class GhidraDisassemblyPlugin : IDisassemblyPlugin, IDisposable { /// /// Plugin identifier. /// public const string PluginId = "stellaops.disasm.ghidra"; private readonly IGhidraService _ghidraService; private readonly GhidraOptions _options; private readonly ILogger _logger; private readonly TimeProvider _timeProvider; private bool _disposed; private static readonly DisassemblyCapabilities s_capabilities = new() { PluginId = PluginId, Name = "Ghidra Disassembler", Version = "11.x", // Ghidra 11.x SupportedArchitectures = [ // All architectures supported by both B2R2 and Ghidra CpuArchitecture.X86, CpuArchitecture.X86_64, CpuArchitecture.ARM32, CpuArchitecture.ARM64, CpuArchitecture.MIPS32, CpuArchitecture.MIPS64, CpuArchitecture.RISCV64, CpuArchitecture.PPC32, CpuArchitecture.PPC64, // Ghidra supports PPC64 better than B2R2 CpuArchitecture.SPARC, CpuArchitecture.SH4, CpuArchitecture.AVR, // Additional architectures Ghidra supports CpuArchitecture.WASM ], SupportedFormats = [ BinaryFormat.ELF, BinaryFormat.PE, BinaryFormat.MachO, BinaryFormat.WASM, BinaryFormat.Raw ], SupportsLifting = true, // P-Code lifting SupportsCfgRecovery = true, // Full CFG recovery and decompilation Priority = 25 // Lower than B2R2 (50) - used as fallback }; /// /// Creates a new Ghidra disassembly plugin. /// /// The Ghidra analysis service. /// Ghidra options. /// Logger instance. /// Time provider for timestamps. public GhidraDisassemblyPlugin( IGhidraService ghidraService, IOptions options, ILogger logger, TimeProvider timeProvider) { _ghidraService = ghidraService ?? throw new ArgumentNullException(nameof(ghidraService)); _options = options?.Value ?? throw new ArgumentNullException(nameof(options)); _logger = logger ?? throw new ArgumentNullException(nameof(logger)); _timeProvider = timeProvider ?? throw new ArgumentNullException(nameof(timeProvider)); } /// public DisassemblyCapabilities Capabilities => s_capabilities; /// public BinaryInfo LoadBinary(Stream stream, CpuArchitecture? archHint = null, BinaryFormat? formatHint = null) { ArgumentNullException.ThrowIfNull(stream); ObjectDisposedException.ThrowIf(_disposed, this); // Copy stream to memory for analysis using var memStream = new MemoryStream(); stream.CopyTo(memStream); return LoadBinary(memStream.ToArray(), archHint, formatHint); } /// public BinaryInfo LoadBinary(ReadOnlySpan bytes, CpuArchitecture? archHint = null, BinaryFormat? formatHint = null) { ObjectDisposedException.ThrowIf(_disposed, this); var byteArray = bytes.ToArray(); _logger.LogDebug("Loading binary with Ghidra plugin (size: {Size} bytes)", byteArray.Length); // Run Ghidra analysis synchronously for IDisassemblyPlugin contract var analysisTask = RunGhidraAnalysisAsync(byteArray, archHint, formatHint, CancellationToken.None); var result = analysisTask.GetAwaiter().GetResult(); // Map Ghidra metadata to BinaryInfo var format = MapFormat(result.Metadata.Format); var architecture = MapArchitecture(result.Metadata.Architecture, result.Metadata.AddressSize); var endianness = result.Metadata.Endianness.Equals("little", StringComparison.OrdinalIgnoreCase) ? Endianness.Little : Endianness.Big; var abi = DetectAbi(format); _logger.LogInformation( "Loaded binary with Ghidra: Format={Format}, Architecture={Architecture}, Processor={Processor}", format, architecture, result.Metadata.Processor); var metadata = new Dictionary { ["size"] = byteArray.Length, ["ghidra_processor"] = result.Metadata.Processor, ["ghidra_version"] = result.Metadata.GhidraVersion, ["analysis_duration_ms"] = result.Metadata.AnalysisDuration.TotalMilliseconds, ["function_count"] = result.Functions.Length, ["import_count"] = result.Imports.Length, ["export_count"] = result.Exports.Length }; if (result.Metadata.Compiler is not null) { metadata["compiler"] = result.Metadata.Compiler; } return new BinaryInfo( Format: format, Architecture: architecture, Bitness: result.Metadata.AddressSize, Endianness: endianness, Abi: abi, EntryPoint: result.Metadata.EntryPoint, BuildId: result.BinaryHash, Metadata: metadata, Handle: new GhidraBinaryHandle(result, byteArray)); } /// public IEnumerable GetCodeRegions(BinaryInfo binary) { ArgumentNullException.ThrowIfNull(binary); ObjectDisposedException.ThrowIf(_disposed, this); var handle = GetHandle(binary); // Extract code regions from Ghidra memory blocks foreach (var block in handle.Result.MemoryBlocks) { if (block.IsExecutable) { yield return new CodeRegion( Name: block.Name, VirtualAddress: block.Start, FileOffset: block.Start - handle.Result.Metadata.ImageBase, Size: (ulong)block.Size, IsExecutable: block.IsExecutable, IsReadable: true, // Executable sections are readable IsWritable: block.IsWritable); } } } /// public IEnumerable GetSymbols(BinaryInfo binary) { ArgumentNullException.ThrowIfNull(binary); ObjectDisposedException.ThrowIf(_disposed, this); var handle = GetHandle(binary); // Map functions to symbols foreach (var func in handle.Result.Functions) { var binding = func.IsExternal ? SymbolBinding.Global : SymbolBinding.Local; yield return new SymbolInfo( Name: func.Name, Address: func.Address, Size: (ulong)func.Size, Type: SymbolType.Function, Binding: binding, Section: DetermineSection(handle.Result.MemoryBlocks, func.Address)); } // Also include exports as symbols foreach (var export in handle.Result.Exports) { yield return new SymbolInfo( Name: export.Name, Address: export.Address, Size: 0, // Unknown size for exports Type: SymbolType.Function, Binding: SymbolBinding.Global, Section: DetermineSection(handle.Result.MemoryBlocks, export.Address)); } } /// public IEnumerable Disassemble(BinaryInfo binary, CodeRegion region) { ArgumentNullException.ThrowIfNull(binary); ArgumentNullException.ThrowIfNull(region); ObjectDisposedException.ThrowIf(_disposed, this); var handle = GetHandle(binary); _logger.LogDebug( "Disassembling region {Name} from 0x{Start:X} to 0x{End:X}", region.Name, region.VirtualAddress, region.VirtualAddress + region.Size); // Find functions within the region and return their instructions var regionEnd = region.VirtualAddress + region.Size; foreach (var func in handle.Result.Functions) { if (func.Address >= region.VirtualAddress && func.Address < regionEnd) { foreach (var instr in DisassembleFunctionInstructions(func, handle)) { if (instr.Address >= region.VirtualAddress && instr.Address < regionEnd) { yield return instr; } } } } } /// public IEnumerable Disassemble(BinaryInfo binary, ulong startAddress, ulong length) { var region = new CodeRegion( Name: $"0x{startAddress:X}", VirtualAddress: startAddress, FileOffset: startAddress, Size: length, IsExecutable: true, IsReadable: true, IsWritable: false); return Disassemble(binary, region); } /// public IEnumerable DisassembleSymbol(BinaryInfo binary, SymbolInfo symbol) { ArgumentNullException.ThrowIfNull(binary); ArgumentNullException.ThrowIfNull(symbol); ObjectDisposedException.ThrowIf(_disposed, this); var handle = GetHandle(binary); // Find the function matching the symbol var func = handle.Result.Functions.FirstOrDefault(f => f.Address == symbol.Address || f.Name.Equals(symbol.Name, StringComparison.Ordinal)); if (func is null) { _logger.LogWarning( "Function not found for symbol {Name} at 0x{Address:X}", symbol.Name, symbol.Address); yield break; } foreach (var instr in DisassembleFunctionInstructions(func, handle)) { yield return instr; } } #region Private Methods private async Task RunGhidraAnalysisAsync( byte[] bytes, CpuArchitecture? archHint, BinaryFormat? formatHint, CancellationToken ct) { // Write bytes to temp file var tempPath = Path.Combine( _options.WorkDir, $"disasm_{_timeProvider.GetUtcNow():yyyyMMddHHmmssfff}_{Guid.NewGuid():N}.bin"); try { Directory.CreateDirectory(Path.GetDirectoryName(tempPath)!); await File.WriteAllBytesAsync(tempPath, bytes, ct); var options = new GhidraAnalysisOptions { RunFullAnalysis = true, ExtractStrings = false, // Not needed for disassembly ExtractFunctions = true, ExtractDecompilation = false, // Can be expensive TimeoutSeconds = _options.DefaultTimeoutSeconds }; // Add architecture hint if provided if (archHint.HasValue) { options = options with { ProcessorHint = MapToGhidraProcessor(archHint.Value) }; } using var stream = File.OpenRead(tempPath); return await _ghidraService.AnalyzeAsync(stream, options, ct); } finally { TryDeleteFile(tempPath); } } private static IEnumerable DisassembleFunctionInstructions( GhidraFunction func, GhidraBinaryHandle handle) { // Ghidra full analysis provides function boundaries but not individual instructions // We synthesize instruction info from the function's decompiled code or from the raw bytes // For now, return a synthetic instruction representing the function entry // A full implementation would require running a Ghidra script to export instructions // Calculate approximate instruction count based on function size and average instruction size // x86/x64 average instruction size is ~3-4 bytes var avgInstructionSize = handle.Result.Metadata.AddressSize == 64 ? 4 : 3; var estimatedInstructions = Math.Max(1, func.Size / avgInstructionSize); var address = func.Address; for (var i = 0; i < estimatedInstructions && i < 1000; i++) // Cap at 1000 instructions { // Without actual Ghidra instruction export, we create placeholder entries // Real implementation would parse Ghidra's instruction listing output var rawBytes = ExtractBytes(handle.Bytes, address, handle.Result.Metadata.ImageBase, avgInstructionSize); yield return new DisassembledInstruction( Address: address, RawBytes: rawBytes, Mnemonic: "GHIDRA", // Placeholder - real impl would have actual mnemonics OperandsText: $"; function {func.Name} + 0x{address - func.Address:X}", Kind: i == 0 ? InstructionKind.Call : InstructionKind.Unknown, Operands: []); address += (ulong)avgInstructionSize; if (address >= func.Address + (ulong)func.Size) { break; } } } private static ImmutableArray ExtractBytes(byte[] binary, ulong address, ulong imageBase, int count) { var offset = address - imageBase; if (offset >= (ulong)binary.Length) { return []; } var available = Math.Min(count, binary.Length - (int)offset); return binary.AsSpan((int)offset, available).ToArray().ToImmutableArray(); } private static string? DetermineSection(ImmutableArray blocks, ulong address) { foreach (var block in blocks) { if (address >= block.Start && address < block.End) { return block.Name; } } return null; } private static GhidraBinaryHandle GetHandle(BinaryInfo binary) { if (binary.Handle is not GhidraBinaryHandle handle) { throw new ArgumentException("Invalid binary handle - not a Ghidra handle", nameof(binary)); } return handle; } private static BinaryFormat MapFormat(string ghidraFormat) { return ghidraFormat.ToUpperInvariant() switch { "ELF" or "ELF32" or "ELF64" => BinaryFormat.ELF, "PE" or "PE32" or "PE64" or "COFF" => BinaryFormat.PE, "MACHO" or "MACH-O" or "MACHO32" or "MACHO64" => BinaryFormat.MachO, "WASM" or "WEBASSEMBLY" => BinaryFormat.WASM, "RAW" or "BINARY" => BinaryFormat.Raw, _ => BinaryFormat.Unknown }; } private static CpuArchitecture MapArchitecture(string ghidraArch, int addressSize) { var arch = ghidraArch.ToUpperInvariant(); return arch switch { // Intel x86/x64 "X86" or "X86:LE:32:DEFAULT" => CpuArchitecture.X86, "X86-64" or "X86:LE:64:DEFAULT" or "AMD64" => CpuArchitecture.X86_64, var x when x.StartsWith("X86", StringComparison.Ordinal) && addressSize == 32 => CpuArchitecture.X86, var x when x.StartsWith("X86", StringComparison.Ordinal) => CpuArchitecture.X86_64, // ARM "ARM" or "ARM:LE:32:V7" or "ARM:LE:32:V8" or "ARMV7" => CpuArchitecture.ARM32, "AARCH64" or "ARM:LE:64:V8A" or "ARM64" => CpuArchitecture.ARM64, var a when a.StartsWith("ARM", StringComparison.Ordinal) && addressSize == 32 => CpuArchitecture.ARM32, var a when a.StartsWith("ARM", StringComparison.Ordinal) || a.StartsWith("AARCH", StringComparison.Ordinal) => CpuArchitecture.ARM64, // MIPS "MIPS" or "MIPS:BE:32:DEFAULT" or "MIPS:LE:32:DEFAULT" => CpuArchitecture.MIPS32, "MIPS64" or "MIPS:BE:64:DEFAULT" or "MIPS:LE:64:DEFAULT" => CpuArchitecture.MIPS64, var m when m.StartsWith("MIPS", StringComparison.Ordinal) && addressSize == 64 => CpuArchitecture.MIPS64, var m when m.StartsWith("MIPS", StringComparison.Ordinal) => CpuArchitecture.MIPS32, // RISC-V "RISCV" or "RISCV:LE:64:RV64" or "RISCV64" => CpuArchitecture.RISCV64, var r when r.StartsWith("RISCV", StringComparison.Ordinal) => CpuArchitecture.RISCV64, // PowerPC "PPC" or "POWERPC" or "PPC:BE:32:DEFAULT" => CpuArchitecture.PPC32, "PPC64" or "POWERPC64" or "PPC:BE:64:DEFAULT" => CpuArchitecture.PPC64, var p when p.StartsWith("PPC", StringComparison.Ordinal) && addressSize == 64 => CpuArchitecture.PPC64, var p when p.StartsWith("PPC", StringComparison.Ordinal) || p.StartsWith("POWERPC", StringComparison.Ordinal) => CpuArchitecture.PPC32, // SPARC "SPARC" or "SPARC:BE:32:DEFAULT" => CpuArchitecture.SPARC, var s when s.StartsWith("SPARC", StringComparison.Ordinal) => CpuArchitecture.SPARC, // SuperH "SH4" or "SUPERH" or "SH:LE:32:SH4" => CpuArchitecture.SH4, var s when s.StartsWith("SH", StringComparison.Ordinal) || s.StartsWith("SUPERH", StringComparison.Ordinal) => CpuArchitecture.SH4, // AVR "AVR" or "AVR8:LE:16:DEFAULT" => CpuArchitecture.AVR, var a when a.StartsWith("AVR", StringComparison.Ordinal) => CpuArchitecture.AVR, // WASM "WASM" or "WEBASSEMBLY" => CpuArchitecture.WASM, // EVM (Ethereum) "EVM" => CpuArchitecture.EVM, _ => CpuArchitecture.Unknown }; } private static string? MapToGhidraProcessor(CpuArchitecture arch) { return arch switch { CpuArchitecture.X86 => "x86:LE:32:default", CpuArchitecture.X86_64 => "x86:LE:64:default", CpuArchitecture.ARM32 => "ARM:LE:32:v7", CpuArchitecture.ARM64 => "AARCH64:LE:64:v8A", CpuArchitecture.MIPS32 => "MIPS:BE:32:default", CpuArchitecture.MIPS64 => "MIPS:BE:64:default", CpuArchitecture.RISCV64 => "RISCV:LE:64:RV64IC", CpuArchitecture.PPC32 => "PowerPC:BE:32:default", CpuArchitecture.PPC64 => "PowerPC:BE:64:default", CpuArchitecture.SPARC => "sparc:BE:32:default", CpuArchitecture.SH4 => "SuperH4:LE:32:default", CpuArchitecture.AVR => "avr8:LE:16:default", CpuArchitecture.WASM => "Wasm:LE:32:default", CpuArchitecture.EVM => "EVM:BE:256:default", _ => null }; } private static string? DetectAbi(BinaryFormat format) { return format switch { BinaryFormat.ELF => "gnu", BinaryFormat.PE => "msvc", BinaryFormat.MachO => "darwin", _ => null }; } private static void TryDeleteFile(string path) { try { if (File.Exists(path)) { File.Delete(path); } } catch { // Ignore cleanup failures } } #endregion /// /// Disposes the plugin and releases resources. /// public void Dispose() { if (_disposed) { return; } _disposed = true; } } /// /// Internal handle for Ghidra-analyzed binaries. /// /// The Ghidra analysis result. /// The original binary bytes. internal sealed record GhidraBinaryHandle( GhidraAnalysisResult Result, byte[] Bytes);