134 lines
3.8 KiB
C#
134 lines
3.8 KiB
C#
// -----------------------------------------------------------------------------
|
|
// IDecompilerAdapter.cs
|
|
// Sprint: SPRINT_20260119_006 ML Embeddings Corpus
|
|
// Task: MLEM-004 - Decompiled Code Extraction
|
|
// Description: Interface for decompiler integration.
|
|
// -----------------------------------------------------------------------------
|
|
|
|
namespace StellaOps.BinaryIndex.ML.Training;
|
|
|
|
/// <summary>
|
|
/// Adapter for decompiler integration.
|
|
/// </summary>
|
|
public interface IDecompilerAdapter
|
|
{
|
|
/// <summary>
|
|
/// Decompiles a function to C-like code.
|
|
/// </summary>
|
|
/// <param name="libraryName">Library name.</param>
|
|
/// <param name="version">Library version.</param>
|
|
/// <param name="functionName">Function name.</param>
|
|
/// <param name="cancellationToken">Cancellation token.</param>
|
|
/// <returns>Decompiled code.</returns>
|
|
Task<string?> DecompileAsync(
|
|
string libraryName,
|
|
string version,
|
|
string functionName,
|
|
CancellationToken cancellationToken = default);
|
|
|
|
/// <summary>
|
|
/// Decompiles raw bytes to C-like code.
|
|
/// </summary>
|
|
/// <param name="bytes">Function bytes.</param>
|
|
/// <param name="architecture">Target architecture.</param>
|
|
/// <param name="options">Decompilation options.</param>
|
|
/// <param name="cancellationToken">Cancellation token.</param>
|
|
/// <returns>Decompiled code.</returns>
|
|
Task<string?> DecompileBytesAsync(
|
|
ReadOnlyMemory<byte> bytes,
|
|
string architecture,
|
|
DecompilationOptions? options = null,
|
|
CancellationToken cancellationToken = default);
|
|
|
|
/// <summary>
|
|
/// Normalizes decompiled code for ML input.
|
|
/// </summary>
|
|
/// <param name="code">Raw decompiled code.</param>
|
|
/// <param name="options">Normalization options.</param>
|
|
/// <returns>Normalized code.</returns>
|
|
string Normalize(string code, NormalizationOptions? options = null);
|
|
}
|
|
|
|
/// <summary>
|
|
/// Options for decompilation.
|
|
/// </summary>
|
|
public sealed record DecompilationOptions
|
|
{
|
|
/// <summary>
|
|
/// Gets the decompiler to use.
|
|
/// </summary>
|
|
public DecompilerType Decompiler { get; init; } = DecompilerType.Ghidra;
|
|
|
|
/// <summary>
|
|
/// Gets whether to simplify the output.
|
|
/// </summary>
|
|
public bool Simplify { get; init; } = true;
|
|
|
|
/// <summary>
|
|
/// Gets the timeout for decompilation.
|
|
/// </summary>
|
|
public TimeSpan Timeout { get; init; } = TimeSpan.FromSeconds(30);
|
|
|
|
/// <summary>
|
|
/// Gets the default options.
|
|
/// </summary>
|
|
public static DecompilationOptions Default { get; } = new();
|
|
}
|
|
|
|
/// <summary>
|
|
/// Available decompilers.
|
|
/// </summary>
|
|
public enum DecompilerType
|
|
{
|
|
/// <summary>
|
|
/// Ghidra decompiler.
|
|
/// </summary>
|
|
Ghidra,
|
|
|
|
/// <summary>
|
|
/// RetDec decompiler.
|
|
/// </summary>
|
|
RetDec,
|
|
|
|
/// <summary>
|
|
/// Hex-Rays decompiler (IDA Pro).
|
|
/// </summary>
|
|
HexRays
|
|
}
|
|
|
|
/// <summary>
|
|
/// Options for code normalization.
|
|
/// </summary>
|
|
public sealed record NormalizationOptions
|
|
{
|
|
/// <summary>
|
|
/// Gets whether to strip comments.
|
|
/// </summary>
|
|
public bool StripComments { get; init; } = true;
|
|
|
|
/// <summary>
|
|
/// Gets whether to normalize variable names.
|
|
/// </summary>
|
|
public bool NormalizeVariables { get; init; } = true;
|
|
|
|
/// <summary>
|
|
/// Gets whether to normalize whitespace.
|
|
/// </summary>
|
|
public bool NormalizeWhitespace { get; init; } = true;
|
|
|
|
/// <summary>
|
|
/// Gets whether to remove type casts.
|
|
/// </summary>
|
|
public bool RemoveTypeCasts { get; init; } = false;
|
|
|
|
/// <summary>
|
|
/// Gets the maximum length.
|
|
/// </summary>
|
|
public int MaxLength { get; init; } = 2048;
|
|
|
|
/// <summary>
|
|
/// Gets the default options.
|
|
/// </summary>
|
|
public static NormalizationOptions Default { get; } = new();
|
|
}
|