Files
git.stella-ops.org/src/BinaryIndex/__Libraries/StellaOps.BinaryIndex.ML/Training/IDecompilerAdapter.cs
2026-01-20 00:45:38 +02:00

134 lines
3.8 KiB
C#

// -----------------------------------------------------------------------------
// IDecompilerAdapter.cs
// Sprint: SPRINT_20260119_006 ML Embeddings Corpus
// Task: MLEM-004 - Decompiled Code Extraction
// Description: Interface for decompiler integration.
// -----------------------------------------------------------------------------
namespace StellaOps.BinaryIndex.ML.Training;
/// <summary>
/// Adapter for decompiler integration.
/// </summary>
public interface IDecompilerAdapter
{
/// <summary>
/// Decompiles a function to C-like code.
/// </summary>
/// <param name="libraryName">Library name.</param>
/// <param name="version">Library version.</param>
/// <param name="functionName">Function name.</param>
/// <param name="cancellationToken">Cancellation token.</param>
/// <returns>Decompiled code.</returns>
Task<string?> DecompileAsync(
string libraryName,
string version,
string functionName,
CancellationToken cancellationToken = default);
/// <summary>
/// Decompiles raw bytes to C-like code.
/// </summary>
/// <param name="bytes">Function bytes.</param>
/// <param name="architecture">Target architecture.</param>
/// <param name="options">Decompilation options.</param>
/// <param name="cancellationToken">Cancellation token.</param>
/// <returns>Decompiled code.</returns>
Task<string?> DecompileBytesAsync(
ReadOnlyMemory<byte> bytes,
string architecture,
DecompilationOptions? options = null,
CancellationToken cancellationToken = default);
/// <summary>
/// Normalizes decompiled code for ML input.
/// </summary>
/// <param name="code">Raw decompiled code.</param>
/// <param name="options">Normalization options.</param>
/// <returns>Normalized code.</returns>
string Normalize(string code, NormalizationOptions? options = null);
}
/// <summary>
/// Options for decompilation.
/// </summary>
public sealed record DecompilationOptions
{
/// <summary>
/// Gets the decompiler to use.
/// </summary>
public DecompilerType Decompiler { get; init; } = DecompilerType.Ghidra;
/// <summary>
/// Gets whether to simplify the output.
/// </summary>
public bool Simplify { get; init; } = true;
/// <summary>
/// Gets the timeout for decompilation.
/// </summary>
public TimeSpan Timeout { get; init; } = TimeSpan.FromSeconds(30);
/// <summary>
/// Gets the default options.
/// </summary>
public static DecompilationOptions Default { get; } = new();
}
/// <summary>
/// Available decompilers.
/// </summary>
public enum DecompilerType
{
/// <summary>
/// Ghidra decompiler.
/// </summary>
Ghidra,
/// <summary>
/// RetDec decompiler.
/// </summary>
RetDec,
/// <summary>
/// Hex-Rays decompiler (IDA Pro).
/// </summary>
HexRays
}
/// <summary>
/// Options for code normalization.
/// </summary>
public sealed record NormalizationOptions
{
/// <summary>
/// Gets whether to strip comments.
/// </summary>
public bool StripComments { get; init; } = true;
/// <summary>
/// Gets whether to normalize variable names.
/// </summary>
public bool NormalizeVariables { get; init; } = true;
/// <summary>
/// Gets whether to normalize whitespace.
/// </summary>
public bool NormalizeWhitespace { get; init; } = true;
/// <summary>
/// Gets whether to remove type casts.
/// </summary>
public bool RemoveTypeCasts { get; init; } = false;
/// <summary>
/// Gets the maximum length.
/// </summary>
public int MaxLength { get; init; } = 2048;
/// <summary>
/// Gets the default options.
/// </summary>
public static NormalizationOptions Default { get; } = new();
}