229 lines
6.7 KiB
C#
229 lines
6.7 KiB
C#
using StellaOps.BinaryIndex.Semantic;
|
|
|
|
namespace StellaOps.BinaryIndex.Builders;
|
|
|
|
/// <summary>
|
|
/// Extracts function-level fingerprints from binary files.
|
|
/// Uses multiple hashing strategies for robust matching.
|
|
/// </summary>
|
|
public interface IFunctionFingerprintExtractor
|
|
{
|
|
/// <summary>
|
|
/// Extracts function fingerprints from a binary file.
|
|
/// </summary>
|
|
/// <param name="binaryPath">Path to the binary file.</param>
|
|
/// <param name="options">Extraction options.</param>
|
|
/// <param name="ct">Cancellation token.</param>
|
|
/// <returns>List of function fingerprints.</returns>
|
|
Task<IReadOnlyList<FunctionFingerprint>> ExtractAsync(
|
|
string binaryPath,
|
|
ExtractionOptions? options = null,
|
|
CancellationToken ct = default);
|
|
|
|
/// <summary>
|
|
/// Extracts function fingerprints from binary data in memory.
|
|
/// </summary>
|
|
/// <param name="binaryData">Binary file contents.</param>
|
|
/// <param name="options">Extraction options.</param>
|
|
/// <param name="ct">Cancellation token.</param>
|
|
/// <returns>List of function fingerprints.</returns>
|
|
Task<IReadOnlyList<FunctionFingerprint>> ExtractFromMemoryAsync(
|
|
ReadOnlyMemory<byte> binaryData,
|
|
ExtractionOptions? options = null,
|
|
CancellationToken ct = default);
|
|
|
|
/// <summary>
|
|
/// Gets supported binary formats for this extractor.
|
|
/// </summary>
|
|
IReadOnlyList<string> SupportedFormats { get; }
|
|
}
|
|
|
|
/// <summary>
|
|
/// Fingerprint data for a single function in a binary.
|
|
/// Uses multiple hash algorithms for robust cross-version matching.
|
|
/// </summary>
|
|
public sealed record FunctionFingerprint
|
|
{
|
|
/// <summary>
|
|
/// Function name (symbol name or synthesized from offset).
|
|
/// </summary>
|
|
public required string Name { get; init; }
|
|
|
|
/// <summary>
|
|
/// Offset of the function within the .text section.
|
|
/// </summary>
|
|
public required long Offset { get; init; }
|
|
|
|
/// <summary>
|
|
/// Size of the function in bytes.
|
|
/// </summary>
|
|
public required int Size { get; init; }
|
|
|
|
/// <summary>
|
|
/// Hash of the basic block structure (opcode sequence, ignoring operands).
|
|
/// More stable across recompilation with different addresses.
|
|
/// </summary>
|
|
public required byte[] BasicBlockHash { get; init; }
|
|
|
|
/// <summary>
|
|
/// Hash of the control flow graph structure.
|
|
/// Captures branch patterns regardless of target addresses.
|
|
/// </summary>
|
|
public required byte[] CfgHash { get; init; }
|
|
|
|
/// <summary>
|
|
/// Hash of string references in the function.
|
|
/// Useful for identifying functions that use specific error messages or constants.
|
|
/// </summary>
|
|
public required byte[] StringRefsHash { get; init; }
|
|
|
|
/// <summary>
|
|
/// Combined fingerprint hash (all algorithms merged).
|
|
/// </summary>
|
|
public byte[]? CombinedHash { get; init; }
|
|
|
|
/// <summary>
|
|
/// List of functions called by this function.
|
|
/// </summary>
|
|
public IReadOnlyList<string>? Callees { get; init; }
|
|
|
|
/// <summary>
|
|
/// List of functions that call this function.
|
|
/// </summary>
|
|
public IReadOnlyList<string>? Callers { get; init; }
|
|
|
|
/// <summary>
|
|
/// Whether this is an exported (visible) symbol.
|
|
/// </summary>
|
|
public bool IsExported { get; init; }
|
|
|
|
/// <summary>
|
|
/// Whether this function has debug information available.
|
|
/// </summary>
|
|
public bool HasDebugInfo { get; init; }
|
|
|
|
/// <summary>
|
|
/// Source file path if debug info available.
|
|
/// </summary>
|
|
public string? SourceFile { get; init; }
|
|
|
|
/// <summary>
|
|
/// Source line number if debug info available.
|
|
/// </summary>
|
|
public int? SourceLine { get; init; }
|
|
|
|
/// <summary>
|
|
/// Semantic fingerprint for enhanced similarity comparison.
|
|
/// Uses IR-level analysis for resilience to compiler optimizations.
|
|
/// </summary>
|
|
public Semantic.SemanticFingerprint? SemanticFingerprint { get; init; }
|
|
}
|
|
|
|
/// <summary>
|
|
/// Options for function fingerprint extraction.
|
|
/// </summary>
|
|
public sealed record ExtractionOptions
|
|
{
|
|
/// <summary>
|
|
/// Whether to include internal/static functions (not exported).
|
|
/// </summary>
|
|
public bool IncludeInternalFunctions { get; init; } = false;
|
|
|
|
/// <summary>
|
|
/// Whether to build the call graph (callees/callers).
|
|
/// </summary>
|
|
public bool IncludeCallGraph { get; init; } = true;
|
|
|
|
/// <summary>
|
|
/// Minimum function size in bytes to include.
|
|
/// </summary>
|
|
public int MinFunctionSize { get; init; } = 16;
|
|
|
|
/// <summary>
|
|
/// Maximum function size in bytes to include. 0 = no limit.
|
|
/// </summary>
|
|
public int MaxFunctionSize { get; init; } = 0;
|
|
|
|
/// <summary>
|
|
/// Regex filter for function names to include. Null = all functions.
|
|
/// </summary>
|
|
public string? SymbolFilter { get; init; }
|
|
|
|
/// <summary>
|
|
/// Regex filter for function names to exclude.
|
|
/// </summary>
|
|
public string? ExcludeFilter { get; init; }
|
|
|
|
/// <summary>
|
|
/// Whether to compute the combined hash.
|
|
/// </summary>
|
|
public bool ComputeCombinedHash { get; init; } = true;
|
|
|
|
/// <summary>
|
|
/// Whether to extract debug information (source file/line).
|
|
/// </summary>
|
|
public bool ExtractDebugInfo { get; init; } = false;
|
|
}
|
|
|
|
/// <summary>
|
|
/// Represents a change to a function between two binary versions.
|
|
/// </summary>
|
|
public sealed record FunctionChange
|
|
{
|
|
/// <summary>
|
|
/// Function name.
|
|
/// </summary>
|
|
public required string FunctionName { get; init; }
|
|
|
|
/// <summary>
|
|
/// Type of change detected.
|
|
/// </summary>
|
|
public required ChangeType Type { get; init; }
|
|
|
|
/// <summary>
|
|
/// Fingerprint from the vulnerable version (null if Added).
|
|
/// </summary>
|
|
public FunctionFingerprint? VulnerableFingerprint { get; init; }
|
|
|
|
/// <summary>
|
|
/// Fingerprint from the patched version (null if Removed).
|
|
/// </summary>
|
|
public FunctionFingerprint? PatchedFingerprint { get; init; }
|
|
|
|
/// <summary>
|
|
/// Similarity score between versions (0.0-1.0) for Modified changes.
|
|
/// </summary>
|
|
public decimal? SimilarityScore { get; init; }
|
|
|
|
/// <summary>
|
|
/// Which hash algorithms showed differences.
|
|
/// </summary>
|
|
public IReadOnlyList<string>? DifferingHashes { get; init; }
|
|
}
|
|
|
|
/// <summary>
|
|
/// Type of change to a function between versions.
|
|
/// </summary>
|
|
public enum ChangeType
|
|
{
|
|
/// <summary>
|
|
/// Function was added in the patched version.
|
|
/// </summary>
|
|
Added,
|
|
|
|
/// <summary>
|
|
/// Function was modified (fingerprint changed).
|
|
/// </summary>
|
|
Modified,
|
|
|
|
/// <summary>
|
|
/// Function was removed in the patched version.
|
|
/// </summary>
|
|
Removed,
|
|
|
|
/// <summary>
|
|
/// Function signature changed (size/callees differ significantly).
|
|
/// </summary>
|
|
SignatureChanged
|
|
}
|