Files
git.stella-ops.org/src/BinaryIndex/__Libraries/StellaOps.BinaryIndex.Builders/IFunctionFingerprintExtractor.cs
StellaOps Bot 37e11918e0 save progress
2026-01-06 09:42:20 +02:00

229 lines
6.7 KiB
C#

using StellaOps.BinaryIndex.Semantic;
namespace StellaOps.BinaryIndex.Builders;
/// <summary>
/// Extracts function-level fingerprints from binary files.
/// Uses multiple hashing strategies for robust matching.
/// </summary>
public interface IFunctionFingerprintExtractor
{
/// <summary>
/// Extracts function fingerprints from a binary file.
/// </summary>
/// <param name="binaryPath">Path to the binary file.</param>
/// <param name="options">Extraction options.</param>
/// <param name="ct">Cancellation token.</param>
/// <returns>List of function fingerprints.</returns>
Task<IReadOnlyList<FunctionFingerprint>> ExtractAsync(
string binaryPath,
ExtractionOptions? options = null,
CancellationToken ct = default);
/// <summary>
/// Extracts function fingerprints from binary data in memory.
/// </summary>
/// <param name="binaryData">Binary file contents.</param>
/// <param name="options">Extraction options.</param>
/// <param name="ct">Cancellation token.</param>
/// <returns>List of function fingerprints.</returns>
Task<IReadOnlyList<FunctionFingerprint>> ExtractFromMemoryAsync(
ReadOnlyMemory<byte> binaryData,
ExtractionOptions? options = null,
CancellationToken ct = default);
/// <summary>
/// Gets supported binary formats for this extractor.
/// </summary>
IReadOnlyList<string> SupportedFormats { get; }
}
/// <summary>
/// Fingerprint data for a single function in a binary.
/// Uses multiple hash algorithms for robust cross-version matching.
/// </summary>
public sealed record FunctionFingerprint
{
/// <summary>
/// Function name (symbol name or synthesized from offset).
/// </summary>
public required string Name { get; init; }
/// <summary>
/// Offset of the function within the .text section.
/// </summary>
public required long Offset { get; init; }
/// <summary>
/// Size of the function in bytes.
/// </summary>
public required int Size { get; init; }
/// <summary>
/// Hash of the basic block structure (opcode sequence, ignoring operands).
/// More stable across recompilation with different addresses.
/// </summary>
public required byte[] BasicBlockHash { get; init; }
/// <summary>
/// Hash of the control flow graph structure.
/// Captures branch patterns regardless of target addresses.
/// </summary>
public required byte[] CfgHash { get; init; }
/// <summary>
/// Hash of string references in the function.
/// Useful for identifying functions that use specific error messages or constants.
/// </summary>
public required byte[] StringRefsHash { get; init; }
/// <summary>
/// Combined fingerprint hash (all algorithms merged).
/// </summary>
public byte[]? CombinedHash { get; init; }
/// <summary>
/// List of functions called by this function.
/// </summary>
public IReadOnlyList<string>? Callees { get; init; }
/// <summary>
/// List of functions that call this function.
/// </summary>
public IReadOnlyList<string>? Callers { get; init; }
/// <summary>
/// Whether this is an exported (visible) symbol.
/// </summary>
public bool IsExported { get; init; }
/// <summary>
/// Whether this function has debug information available.
/// </summary>
public bool HasDebugInfo { get; init; }
/// <summary>
/// Source file path if debug info available.
/// </summary>
public string? SourceFile { get; init; }
/// <summary>
/// Source line number if debug info available.
/// </summary>
public int? SourceLine { get; init; }
/// <summary>
/// Semantic fingerprint for enhanced similarity comparison.
/// Uses IR-level analysis for resilience to compiler optimizations.
/// </summary>
public Semantic.SemanticFingerprint? SemanticFingerprint { get; init; }
}
/// <summary>
/// Options for function fingerprint extraction.
/// </summary>
public sealed record ExtractionOptions
{
/// <summary>
/// Whether to include internal/static functions (not exported).
/// </summary>
public bool IncludeInternalFunctions { get; init; } = false;
/// <summary>
/// Whether to build the call graph (callees/callers).
/// </summary>
public bool IncludeCallGraph { get; init; } = true;
/// <summary>
/// Minimum function size in bytes to include.
/// </summary>
public int MinFunctionSize { get; init; } = 16;
/// <summary>
/// Maximum function size in bytes to include. 0 = no limit.
/// </summary>
public int MaxFunctionSize { get; init; } = 0;
/// <summary>
/// Regex filter for function names to include. Null = all functions.
/// </summary>
public string? SymbolFilter { get; init; }
/// <summary>
/// Regex filter for function names to exclude.
/// </summary>
public string? ExcludeFilter { get; init; }
/// <summary>
/// Whether to compute the combined hash.
/// </summary>
public bool ComputeCombinedHash { get; init; } = true;
/// <summary>
/// Whether to extract debug information (source file/line).
/// </summary>
public bool ExtractDebugInfo { get; init; } = false;
}
/// <summary>
/// Represents a change to a function between two binary versions.
/// </summary>
public sealed record FunctionChange
{
/// <summary>
/// Function name.
/// </summary>
public required string FunctionName { get; init; }
/// <summary>
/// Type of change detected.
/// </summary>
public required ChangeType Type { get; init; }
/// <summary>
/// Fingerprint from the vulnerable version (null if Added).
/// </summary>
public FunctionFingerprint? VulnerableFingerprint { get; init; }
/// <summary>
/// Fingerprint from the patched version (null if Removed).
/// </summary>
public FunctionFingerprint? PatchedFingerprint { get; init; }
/// <summary>
/// Similarity score between versions (0.0-1.0) for Modified changes.
/// </summary>
public decimal? SimilarityScore { get; init; }
/// <summary>
/// Which hash algorithms showed differences.
/// </summary>
public IReadOnlyList<string>? DifferingHashes { get; init; }
}
/// <summary>
/// Type of change to a function between versions.
/// </summary>
public enum ChangeType
{
/// <summary>
/// Function was added in the patched version.
/// </summary>
Added,
/// <summary>
/// Function was modified (fingerprint changed).
/// </summary>
Modified,
/// <summary>
/// Function was removed in the patched version.
/// </summary>
Removed,
/// <summary>
/// Function signature changed (size/callees differ significantly).
/// </summary>
SignatureChanged
}