using StellaOps.BinaryIndex.Semantic;
namespace StellaOps.BinaryIndex.Builders;
///
/// Extracts function-level fingerprints from binary files.
/// Uses multiple hashing strategies for robust matching.
///
public interface IFunctionFingerprintExtractor
{
///
/// Extracts function fingerprints from a binary file.
///
/// Path to the binary file.
/// Extraction options.
/// Cancellation token.
/// List of function fingerprints.
Task> ExtractAsync(
string binaryPath,
ExtractionOptions? options = null,
CancellationToken ct = default);
///
/// Extracts function fingerprints from binary data in memory.
///
/// Binary file contents.
/// Extraction options.
/// Cancellation token.
/// List of function fingerprints.
Task> ExtractFromMemoryAsync(
ReadOnlyMemory binaryData,
ExtractionOptions? options = null,
CancellationToken ct = default);
///
/// Gets supported binary formats for this extractor.
///
IReadOnlyList SupportedFormats { get; }
}
///
/// Fingerprint data for a single function in a binary.
/// Uses multiple hash algorithms for robust cross-version matching.
///
public sealed record FunctionFingerprint
{
///
/// Function name (symbol name or synthesized from offset).
///
public required string Name { get; init; }
///
/// Offset of the function within the .text section.
///
public required long Offset { get; init; }
///
/// Size of the function in bytes.
///
public required int Size { get; init; }
///
/// Hash of the basic block structure (opcode sequence, ignoring operands).
/// More stable across recompilation with different addresses.
///
public required byte[] BasicBlockHash { get; init; }
///
/// Hash of the control flow graph structure.
/// Captures branch patterns regardless of target addresses.
///
public required byte[] CfgHash { get; init; }
///
/// Hash of string references in the function.
/// Useful for identifying functions that use specific error messages or constants.
///
public required byte[] StringRefsHash { get; init; }
///
/// Combined fingerprint hash (all algorithms merged).
///
public byte[]? CombinedHash { get; init; }
///
/// List of functions called by this function.
///
public IReadOnlyList? Callees { get; init; }
///
/// List of functions that call this function.
///
public IReadOnlyList? Callers { get; init; }
///
/// Whether this is an exported (visible) symbol.
///
public bool IsExported { get; init; }
///
/// Whether this function has debug information available.
///
public bool HasDebugInfo { get; init; }
///
/// Source file path if debug info available.
///
public string? SourceFile { get; init; }
///
/// Source line number if debug info available.
///
public int? SourceLine { get; init; }
///
/// Semantic fingerprint for enhanced similarity comparison.
/// Uses IR-level analysis for resilience to compiler optimizations.
///
public Semantic.SemanticFingerprint? SemanticFingerprint { get; init; }
}
///
/// Options for function fingerprint extraction.
///
public sealed record ExtractionOptions
{
///
/// Whether to include internal/static functions (not exported).
///
public bool IncludeInternalFunctions { get; init; } = false;
///
/// Whether to build the call graph (callees/callers).
///
public bool IncludeCallGraph { get; init; } = true;
///
/// Minimum function size in bytes to include.
///
public int MinFunctionSize { get; init; } = 16;
///
/// Maximum function size in bytes to include. 0 = no limit.
///
public int MaxFunctionSize { get; init; } = 0;
///
/// Regex filter for function names to include. Null = all functions.
///
public string? SymbolFilter { get; init; }
///
/// Regex filter for function names to exclude.
///
public string? ExcludeFilter { get; init; }
///
/// Whether to compute the combined hash.
///
public bool ComputeCombinedHash { get; init; } = true;
///
/// Whether to extract debug information (source file/line).
///
public bool ExtractDebugInfo { get; init; } = false;
}
///
/// Represents a change to a function between two binary versions.
///
public sealed record FunctionChange
{
///
/// Function name.
///
public required string FunctionName { get; init; }
///
/// Type of change detected.
///
public required ChangeType Type { get; init; }
///
/// Fingerprint from the vulnerable version (null if Added).
///
public FunctionFingerprint? VulnerableFingerprint { get; init; }
///
/// Fingerprint from the patched version (null if Removed).
///
public FunctionFingerprint? PatchedFingerprint { get; init; }
///
/// Similarity score between versions (0.0-1.0) for Modified changes.
///
public decimal? SimilarityScore { get; init; }
///
/// Which hash algorithms showed differences.
///
public IReadOnlyList? DifferingHashes { get; init; }
}
///
/// Type of change to a function between versions.
///
public enum ChangeType
{
///
/// Function was added in the patched version.
///
Added,
///
/// Function was modified (fingerprint changed).
///
Modified,
///
/// Function was removed in the patched version.
///
Removed,
///
/// Function signature changed (size/callees differ significantly).
///
SignatureChanged
}