using StellaOps.BinaryIndex.Semantic; namespace StellaOps.BinaryIndex.Builders; /// /// Extracts function-level fingerprints from binary files. /// Uses multiple hashing strategies for robust matching. /// public interface IFunctionFingerprintExtractor { /// /// Extracts function fingerprints from a binary file. /// /// Path to the binary file. /// Extraction options. /// Cancellation token. /// List of function fingerprints. Task> ExtractAsync( string binaryPath, ExtractionOptions? options = null, CancellationToken ct = default); /// /// Extracts function fingerprints from binary data in memory. /// /// Binary file contents. /// Extraction options. /// Cancellation token. /// List of function fingerprints. Task> ExtractFromMemoryAsync( ReadOnlyMemory binaryData, ExtractionOptions? options = null, CancellationToken ct = default); /// /// Gets supported binary formats for this extractor. /// IReadOnlyList SupportedFormats { get; } } /// /// Fingerprint data for a single function in a binary. /// Uses multiple hash algorithms for robust cross-version matching. /// public sealed record FunctionFingerprint { /// /// Function name (symbol name or synthesized from offset). /// public required string Name { get; init; } /// /// Offset of the function within the .text section. /// public required long Offset { get; init; } /// /// Size of the function in bytes. /// public required int Size { get; init; } /// /// Hash of the basic block structure (opcode sequence, ignoring operands). /// More stable across recompilation with different addresses. /// public required byte[] BasicBlockHash { get; init; } /// /// Hash of the control flow graph structure. /// Captures branch patterns regardless of target addresses. /// public required byte[] CfgHash { get; init; } /// /// Hash of string references in the function. /// Useful for identifying functions that use specific error messages or constants. /// public required byte[] StringRefsHash { get; init; } /// /// Combined fingerprint hash (all algorithms merged). /// public byte[]? CombinedHash { get; init; } /// /// List of functions called by this function. /// public IReadOnlyList? Callees { get; init; } /// /// List of functions that call this function. /// public IReadOnlyList? Callers { get; init; } /// /// Whether this is an exported (visible) symbol. /// public bool IsExported { get; init; } /// /// Whether this function has debug information available. /// public bool HasDebugInfo { get; init; } /// /// Source file path if debug info available. /// public string? SourceFile { get; init; } /// /// Source line number if debug info available. /// public int? SourceLine { get; init; } /// /// Semantic fingerprint for enhanced similarity comparison. /// Uses IR-level analysis for resilience to compiler optimizations. /// public Semantic.SemanticFingerprint? SemanticFingerprint { get; init; } } /// /// Options for function fingerprint extraction. /// public sealed record ExtractionOptions { /// /// Whether to include internal/static functions (not exported). /// public bool IncludeInternalFunctions { get; init; } = false; /// /// Whether to build the call graph (callees/callers). /// public bool IncludeCallGraph { get; init; } = true; /// /// Minimum function size in bytes to include. /// public int MinFunctionSize { get; init; } = 16; /// /// Maximum function size in bytes to include. 0 = no limit. /// public int MaxFunctionSize { get; init; } = 0; /// /// Regex filter for function names to include. Null = all functions. /// public string? SymbolFilter { get; init; } /// /// Regex filter for function names to exclude. /// public string? ExcludeFilter { get; init; } /// /// Whether to compute the combined hash. /// public bool ComputeCombinedHash { get; init; } = true; /// /// Whether to extract debug information (source file/line). /// public bool ExtractDebugInfo { get; init; } = false; } /// /// Represents a change to a function between two binary versions. /// public sealed record FunctionChange { /// /// Function name. /// public required string FunctionName { get; init; } /// /// Type of change detected. /// public required ChangeType Type { get; init; } /// /// Fingerprint from the vulnerable version (null if Added). /// public FunctionFingerprint? VulnerableFingerprint { get; init; } /// /// Fingerprint from the patched version (null if Removed). /// public FunctionFingerprint? PatchedFingerprint { get; init; } /// /// Similarity score between versions (0.0-1.0) for Modified changes. /// public decimal? SimilarityScore { get; init; } /// /// Which hash algorithms showed differences. /// public IReadOnlyList? DifferingHashes { get; init; } } /// /// Type of change to a function between versions. /// public enum ChangeType { /// /// Function was added in the patched version. /// Added, /// /// Function was modified (fingerprint changed). /// Modified, /// /// Function was removed in the patched version. /// Removed, /// /// Function signature changed (size/callees differ significantly). /// SignatureChanged }