Fix build and code structure improvements. New but essential UI functionality. CI improvements. Documentation improvements. AI module improvements.
This commit is contained in:
@@ -0,0 +1,220 @@
|
||||
namespace StellaOps.BinaryIndex.Builders;
|
||||
|
||||
/// <summary>
|
||||
/// Extracts function-level fingerprints from binary files.
|
||||
/// Uses multiple hashing strategies for robust matching.
|
||||
/// </summary>
|
||||
public interface IFunctionFingerprintExtractor
|
||||
{
|
||||
/// <summary>
|
||||
/// Extracts function fingerprints from a binary file.
|
||||
/// </summary>
|
||||
/// <param name="binaryPath">Path to the binary file.</param>
|
||||
/// <param name="options">Extraction options.</param>
|
||||
/// <param name="ct">Cancellation token.</param>
|
||||
/// <returns>List of function fingerprints.</returns>
|
||||
Task<IReadOnlyList<FunctionFingerprint>> ExtractAsync(
|
||||
string binaryPath,
|
||||
ExtractionOptions? options = null,
|
||||
CancellationToken ct = default);
|
||||
|
||||
/// <summary>
|
||||
/// Extracts function fingerprints from binary data in memory.
|
||||
/// </summary>
|
||||
/// <param name="binaryData">Binary file contents.</param>
|
||||
/// <param name="options">Extraction options.</param>
|
||||
/// <param name="ct">Cancellation token.</param>
|
||||
/// <returns>List of function fingerprints.</returns>
|
||||
Task<IReadOnlyList<FunctionFingerprint>> ExtractFromMemoryAsync(
|
||||
ReadOnlyMemory<byte> binaryData,
|
||||
ExtractionOptions? options = null,
|
||||
CancellationToken ct = default);
|
||||
|
||||
/// <summary>
|
||||
/// Gets supported binary formats for this extractor.
|
||||
/// </summary>
|
||||
IReadOnlyList<string> SupportedFormats { get; }
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Fingerprint data for a single function in a binary.
|
||||
/// Uses multiple hash algorithms for robust cross-version matching.
|
||||
/// </summary>
|
||||
public sealed record FunctionFingerprint
|
||||
{
|
||||
/// <summary>
|
||||
/// Function name (symbol name or synthesized from offset).
|
||||
/// </summary>
|
||||
public required string Name { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Offset of the function within the .text section.
|
||||
/// </summary>
|
||||
public required long Offset { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Size of the function in bytes.
|
||||
/// </summary>
|
||||
public required int Size { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Hash of the basic block structure (opcode sequence, ignoring operands).
|
||||
/// More stable across recompilation with different addresses.
|
||||
/// </summary>
|
||||
public required byte[] BasicBlockHash { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Hash of the control flow graph structure.
|
||||
/// Captures branch patterns regardless of target addresses.
|
||||
/// </summary>
|
||||
public required byte[] CfgHash { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Hash of string references in the function.
|
||||
/// Useful for identifying functions that use specific error messages or constants.
|
||||
/// </summary>
|
||||
public required byte[] StringRefsHash { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Combined fingerprint hash (all algorithms merged).
|
||||
/// </summary>
|
||||
public byte[]? CombinedHash { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// List of functions called by this function.
|
||||
/// </summary>
|
||||
public IReadOnlyList<string>? Callees { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// List of functions that call this function.
|
||||
/// </summary>
|
||||
public IReadOnlyList<string>? Callers { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Whether this is an exported (visible) symbol.
|
||||
/// </summary>
|
||||
public bool IsExported { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Whether this function has debug information available.
|
||||
/// </summary>
|
||||
public bool HasDebugInfo { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Source file path if debug info available.
|
||||
/// </summary>
|
||||
public string? SourceFile { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Source line number if debug info available.
|
||||
/// </summary>
|
||||
public int? SourceLine { get; init; }
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Options for function fingerprint extraction.
|
||||
/// </summary>
|
||||
public sealed record ExtractionOptions
|
||||
{
|
||||
/// <summary>
|
||||
/// Whether to include internal/static functions (not exported).
|
||||
/// </summary>
|
||||
public bool IncludeInternalFunctions { get; init; } = false;
|
||||
|
||||
/// <summary>
|
||||
/// Whether to build the call graph (callees/callers).
|
||||
/// </summary>
|
||||
public bool IncludeCallGraph { get; init; } = true;
|
||||
|
||||
/// <summary>
|
||||
/// Minimum function size in bytes to include.
|
||||
/// </summary>
|
||||
public int MinFunctionSize { get; init; } = 16;
|
||||
|
||||
/// <summary>
|
||||
/// Maximum function size in bytes to include. 0 = no limit.
|
||||
/// </summary>
|
||||
public int MaxFunctionSize { get; init; } = 0;
|
||||
|
||||
/// <summary>
|
||||
/// Regex filter for function names to include. Null = all functions.
|
||||
/// </summary>
|
||||
public string? SymbolFilter { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Regex filter for function names to exclude.
|
||||
/// </summary>
|
||||
public string? ExcludeFilter { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Whether to compute the combined hash.
|
||||
/// </summary>
|
||||
public bool ComputeCombinedHash { get; init; } = true;
|
||||
|
||||
/// <summary>
|
||||
/// Whether to extract debug information (source file/line).
|
||||
/// </summary>
|
||||
public bool ExtractDebugInfo { get; init; } = false;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Represents a change to a function between two binary versions.
|
||||
/// </summary>
|
||||
public sealed record FunctionChange
|
||||
{
|
||||
/// <summary>
|
||||
/// Function name.
|
||||
/// </summary>
|
||||
public required string FunctionName { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Type of change detected.
|
||||
/// </summary>
|
||||
public required ChangeType Type { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Fingerprint from the vulnerable version (null if Added).
|
||||
/// </summary>
|
||||
public FunctionFingerprint? VulnerableFingerprint { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Fingerprint from the patched version (null if Removed).
|
||||
/// </summary>
|
||||
public FunctionFingerprint? PatchedFingerprint { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Similarity score between versions (0.0-1.0) for Modified changes.
|
||||
/// </summary>
|
||||
public decimal? SimilarityScore { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Which hash algorithms showed differences.
|
||||
/// </summary>
|
||||
public IReadOnlyList<string>? DifferingHashes { get; init; }
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Type of change to a function between versions.
|
||||
/// </summary>
|
||||
public enum ChangeType
|
||||
{
|
||||
/// <summary>
|
||||
/// Function was added in the patched version.
|
||||
/// </summary>
|
||||
Added,
|
||||
|
||||
/// <summary>
|
||||
/// Function was modified (fingerprint changed).
|
||||
/// </summary>
|
||||
Modified,
|
||||
|
||||
/// <summary>
|
||||
/// Function was removed in the patched version.
|
||||
/// </summary>
|
||||
Removed,
|
||||
|
||||
/// <summary>
|
||||
/// Function signature changed (size/callees differ significantly).
|
||||
/// </summary>
|
||||
SignatureChanged
|
||||
}
|
||||
Reference in New Issue
Block a user