Sprints completed: - SPRINT_20260110_012_* (golden set diff layer - 10 sprints) - SPRINT_20260110_013_* (advisory chat - 4 sprints) Build fixes applied: - Fix namespace conflicts with Microsoft.Extensions.Options.Options.Create - Fix VexDecisionReachabilityIntegrationTests API drift (major rewrite) - Fix VexSchemaValidationTests FluentAssertions method name - Fix FixChainGateIntegrationTests ambiguous type references - Fix AdvisoryAI test files required properties and namespace aliases - Add stub types for CveMappingController (ICveSymbolMappingService) - Fix VerdictBuilderService static context issue Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
286 lines
7.1 KiB
C#
286 lines
7.1 KiB
C#
// Licensed under AGPL-3.0-or-later. Copyright (C) 2026 StellaOps Contributors.
|
|
|
|
using System.Collections.Immutable;
|
|
|
|
namespace StellaOps.BinaryIndex.Analysis;
|
|
|
|
/// <summary>
|
|
/// Multi-level fingerprint collection for a function.
|
|
/// </summary>
|
|
public sealed record FunctionFingerprint
|
|
{
|
|
/// <summary>
|
|
/// Function name (symbol or demangled).
|
|
/// </summary>
|
|
public required string FunctionName { get; init; }
|
|
|
|
/// <summary>
|
|
/// Function address in binary.
|
|
/// </summary>
|
|
public required ulong Address { get; init; }
|
|
|
|
/// <summary>
|
|
/// Size of the function in bytes.
|
|
/// </summary>
|
|
public ulong Size { get; init; }
|
|
|
|
/// <summary>
|
|
/// BasicBlock-level hashes (per-block instruction hashes).
|
|
/// </summary>
|
|
public required ImmutableArray<BasicBlockHash> BasicBlockHashes { get; init; }
|
|
|
|
/// <summary>
|
|
/// CFG structural hash (Weisfeiler-Lehman on block graph).
|
|
/// </summary>
|
|
public required string CfgHash { get; init; }
|
|
|
|
/// <summary>
|
|
/// String reference hashes (sorted, normalized).
|
|
/// </summary>
|
|
public ImmutableArray<string> StringRefHashes { get; init; } = [];
|
|
|
|
/// <summary>
|
|
/// Semantic embedding (KSG + Weisfeiler-Lehman).
|
|
/// </summary>
|
|
public SemanticEmbedding? SemanticEmbedding { get; init; }
|
|
|
|
/// <summary>
|
|
/// Constants extracted from instructions.
|
|
/// </summary>
|
|
public ImmutableArray<ExtractedConstant> Constants { get; init; } = [];
|
|
|
|
/// <summary>
|
|
/// Call targets (functions called by this function).
|
|
/// </summary>
|
|
public ImmutableArray<string> CallTargets { get; init; } = [];
|
|
|
|
/// <summary>
|
|
/// Architecture (x86_64, aarch64, etc.).
|
|
/// </summary>
|
|
public string? Architecture { get; init; }
|
|
}
|
|
|
|
/// <summary>
|
|
/// Hash of a single basic block.
|
|
/// </summary>
|
|
public sealed record BasicBlockHash
|
|
{
|
|
/// <summary>
|
|
/// Block identifier (e.g., "bb0", "bb1").
|
|
/// </summary>
|
|
public required string BlockId { get; init; }
|
|
|
|
/// <summary>
|
|
/// Address of block start.
|
|
/// </summary>
|
|
public required ulong StartAddress { get; init; }
|
|
|
|
/// <summary>
|
|
/// Address of block end.
|
|
/// </summary>
|
|
public ulong EndAddress { get; init; }
|
|
|
|
/// <summary>
|
|
/// Normalized instruction hash (opcode sequence only).
|
|
/// </summary>
|
|
public required string OpcodeHash { get; init; }
|
|
|
|
/// <summary>
|
|
/// Full instruction hash (with operands).
|
|
/// </summary>
|
|
public required string FullHash { get; init; }
|
|
|
|
/// <summary>
|
|
/// Number of instructions in the block.
|
|
/// </summary>
|
|
public int InstructionCount { get; init; }
|
|
|
|
/// <summary>
|
|
/// Successor blocks (outgoing edges).
|
|
/// </summary>
|
|
public ImmutableArray<string> Successors { get; init; } = [];
|
|
|
|
/// <summary>
|
|
/// Predecessor blocks (incoming edges).
|
|
/// </summary>
|
|
public ImmutableArray<string> Predecessors { get; init; } = [];
|
|
|
|
/// <summary>
|
|
/// Block type (entry, exit, branch, loop, etc.).
|
|
/// </summary>
|
|
public BasicBlockType BlockType { get; init; } = BasicBlockType.Normal;
|
|
}
|
|
|
|
/// <summary>
|
|
/// Basic block types.
|
|
/// </summary>
|
|
public enum BasicBlockType
|
|
{
|
|
/// <summary>Normal block.</summary>
|
|
Normal,
|
|
|
|
/// <summary>Function entry block.</summary>
|
|
Entry,
|
|
|
|
/// <summary>Function exit/return block.</summary>
|
|
Exit,
|
|
|
|
/// <summary>Conditional branch block.</summary>
|
|
ConditionalBranch,
|
|
|
|
/// <summary>Unconditional jump block.</summary>
|
|
UnconditionalJump,
|
|
|
|
/// <summary>Loop header block.</summary>
|
|
LoopHeader,
|
|
|
|
/// <summary>Loop body block.</summary>
|
|
LoopBody,
|
|
|
|
/// <summary>Switch/indirect jump block.</summary>
|
|
Switch,
|
|
|
|
/// <summary>Exception handler block.</summary>
|
|
ExceptionHandler
|
|
}
|
|
|
|
/// <summary>
|
|
/// Semantic embedding using KSG (Knowledge Semantic Graph).
|
|
/// </summary>
|
|
public sealed record SemanticEmbedding
|
|
{
|
|
/// <summary>
|
|
/// Embedding vector (dimension depends on model).
|
|
/// </summary>
|
|
public required float[] Vector { get; init; }
|
|
|
|
/// <summary>
|
|
/// Model version used for embedding.
|
|
/// </summary>
|
|
public required string ModelVersion { get; init; }
|
|
|
|
/// <summary>
|
|
/// Embedding dimension.
|
|
/// </summary>
|
|
public int Dimension => Vector.Length;
|
|
|
|
/// <summary>
|
|
/// Similarity threshold for matching.
|
|
/// </summary>
|
|
public float SimilarityThreshold { get; init; } = 0.85f;
|
|
|
|
/// <summary>
|
|
/// Computes cosine similarity with another embedding.
|
|
/// </summary>
|
|
public float CosineSimilarity(SemanticEmbedding other)
|
|
{
|
|
ArgumentNullException.ThrowIfNull(other);
|
|
|
|
if (Vector.Length != other.Vector.Length)
|
|
return 0f;
|
|
|
|
var dotProduct = 0f;
|
|
var normA = 0f;
|
|
var normB = 0f;
|
|
|
|
for (var i = 0; i < Vector.Length; i++)
|
|
{
|
|
dotProduct += Vector[i] * other.Vector[i];
|
|
normA += Vector[i] * Vector[i];
|
|
normB += other.Vector[i] * other.Vector[i];
|
|
}
|
|
|
|
var denominator = MathF.Sqrt(normA) * MathF.Sqrt(normB);
|
|
return denominator > 0 ? dotProduct / denominator : 0f;
|
|
}
|
|
}
|
|
|
|
/// <summary>
|
|
/// A constant extracted from binary instructions.
|
|
/// </summary>
|
|
public sealed record ExtractedConstant
|
|
{
|
|
/// <summary>
|
|
/// Value as hex string (e.g., "0x1000").
|
|
/// </summary>
|
|
public required string Value { get; init; }
|
|
|
|
/// <summary>
|
|
/// Numeric value (if parseable).
|
|
/// </summary>
|
|
public long? NumericValue { get; init; }
|
|
|
|
/// <summary>
|
|
/// Address where found.
|
|
/// </summary>
|
|
public required ulong Address { get; init; }
|
|
|
|
/// <summary>
|
|
/// Size in bytes (1, 2, 4, 8).
|
|
/// </summary>
|
|
public int Size { get; init; } = 4;
|
|
|
|
/// <summary>
|
|
/// Context (instruction type or data section).
|
|
/// </summary>
|
|
public string? Context { get; init; }
|
|
|
|
/// <summary>
|
|
/// Whether this is likely a meaningful constant (not a small immediate).
|
|
/// </summary>
|
|
public bool IsMeaningful { get; init; } = true;
|
|
}
|
|
|
|
/// <summary>
|
|
/// CFG edge between basic blocks.
|
|
/// </summary>
|
|
public sealed record CfgEdge
|
|
{
|
|
/// <summary>
|
|
/// Source block ID.
|
|
/// </summary>
|
|
public required string SourceBlockId { get; init; }
|
|
|
|
/// <summary>
|
|
/// Target block ID.
|
|
/// </summary>
|
|
public required string TargetBlockId { get; init; }
|
|
|
|
/// <summary>
|
|
/// Edge type (fall-through, conditional-true, conditional-false, jump).
|
|
/// </summary>
|
|
public CfgEdgeType EdgeType { get; init; } = CfgEdgeType.FallThrough;
|
|
|
|
/// <summary>
|
|
/// Condition expression (for conditional edges).
|
|
/// </summary>
|
|
public string? Condition { get; init; }
|
|
}
|
|
|
|
/// <summary>
|
|
/// CFG edge types.
|
|
/// </summary>
|
|
public enum CfgEdgeType
|
|
{
|
|
/// <summary>Fall-through to next block.</summary>
|
|
FallThrough,
|
|
|
|
/// <summary>Conditional true branch.</summary>
|
|
ConditionalTrue,
|
|
|
|
/// <summary>Conditional false branch.</summary>
|
|
ConditionalFalse,
|
|
|
|
/// <summary>Unconditional jump.</summary>
|
|
UnconditionalJump,
|
|
|
|
/// <summary>Call edge.</summary>
|
|
Call,
|
|
|
|
/// <summary>Return edge.</summary>
|
|
Return,
|
|
|
|
/// <summary>Switch/indirect edge.</summary>
|
|
Switch
|
|
}
|