Files
git.stella-ops.org/src/BinaryIndex/__Libraries/StellaOps.BinaryIndex.Analysis/Models/FingerprintModels.cs
master 7f7eb8b228 Complete batch 012 (golden set diff) and 013 (advisory chat), fix build errors
Sprints completed:
- SPRINT_20260110_012_* (golden set diff layer - 10 sprints)
- SPRINT_20260110_013_* (advisory chat - 4 sprints)

Build fixes applied:
- Fix namespace conflicts with Microsoft.Extensions.Options.Options.Create
- Fix VexDecisionReachabilityIntegrationTests API drift (major rewrite)
- Fix VexSchemaValidationTests FluentAssertions method name
- Fix FixChainGateIntegrationTests ambiguous type references
- Fix AdvisoryAI test files required properties and namespace aliases
- Add stub types for CveMappingController (ICveSymbolMappingService)
- Fix VerdictBuilderService static context issue

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2026-01-11 10:09:07 +02:00

286 lines
7.1 KiB
C#

// Licensed under AGPL-3.0-or-later. Copyright (C) 2026 StellaOps Contributors.
using System.Collections.Immutable;
namespace StellaOps.BinaryIndex.Analysis;
/// <summary>
/// Multi-level fingerprint collection for a function.
/// </summary>
public sealed record FunctionFingerprint
{
/// <summary>
/// Function name (symbol or demangled).
/// </summary>
public required string FunctionName { get; init; }
/// <summary>
/// Function address in binary.
/// </summary>
public required ulong Address { get; init; }
/// <summary>
/// Size of the function in bytes.
/// </summary>
public ulong Size { get; init; }
/// <summary>
/// BasicBlock-level hashes (per-block instruction hashes).
/// </summary>
public required ImmutableArray<BasicBlockHash> BasicBlockHashes { get; init; }
/// <summary>
/// CFG structural hash (Weisfeiler-Lehman on block graph).
/// </summary>
public required string CfgHash { get; init; }
/// <summary>
/// String reference hashes (sorted, normalized).
/// </summary>
public ImmutableArray<string> StringRefHashes { get; init; } = [];
/// <summary>
/// Semantic embedding (KSG + Weisfeiler-Lehman).
/// </summary>
public SemanticEmbedding? SemanticEmbedding { get; init; }
/// <summary>
/// Constants extracted from instructions.
/// </summary>
public ImmutableArray<ExtractedConstant> Constants { get; init; } = [];
/// <summary>
/// Call targets (functions called by this function).
/// </summary>
public ImmutableArray<string> CallTargets { get; init; } = [];
/// <summary>
/// Architecture (x86_64, aarch64, etc.).
/// </summary>
public string? Architecture { get; init; }
}
/// <summary>
/// Hash of a single basic block.
/// </summary>
public sealed record BasicBlockHash
{
/// <summary>
/// Block identifier (e.g., "bb0", "bb1").
/// </summary>
public required string BlockId { get; init; }
/// <summary>
/// Address of block start.
/// </summary>
public required ulong StartAddress { get; init; }
/// <summary>
/// Address of block end.
/// </summary>
public ulong EndAddress { get; init; }
/// <summary>
/// Normalized instruction hash (opcode sequence only).
/// </summary>
public required string OpcodeHash { get; init; }
/// <summary>
/// Full instruction hash (with operands).
/// </summary>
public required string FullHash { get; init; }
/// <summary>
/// Number of instructions in the block.
/// </summary>
public int InstructionCount { get; init; }
/// <summary>
/// Successor blocks (outgoing edges).
/// </summary>
public ImmutableArray<string> Successors { get; init; } = [];
/// <summary>
/// Predecessor blocks (incoming edges).
/// </summary>
public ImmutableArray<string> Predecessors { get; init; } = [];
/// <summary>
/// Block type (entry, exit, branch, loop, etc.).
/// </summary>
public BasicBlockType BlockType { get; init; } = BasicBlockType.Normal;
}
/// <summary>
/// Basic block types.
/// </summary>
public enum BasicBlockType
{
/// <summary>Normal block.</summary>
Normal,
/// <summary>Function entry block.</summary>
Entry,
/// <summary>Function exit/return block.</summary>
Exit,
/// <summary>Conditional branch block.</summary>
ConditionalBranch,
/// <summary>Unconditional jump block.</summary>
UnconditionalJump,
/// <summary>Loop header block.</summary>
LoopHeader,
/// <summary>Loop body block.</summary>
LoopBody,
/// <summary>Switch/indirect jump block.</summary>
Switch,
/// <summary>Exception handler block.</summary>
ExceptionHandler
}
/// <summary>
/// Semantic embedding using KSG (Knowledge Semantic Graph).
/// </summary>
public sealed record SemanticEmbedding
{
/// <summary>
/// Embedding vector (dimension depends on model).
/// </summary>
public required float[] Vector { get; init; }
/// <summary>
/// Model version used for embedding.
/// </summary>
public required string ModelVersion { get; init; }
/// <summary>
/// Embedding dimension.
/// </summary>
public int Dimension => Vector.Length;
/// <summary>
/// Similarity threshold for matching.
/// </summary>
public float SimilarityThreshold { get; init; } = 0.85f;
/// <summary>
/// Computes cosine similarity with another embedding.
/// </summary>
public float CosineSimilarity(SemanticEmbedding other)
{
ArgumentNullException.ThrowIfNull(other);
if (Vector.Length != other.Vector.Length)
return 0f;
var dotProduct = 0f;
var normA = 0f;
var normB = 0f;
for (var i = 0; i < Vector.Length; i++)
{
dotProduct += Vector[i] * other.Vector[i];
normA += Vector[i] * Vector[i];
normB += other.Vector[i] * other.Vector[i];
}
var denominator = MathF.Sqrt(normA) * MathF.Sqrt(normB);
return denominator > 0 ? dotProduct / denominator : 0f;
}
}
/// <summary>
/// A constant extracted from binary instructions.
/// </summary>
public sealed record ExtractedConstant
{
/// <summary>
/// Value as hex string (e.g., "0x1000").
/// </summary>
public required string Value { get; init; }
/// <summary>
/// Numeric value (if parseable).
/// </summary>
public long? NumericValue { get; init; }
/// <summary>
/// Address where found.
/// </summary>
public required ulong Address { get; init; }
/// <summary>
/// Size in bytes (1, 2, 4, 8).
/// </summary>
public int Size { get; init; } = 4;
/// <summary>
/// Context (instruction type or data section).
/// </summary>
public string? Context { get; init; }
/// <summary>
/// Whether this is likely a meaningful constant (not a small immediate).
/// </summary>
public bool IsMeaningful { get; init; } = true;
}
/// <summary>
/// CFG edge between basic blocks.
/// </summary>
public sealed record CfgEdge
{
/// <summary>
/// Source block ID.
/// </summary>
public required string SourceBlockId { get; init; }
/// <summary>
/// Target block ID.
/// </summary>
public required string TargetBlockId { get; init; }
/// <summary>
/// Edge type (fall-through, conditional-true, conditional-false, jump).
/// </summary>
public CfgEdgeType EdgeType { get; init; } = CfgEdgeType.FallThrough;
/// <summary>
/// Condition expression (for conditional edges).
/// </summary>
public string? Condition { get; init; }
}
/// <summary>
/// CFG edge types.
/// </summary>
public enum CfgEdgeType
{
/// <summary>Fall-through to next block.</summary>
FallThrough,
/// <summary>Conditional true branch.</summary>
ConditionalTrue,
/// <summary>Conditional false branch.</summary>
ConditionalFalse,
/// <summary>Unconditional jump.</summary>
UnconditionalJump,
/// <summary>Call edge.</summary>
Call,
/// <summary>Return edge.</summary>
Return,
/// <summary>Switch/indirect edge.</summary>
Switch
}