Refactor code structure and optimize performance across multiple modules

This commit is contained in:
StellaOps Bot
2025-12-26 20:03:22 +02:00
parent c786faae84
commit b4fc66feb6
3353 changed files with 88254 additions and 1590657 deletions

View File

@@ -0,0 +1,56 @@
// -----------------------------------------------------------------------------
// BinaryCacheOptions.cs
// Sprint: SPRINT_20251226_014_BINIDX
// Task: SCANINT-21 - Add Valkey cache layer for hot lookups
// -----------------------------------------------------------------------------
namespace StellaOps.BinaryIndex.Cache;
/// <summary>
/// Configuration options for binary vulnerability cache layer.
/// </summary>
public sealed class BinaryCacheOptions
{
/// <summary>
/// Valkey key prefix for binary cache entries.
/// Default: "stellaops:binary:"
/// </summary>
public string KeyPrefix { get; init; } = "stellaops:binary:";
/// <summary>
/// TTL for identity lookups.
/// Default: 1 hour
/// </summary>
public TimeSpan IdentityTtl { get; init; } = TimeSpan.FromHours(1);
/// <summary>
/// TTL for fix status lookups.
/// Default: 1 hour
/// </summary>
public TimeSpan FixStatusTtl { get; init; } = TimeSpan.FromHours(1);
/// <summary>
/// TTL for fingerprint lookups.
/// Default: 30 minutes (shorter due to potential corpus updates)
/// </summary>
public TimeSpan FingerprintTtl { get; init; } = TimeSpan.FromMinutes(30);
/// <summary>
/// Maximum TTL for any cache entry.
/// Default: 24 hours
/// </summary>
public TimeSpan MaxTtl { get; init; } = TimeSpan.FromHours(24);
/// <summary>
/// Whether to use sliding expiration.
/// Default: false (absolute expiration)
/// </summary>
public bool SlidingExpiration { get; init; } = false;
/// <summary>
/// Target cache hit rate.
/// Used for monitoring and alerting.
/// Default: 0.80 (80%)
/// </summary>
public double TargetHitRate { get; init; } = 0.80;
}

View File

@@ -0,0 +1,52 @@
// -----------------------------------------------------------------------------
// BinaryCacheServiceExtensions.cs
// Sprint: SPRINT_20251226_014_BINIDX
// Task: SCANINT-21 - Add Valkey cache layer for hot lookups
// -----------------------------------------------------------------------------
using Microsoft.Extensions.Configuration;
using Microsoft.Extensions.DependencyInjection;
using Microsoft.Extensions.DependencyInjection.Extensions;
using StellaOps.BinaryIndex.Core.Services;
namespace StellaOps.BinaryIndex.Cache;
/// <summary>
/// Extension methods for registering binary cache services.
/// </summary>
public static class BinaryCacheServiceExtensions
{
/// <summary>
/// Adds binary cache layer to the service collection.
/// Decorates the existing <see cref="IBinaryVulnerabilityService"/> with caching.
/// </summary>
/// <param name="services">The service collection.</param>
/// <param name="configuration">Configuration for cache options.</param>
/// <returns>The service collection for chaining.</returns>
public static IServiceCollection AddBinaryIndexCaching(
this IServiceCollection services,
IConfiguration configuration)
{
// Bind options
services.Configure<BinaryCacheOptions>(
configuration.GetSection("BinaryIndex:Cache"));
// Decorate the existing service with caching
services.Decorate<IBinaryVulnerabilityService, CachedBinaryVulnerabilityService>();
return services;
}
/// <summary>
/// Adds binary cache layer with explicit options.
/// </summary>
public static IServiceCollection AddBinaryIndexCaching(
this IServiceCollection services,
Action<BinaryCacheOptions> configureOptions)
{
services.Configure(configureOptions);
services.Decorate<IBinaryVulnerabilityService, CachedBinaryVulnerabilityService>();
return services;
}
}

View File

@@ -0,0 +1,431 @@
// -----------------------------------------------------------------------------
// CachedBinaryVulnerabilityService.cs
// Sprint: SPRINT_20251226_014_BINIDX
// Task: SCANINT-21 - Add Valkey cache layer for hot lookups
// -----------------------------------------------------------------------------
using System.Collections.Immutable;
using System.Diagnostics;
using System.Text.Json;
using Microsoft.Extensions.Logging;
using Microsoft.Extensions.Options;
using StackExchange.Redis;
using StellaOps.BinaryIndex.Core.Models;
using StellaOps.BinaryIndex.Core.Services;
using StellaOps.BinaryIndex.FixIndex.Models;
namespace StellaOps.BinaryIndex.Cache;
/// <summary>
/// Caching decorator for <see cref="IBinaryVulnerabilityService"/>.
/// Implements read-through caching with Valkey/Redis for binary vulnerability lookups.
/// Target: > 80% cache hit rate for repeat scans.
/// </summary>
public sealed class CachedBinaryVulnerabilityService : IBinaryVulnerabilityService, IAsyncDisposable
{
private readonly IBinaryVulnerabilityService _inner;
private readonly IConnectionMultiplexer _connectionMultiplexer;
private readonly BinaryCacheOptions _options;
private readonly ILogger<CachedBinaryVulnerabilityService> _logger;
private readonly JsonSerializerOptions _jsonOptions;
private readonly SemaphoreSlim _connectionLock = new(1, 1);
private IDatabase? _database;
public CachedBinaryVulnerabilityService(
IBinaryVulnerabilityService inner,
IConnectionMultiplexer connectionMultiplexer,
IOptions<BinaryCacheOptions> options,
ILogger<CachedBinaryVulnerabilityService> logger)
{
_inner = inner ?? throw new ArgumentNullException(nameof(inner));
_connectionMultiplexer = connectionMultiplexer ?? throw new ArgumentNullException(nameof(connectionMultiplexer));
_options = options?.Value ?? throw new ArgumentNullException(nameof(options));
_logger = logger ?? throw new ArgumentNullException(nameof(logger));
_jsonOptions = new JsonSerializerOptions
{
PropertyNamingPolicy = JsonNamingPolicy.CamelCase,
WriteIndented = false
};
}
/// <inheritdoc />
public async Task<ImmutableArray<BinaryVulnMatch>> LookupByIdentityAsync(
BinaryIdentity identity,
LookupOptions? options = null,
CancellationToken ct = default)
{
var cacheKey = BuildIdentityKey(identity, options);
var sw = Stopwatch.StartNew();
// Try cache first
var cached = await GetFromCacheAsync<ImmutableArray<BinaryVulnMatch>>(cacheKey, ct).ConfigureAwait(false);
if (cached.HasValue)
{
sw.Stop();
_logger.LogDebug(
"Cache hit for identity {BinaryKey} in {ElapsedMs}ms",
identity.BinaryKey,
sw.Elapsed.TotalMilliseconds);
return cached.Value;
}
// Cache miss - call inner service
var result = await _inner.LookupByIdentityAsync(identity, options, ct).ConfigureAwait(false);
sw.Stop();
// Store in cache
await SetCacheAsync(cacheKey, result, _options.IdentityTtl, ct).ConfigureAwait(false);
_logger.LogDebug(
"Cache miss for identity {BinaryKey}, fetched in {ElapsedMs}ms",
identity.BinaryKey,
sw.Elapsed.TotalMilliseconds);
return result;
}
/// <inheritdoc />
public async Task<ImmutableDictionary<string, ImmutableArray<BinaryVulnMatch>>> LookupBatchAsync(
IEnumerable<BinaryIdentity> identities,
LookupOptions? options = null,
CancellationToken ct = default)
{
var identityList = identities.ToList();
if (identityList.Count == 0)
{
return ImmutableDictionary<string, ImmutableArray<BinaryVulnMatch>>.Empty;
}
var sw = Stopwatch.StartNew();
var db = await GetDatabaseAsync().ConfigureAwait(false);
// Build cache keys
var cacheKeys = identityList
.Select(i => (Identity: i, Key: BuildIdentityKey(i, options)))
.ToList();
// Batch get from cache
var redisKeys = cacheKeys.Select(k => (RedisKey)k.Key).ToArray();
var cachedValues = await db.StringGetAsync(redisKeys).ConfigureAwait(false);
var results = new Dictionary<string, ImmutableArray<BinaryVulnMatch>>();
var misses = new List<BinaryIdentity>();
for (int i = 0; i < cacheKeys.Count; i++)
{
var (identity, key) = cacheKeys[i];
var value = cachedValues[i];
if (!value.IsNullOrEmpty)
{
try
{
var matches = JsonSerializer.Deserialize<ImmutableArray<BinaryVulnMatch>>(
(string)value!, _jsonOptions);
results[identity.BinaryKey] = matches;
continue;
}
catch
{
// Deserialization failed, treat as miss
}
}
misses.Add(identity);
}
_logger.LogDebug(
"Batch lookup: {Hits} cache hits, {Misses} cache misses",
results.Count,
misses.Count);
// Fetch misses from inner service
if (misses.Count > 0)
{
var fetchedResults = await _inner.LookupBatchAsync(misses, options, ct).ConfigureAwait(false);
// Store fetched results in cache
var batch = db.CreateBatch();
var tasks = new List<Task>();
foreach (var (binaryKey, matches) in fetchedResults)
{
results[binaryKey] = matches;
var identity = misses.First(i => i.BinaryKey == binaryKey);
var cacheKey = BuildIdentityKey(identity, options);
var value = JsonSerializer.Serialize(matches, _jsonOptions);
tasks.Add(batch.StringSetAsync(cacheKey, value, _options.IdentityTtl));
}
batch.Execute();
await Task.WhenAll(tasks).ConfigureAwait(false);
}
sw.Stop();
_logger.LogDebug(
"Batch lookup completed in {ElapsedMs}ms: {Total} total, {Hits} hits, {Misses} misses",
sw.Elapsed.TotalMilliseconds,
identityList.Count,
results.Count - misses.Count,
misses.Count);
return results.ToImmutableDictionary();
}
/// <inheritdoc />
public async Task<FixStatusResult?> GetFixStatusAsync(
string distro,
string release,
string sourcePkg,
string cveId,
CancellationToken ct = default)
{
var cacheKey = BuildFixStatusKey(distro, release, sourcePkg, cveId);
var sw = Stopwatch.StartNew();
// Try cache first
var cached = await GetFromCacheAsync<FixStatusResult?>(cacheKey, ct).ConfigureAwait(false);
if (cached.HasValue)
{
sw.Stop();
_logger.LogDebug(
"Cache hit for fix status {Distro}:{SourcePkg}:{CveId} in {ElapsedMs}ms",
distro, sourcePkg, cveId, sw.Elapsed.TotalMilliseconds);
return cached.Value;
}
// Cache miss
var result = await _inner.GetFixStatusAsync(distro, release, sourcePkg, cveId, ct).ConfigureAwait(false);
sw.Stop();
// Store in cache
await SetCacheAsync(cacheKey, result, _options.FixStatusTtl, ct).ConfigureAwait(false);
return result;
}
/// <inheritdoc />
public async Task<ImmutableDictionary<string, FixStatusResult>> GetFixStatusBatchAsync(
string distro,
string release,
string sourcePkg,
IEnumerable<string> cveIds,
CancellationToken ct = default)
{
var cveList = cveIds.ToList();
if (cveList.Count == 0)
{
return ImmutableDictionary<string, FixStatusResult>.Empty;
}
var db = await GetDatabaseAsync().ConfigureAwait(false);
// Build cache keys
var cacheKeys = cveList
.Select(cve => (CveId: cve, Key: BuildFixStatusKey(distro, release, sourcePkg, cve)))
.ToList();
// Batch get from cache
var redisKeys = cacheKeys.Select(k => (RedisKey)k.Key).ToArray();
var cachedValues = await db.StringGetAsync(redisKeys).ConfigureAwait(false);
var results = new Dictionary<string, FixStatusResult>();
var misses = new List<string>();
for (int i = 0; i < cacheKeys.Count; i++)
{
var (cveId, key) = cacheKeys[i];
var value = cachedValues[i];
if (!value.IsNullOrEmpty)
{
try
{
var status = JsonSerializer.Deserialize<FixStatusResult>((string)value!, _jsonOptions);
if (status is not null)
{
results[cveId] = status;
continue;
}
}
catch
{
// Deserialization failed
}
}
misses.Add(cveId);
}
// Fetch misses from inner service
if (misses.Count > 0)
{
var fetchedResults = await _inner.GetFixStatusBatchAsync(distro, release, sourcePkg, misses, ct)
.ConfigureAwait(false);
var batch = db.CreateBatch();
var tasks = new List<Task>();
foreach (var (cveId, status) in fetchedResults)
{
results[cveId] = status;
var cacheKey = BuildFixStatusKey(distro, release, sourcePkg, cveId);
var serialized = JsonSerializer.Serialize(status, _jsonOptions);
tasks.Add(batch.StringSetAsync(cacheKey, serialized, _options.FixStatusTtl));
}
batch.Execute();
await Task.WhenAll(tasks).ConfigureAwait(false);
}
return results.ToImmutableDictionary();
}
/// <inheritdoc />
public async Task<ImmutableArray<BinaryVulnMatch>> LookupByFingerprintAsync(
byte[] fingerprint,
FingerprintLookupOptions? options = null,
CancellationToken ct = default)
{
var cacheKey = BuildFingerprintKey(fingerprint, options);
var sw = Stopwatch.StartNew();
// Try cache first
var cached = await GetFromCacheAsync<ImmutableArray<BinaryVulnMatch>>(cacheKey, ct).ConfigureAwait(false);
if (cached.HasValue)
{
sw.Stop();
_logger.LogDebug("Cache hit for fingerprint in {ElapsedMs}ms", sw.Elapsed.TotalMilliseconds);
return cached.Value;
}
// Cache miss
var result = await _inner.LookupByFingerprintAsync(fingerprint, options, ct).ConfigureAwait(false);
sw.Stop();
// Store in cache (shorter TTL for fingerprints as they may change)
await SetCacheAsync(cacheKey, result, _options.FingerprintTtl, ct).ConfigureAwait(false);
return result;
}
/// <inheritdoc />
public async Task<ImmutableDictionary<string, ImmutableArray<BinaryVulnMatch>>> LookupByFingerprintBatchAsync(
IEnumerable<(string Key, byte[] Fingerprint)> fingerprints,
FingerprintLookupOptions? options = null,
CancellationToken ct = default)
{
// For fingerprint batch, delegate directly to inner service
// Fingerprint lookups are less common and more expensive to cache
return await _inner.LookupByFingerprintBatchAsync(fingerprints, options, ct).ConfigureAwait(false);
}
/// <summary>
/// Invalidate all cache entries for a specific distro/release combination.
/// Called when a new corpus update is published.
/// </summary>
public async Task InvalidateDistroAsync(string distro, string release, CancellationToken ct = default)
{
try
{
var db = await GetDatabaseAsync().ConfigureAwait(false);
var server = _connectionMultiplexer.GetServer(_connectionMultiplexer.GetEndPoints().First());
var pattern = $"{_options.KeyPrefix}fix:{distro}:{release}:*";
var keys = server.Keys(pattern: pattern).ToArray();
if (keys.Length > 0)
{
var deleted = await db.KeyDeleteAsync(keys).ConfigureAwait(false);
_logger.LogInformation(
"Invalidated {Count} cache entries for {Distro}:{Release}",
deleted, distro, release);
}
}
catch (Exception ex)
{
_logger.LogError(ex, "Error invalidating cache for {Distro}:{Release}", distro, release);
}
}
private string BuildIdentityKey(BinaryIdentity identity, LookupOptions? options)
{
var tenant = options?.TenantId ?? "default";
return $"{_options.KeyPrefix}id:{tenant}:{identity.BinaryKey}";
}
private string BuildFixStatusKey(string distro, string release, string sourcePkg, string cveId)
{
return $"{_options.KeyPrefix}fix:{distro}:{release}:{sourcePkg}:{cveId}";
}
private string BuildFingerprintKey(byte[] fingerprint, FingerprintLookupOptions? options)
{
var hash = Convert.ToHexString(fingerprint).ToLowerInvariant();
var algo = options?.Algorithm ?? "combined";
return $"{_options.KeyPrefix}fp:{algo}:{hash[..Math.Min(32, hash.Length)]}";
}
private async Task<T?> GetFromCacheAsync<T>(string key, CancellationToken ct)
{
try
{
var db = await GetDatabaseAsync().ConfigureAwait(false);
var value = await db.StringGetAsync(key).ConfigureAwait(false);
if (value.IsNullOrEmpty)
{
return default;
}
return JsonSerializer.Deserialize<T>((string)value!, _jsonOptions);
}
catch (Exception ex)
{
_logger.LogWarning(ex, "Error getting cache entry for key {Key}", key);
return default;
}
}
private async Task SetCacheAsync<T>(string key, T value, TimeSpan ttl, CancellationToken ct)
{
try
{
var db = await GetDatabaseAsync().ConfigureAwait(false);
var serialized = JsonSerializer.Serialize(value, _jsonOptions);
await db.StringSetAsync(key, serialized, ttl).ConfigureAwait(false);
}
catch (Exception ex)
{
_logger.LogWarning(ex, "Error setting cache entry for key {Key}", key);
}
}
private async Task<IDatabase> GetDatabaseAsync()
{
if (_database is not null)
return _database;
await _connectionLock.WaitAsync().ConfigureAwait(false);
try
{
_database ??= _connectionMultiplexer.GetDatabase();
return _database;
}
finally
{
_connectionLock.Release();
}
}
public async ValueTask DisposeAsync()
{
_connectionLock.Dispose();
await Task.CompletedTask;
}
}

View File

@@ -0,0 +1,26 @@
<?xml version="1.0" encoding="utf-8"?>
<Project Sdk="Microsoft.NET.Sdk">
<PropertyGroup>
<TargetFramework>net10.0</TargetFramework>
<ImplicitUsings>enable</ImplicitUsings>
<Nullable>enable</Nullable>
<LangVersion>preview</LangVersion>
<TreatWarningsAsErrors>false</TreatWarningsAsErrors>
<RootNamespace>StellaOps.BinaryIndex.Cache</RootNamespace>
<AssemblyName>StellaOps.BinaryIndex.Cache</AssemblyName>
<Description>Valkey/Redis cache layer for BinaryIndex vulnerability lookups</Description>
</PropertyGroup>
<ItemGroup>
<PackageReference Include="StackExchange.Redis" Version="2.8.37" />
<PackageReference Include="Microsoft.Extensions.DependencyInjection.Abstractions" Version="10.0.0" />
<PackageReference Include="Microsoft.Extensions.Logging.Abstractions" Version="10.0.0" />
<PackageReference Include="Microsoft.Extensions.Options" Version="10.0.0" />
</ItemGroup>
<ItemGroup>
<ProjectReference Include="../StellaOps.BinaryIndex.Core/StellaOps.BinaryIndex.Core.csproj" />
</ItemGroup>
</Project>

View File

@@ -0,0 +1,40 @@
namespace StellaOps.BinaryIndex.Core.Models;
/// <summary>
/// Fix state enumeration.
/// </summary>
public enum FixState
{
/// <summary>CVE is fixed in this version</summary>
Fixed,
/// <summary>CVE affects this package</summary>
Vulnerable,
/// <summary>CVE does not affect this package</summary>
NotAffected,
/// <summary>Fix won't be applied (e.g., EOL version)</summary>
Wontfix,
/// <summary>Unknown status</summary>
Unknown
}
/// <summary>
/// Method used to identify the fix.
/// </summary>
public enum FixMethod
{
/// <summary>From official security feed (OVAL, DSA, etc.)</summary>
SecurityFeed,
/// <summary>Parsed from Debian/Ubuntu changelog</summary>
Changelog,
/// <summary>Extracted from patch header (DEP-3)</summary>
PatchHeader,
/// <summary>Matched against upstream patch database</summary>
UpstreamPatchMatch
}

View File

@@ -1,6 +1,5 @@
using System.Collections.Immutable;
using StellaOps.BinaryIndex.Core.Models;
using StellaOps.BinaryIndex.FixIndex.Models;
namespace StellaOps.BinaryIndex.Core.Services;
@@ -52,6 +51,51 @@ public interface IBinaryVulnerabilityService
string sourcePkg,
IEnumerable<string> cveIds,
CancellationToken ct = default);
/// <summary>
/// Look up vulnerabilities by binary fingerprint.
/// Used for fingerprint-based matching when Build-ID is unavailable.
/// </summary>
/// <param name="fingerprint">Fingerprint bytes to match.</param>
/// <param name="options">Matching options.</param>
/// <param name="ct">Cancellation token.</param>
/// <returns>List of vulnerability matches.</returns>
Task<ImmutableArray<BinaryVulnMatch>> LookupByFingerprintAsync(
byte[] fingerprint,
FingerprintLookupOptions? options = null,
CancellationToken ct = default);
/// <summary>
/// Batch fingerprint lookup for scan performance.
/// </summary>
Task<ImmutableDictionary<string, ImmutableArray<BinaryVulnMatch>>> LookupByFingerprintBatchAsync(
IEnumerable<(string Key, byte[] Fingerprint)> fingerprints,
FingerprintLookupOptions? options = null,
CancellationToken ct = default);
}
/// <summary>
/// Options for fingerprint-based lookup.
/// </summary>
public sealed record FingerprintLookupOptions
{
/// <summary>Minimum similarity threshold (0.0-1.0). Default 0.95.</summary>
public decimal MinSimilarity { get; init; } = 0.95m;
/// <summary>Maximum candidates to evaluate. Default 100.</summary>
public int MaxCandidates { get; init; } = 100;
/// <summary>Architecture filter. Null means any.</summary>
public string? Architecture { get; init; }
/// <summary>Check fix index for matched CVEs.</summary>
public bool CheckFixIndex { get; init; } = true;
/// <summary>Distro hint for fix status lookup.</summary>
public string? DistroHint { get; init; }
/// <summary>Release hint for fix status lookup.</summary>
public string? ReleaseHint { get; init; }
}
public sealed record LookupOptions

View File

@@ -0,0 +1,306 @@
// -----------------------------------------------------------------------------
// BasicBlockFingerprintGenerator.cs
// Sprint: SPRINT_20251226_013_BINIDX_fingerprint_factory
// Task: FPRINT-06 — Implement BasicBlockFingerprintGenerator
// -----------------------------------------------------------------------------
using System.Security.Cryptography;
using Microsoft.Extensions.Logging;
using StellaOps.BinaryIndex.Fingerprints.Models;
namespace StellaOps.BinaryIndex.Fingerprints.Generators;
/// <summary>
/// Generates fingerprints based on basic block hashing.
///
/// Algorithm:
/// 1. Disassemble function to basic blocks
/// 2. Normalize instructions (remove absolute addresses)
/// 3. Hash each basic block
/// 4. Combine block hashes with topology info
///
/// Produces a 16-byte fingerprint.
/// </summary>
public sealed class BasicBlockFingerprintGenerator : IVulnFingerprintGenerator
{
private readonly ILogger<BasicBlockFingerprintGenerator> _logger;
public BasicBlockFingerprintGenerator(ILogger<BasicBlockFingerprintGenerator> logger)
{
_logger = logger;
}
public FingerprintAlgorithm Algorithm => FingerprintAlgorithm.BasicBlock;
public bool CanProcess(FingerprintInput input)
{
// Require at least 16 bytes of binary data
return input.BinaryData.Length >= 16;
}
public Task<FingerprintOutput> GenerateAsync(FingerprintInput input, CancellationToken ct = default)
{
ct.ThrowIfCancellationRequested();
_logger.LogDebug(
"Generating basic block fingerprint for {Component}/{CveId} ({Size} bytes)",
input.Component,
input.CveId,
input.BinaryData.Length);
// Step 1: Identify basic blocks (simplified - real impl would use disassembler)
var blocks = IdentifyBasicBlocks(input.BinaryData, input.Architecture);
// Step 2: Normalize each block
var normalizedBlocks = blocks.Select(b => NormalizeBlock(b, input.Architecture)).ToList();
// Step 3: Hash each block
var blockHashes = normalizedBlocks.Select(HashBlock).ToList();
// Step 4: Combine with topology
var fingerprint = CombineBlockHashes(blockHashes);
var fingerprintId = Convert.ToHexString(fingerprint).ToLowerInvariant();
_logger.LogDebug(
"Generated fingerprint {FingerprintId} with {BlockCount} blocks",
fingerprintId,
blocks.Count);
return Task.FromResult(new FingerprintOutput
{
Hash = fingerprint,
FingerprintId = fingerprintId,
Algorithm = FingerprintAlgorithm.BasicBlock,
Confidence = CalculateConfidence(blocks.Count, input.BinaryData.Length),
Metadata = new FingerprintMetadata
{
BasicBlockCount = blocks.Count,
FunctionSize = input.BinaryData.Length
}
});
}
/// <summary>
/// Identifies basic blocks in the binary data.
/// A basic block ends at: jump, call, return, or conditional branch.
/// </summary>
private List<byte[]> IdentifyBasicBlocks(byte[] binaryData, string architecture)
{
var blocks = new List<byte[]>();
var currentBlockStart = 0;
// Simplified heuristic: split on common instruction boundaries
// Real implementation would use a proper disassembler (Capstone, etc.)
for (var i = 0; i < binaryData.Length; i++)
{
if (IsBlockTerminator(binaryData, i, architecture))
{
var blockSize = i - currentBlockStart + GetInstructionLength(binaryData, i, architecture);
if (blockSize > 0 && currentBlockStart + blockSize <= binaryData.Length)
{
var block = new byte[blockSize];
Array.Copy(binaryData, currentBlockStart, block, 0, blockSize);
blocks.Add(block);
currentBlockStart = i + GetInstructionLength(binaryData, i, architecture);
i = currentBlockStart - 1;
}
}
}
// Add final block if any remaining
if (currentBlockStart < binaryData.Length)
{
var finalBlock = new byte[binaryData.Length - currentBlockStart];
Array.Copy(binaryData, currentBlockStart, finalBlock, 0, finalBlock.Length);
blocks.Add(finalBlock);
}
// Ensure at least one block
if (blocks.Count == 0)
{
blocks.Add(binaryData);
}
return blocks;
}
/// <summary>
/// Checks if the byte at position i is a block terminator instruction.
/// </summary>
private static bool IsBlockTerminator(byte[] data, int i, string architecture)
{
if (i >= data.Length) return false;
return architecture.ToLowerInvariant() switch
{
"x86_64" or "x64" or "amd64" => IsX64BlockTerminator(data, i),
"aarch64" or "arm64" => IsArm64BlockTerminator(data, i),
_ => false
};
}
private static bool IsX64BlockTerminator(byte[] data, int i)
{
// Common x64 terminators:
// C3 = ret
// E8 = call (near)
// E9 = jmp (near)
// 0F 8x = conditional jumps
// EB = jmp (short)
// 7x = short conditional jumps
var b = data[i];
return b switch
{
0xC3 => true, // ret
0xE8 => true, // call
0xE9 => true, // jmp near
0xEB => true, // jmp short
>= 0x70 and <= 0x7F => true, // short conditional jumps
_ => i + 1 < data.Length && data[i] == 0x0F && data[i + 1] >= 0x80 && data[i + 1] <= 0x8F
};
}
private static bool IsArm64BlockTerminator(byte[] data, int i)
{
// ARM64 instructions are 4 bytes
if (i + 3 >= data.Length) return false;
// Check for branch instructions (simplified)
// Real impl would decode the instruction properly
var opcode = (uint)(data[i + 3] & 0xFC);
return opcode switch
{
0x14 => true, // B (branch)
0x54 => true, // B.cond
0x94 => true, // BL (branch with link)
0xD4 => true, // RET (when full decode matches)
_ => false
};
}
private static int GetInstructionLength(byte[] data, int i, string architecture)
{
// Simplified instruction length calculation
return architecture.ToLowerInvariant() switch
{
"x86_64" or "x64" or "amd64" => GetX64InstructionLength(data, i),
"aarch64" or "arm64" => 4, // ARM64 has fixed 4-byte instructions
_ => 1
};
}
private static int GetX64InstructionLength(byte[] data, int i)
{
// Very simplified - real impl would use instruction decoder
if (i >= data.Length) return 1;
var b = data[i];
return b switch
{
0xC3 => 1, // ret
0xEB => 2, // jmp short
>= 0x70 and <= 0x7F => 2, // short conditional
0xE8 => 5, // call near
0xE9 => 5, // jmp near
0x0F when i + 1 < data.Length => 6, // 0F xx = 2 byte opcode + 4 byte offset
_ => 1
};
}
/// <summary>
/// Normalizes a basic block by removing absolute addresses.
/// </summary>
private byte[] NormalizeBlock(byte[] block, string architecture)
{
var normalized = new byte[block.Length];
Array.Copy(block, normalized, block.Length);
// Zero out immediate address operands (simplified)
// Real implementation would parse instructions and identify address operands
return architecture.ToLowerInvariant() switch
{
"x86_64" or "x64" or "amd64" => NormalizeX64Block(normalized),
"aarch64" or "arm64" => NormalizeArm64Block(normalized),
_ => normalized
};
}
private static byte[] NormalizeX64Block(byte[] block)
{
// Zero out likely address operands (4-byte and 8-byte immediates)
// This is a heuristic - real impl would parse properly
for (var i = 0; i < block.Length; i++)
{
// After call/jmp instructions, zero the offset
if (block[i] == 0xE8 || block[i] == 0xE9)
{
for (var j = 1; j <= 4 && i + j < block.Length; j++)
{
block[i + j] = 0;
}
i += 4;
}
}
return block;
}
private static byte[] NormalizeArm64Block(byte[] block)
{
// ARM64: zero out immediate fields in branch instructions
for (var i = 0; i + 3 < block.Length; i += 4)
{
var opcode = block[i + 3] & 0xFC;
if (opcode is 0x14 or 0x94) // B or BL
{
// Zero immediate field (bits 0-25)
block[i] = 0;
block[i + 1] = 0;
block[i + 2] = 0;
block[i + 3] = (byte)(block[i + 3] & 0xFC);
}
}
return block;
}
private static byte[] HashBlock(byte[] block)
{
// Use truncated SHA-256 for each block
var hash = SHA256.HashData(block);
var truncated = new byte[8];
Array.Copy(hash, truncated, 8);
return truncated;
}
/// <summary>
/// Combines block hashes with topological ordering to produce final fingerprint.
/// </summary>
private static byte[] CombineBlockHashes(List<byte[]> blockHashes)
{
// Combine all block hashes into one fingerprint
using var ms = new MemoryStream();
// Add block count as prefix
ms.Write(BitConverter.GetBytes(blockHashes.Count));
// Add each block hash
foreach (var hash in blockHashes)
{
ms.Write(hash);
}
// Final hash and truncate to 16 bytes
var combined = SHA256.HashData(ms.ToArray());
var fingerprint = new byte[16];
Array.Copy(combined, fingerprint, 16);
return fingerprint;
}
private static decimal CalculateConfidence(int blockCount, int size)
{
// Higher confidence for more blocks and larger functions
if (blockCount < 2 || size < 32) return 0.5m;
if (blockCount < 5 || size < 100) return 0.7m;
if (blockCount < 10 || size < 500) return 0.85m;
return 0.95m;
}
}

View File

@@ -0,0 +1,182 @@
// -----------------------------------------------------------------------------
// CombinedFingerprintGenerator.cs
// Sprint: SPRINT_20251226_013_BINIDX_fingerprint_factory
// Task: FPRINT-09 — Implement CombinedFingerprintGenerator (ensemble)
// -----------------------------------------------------------------------------
using System.Security.Cryptography;
using Microsoft.Extensions.Logging;
using StellaOps.BinaryIndex.Fingerprints.Models;
namespace StellaOps.BinaryIndex.Fingerprints.Generators;
/// <summary>
/// Combines multiple fingerprinting algorithms into an ensemble fingerprint.
/// Uses weighted combination of BasicBlock, ControlFlowGraph, and StringRefs.
///
/// This provides the most robust fingerprint by combining structural and
/// semantic features from all algorithms.
///
/// Produces a 48-byte fingerprint (16 + 32 + optional 16 for string refs).
/// </summary>
public sealed class CombinedFingerprintGenerator : IVulnFingerprintGenerator
{
private readonly ILogger<CombinedFingerprintGenerator> _logger;
private readonly BasicBlockFingerprintGenerator _basicBlockGen;
private readonly ControlFlowGraphFingerprintGenerator _cfgGen;
private readonly StringRefsFingerprintGenerator _stringRefsGen;
public CombinedFingerprintGenerator(
ILogger<CombinedFingerprintGenerator> logger,
BasicBlockFingerprintGenerator basicBlockGen,
ControlFlowGraphFingerprintGenerator cfgGen,
StringRefsFingerprintGenerator stringRefsGen)
{
_logger = logger;
_basicBlockGen = basicBlockGen;
_cfgGen = cfgGen;
_stringRefsGen = stringRefsGen;
}
public FingerprintAlgorithm Algorithm => FingerprintAlgorithm.Combined;
public bool CanProcess(FingerprintInput input)
{
// Require at least basic block and CFG to work
return _basicBlockGen.CanProcess(input) && _cfgGen.CanProcess(input);
}
public async Task<FingerprintOutput> GenerateAsync(FingerprintInput input, CancellationToken ct = default)
{
ct.ThrowIfCancellationRequested();
_logger.LogDebug(
"Generating combined fingerprint for {Component}/{CveId} ({Size} bytes)",
input.Component,
input.CveId,
input.BinaryData.Length);
// Generate all component fingerprints
var basicBlockTask = _basicBlockGen.GenerateAsync(input, ct);
var cfgTask = _cfgGen.GenerateAsync(input, ct);
FingerprintOutput? stringRefsOutput = null;
if (_stringRefsGen.CanProcess(input))
{
stringRefsOutput = await _stringRefsGen.GenerateAsync(input, ct);
}
var basicBlockOutput = await basicBlockTask;
var cfgOutput = await cfgTask;
// Combine fingerprints
var combined = CombineFingerprints(basicBlockOutput, cfgOutput, stringRefsOutput);
var fingerprintId = Convert.ToHexString(combined).ToLowerInvariant();
// Combine metadata
var metadata = CombineMetadata(basicBlockOutput.Metadata, cfgOutput.Metadata, stringRefsOutput?.Metadata);
// Calculate combined confidence (weighted average)
var confidence = CalculateCombinedConfidence(basicBlockOutput, cfgOutput, stringRefsOutput);
_logger.LogDebug(
"Generated combined fingerprint {FingerprintId} with confidence {Confidence:P0}",
fingerprintId,
confidence);
return new FingerprintOutput
{
Hash = combined,
FingerprintId = fingerprintId,
Algorithm = FingerprintAlgorithm.Combined,
Confidence = confidence,
Metadata = metadata
};
}
private static byte[] CombineFingerprints(
FingerprintOutput basicBlock,
FingerprintOutput cfg,
FingerprintOutput? stringRefs)
{
using var ms = new MemoryStream();
// Version byte (for future compatibility)
ms.WriteByte(0x01);
// Basic block fingerprint (16 bytes)
ms.Write(basicBlock.Hash);
// CFG fingerprint (32 bytes)
ms.Write(cfg.Hash);
// String refs fingerprint if available (16 bytes)
if (stringRefs != null)
{
ms.WriteByte(0x01); // Marker: has string refs
ms.Write(stringRefs.Hash);
}
else
{
ms.WriteByte(0x00); // Marker: no string refs
}
// Final hash to fixed size (48 bytes)
var combined = SHA256.HashData(ms.ToArray());
var result = new byte[48];
Array.Copy(combined, result, 32);
// Add original basic block hash for quick lookup (16 bytes)
Array.Copy(basicBlock.Hash, 0, result, 32, 16);
return result;
}
private static FingerprintMetadata CombineMetadata(
FingerprintMetadata? basicBlock,
FingerprintMetadata? cfg,
FingerprintMetadata? stringRefs)
{
return new FingerprintMetadata
{
BasicBlockCount = basicBlock?.BasicBlockCount ?? cfg?.BasicBlockCount,
EdgeCount = cfg?.EdgeCount,
CyclomaticComplexity = cfg?.CyclomaticComplexity,
StringRefCount = stringRefs?.StringRefCount,
InstructionCount = basicBlock?.InstructionCount,
FunctionSize = basicBlock?.FunctionSize ?? cfg?.FunctionSize ?? stringRefs?.FunctionSize
};
}
private static decimal CalculateCombinedConfidence(
FingerprintOutput basicBlock,
FingerprintOutput cfg,
FingerprintOutput? stringRefs)
{
// Weighted average: CFG (40%), BasicBlock (35%), StringRefs (25%)
const decimal cfgWeight = 0.40m;
const decimal basicBlockWeight = 0.35m;
const decimal stringRefsWeight = 0.25m;
var totalWeight = cfgWeight + basicBlockWeight;
var weightedSum = (cfg.Confidence * cfgWeight) + (basicBlock.Confidence * basicBlockWeight);
if (stringRefs != null)
{
totalWeight += stringRefsWeight;
weightedSum += stringRefs.Confidence * stringRefsWeight;
}
var combined = weightedSum / totalWeight;
// Boost confidence if all algorithms agree (all > 0.7)
if (basicBlock.Confidence >= 0.7m && cfg.Confidence >= 0.7m &&
(stringRefs == null || stringRefs.Confidence >= 0.7m))
{
combined = Math.Min(1.0m, combined * 1.1m);
}
return combined;
}
}

View File

@@ -0,0 +1,432 @@
// -----------------------------------------------------------------------------
// ControlFlowGraphFingerprintGenerator.cs
// Sprint: SPRINT_20251226_013_BINIDX_fingerprint_factory
// Task: FPRINT-07 — Implement ControlFlowGraphFingerprintGenerator
// -----------------------------------------------------------------------------
using System.Security.Cryptography;
using Microsoft.Extensions.Logging;
using StellaOps.BinaryIndex.Fingerprints.Models;
namespace StellaOps.BinaryIndex.Fingerprints.Generators;
/// <summary>
/// Generates fingerprints based on control flow graph structure.
///
/// Algorithm:
/// 1. Build CFG from disassembly
/// 2. Extract graph properties (node count, edge count, cyclomatic complexity)
/// 3. Compute structural hash (adjacency matrix or graph kernel)
///
/// Resilient to instruction reordering, captures loop and branch structure.
/// Produces a 32-byte fingerprint.
/// </summary>
public sealed class ControlFlowGraphFingerprintGenerator : IVulnFingerprintGenerator
{
private readonly ILogger<ControlFlowGraphFingerprintGenerator> _logger;
public ControlFlowGraphFingerprintGenerator(ILogger<ControlFlowGraphFingerprintGenerator> logger)
{
_logger = logger;
}
public FingerprintAlgorithm Algorithm => FingerprintAlgorithm.ControlFlowGraph;
public bool CanProcess(FingerprintInput input)
{
// Require at least 32 bytes of binary data for meaningful CFG
return input.BinaryData.Length >= 32;
}
public Task<FingerprintOutput> GenerateAsync(FingerprintInput input, CancellationToken ct = default)
{
ct.ThrowIfCancellationRequested();
_logger.LogDebug(
"Generating CFG fingerprint for {Component}/{CveId} ({Size} bytes)",
input.Component,
input.CveId,
input.BinaryData.Length);
// Step 1: Build control flow graph
var cfg = BuildCfg(input.BinaryData, input.Architecture);
// Step 2: Extract graph properties
var properties = ExtractGraphProperties(cfg);
// Step 3: Compute structural hash
var fingerprint = ComputeStructuralHash(cfg, properties);
var fingerprintId = Convert.ToHexString(fingerprint).ToLowerInvariant();
_logger.LogDebug(
"Generated CFG fingerprint {FingerprintId}: {NodeCount} nodes, {EdgeCount} edges, CC={CC}",
fingerprintId,
properties.NodeCount,
properties.EdgeCount,
properties.CyclomaticComplexity);
return Task.FromResult(new FingerprintOutput
{
Hash = fingerprint,
FingerprintId = fingerprintId,
Algorithm = FingerprintAlgorithm.ControlFlowGraph,
Confidence = CalculateConfidence(properties),
Metadata = new FingerprintMetadata
{
BasicBlockCount = properties.NodeCount,
EdgeCount = properties.EdgeCount,
CyclomaticComplexity = properties.CyclomaticComplexity,
FunctionSize = input.BinaryData.Length
}
});
}
/// <summary>
/// Represents a node in the control flow graph.
/// </summary>
private sealed class CfgNode
{
public int Id { get; init; }
public int StartOffset { get; init; }
public int EndOffset { get; init; }
public List<int> Successors { get; } = [];
public List<int> Predecessors { get; } = [];
public CfgNodeType Type { get; init; }
}
private enum CfgNodeType
{
Entry,
Exit,
Basic,
Conditional,
Call,
Loop
}
private sealed class Cfg
{
public List<CfgNode> Nodes { get; } = [];
public int EntryNode { get; set; }
public List<int> ExitNodes { get; } = [];
}
private sealed class CfgProperties
{
public int NodeCount { get; init; }
public int EdgeCount { get; init; }
public int CyclomaticComplexity { get; init; }
public int[] DegreeSequence { get; init; } = [];
public int MaxDepth { get; init; }
public int LoopCount { get; init; }
}
private Cfg BuildCfg(byte[] binaryData, string architecture)
{
var cfg = new Cfg();
var currentNodeStart = 0;
var nodeId = 0;
var pendingEdges = new List<(int from, int targetOffset)>();
// Entry node
cfg.EntryNode = 0;
for (var i = 0; i < binaryData.Length;)
{
var (isTerminator, type, branchTarget) = AnalyzeInstruction(binaryData, i, architecture);
if (isTerminator)
{
var node = new CfgNode
{
Id = nodeId,
StartOffset = currentNodeStart,
EndOffset = i,
Type = type
};
cfg.Nodes.Add(node);
if (type == CfgNodeType.Exit)
{
cfg.ExitNodes.Add(nodeId);
}
else
{
// Fall-through to next node
var instrLen = GetInstructionLength(binaryData, i, architecture);
if (i + instrLen < binaryData.Length && type != CfgNodeType.Exit)
{
pendingEdges.Add((nodeId, i + instrLen));
}
// Branch target
if (branchTarget >= 0 && branchTarget < binaryData.Length)
{
pendingEdges.Add((nodeId, branchTarget));
}
}
nodeId++;
i += GetInstructionLength(binaryData, i, architecture);
currentNodeStart = i;
}
else
{
i++;
}
}
// Add final node if any remaining code
if (currentNodeStart < binaryData.Length)
{
cfg.Nodes.Add(new CfgNode
{
Id = nodeId,
StartOffset = currentNodeStart,
EndOffset = binaryData.Length - 1,
Type = CfgNodeType.Exit
});
cfg.ExitNodes.Add(nodeId);
}
// Ensure at least one node
if (cfg.Nodes.Count == 0)
{
cfg.Nodes.Add(new CfgNode
{
Id = 0,
StartOffset = 0,
EndOffset = binaryData.Length - 1,
Type = CfgNodeType.Basic
});
}
// Resolve pending edges
foreach (var (from, targetOffset) in pendingEdges)
{
var targetNode = cfg.Nodes.FirstOrDefault(n =>
n.StartOffset <= targetOffset && targetOffset <= n.EndOffset);
if (targetNode != null && from < cfg.Nodes.Count)
{
cfg.Nodes[from].Successors.Add(targetNode.Id);
targetNode.Predecessors.Add(from);
}
}
return cfg;
}
private static (bool isTerminator, CfgNodeType type, int branchTarget) AnalyzeInstruction(
byte[] data, int i, string architecture)
{
if (i >= data.Length) return (false, CfgNodeType.Basic, -1);
return architecture.ToLowerInvariant() switch
{
"x86_64" or "x64" or "amd64" => AnalyzeX64Instruction(data, i),
"aarch64" or "arm64" => AnalyzeArm64Instruction(data, i),
_ => (false, CfgNodeType.Basic, -1)
};
}
private static (bool isTerminator, CfgNodeType type, int branchTarget) AnalyzeX64Instruction(byte[] data, int i)
{
var b = data[i];
return b switch
{
0xC3 => (true, CfgNodeType.Exit, -1), // ret
0xE8 => (true, CfgNodeType.Call, GetX64BranchTarget(data, i, 5)), // call
0xE9 => (true, CfgNodeType.Basic, GetX64BranchTarget(data, i, 5)), // jmp
0xEB => (true, CfgNodeType.Basic, GetX64ShortBranchTarget(data, i)), // jmp short
>= 0x70 and <= 0x7F => (true, CfgNodeType.Conditional, GetX64ShortBranchTarget(data, i)), // Jcc short
0x0F when i + 1 < data.Length && data[i + 1] >= 0x80 && data[i + 1] <= 0x8F =>
(true, CfgNodeType.Conditional, GetX64BranchTarget(data, i, 6)), // Jcc near
_ => (false, CfgNodeType.Basic, -1)
};
}
private static int GetX64BranchTarget(byte[] data, int i, int instrLen)
{
if (i + instrLen > data.Length) return -1;
var offset = BitConverter.ToInt32(data, i + instrLen - 4);
return i + instrLen + offset;
}
private static int GetX64ShortBranchTarget(byte[] data, int i)
{
if (i + 2 > data.Length) return -1;
var offset = (sbyte)data[i + 1];
return i + 2 + offset;
}
private static (bool isTerminator, CfgNodeType type, int branchTarget) AnalyzeArm64Instruction(byte[] data, int i)
{
if (i + 4 > data.Length) return (false, CfgNodeType.Basic, -1);
var opcode = (uint)(data[i + 3] & 0xFC);
return opcode switch
{
0x14 => (true, CfgNodeType.Basic, GetArm64BranchTarget(data, i)), // B
0x54 => (true, CfgNodeType.Conditional, GetArm64BranchTarget(data, i)), // B.cond
0x94 => (true, CfgNodeType.Call, GetArm64BranchTarget(data, i)), // BL
0xD4 when data[i + 3] == 0xD6 && (data[i + 2] & 0x1F) == 0x1F =>
(true, CfgNodeType.Exit, -1), // RET
_ => (false, CfgNodeType.Basic, -1)
};
}
private static int GetArm64BranchTarget(byte[] data, int i)
{
if (i + 4 > data.Length) return -1;
var imm = ((data[i + 2] & 0x03) << 24) | (data[i + 1] << 16) | (data[i] << 8) | data[i];
// Sign extend 26-bit immediate
if ((imm & 0x02000000) != 0) imm |= unchecked((int)0xFC000000);
return i + (imm << 2);
}
private static int GetInstructionLength(byte[] data, int i, string architecture)
{
return architecture.ToLowerInvariant() switch
{
"x86_64" or "x64" or "amd64" => GetX64InstructionLength(data, i),
"aarch64" or "arm64" => 4,
_ => 1
};
}
private static int GetX64InstructionLength(byte[] data, int i)
{
if (i >= data.Length) return 1;
var b = data[i];
return b switch
{
0xC3 => 1,
0xEB or (>= 0x70 and <= 0x7F) => 2,
0xE8 or 0xE9 => 5,
0x0F => 6,
_ => 1
};
}
private static CfgProperties ExtractGraphProperties(Cfg cfg)
{
var edgeCount = cfg.Nodes.Sum(n => n.Successors.Count);
// Cyclomatic complexity = E - N + 2P (P=1 for single component)
var cyclomaticComplexity = edgeCount - cfg.Nodes.Count + 2;
// Degree sequence (in-degree + out-degree for each node)
var degreeSequence = cfg.Nodes
.Select(n => n.Predecessors.Count + n.Successors.Count)
.OrderDescending()
.ToArray();
// Estimate loop count (nodes with back edges)
var loopCount = CountBackEdges(cfg);
// Max depth via BFS from entry
var maxDepth = ComputeMaxDepth(cfg);
return new CfgProperties
{
NodeCount = cfg.Nodes.Count,
EdgeCount = edgeCount,
CyclomaticComplexity = Math.Max(1, cyclomaticComplexity),
DegreeSequence = degreeSequence,
MaxDepth = maxDepth,
LoopCount = loopCount
};
}
private static int CountBackEdges(Cfg cfg)
{
// Simple heuristic: count edges pointing to earlier nodes
return cfg.Nodes.Sum(n => n.Successors.Count(s => s <= n.Id));
}
private static int ComputeMaxDepth(Cfg cfg)
{
if (cfg.Nodes.Count == 0) return 0;
var visited = new HashSet<int>();
var queue = new Queue<(int nodeId, int depth)>();
queue.Enqueue((cfg.EntryNode, 0));
var maxDepth = 0;
while (queue.Count > 0)
{
var (nodeId, depth) = queue.Dequeue();
if (!visited.Add(nodeId)) continue;
maxDepth = Math.Max(maxDepth, depth);
if (nodeId < cfg.Nodes.Count)
{
foreach (var succ in cfg.Nodes[nodeId].Successors)
{
if (!visited.Contains(succ))
{
queue.Enqueue((succ, depth + 1));
}
}
}
}
return maxDepth;
}
private static byte[] ComputeStructuralHash(Cfg cfg, CfgProperties props)
{
using var ms = new MemoryStream();
using var writer = new BinaryWriter(ms);
// Encode graph properties
writer.Write(props.NodeCount);
writer.Write(props.EdgeCount);
writer.Write(props.CyclomaticComplexity);
writer.Write(props.MaxDepth);
writer.Write(props.LoopCount);
// Encode degree sequence (truncated)
var degreeSeq = props.DegreeSequence.Take(16).ToArray();
writer.Write(degreeSeq.Length);
foreach (var d in degreeSeq)
{
writer.Write(d);
}
// Encode node types histogram
var typeHistogram = cfg.Nodes
.GroupBy(n => n.Type)
.ToDictionary(g => g.Key, g => g.Count());
foreach (var type in Enum.GetValues<CfgNodeType>())
{
writer.Write(typeHistogram.GetValueOrDefault(type, 0));
}
// Simplified adjacency encoding (first 64 edges)
var edges = cfg.Nodes
.SelectMany(n => n.Successors.Select(s => (n.Id, s)))
.Take(64)
.ToList();
writer.Write(edges.Count);
foreach (var (from, to) in edges)
{
writer.Write((ushort)from);
writer.Write((ushort)to);
}
// Hash to 32 bytes
return SHA256.HashData(ms.ToArray());
}
private static decimal CalculateConfidence(CfgProperties props)
{
// Higher confidence for more complex graphs
if (props.NodeCount < 3) return 0.5m;
if (props.CyclomaticComplexity < 3) return 0.6m;
if (props.NodeCount < 10 && props.EdgeCount < 15) return 0.75m;
if (props.LoopCount > 0) return 0.9m;
return 0.85m;
}
}

View File

@@ -0,0 +1,113 @@
// -----------------------------------------------------------------------------
// IVulnFingerprintGenerator.cs
// Sprint: SPRINT_20251226_013_BINIDX_fingerprint_factory
// Task: FPRINT-05 — Design IVulnFingerprintGenerator interface
// -----------------------------------------------------------------------------
using StellaOps.BinaryIndex.Fingerprints.Models;
namespace StellaOps.BinaryIndex.Fingerprints.Generators;
/// <summary>
/// Input data for fingerprint generation.
/// </summary>
public sealed record FingerprintInput
{
/// <summary>Raw binary data of the function or code section.</summary>
public required byte[] BinaryData { get; init; }
/// <summary>Target architecture (e.g., "x86_64", "aarch64").</summary>
public required string Architecture { get; init; }
/// <summary>Function name if known.</summary>
public string? FunctionName { get; init; }
/// <summary>Base address for disassembly normalization.</summary>
public ulong BaseAddress { get; init; }
/// <summary>CVE identifier this fingerprint is for.</summary>
public required string CveId { get; init; }
/// <summary>Component name (e.g., "openssl").</summary>
public required string Component { get; init; }
/// <summary>Source file path if known.</summary>
public string? SourceFile { get; init; }
/// <summary>Source line number if known.</summary>
public int? SourceLine { get; init; }
/// <summary>Package URL if known.</summary>
public string? Purl { get; init; }
}
/// <summary>
/// Output from fingerprint generation.
/// </summary>
public sealed record FingerprintOutput
{
/// <summary>Fingerprint hash bytes.</summary>
public required byte[] Hash { get; init; }
/// <summary>Unique fingerprint identifier (hex-encoded).</summary>
public required string FingerprintId { get; init; }
/// <summary>Algorithm used for generation.</summary>
public required FingerprintAlgorithm Algorithm { get; init; }
/// <summary>Generation confidence score (0.0-1.0).</summary>
public decimal Confidence { get; init; } = 1.0m;
/// <summary>Additional metadata from generation.</summary>
public FingerprintMetadata? Metadata { get; init; }
}
/// <summary>
/// Additional metadata extracted during fingerprint generation.
/// </summary>
public sealed record FingerprintMetadata
{
/// <summary>Number of basic blocks in the function.</summary>
public int? BasicBlockCount { get; init; }
/// <summary>Number of edges in the control flow graph.</summary>
public int? EdgeCount { get; init; }
/// <summary>Cyclomatic complexity.</summary>
public int? CyclomaticComplexity { get; init; }
/// <summary>Number of string references.</summary>
public int? StringRefCount { get; init; }
/// <summary>Instruction count.</summary>
public int? InstructionCount { get; init; }
/// <summary>Function size in bytes.</summary>
public int? FunctionSize { get; init; }
}
/// <summary>
/// Interface for vulnerability fingerprint generators.
/// </summary>
public interface IVulnFingerprintGenerator
{
/// <summary>
/// Algorithm type produced by this generator.
/// </summary>
FingerprintAlgorithm Algorithm { get; }
/// <summary>
/// Generates a fingerprint from the given input.
/// </summary>
/// <param name="input">Input data for fingerprint generation.</param>
/// <param name="ct">Cancellation token.</param>
/// <returns>Generated fingerprint output.</returns>
Task<FingerprintOutput> GenerateAsync(FingerprintInput input, CancellationToken ct = default);
/// <summary>
/// Checks if this generator can process the given input.
/// </summary>
/// <param name="input">Input to validate.</param>
/// <returns>True if the generator can process this input.</returns>
bool CanProcess(FingerprintInput input);
}

View File

@@ -0,0 +1,281 @@
// -----------------------------------------------------------------------------
// StringRefsFingerprintGenerator.cs
// Sprint: SPRINT_20251226_013_BINIDX_fingerprint_factory
// Task: FPRINT-08 — Implement StringRefsFingerprintGenerator
// -----------------------------------------------------------------------------
using System.Security.Cryptography;
using System.Text;
using Microsoft.Extensions.Logging;
using StellaOps.BinaryIndex.Fingerprints.Models;
namespace StellaOps.BinaryIndex.Fingerprints.Generators;
/// <summary>
/// Generates fingerprints based on string references in code.
///
/// Algorithm:
/// 1. Extract string constants referenced by function
/// 2. Hash string content (normalized)
/// 3. Include reference order/pattern
///
/// Useful for error message patterns and version strings.
/// Produces a 16-byte fingerprint.
/// </summary>
public sealed class StringRefsFingerprintGenerator : IVulnFingerprintGenerator
{
private readonly ILogger<StringRefsFingerprintGenerator> _logger;
public StringRefsFingerprintGenerator(ILogger<StringRefsFingerprintGenerator> logger)
{
_logger = logger;
}
public FingerprintAlgorithm Algorithm => FingerprintAlgorithm.StringRefs;
public bool CanProcess(FingerprintInput input)
{
// Only process if we can find at least one string reference
var strings = ExtractStringReferences(input.BinaryData);
return strings.Count >= 1;
}
public Task<FingerprintOutput> GenerateAsync(FingerprintInput input, CancellationToken ct = default)
{
ct.ThrowIfCancellationRequested();
_logger.LogDebug(
"Generating string refs fingerprint for {Component}/{CveId} ({Size} bytes)",
input.Component,
input.CveId,
input.BinaryData.Length);
// Step 1: Extract string references
var strings = ExtractStringReferences(input.BinaryData);
// Step 2: Normalize strings
var normalized = strings.Select(NormalizeString).ToList();
// Step 3: Hash with order preserved
var fingerprint = HashStringReferences(normalized);
var fingerprintId = Convert.ToHexString(fingerprint).ToLowerInvariant();
_logger.LogDebug(
"Generated string refs fingerprint {FingerprintId} with {StringCount} strings",
fingerprintId,
strings.Count);
return Task.FromResult(new FingerprintOutput
{
Hash = fingerprint,
FingerprintId = fingerprintId,
Algorithm = FingerprintAlgorithm.StringRefs,
Confidence = CalculateConfidence(strings),
Metadata = new FingerprintMetadata
{
StringRefCount = strings.Count,
FunctionSize = input.BinaryData.Length
}
});
}
/// <summary>
/// Extracts ASCII/UTF-8 string references from binary data.
/// </summary>
private List<ExtractedString> ExtractStringReferences(byte[] binaryData)
{
var strings = new List<ExtractedString>();
var minLength = 4;
var currentString = new StringBuilder();
var currentOffset = 0;
for (var i = 0; i < binaryData.Length; i++)
{
var b = binaryData[i];
if (IsPrintableAscii(b))
{
if (currentString.Length == 0)
{
currentOffset = i;
}
currentString.Append((char)b);
}
else if (b == 0 && currentString.Length >= minLength)
{
// Null terminator ends a valid string
strings.Add(new ExtractedString
{
Value = currentString.ToString(),
Offset = currentOffset,
Type = ClassifyString(currentString.ToString())
});
currentString.Clear();
}
else
{
if (currentString.Length >= minLength)
{
strings.Add(new ExtractedString
{
Value = currentString.ToString(),
Offset = currentOffset,
Type = ClassifyString(currentString.ToString())
});
}
currentString.Clear();
}
}
// Handle trailing string
if (currentString.Length >= minLength)
{
strings.Add(new ExtractedString
{
Value = currentString.ToString(),
Offset = currentOffset,
Type = ClassifyString(currentString.ToString())
});
}
// Filter out likely false positives
return strings
.Where(s => IsLikelyValidString(s.Value))
.ToList();
}
private static bool IsPrintableAscii(byte b)
{
return b >= 0x20 && b < 0x7F;
}
private static bool IsLikelyValidString(string s)
{
// Filter out noise
if (s.Length < 4) return false;
if (s.All(c => c == s[0])) return false; // Repeated characters
if (s.Count(char.IsLetter) < s.Length / 3) return false; // Too few letters
// Must have some word-like patterns
return s.Any(char.IsLetter);
}
private sealed record ExtractedString
{
public required string Value { get; init; }
public int Offset { get; init; }
public StringType Type { get; init; }
}
private enum StringType
{
General,
Error,
Version,
Path,
Url,
Format
}
private static StringType ClassifyString(string s)
{
var lower = s.ToLowerInvariant();
if (lower.Contains("error") || lower.Contains("fail") || lower.Contains("invalid"))
return StringType.Error;
if (lower.Contains("version") || s.Any(char.IsDigit) && s.Contains('.'))
return StringType.Version;
if (s.Contains('/') && (s.StartsWith('/') || s.Contains("/usr") || s.Contains("/etc")))
return StringType.Path;
if (lower.Contains("http://") || lower.Contains("https://"))
return StringType.Url;
if (s.Contains('%') && s.Any(c => c is 'd' or 's' or 'x' or 'f'))
return StringType.Format;
return StringType.General;
}
private static string NormalizeString(ExtractedString s)
{
var value = s.Value;
// Remove common variable parts
value = RemoveVersionNumbers(value);
value = RemoveTimestamps(value);
value = RemoveHexAddresses(value);
// Normalize whitespace
value = NormalizeWhitespace(value);
return value.ToLowerInvariant();
}
private static string RemoveVersionNumbers(string s)
{
// Replace version patterns like 1.2.3 with placeholder
return System.Text.RegularExpressions.Regex.Replace(
s, @"\d+\.\d+(\.\d+)?", "<VER>");
}
private static string RemoveTimestamps(string s)
{
// Replace ISO timestamps
return System.Text.RegularExpressions.Regex.Replace(
s, @"\d{4}-\d{2}-\d{2}(T\d{2}:\d{2}:\d{2})?", "<TS>");
}
private static string RemoveHexAddresses(string s)
{
// Replace hex addresses like 0x1234abcd
return System.Text.RegularExpressions.Regex.Replace(
s, @"0x[0-9a-fA-F]+", "<ADDR>");
}
private static string NormalizeWhitespace(string s)
{
return System.Text.RegularExpressions.Regex.Replace(s.Trim(), @"\s+", " ");
}
private static byte[] HashStringReferences(List<string> strings)
{
using var ms = new MemoryStream();
using var writer = new BinaryWriter(ms);
// Encode string count
writer.Write(strings.Count);
// Hash each string individually, then combine
foreach (var s in strings)
{
var stringBytes = Encoding.UTF8.GetBytes(s);
var stringHash = SHA256.HashData(stringBytes);
writer.Write(stringHash, 0, 8); // First 8 bytes of each hash
}
// Final hash and truncate to 16 bytes
var combined = SHA256.HashData(ms.ToArray());
var fingerprint = new byte[16];
Array.Copy(combined, fingerprint, 16);
return fingerprint;
}
private static decimal CalculateConfidence(List<ExtractedString> strings)
{
if (strings.Count == 0) return 0.1m;
if (strings.Count == 1) return 0.4m;
// Higher confidence for error messages and format strings
var hasError = strings.Any(s => s.Type == StringType.Error);
var hasFormat = strings.Any(s => s.Type == StringType.Format);
if (hasError && strings.Count >= 3) return 0.9m;
if (hasFormat && strings.Count >= 2) return 0.8m;
if (strings.Count >= 5) return 0.75m;
return 0.6m;
}
}

View File

@@ -0,0 +1,308 @@
// -----------------------------------------------------------------------------
// FingerprintMatcher.cs
// Sprint: SPRINT_20251226_013_BINIDX_fingerprint_factory
// Task: FPRINT-13 — Implement similarity matching with configurable threshold
// -----------------------------------------------------------------------------
using System.Diagnostics;
using Microsoft.Extensions.Logging;
using StellaOps.BinaryIndex.Fingerprints.Models;
namespace StellaOps.BinaryIndex.Fingerprints.Matching;
/// <summary>
/// Implementation of fingerprint matching using multiple similarity metrics.
/// </summary>
public sealed class FingerprintMatcher : IFingerprintMatcher
{
private readonly ILogger<FingerprintMatcher> _logger;
private readonly IFingerprintRepository _repository;
public FingerprintMatcher(
ILogger<FingerprintMatcher> logger,
IFingerprintRepository repository)
{
_logger = logger;
_repository = repository;
}
public async Task<FingerprintMatchResult> MatchAsync(
byte[] fingerprint,
MatchOptions? options = null,
CancellationToken ct = default)
{
options ??= new MatchOptions();
var sw = Stopwatch.StartNew();
_logger.LogDebug(
"Matching fingerprint ({Size} bytes) with threshold {Threshold:P0}",
fingerprint.Length,
options.MinSimilarity);
// Determine algorithm from fingerprint size
var algorithm = InferAlgorithm(fingerprint);
// Get candidate fingerprints from repository
var candidates = await _repository.SearchByHashAsync(
fingerprint,
algorithm,
options.Architecture ?? "",
ct);
if (candidates.Length == 0)
{
_logger.LogDebug("No candidates found for fingerprint");
return new FingerprintMatchResult
{
IsMatch = false,
Similarity = 0,
Confidence = 0,
Details = new MatchDetails
{
MatchingAlgorithm = algorithm,
CandidatesEvaluated = 0,
MatchTimeMs = sw.ElapsedMilliseconds
}
};
}
// Apply filters
var filteredCandidates = candidates
.Where(c => !options.RequireValidated || c.Validated)
.Take(options.MaxCandidates)
.ToList();
// Find best match
VulnFingerprint? bestMatch = null;
var bestSimilarity = 0m;
MatchDetails? bestDetails = null;
foreach (var candidate in filteredCandidates)
{
var similarity = CalculateSimilarity(fingerprint, candidate.FingerprintHash, algorithm);
if (similarity > bestSimilarity)
{
bestSimilarity = similarity;
bestMatch = candidate;
bestDetails = new MatchDetails
{
MatchingAlgorithm = algorithm,
CandidatesEvaluated = filteredCandidates.Count,
MatchTimeMs = sw.ElapsedMilliseconds
};
// Add algorithm-specific similarity scores
if (algorithm == FingerprintAlgorithm.Combined && fingerprint.Length >= 48)
{
bestDetails = bestDetails with
{
BasicBlockSimilarity = CalculateBasicBlockSimilarity(fingerprint, candidate.FingerprintHash),
CfgSimilarity = CalculateCfgSimilarity(fingerprint, candidate.FingerprintHash)
};
}
}
}
var isMatch = bestSimilarity >= options.MinSimilarity;
_logger.LogDebug(
"Match result: {IsMatch}, similarity={Similarity:P2}, candidates={Candidates}",
isMatch,
bestSimilarity,
filteredCandidates.Count);
return new FingerprintMatchResult
{
IsMatch = isMatch,
Similarity = bestSimilarity,
MatchedFingerprint = isMatch ? bestMatch : null,
Confidence = isMatch ? CalculateMatchConfidence(bestSimilarity, bestMatch) : 0,
Details = bestDetails
};
}
public async Task<IReadOnlyList<FingerprintMatchResult>> MatchBatchAsync(
IEnumerable<byte[]> fingerprints,
MatchOptions? options = null,
CancellationToken ct = default)
{
var results = new List<FingerprintMatchResult>();
foreach (var fingerprint in fingerprints)
{
ct.ThrowIfCancellationRequested();
var result = await MatchAsync(fingerprint, options, ct);
results.Add(result);
}
return results;
}
public decimal CalculateSimilarity(byte[] fingerprint1, byte[] fingerprint2, FingerprintAlgorithm algorithm)
{
if (fingerprint1.Length != fingerprint2.Length)
{
// Handle mismatched sizes by comparing common prefix
var minLen = Math.Min(fingerprint1.Length, fingerprint2.Length);
var fp1 = fingerprint1.AsSpan(0, minLen);
var fp2 = fingerprint2.AsSpan(0, minLen);
return CalculateHashSimilarity(fp1, fp2);
}
return algorithm switch
{
FingerprintAlgorithm.BasicBlock => CalculateBasicBlockSimilarity(fingerprint1, fingerprint2),
FingerprintAlgorithm.ControlFlowGraph => CalculateCfgSimilarity(fingerprint1, fingerprint2),
FingerprintAlgorithm.StringRefs => CalculateStringRefsSimilarity(fingerprint1, fingerprint2),
FingerprintAlgorithm.Combined => CalculateCombinedSimilarity(fingerprint1, fingerprint2),
_ => CalculateHashSimilarity(fingerprint1, fingerprint2)
};
}
private static FingerprintAlgorithm InferAlgorithm(byte[] fingerprint)
{
return fingerprint.Length switch
{
16 => FingerprintAlgorithm.BasicBlock, // Could also be StringRefs
32 => FingerprintAlgorithm.ControlFlowGraph,
48 => FingerprintAlgorithm.Combined,
_ => FingerprintAlgorithm.BasicBlock
};
}
/// <summary>
/// Calculates similarity using TLSH-like algorithm for basic blocks.
/// </summary>
private static decimal CalculateBasicBlockSimilarity(byte[] fp1, byte[] fp2)
{
// Use Hamming distance normalized to similarity
var minLen = Math.Min(fp1.Length, Math.Min(fp2.Length, 16));
var hammingDistance = 0;
var totalBits = minLen * 8;
for (var i = 0; i < minLen; i++)
{
var xor = (byte)(fp1[i] ^ fp2[i]);
hammingDistance += BitCount(xor);
}
return 1m - ((decimal)hammingDistance / totalBits);
}
/// <summary>
/// Calculates similarity for CFG fingerprints using structural comparison.
/// </summary>
private static decimal CalculateCfgSimilarity(byte[] fp1, byte[] fp2)
{
if (fp1.Length < 32 || fp2.Length < 32) return 0m;
// For CFG, compare structural properties (first 20 bytes) more heavily
var structuralSimilarity = CalculateHashSimilarity(
fp1.AsSpan(0, 20),
fp2.AsSpan(0, 20));
// Compare remaining bytes (adjacency encoding)
var adjacencySimilarity = CalculateHashSimilarity(
fp1.AsSpan(20, 12),
fp2.AsSpan(20, 12));
// Weight structural properties more heavily
return (structuralSimilarity * 0.7m) + (adjacencySimilarity * 0.3m);
}
/// <summary>
/// Calculates similarity for string reference fingerprints.
/// </summary>
private static decimal CalculateStringRefsSimilarity(byte[] fp1, byte[] fp2)
{
// String refs use direct hash comparison
return CalculateHashSimilarity(fp1, fp2);
}
/// <summary>
/// Calculates similarity for combined fingerprints.
/// </summary>
private static decimal CalculateCombinedSimilarity(byte[] fp1, byte[] fp2)
{
if (fp1.Length < 48 || fp2.Length < 48) return 0m;
// Skip version byte
var offset = 1;
// Basic block (16 bytes)
var bbSim = CalculateHashSimilarity(
fp1.AsSpan(offset, 16),
fp2.AsSpan(offset, 16));
offset += 16;
// CFG (32 bytes)
var cfgSim = CalculateHashSimilarity(
fp1.AsSpan(offset, 32),
fp2.AsSpan(offset, 32));
// Weighted combination
return (cfgSim * 0.5m) + (bbSim * 0.5m);
}
/// <summary>
/// General hash similarity using normalized Hamming distance.
/// </summary>
private static decimal CalculateHashSimilarity(ReadOnlySpan<byte> hash1, ReadOnlySpan<byte> hash2)
{
if (hash1.Length != hash2.Length) return 0m;
if (hash1.Length == 0) return 1m;
var matchingBytes = 0;
for (var i = 0; i < hash1.Length; i++)
{
if (hash1[i] == hash2[i]) matchingBytes++;
}
var exactMatchScore = (decimal)matchingBytes / hash1.Length;
// Also consider bit-level similarity for near matches
var hammingDistance = 0;
for (var i = 0; i < hash1.Length; i++)
{
hammingDistance += BitCount((byte)(hash1[i] ^ hash2[i]));
}
var bitSimilarity = 1m - ((decimal)hammingDistance / (hash1.Length * 8));
// Combine: exact match is important, but bit similarity catches near matches
return (exactMatchScore * 0.4m) + (bitSimilarity * 0.6m);
}
private static int BitCount(byte b)
{
var count = 0;
while (b != 0)
{
count += b & 1;
b >>= 1;
}
return count;
}
private static decimal CalculateMatchConfidence(decimal similarity, VulnFingerprint? fingerprint)
{
if (fingerprint == null) return 0m;
var baseConfidence = similarity;
// Boost confidence if fingerprint is validated
if (fingerprint.Validated)
{
baseConfidence = Math.Min(1m, baseConfidence * 1.1m);
}
// Consider fingerprint's own confidence
if (fingerprint.Confidence.HasValue)
{
baseConfidence = (baseConfidence + fingerprint.Confidence.Value) / 2;
}
return baseConfidence;
}
}

View File

@@ -0,0 +1,106 @@
// -----------------------------------------------------------------------------
// IFingerprintMatcher.cs
// Sprint: SPRINT_20251226_013_BINIDX_fingerprint_factory
// Task: FPRINT-12 — Implement IFingerprintMatcher interface
// -----------------------------------------------------------------------------
using StellaOps.BinaryIndex.Fingerprints.Models;
namespace StellaOps.BinaryIndex.Fingerprints.Matching;
/// <summary>
/// Result of a fingerprint matching operation.
/// </summary>
public sealed record FingerprintMatchResult
{
/// <summary>Whether a match was found.</summary>
public bool IsMatch { get; init; }
/// <summary>Similarity score (0.0-1.0).</summary>
public decimal Similarity { get; init; }
/// <summary>Matched fingerprint if found.</summary>
public VulnFingerprint? MatchedFingerprint { get; init; }
/// <summary>Match confidence score.</summary>
public decimal Confidence { get; init; }
/// <summary>Additional match details.</summary>
public MatchDetails? Details { get; init; }
}
/// <summary>
/// Details about how a match was determined.
/// </summary>
public sealed record MatchDetails
{
/// <summary>Which algorithm found the match.</summary>
public FingerprintAlgorithm MatchingAlgorithm { get; init; }
/// <summary>Basic block similarity if applicable.</summary>
public decimal? BasicBlockSimilarity { get; init; }
/// <summary>CFG similarity if applicable.</summary>
public decimal? CfgSimilarity { get; init; }
/// <summary>String refs similarity if applicable.</summary>
public decimal? StringRefsSimilarity { get; init; }
/// <summary>Number of candidate fingerprints evaluated.</summary>
public int CandidatesEvaluated { get; init; }
/// <summary>Time taken for matching in milliseconds.</summary>
public long MatchTimeMs { get; init; }
}
/// <summary>
/// Options for fingerprint matching.
/// </summary>
public sealed record MatchOptions
{
/// <summary>Minimum similarity threshold (0.0-1.0). Default 0.95.</summary>
public decimal MinSimilarity { get; init; } = 0.95m;
/// <summary>Maximum candidates to evaluate. Default 100.</summary>
public int MaxCandidates { get; init; } = 100;
/// <summary>Algorithms to use for matching. Null means all.</summary>
public FingerprintAlgorithm[]? Algorithms { get; init; }
/// <summary>Whether to require validation of matched fingerprint.</summary>
public bool RequireValidated { get; init; }
/// <summary>Architecture filter. Null means any.</summary>
public string? Architecture { get; init; }
}
/// <summary>
/// Interface for fingerprint matching operations.
/// </summary>
public interface IFingerprintMatcher
{
/// <summary>
/// Matches a fingerprint against the vulnerability database.
/// </summary>
/// <param name="fingerprint">Fingerprint to match.</param>
/// <param name="options">Matching options.</param>
/// <param name="ct">Cancellation token.</param>
/// <returns>Match result.</returns>
Task<FingerprintMatchResult> MatchAsync(
byte[] fingerprint,
MatchOptions? options = null,
CancellationToken ct = default);
/// <summary>
/// Matches multiple fingerprints in batch.
/// </summary>
Task<IReadOnlyList<FingerprintMatchResult>> MatchBatchAsync(
IEnumerable<byte[]> fingerprints,
MatchOptions? options = null,
CancellationToken ct = default);
/// <summary>
/// Calculates similarity between two fingerprints.
/// </summary>
decimal CalculateSimilarity(byte[] fingerprint1, byte[] fingerprint2, FingerprintAlgorithm algorithm);
}

View File

@@ -0,0 +1,390 @@
// -----------------------------------------------------------------------------
// ReferenceBuildPipeline.cs
// Sprint: SPRINT_20251226_013_BINIDX_fingerprint_factory
// Task: FPRINT-10 — Create reference build generation pipeline
// Task: FPRINT-11 — Implement vulnerable/fixed binary pair builder
// -----------------------------------------------------------------------------
using Microsoft.Extensions.Logging;
using StellaOps.BinaryIndex.Fingerprints.Generators;
using StellaOps.BinaryIndex.Fingerprints.Models;
using StellaOps.BinaryIndex.Fingerprints.Storage;
namespace StellaOps.BinaryIndex.Fingerprints.Pipeline;
/// <summary>
/// Request for building reference binaries.
/// </summary>
public sealed record ReferenceBuildRequest
{
/// <summary>CVE identifier.</summary>
public required string CveId { get; init; }
/// <summary>Component name (e.g., "openssl").</summary>
public required string Component { get; init; }
/// <summary>Git repository URL.</summary>
public required string RepoUrl { get; init; }
/// <summary>Vulnerable commit or tag.</summary>
public required string VulnerableRef { get; init; }
/// <summary>Fixed commit or tag.</summary>
public required string FixedRef { get; init; }
/// <summary>Target architectures.</summary>
public string[] Architectures { get; init; } = ["x86_64"];
/// <summary>Build command template.</summary>
public string? BuildCommand { get; init; }
/// <summary>Function names to fingerprint (optional).</summary>
public string[]? TargetFunctions { get; init; }
}
/// <summary>
/// Result of a reference build pipeline run.
/// </summary>
public sealed record ReferenceBuildResult
{
/// <summary>Whether the pipeline succeeded.</summary>
public bool Success { get; init; }
/// <summary>Error message if failed.</summary>
public string? Error { get; init; }
/// <summary>Generated fingerprints.</summary>
public VulnFingerprint[] Fingerprints { get; init; } = [];
/// <summary>Storage path for vulnerable build.</summary>
public string? VulnBuildPath { get; init; }
/// <summary>Storage path for fixed build.</summary>
public string? FixedBuildPath { get; init; }
/// <summary>Build log.</summary>
public string? BuildLog { get; init; }
}
/// <summary>
/// Represents a built binary artifact.
/// </summary>
public sealed record BuildArtifact
{
/// <summary>Path within build output.</summary>
public required string Path { get; init; }
/// <summary>Binary content.</summary>
public required byte[] Content { get; init; }
/// <summary>Target architecture.</summary>
public required string Architecture { get; init; }
/// <summary>Whether this is the vulnerable or fixed version.</summary>
public required bool IsVulnerable { get; init; }
}
/// <summary>
/// Represents a function extracted from a binary for fingerprinting.
/// </summary>
public sealed record ExtractedFunction
{
/// <summary>Function name.</summary>
public required string Name { get; init; }
/// <summary>Function binary data.</summary>
public required byte[] Data { get; init; }
/// <summary>Start offset in original binary.</summary>
public long Offset { get; init; }
/// <summary>Size in bytes.</summary>
public int Size { get; init; }
/// <summary>Source file if known.</summary>
public string? SourceFile { get; init; }
/// <summary>Source line if known.</summary>
public int? SourceLine { get; init; }
}
/// <summary>
/// Pipeline for generating reference builds and extracting vulnerability fingerprints.
/// </summary>
public sealed class ReferenceBuildPipeline
{
private readonly ILogger<ReferenceBuildPipeline> _logger;
private readonly IFingerprintBlobStorage _storage;
private readonly IFingerprintRepository _repository;
private readonly CombinedFingerprintGenerator _fingerprintGenerator;
public ReferenceBuildPipeline(
ILogger<ReferenceBuildPipeline> logger,
IFingerprintBlobStorage storage,
IFingerprintRepository repository,
CombinedFingerprintGenerator fingerprintGenerator)
{
_logger = logger;
_storage = storage;
_repository = repository;
_fingerprintGenerator = fingerprintGenerator;
}
/// <summary>
/// Executes the full reference build pipeline.
/// </summary>
public async Task<ReferenceBuildResult> ExecuteAsync(
ReferenceBuildRequest request,
CancellationToken ct = default)
{
_logger.LogInformation(
"Starting reference build pipeline for {CveId} ({Component})",
request.CveId,
request.Component);
try
{
// Step 1: Clone and build vulnerable version
var vulnArtifacts = await BuildVersionAsync(request, isVulnerable: true, ct);
if (vulnArtifacts.Count == 0)
{
return new ReferenceBuildResult
{
Success = false,
Error = "Failed to build vulnerable version"
};
}
// Step 2: Clone and build fixed version
var fixedArtifacts = await BuildVersionAsync(request, isVulnerable: false, ct);
if (fixedArtifacts.Count == 0)
{
return new ReferenceBuildResult
{
Success = false,
Error = "Failed to build fixed version"
};
}
// Step 3: Extract functions from both versions
var vulnFunctions = await ExtractFunctionsAsync(vulnArtifacts, request.TargetFunctions, ct);
var fixedFunctions = await ExtractFunctionsAsync(fixedArtifacts, request.TargetFunctions, ct);
// Step 4: Find differential fingerprints (what changed)
var fingerprints = await GenerateDifferentialFingerprintsAsync(
request,
vulnFunctions,
fixedFunctions,
ct);
// Step 5: Store reference builds
var vulnBuildPath = await StoreReferenceBuildAsync(request.CveId, vulnArtifacts, "vulnerable", ct);
var fixedBuildPath = await StoreReferenceBuildAsync(request.CveId, fixedArtifacts, "fixed", ct);
// Step 6: Store fingerprints to repository
foreach (var fp in fingerprints)
{
await _repository.CreateAsync(fp, ct);
}
_logger.LogInformation(
"Pipeline complete for {CveId}: generated {Count} fingerprints",
request.CveId,
fingerprints.Length);
return new ReferenceBuildResult
{
Success = true,
Fingerprints = fingerprints,
VulnBuildPath = vulnBuildPath,
FixedBuildPath = fixedBuildPath
};
}
catch (Exception ex)
{
_logger.LogError(ex, "Pipeline failed for {CveId}", request.CveId);
return new ReferenceBuildResult
{
Success = false,
Error = ex.Message
};
}
}
/// <summary>
/// Builds a specific version (vulnerable or fixed).
/// </summary>
private async Task<List<BuildArtifact>> BuildVersionAsync(
ReferenceBuildRequest request,
bool isVulnerable,
CancellationToken ct)
{
var version = isVulnerable ? request.VulnerableRef : request.FixedRef;
_logger.LogDebug(
"Building {Type} version at {Ref}",
isVulnerable ? "vulnerable" : "fixed",
version);
// NOTE: Actual implementation would:
// 1. Clone repo to sandboxed environment
// 2. Checkout the specific ref
// 3. Run build command
// 4. Extract built binaries
//
// This is a placeholder that returns empty for now.
// Production implementation would use containers or VMs for sandboxing.
await Task.CompletedTask;
// Placeholder: return empty list
// Real impl would return built artifacts
return [];
}
/// <summary>
/// Extracts functions from build artifacts.
/// </summary>
private async Task<List<ExtractedFunction>> ExtractFunctionsAsync(
List<BuildArtifact> artifacts,
string[]? targetFunctions,
CancellationToken ct)
{
var functions = new List<ExtractedFunction>();
foreach (var artifact in artifacts)
{
ct.ThrowIfCancellationRequested();
// NOTE: Real implementation would:
// 1. Parse ELF/PE headers
// 2. Find symbol table
// 3. Extract function boundaries
// 4. Extract code bytes for each function
//
// This is a placeholder.
_logger.LogDebug(
"Extracting functions from {Path} ({Size} bytes)",
artifact.Path,
artifact.Content.Length);
// Placeholder: would use ELF parser
}
await Task.CompletedTask;
return functions;
}
/// <summary>
/// Generates differential fingerprints by comparing vulnerable and fixed versions.
/// </summary>
private async Task<VulnFingerprint[]> GenerateDifferentialFingerprintsAsync(
ReferenceBuildRequest request,
List<ExtractedFunction> vulnFunctions,
List<ExtractedFunction> fixedFunctions,
CancellationToken ct)
{
var fingerprints = new List<VulnFingerprint>();
// Find functions that changed between versions
var changedFunctions = FindChangedFunctions(vulnFunctions, fixedFunctions);
_logger.LogDebug(
"Found {Count} changed functions between vulnerable and fixed",
changedFunctions.Count);
foreach (var (vulnFunc, fixedFunc) in changedFunctions)
{
ct.ThrowIfCancellationRequested();
// Generate fingerprint for the vulnerable version
var input = new FingerprintInput
{
BinaryData = vulnFunc.Data,
Architecture = "x86_64", // Would come from artifact
FunctionName = vulnFunc.Name,
CveId = request.CveId,
Component = request.Component,
SourceFile = vulnFunc.SourceFile,
SourceLine = vulnFunc.SourceLine
};
if (!_fingerprintGenerator.CanProcess(input))
{
_logger.LogDebug("Skipping function {Name}: too small", vulnFunc.Name);
continue;
}
var output = await _fingerprintGenerator.GenerateAsync(input, ct);
fingerprints.Add(new VulnFingerprint
{
Id = Guid.NewGuid(),
CveId = request.CveId,
Component = request.Component,
Algorithm = output.Algorithm,
FingerprintId = output.FingerprintId,
FingerprintHash = output.Hash,
Architecture = "x86_64",
FunctionName = vulnFunc.Name,
SourceFile = vulnFunc.SourceFile,
SourceLine = vulnFunc.SourceLine,
Confidence = output.Confidence,
VulnBuildRef = request.VulnerableRef,
FixedBuildRef = request.FixedRef,
IndexedAt = DateTimeOffset.UtcNow
});
}
return fingerprints.ToArray();
}
/// <summary>
/// Finds functions that changed between vulnerable and fixed versions.
/// </summary>
private static List<(ExtractedFunction vuln, ExtractedFunction? fix)> FindChangedFunctions(
List<ExtractedFunction> vulnFunctions,
List<ExtractedFunction> fixedFunctions)
{
var results = new List<(ExtractedFunction, ExtractedFunction?)>();
foreach (var vuln in vulnFunctions)
{
var fix = fixedFunctions.FirstOrDefault(f => f.Name == vuln.Name);
// Include if function exists in vuln but not in fixed (deleted)
// or if the function data changed
if (fix == null || !vuln.Data.SequenceEqual(fix.Data))
{
results.Add((vuln, fix));
}
}
return results;
}
/// <summary>
/// Stores reference build artifacts to blob storage.
/// </summary>
private async Task<string> StoreReferenceBuildAsync(
string cveId,
List<BuildArtifact> artifacts,
string buildType,
CancellationToken ct)
{
// NOTE: Real implementation would:
// 1. Create tar archive of all artifacts
// 2. Compress with zstd
// 3. Store to blob storage
// Placeholder: just log
_logger.LogDebug(
"Storing {Count} artifacts for {CveId}/{BuildType}",
artifacts.Count,
cveId,
buildType);
var storagePath = await _storage.StoreReferenceBuildAsync(cveId, buildType, [], ct);
return storagePath;
}
}

View File

@@ -1,3 +1,5 @@
using StellaOps.BinaryIndex.Core.Models;
namespace StellaOps.BinaryIndex.FixIndex.Models;
/// <summary>
@@ -39,45 +41,6 @@ public sealed record FixEvidence
public DateTimeOffset CreatedAt { get; init; }
}
/// <summary>
/// Fix state enumeration.
/// </summary>
public enum FixState
{
/// <summary>CVE is fixed in this version</summary>
Fixed,
/// <summary>CVE affects this package</summary>
Vulnerable,
/// <summary>CVE does not affect this package</summary>
NotAffected,
/// <summary>Fix won't be applied (e.g., EOL version)</summary>
Wontfix,
/// <summary>Unknown status</summary>
Unknown
}
/// <summary>
/// Method used to identify the fix.
/// </summary>
public enum FixMethod
{
/// <summary>From official security feed (OVAL, DSA, etc.)</summary>
SecurityFeed,
/// <summary>Parsed from Debian/Ubuntu changelog</summary>
Changelog,
/// <summary>Extracted from patch header (DEP-3)</summary>
PatchHeader,
/// <summary>Matched against upstream patch database</summary>
UpstreamPatchMatch
}
/// <summary>
/// Base class for evidence payloads.
/// </summary>

View File

@@ -1,4 +1,5 @@
using System.Text.RegularExpressions;
using StellaOps.BinaryIndex.Core.Models;
using StellaOps.BinaryIndex.FixIndex.Models;
namespace StellaOps.BinaryIndex.FixIndex.Parsers;

View File

@@ -1,4 +1,5 @@
using System.Text.RegularExpressions;
using StellaOps.BinaryIndex.Core.Models;
using StellaOps.BinaryIndex.FixIndex.Models;
namespace StellaOps.BinaryIndex.FixIndex.Parsers;

View File

@@ -1,4 +1,5 @@
using System.Text.RegularExpressions;
using StellaOps.BinaryIndex.Core.Models;
using StellaOps.BinaryIndex.FixIndex.Models;
namespace StellaOps.BinaryIndex.FixIndex.Parsers;

View File

@@ -1,4 +1,5 @@
using System.Text.RegularExpressions;
using StellaOps.BinaryIndex.Core.Models;
using StellaOps.BinaryIndex.FixIndex.Models;
namespace StellaOps.BinaryIndex.FixIndex.Parsers;

View File

@@ -1,3 +1,4 @@
using StellaOps.BinaryIndex.Core.Models;
using StellaOps.BinaryIndex.FixIndex.Models;
namespace StellaOps.BinaryIndex.FixIndex.Repositories;

View File

@@ -0,0 +1,116 @@
-- -----------------------------------------------------------------------------
-- 20251226_AddFingerprintTables.sql
-- Sprint: SPRINT_20251226_013_BINIDX_fingerprint_factory
-- Task: FPRINT-01 — Create vulnerable_fingerprints table schema
-- Task: FPRINT-02 — Create fingerprint_matches table for match results
-- -----------------------------------------------------------------------------
-- Fingerprint tables for vulnerability detection independent of package metadata
BEGIN;
-- Table: vulnerable_fingerprints
-- Stores function-level vulnerability fingerprints
CREATE TABLE IF NOT EXISTS binaries.vulnerable_fingerprints (
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
tenant_id TEXT NOT NULL DEFAULT binaries_app.require_current_tenant(),
cve_id TEXT NOT NULL,
component TEXT NOT NULL,
purl TEXT,
algorithm TEXT NOT NULL CHECK (algorithm IN ('basic_block', 'cfg', 'string_refs', 'combined')),
fingerprint_id TEXT NOT NULL,
fingerprint_hash BYTEA NOT NULL,
architecture TEXT NOT NULL,
function_name TEXT,
source_file TEXT,
source_line INT,
similarity_threshold DECIMAL(3,2) DEFAULT 0.95 CHECK (similarity_threshold BETWEEN 0 AND 1),
confidence DECIMAL(3,2) CHECK (confidence IS NULL OR confidence BETWEEN 0 AND 1),
validated BOOLEAN DEFAULT false,
validation_stats JSONB,
vuln_build_ref TEXT,
fixed_build_ref TEXT,
indexed_at TIMESTAMPTZ NOT NULL DEFAULT now(),
UNIQUE (tenant_id, fingerprint_id)
);
-- Indexes for efficient lookups
CREATE INDEX IF NOT EXISTS idx_fingerprint_cve
ON binaries.vulnerable_fingerprints (tenant_id, cve_id);
CREATE INDEX IF NOT EXISTS idx_fingerprint_component
ON binaries.vulnerable_fingerprints (tenant_id, component);
CREATE INDEX IF NOT EXISTS idx_fingerprint_algorithm
ON binaries.vulnerable_fingerprints (tenant_id, algorithm, architecture);
CREATE INDEX IF NOT EXISTS idx_fingerprint_hash
ON binaries.vulnerable_fingerprints USING hash (fingerprint_hash);
CREATE INDEX IF NOT EXISTS idx_fingerprint_validated
ON binaries.vulnerable_fingerprints (tenant_id, validated)
WHERE validated = true;
-- Enable Row-Level Security
ALTER TABLE binaries.vulnerable_fingerprints ENABLE ROW LEVEL SECURITY;
CREATE POLICY fingerprints_tenant_isolation ON binaries.vulnerable_fingerprints
USING (tenant_id = binaries_app.require_current_tenant());
-- Table: fingerprint_matches
-- Stores results of fingerprint matching operations
CREATE TABLE IF NOT EXISTS binaries.fingerprint_matches (
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
tenant_id TEXT NOT NULL DEFAULT binaries_app.require_current_tenant(),
scan_id UUID NOT NULL,
match_type TEXT NOT NULL CHECK (match_type IN ('fingerprint', 'build_id', 'hash_exact')),
binary_key TEXT NOT NULL,
vulnerable_purl TEXT NOT NULL,
vulnerable_version TEXT NOT NULL,
matched_fingerprint_id UUID REFERENCES binaries.vulnerable_fingerprints(id),
matched_function TEXT,
similarity DECIMAL(3,2) CHECK (similarity IS NULL OR similarity BETWEEN 0 AND 1),
advisory_ids TEXT[],
reachability_status TEXT CHECK (reachability_status IN ('reachable', 'unreachable', 'unknown', 'partial')),
matched_at TIMESTAMPTZ NOT NULL DEFAULT now()
);
-- Indexes for fingerprint_matches
CREATE INDEX IF NOT EXISTS idx_match_scan
ON binaries.fingerprint_matches (tenant_id, scan_id);
CREATE INDEX IF NOT EXISTS idx_match_fingerprint
ON binaries.fingerprint_matches (matched_fingerprint_id);
CREATE INDEX IF NOT EXISTS idx_match_binary
ON binaries.fingerprint_matches (tenant_id, binary_key);
CREATE INDEX IF NOT EXISTS idx_match_reachability
ON binaries.fingerprint_matches (tenant_id, reachability_status);
-- Enable Row-Level Security
ALTER TABLE binaries.fingerprint_matches ENABLE ROW LEVEL SECURITY;
CREATE POLICY matches_tenant_isolation ON binaries.fingerprint_matches
USING (tenant_id = binaries_app.require_current_tenant());
-- Add comments
COMMENT ON TABLE binaries.vulnerable_fingerprints IS
'Function-level vulnerability fingerprints for detecting vulnerable code independent of package metadata';
COMMENT ON COLUMN binaries.vulnerable_fingerprints.algorithm IS
'Fingerprinting algorithm: basic_block, cfg (control flow graph), string_refs, or combined (ensemble)';
COMMENT ON COLUMN binaries.vulnerable_fingerprints.fingerprint_hash IS
'Binary fingerprint data (16-48 bytes depending on algorithm)';
COMMENT ON COLUMN binaries.vulnerable_fingerprints.validation_stats IS
'JSON object with tp, fp, tn, fn counts from validation corpus';
COMMENT ON TABLE binaries.fingerprint_matches IS
'Results of fingerprint matching operations during scans';
COMMENT ON COLUMN binaries.fingerprint_matches.similarity IS
'Similarity score (0.0-1.0) for fingerprint matches';
COMMIT;

View File

@@ -1,6 +1,7 @@
using System.Text.Json;
using Npgsql;
using NpgsqlTypes;
using StellaOps.BinaryIndex.Core.Models;
using StellaOps.BinaryIndex.FixIndex.Models;
using StellaOps.BinaryIndex.FixIndex.Repositories;
@@ -11,16 +12,16 @@ namespace StellaOps.BinaryIndex.Persistence.Repositories;
/// </summary>
public sealed class FixIndexRepository : IFixIndexRepository
{
private readonly BinaryIndexDataSource _dataSource;
private readonly BinaryIndexDbContext _dbContext;
private static readonly JsonSerializerOptions JsonOptions = new(JsonSerializerDefaults.Web)
{
PropertyNamingPolicy = JsonNamingPolicy.CamelCase
};
public FixIndexRepository(BinaryIndexDataSource dataSource)
public FixIndexRepository(BinaryIndexDbContext dbContext)
{
_dataSource = dataSource ?? throw new ArgumentNullException(nameof(dataSource));
_dbContext = dbContext ?? throw new ArgumentNullException(nameof(dbContext));
}
/// <inheritdoc />
@@ -39,7 +40,7 @@ public sealed class FixIndexRepository : IFixIndexRepository
AND source_pkg = @sourcePkg AND cve_id = @cveId
""";
await using var conn = await _dataSource.OpenConnectionAsync(cancellationToken);
await using var conn = await _dbContext.OpenConnectionAsync(cancellationToken);
await using var cmd = new NpgsqlCommand(sql, conn);
cmd.Parameters.AddWithValue("distro", distro);
cmd.Parameters.AddWithValue("release", release);
@@ -70,7 +71,7 @@ public sealed class FixIndexRepository : IFixIndexRepository
ORDER BY cve_id
""";
await using var conn = await _dataSource.OpenConnectionAsync(cancellationToken);
await using var conn = await _dbContext.OpenConnectionAsync(cancellationToken);
await using var cmd = new NpgsqlCommand(sql, conn);
cmd.Parameters.AddWithValue("distro", distro);
cmd.Parameters.AddWithValue("release", release);
@@ -99,7 +100,7 @@ public sealed class FixIndexRepository : IFixIndexRepository
ORDER BY distro, release, source_pkg
""";
await using var conn = await _dataSource.OpenConnectionAsync(cancellationToken);
await using var conn = await _dbContext.OpenConnectionAsync(cancellationToken);
await using var cmd = new NpgsqlCommand(sql, conn);
cmd.Parameters.AddWithValue("cveId", cveId);
@@ -145,7 +146,7 @@ public sealed class FixIndexRepository : IFixIndexRepository
method, confidence, evidence_id, snapshot_id, indexed_at, updated_at
""";
await using var conn = await _dataSource.OpenConnectionAsync(cancellationToken);
await using var conn = await _dbContext.OpenConnectionAsync(cancellationToken);
await using var cmd = new NpgsqlCommand(sql, conn);
cmd.Parameters.AddWithValue("distro", evidence.Distro);
cmd.Parameters.AddWithValue("release", evidence.Release);
@@ -192,7 +193,7 @@ public sealed class FixIndexRepository : IFixIndexRepository
RETURNING id
""";
await using var conn = await _dataSource.OpenConnectionAsync(cancellationToken);
await using var conn = await _dbContext.OpenConnectionAsync(cancellationToken);
await using var cmd = new NpgsqlCommand(sql, conn);
cmd.Parameters.AddWithValue("evidenceType", evidenceType);
cmd.Parameters.AddWithValue("sourceFile", (object?)sourceFile ?? DBNull.Value);
@@ -215,7 +216,7 @@ public sealed class FixIndexRepository : IFixIndexRepository
WHERE id = @id
""";
await using var conn = await _dataSource.OpenConnectionAsync(cancellationToken);
await using var conn = await _dbContext.OpenConnectionAsync(cancellationToken);
await using var cmd = new NpgsqlCommand(sql, conn);
cmd.Parameters.AddWithValue("id", evidenceId);
@@ -253,7 +254,7 @@ public sealed class FixIndexRepository : IFixIndexRepository
SELECT (SELECT COUNT(*) FROM deleted_index) + (SELECT COUNT(*) FROM deleted_evidence)
""";
await using var conn = await _dataSource.OpenConnectionAsync(cancellationToken);
await using var conn = await _dbContext.OpenConnectionAsync(cancellationToken);
await using var cmd = new NpgsqlCommand(sql, conn);
cmd.Parameters.AddWithValue("snapshotId", snapshotId);

View File

@@ -1,10 +1,12 @@
using System.Collections.Immutable;
using Microsoft.Extensions.Logging;
using StellaOps.BinaryIndex.Core.Models;
using StellaOps.BinaryIndex.FixIndex.Models;
using StellaOps.BinaryIndex.Core.Services;
using StellaOps.BinaryIndex.FixIndex.Repositories;
using StellaOps.BinaryIndex.Fingerprints.Matching;
using StellaOps.BinaryIndex.Persistence.Repositories;
namespace StellaOps.BinaryIndex.Core.Services;
namespace StellaOps.BinaryIndex.Persistence.Services;
/// <summary>
/// Implementation of binary vulnerability lookup service.
@@ -13,16 +15,19 @@ public sealed class BinaryVulnerabilityService : IBinaryVulnerabilityService
{
private readonly IBinaryVulnAssertionRepository _assertionRepo;
private readonly IFixIndexRepository? _fixIndexRepo;
private readonly IFingerprintMatcher? _fingerprintMatcher;
private readonly ILogger<BinaryVulnerabilityService> _logger;
public BinaryVulnerabilityService(
IBinaryVulnAssertionRepository assertionRepo,
ILogger<BinaryVulnerabilityService> logger,
IFixIndexRepository? fixIndexRepo = null)
IFixIndexRepository? fixIndexRepo = null,
IFingerprintMatcher? fingerprintMatcher = null)
{
_assertionRepo = assertionRepo;
_logger = logger;
_fixIndexRepo = fixIndexRepo;
_fingerprintMatcher = fingerprintMatcher;
}
public async Task<ImmutableArray<BinaryVulnMatch>> LookupByIdentityAsync(
@@ -133,4 +138,64 @@ public sealed class BinaryVulnerabilityService : IBinaryVulnerabilityService
"fingerprint_match" => MatchMethod.FingerprintMatch,
_ => MatchMethod.RangeMatch
};
public async Task<ImmutableArray<BinaryVulnMatch>> LookupByFingerprintAsync(
byte[] fingerprint,
FingerprintLookupOptions? options = null,
CancellationToken ct = default)
{
if (_fingerprintMatcher is null)
{
_logger.LogWarning("Fingerprint matcher not configured, cannot perform fingerprint lookup");
return ImmutableArray<BinaryVulnMatch>.Empty;
}
options ??= new FingerprintLookupOptions();
var matches = new List<BinaryVulnMatch>();
var matchOptions = new MatchOptions
{
MinSimilarity = options.MinSimilarity,
MaxCandidates = options.MaxCandidates,
Architecture = options.Architecture
};
var result = await _fingerprintMatcher.MatchAsync(fingerprint, matchOptions, ct).ConfigureAwait(false);
if (result.IsMatch && result.MatchedFingerprint is not null)
{
var fp = result.MatchedFingerprint;
matches.Add(new BinaryVulnMatch
{
CveId = fp.CveId,
VulnerablePurl = fp.Purl ?? $"pkg:generic/{fp.Component}",
Method = MatchMethod.FingerprintMatch,
Confidence = result.Confidence,
Evidence = new MatchEvidence
{
Similarity = result.Similarity,
MatchedFunction = fp.FunctionName
}
});
}
_logger.LogDebug("Fingerprint lookup found {Count} matches", matches.Count);
return matches.ToImmutableArray();
}
public async Task<ImmutableDictionary<string, ImmutableArray<BinaryVulnMatch>>> LookupByFingerprintBatchAsync(
IEnumerable<(string Key, byte[] Fingerprint)> fingerprints,
FingerprintLookupOptions? options = null,
CancellationToken ct = default)
{
var results = new Dictionary<string, ImmutableArray<BinaryVulnMatch>>();
foreach (var (key, fingerprint) in fingerprints)
{
var matches = await LookupByFingerprintAsync(fingerprint, options, ct).ConfigureAwait(false);
results[key] = matches;
}
return results.ToImmutableDictionary();
}
}

View File

@@ -16,6 +16,7 @@
<ItemGroup>
<ProjectReference Include="..\StellaOps.BinaryIndex.Core\StellaOps.BinaryIndex.Core.csproj" />
<ProjectReference Include="..\StellaOps.BinaryIndex.Corpus\StellaOps.BinaryIndex.Corpus.csproj" />
<ProjectReference Include="..\StellaOps.BinaryIndex.FixIndex\StellaOps.BinaryIndex.FixIndex.csproj" />
<ProjectReference Include="..\StellaOps.BinaryIndex.Fingerprints\StellaOps.BinaryIndex.Fingerprints.csproj" />
<ProjectReference Include="..\..\..\__Libraries\StellaOps.Infrastructure.Postgres\StellaOps.Infrastructure.Postgres.csproj" />
</ItemGroup>