Refactor code structure and optimize performance across multiple modules
This commit is contained in:
@@ -0,0 +1,56 @@
|
||||
// -----------------------------------------------------------------------------
|
||||
// BinaryCacheOptions.cs
|
||||
// Sprint: SPRINT_20251226_014_BINIDX
|
||||
// Task: SCANINT-21 - Add Valkey cache layer for hot lookups
|
||||
// -----------------------------------------------------------------------------
|
||||
|
||||
namespace StellaOps.BinaryIndex.Cache;
|
||||
|
||||
/// <summary>
|
||||
/// Configuration options for binary vulnerability cache layer.
|
||||
/// </summary>
|
||||
public sealed class BinaryCacheOptions
|
||||
{
|
||||
/// <summary>
|
||||
/// Valkey key prefix for binary cache entries.
|
||||
/// Default: "stellaops:binary:"
|
||||
/// </summary>
|
||||
public string KeyPrefix { get; init; } = "stellaops:binary:";
|
||||
|
||||
/// <summary>
|
||||
/// TTL for identity lookups.
|
||||
/// Default: 1 hour
|
||||
/// </summary>
|
||||
public TimeSpan IdentityTtl { get; init; } = TimeSpan.FromHours(1);
|
||||
|
||||
/// <summary>
|
||||
/// TTL for fix status lookups.
|
||||
/// Default: 1 hour
|
||||
/// </summary>
|
||||
public TimeSpan FixStatusTtl { get; init; } = TimeSpan.FromHours(1);
|
||||
|
||||
/// <summary>
|
||||
/// TTL for fingerprint lookups.
|
||||
/// Default: 30 minutes (shorter due to potential corpus updates)
|
||||
/// </summary>
|
||||
public TimeSpan FingerprintTtl { get; init; } = TimeSpan.FromMinutes(30);
|
||||
|
||||
/// <summary>
|
||||
/// Maximum TTL for any cache entry.
|
||||
/// Default: 24 hours
|
||||
/// </summary>
|
||||
public TimeSpan MaxTtl { get; init; } = TimeSpan.FromHours(24);
|
||||
|
||||
/// <summary>
|
||||
/// Whether to use sliding expiration.
|
||||
/// Default: false (absolute expiration)
|
||||
/// </summary>
|
||||
public bool SlidingExpiration { get; init; } = false;
|
||||
|
||||
/// <summary>
|
||||
/// Target cache hit rate.
|
||||
/// Used for monitoring and alerting.
|
||||
/// Default: 0.80 (80%)
|
||||
/// </summary>
|
||||
public double TargetHitRate { get; init; } = 0.80;
|
||||
}
|
||||
@@ -0,0 +1,52 @@
|
||||
// -----------------------------------------------------------------------------
|
||||
// BinaryCacheServiceExtensions.cs
|
||||
// Sprint: SPRINT_20251226_014_BINIDX
|
||||
// Task: SCANINT-21 - Add Valkey cache layer for hot lookups
|
||||
// -----------------------------------------------------------------------------
|
||||
|
||||
using Microsoft.Extensions.Configuration;
|
||||
using Microsoft.Extensions.DependencyInjection;
|
||||
using Microsoft.Extensions.DependencyInjection.Extensions;
|
||||
using StellaOps.BinaryIndex.Core.Services;
|
||||
|
||||
namespace StellaOps.BinaryIndex.Cache;
|
||||
|
||||
/// <summary>
|
||||
/// Extension methods for registering binary cache services.
|
||||
/// </summary>
|
||||
public static class BinaryCacheServiceExtensions
|
||||
{
|
||||
/// <summary>
|
||||
/// Adds binary cache layer to the service collection.
|
||||
/// Decorates the existing <see cref="IBinaryVulnerabilityService"/> with caching.
|
||||
/// </summary>
|
||||
/// <param name="services">The service collection.</param>
|
||||
/// <param name="configuration">Configuration for cache options.</param>
|
||||
/// <returns>The service collection for chaining.</returns>
|
||||
public static IServiceCollection AddBinaryIndexCaching(
|
||||
this IServiceCollection services,
|
||||
IConfiguration configuration)
|
||||
{
|
||||
// Bind options
|
||||
services.Configure<BinaryCacheOptions>(
|
||||
configuration.GetSection("BinaryIndex:Cache"));
|
||||
|
||||
// Decorate the existing service with caching
|
||||
services.Decorate<IBinaryVulnerabilityService, CachedBinaryVulnerabilityService>();
|
||||
|
||||
return services;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Adds binary cache layer with explicit options.
|
||||
/// </summary>
|
||||
public static IServiceCollection AddBinaryIndexCaching(
|
||||
this IServiceCollection services,
|
||||
Action<BinaryCacheOptions> configureOptions)
|
||||
{
|
||||
services.Configure(configureOptions);
|
||||
services.Decorate<IBinaryVulnerabilityService, CachedBinaryVulnerabilityService>();
|
||||
|
||||
return services;
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,431 @@
|
||||
// -----------------------------------------------------------------------------
|
||||
// CachedBinaryVulnerabilityService.cs
|
||||
// Sprint: SPRINT_20251226_014_BINIDX
|
||||
// Task: SCANINT-21 - Add Valkey cache layer for hot lookups
|
||||
// -----------------------------------------------------------------------------
|
||||
|
||||
using System.Collections.Immutable;
|
||||
using System.Diagnostics;
|
||||
using System.Text.Json;
|
||||
using Microsoft.Extensions.Logging;
|
||||
using Microsoft.Extensions.Options;
|
||||
using StackExchange.Redis;
|
||||
using StellaOps.BinaryIndex.Core.Models;
|
||||
using StellaOps.BinaryIndex.Core.Services;
|
||||
using StellaOps.BinaryIndex.FixIndex.Models;
|
||||
|
||||
namespace StellaOps.BinaryIndex.Cache;
|
||||
|
||||
/// <summary>
|
||||
/// Caching decorator for <see cref="IBinaryVulnerabilityService"/>.
|
||||
/// Implements read-through caching with Valkey/Redis for binary vulnerability lookups.
|
||||
/// Target: > 80% cache hit rate for repeat scans.
|
||||
/// </summary>
|
||||
public sealed class CachedBinaryVulnerabilityService : IBinaryVulnerabilityService, IAsyncDisposable
|
||||
{
|
||||
private readonly IBinaryVulnerabilityService _inner;
|
||||
private readonly IConnectionMultiplexer _connectionMultiplexer;
|
||||
private readonly BinaryCacheOptions _options;
|
||||
private readonly ILogger<CachedBinaryVulnerabilityService> _logger;
|
||||
private readonly JsonSerializerOptions _jsonOptions;
|
||||
private readonly SemaphoreSlim _connectionLock = new(1, 1);
|
||||
private IDatabase? _database;
|
||||
|
||||
public CachedBinaryVulnerabilityService(
|
||||
IBinaryVulnerabilityService inner,
|
||||
IConnectionMultiplexer connectionMultiplexer,
|
||||
IOptions<BinaryCacheOptions> options,
|
||||
ILogger<CachedBinaryVulnerabilityService> logger)
|
||||
{
|
||||
_inner = inner ?? throw new ArgumentNullException(nameof(inner));
|
||||
_connectionMultiplexer = connectionMultiplexer ?? throw new ArgumentNullException(nameof(connectionMultiplexer));
|
||||
_options = options?.Value ?? throw new ArgumentNullException(nameof(options));
|
||||
_logger = logger ?? throw new ArgumentNullException(nameof(logger));
|
||||
_jsonOptions = new JsonSerializerOptions
|
||||
{
|
||||
PropertyNamingPolicy = JsonNamingPolicy.CamelCase,
|
||||
WriteIndented = false
|
||||
};
|
||||
}
|
||||
|
||||
/// <inheritdoc />
|
||||
public async Task<ImmutableArray<BinaryVulnMatch>> LookupByIdentityAsync(
|
||||
BinaryIdentity identity,
|
||||
LookupOptions? options = null,
|
||||
CancellationToken ct = default)
|
||||
{
|
||||
var cacheKey = BuildIdentityKey(identity, options);
|
||||
var sw = Stopwatch.StartNew();
|
||||
|
||||
// Try cache first
|
||||
var cached = await GetFromCacheAsync<ImmutableArray<BinaryVulnMatch>>(cacheKey, ct).ConfigureAwait(false);
|
||||
if (cached.HasValue)
|
||||
{
|
||||
sw.Stop();
|
||||
_logger.LogDebug(
|
||||
"Cache hit for identity {BinaryKey} in {ElapsedMs}ms",
|
||||
identity.BinaryKey,
|
||||
sw.Elapsed.TotalMilliseconds);
|
||||
return cached.Value;
|
||||
}
|
||||
|
||||
// Cache miss - call inner service
|
||||
var result = await _inner.LookupByIdentityAsync(identity, options, ct).ConfigureAwait(false);
|
||||
sw.Stop();
|
||||
|
||||
// Store in cache
|
||||
await SetCacheAsync(cacheKey, result, _options.IdentityTtl, ct).ConfigureAwait(false);
|
||||
|
||||
_logger.LogDebug(
|
||||
"Cache miss for identity {BinaryKey}, fetched in {ElapsedMs}ms",
|
||||
identity.BinaryKey,
|
||||
sw.Elapsed.TotalMilliseconds);
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
/// <inheritdoc />
|
||||
public async Task<ImmutableDictionary<string, ImmutableArray<BinaryVulnMatch>>> LookupBatchAsync(
|
||||
IEnumerable<BinaryIdentity> identities,
|
||||
LookupOptions? options = null,
|
||||
CancellationToken ct = default)
|
||||
{
|
||||
var identityList = identities.ToList();
|
||||
if (identityList.Count == 0)
|
||||
{
|
||||
return ImmutableDictionary<string, ImmutableArray<BinaryVulnMatch>>.Empty;
|
||||
}
|
||||
|
||||
var sw = Stopwatch.StartNew();
|
||||
var db = await GetDatabaseAsync().ConfigureAwait(false);
|
||||
|
||||
// Build cache keys
|
||||
var cacheKeys = identityList
|
||||
.Select(i => (Identity: i, Key: BuildIdentityKey(i, options)))
|
||||
.ToList();
|
||||
|
||||
// Batch get from cache
|
||||
var redisKeys = cacheKeys.Select(k => (RedisKey)k.Key).ToArray();
|
||||
var cachedValues = await db.StringGetAsync(redisKeys).ConfigureAwait(false);
|
||||
|
||||
var results = new Dictionary<string, ImmutableArray<BinaryVulnMatch>>();
|
||||
var misses = new List<BinaryIdentity>();
|
||||
|
||||
for (int i = 0; i < cacheKeys.Count; i++)
|
||||
{
|
||||
var (identity, key) = cacheKeys[i];
|
||||
var value = cachedValues[i];
|
||||
|
||||
if (!value.IsNullOrEmpty)
|
||||
{
|
||||
try
|
||||
{
|
||||
var matches = JsonSerializer.Deserialize<ImmutableArray<BinaryVulnMatch>>(
|
||||
(string)value!, _jsonOptions);
|
||||
results[identity.BinaryKey] = matches;
|
||||
continue;
|
||||
}
|
||||
catch
|
||||
{
|
||||
// Deserialization failed, treat as miss
|
||||
}
|
||||
}
|
||||
|
||||
misses.Add(identity);
|
||||
}
|
||||
|
||||
_logger.LogDebug(
|
||||
"Batch lookup: {Hits} cache hits, {Misses} cache misses",
|
||||
results.Count,
|
||||
misses.Count);
|
||||
|
||||
// Fetch misses from inner service
|
||||
if (misses.Count > 0)
|
||||
{
|
||||
var fetchedResults = await _inner.LookupBatchAsync(misses, options, ct).ConfigureAwait(false);
|
||||
|
||||
// Store fetched results in cache
|
||||
var batch = db.CreateBatch();
|
||||
var tasks = new List<Task>();
|
||||
|
||||
foreach (var (binaryKey, matches) in fetchedResults)
|
||||
{
|
||||
results[binaryKey] = matches;
|
||||
|
||||
var identity = misses.First(i => i.BinaryKey == binaryKey);
|
||||
var cacheKey = BuildIdentityKey(identity, options);
|
||||
var value = JsonSerializer.Serialize(matches, _jsonOptions);
|
||||
|
||||
tasks.Add(batch.StringSetAsync(cacheKey, value, _options.IdentityTtl));
|
||||
}
|
||||
|
||||
batch.Execute();
|
||||
await Task.WhenAll(tasks).ConfigureAwait(false);
|
||||
}
|
||||
|
||||
sw.Stop();
|
||||
_logger.LogDebug(
|
||||
"Batch lookup completed in {ElapsedMs}ms: {Total} total, {Hits} hits, {Misses} misses",
|
||||
sw.Elapsed.TotalMilliseconds,
|
||||
identityList.Count,
|
||||
results.Count - misses.Count,
|
||||
misses.Count);
|
||||
|
||||
return results.ToImmutableDictionary();
|
||||
}
|
||||
|
||||
/// <inheritdoc />
|
||||
public async Task<FixStatusResult?> GetFixStatusAsync(
|
||||
string distro,
|
||||
string release,
|
||||
string sourcePkg,
|
||||
string cveId,
|
||||
CancellationToken ct = default)
|
||||
{
|
||||
var cacheKey = BuildFixStatusKey(distro, release, sourcePkg, cveId);
|
||||
var sw = Stopwatch.StartNew();
|
||||
|
||||
// Try cache first
|
||||
var cached = await GetFromCacheAsync<FixStatusResult?>(cacheKey, ct).ConfigureAwait(false);
|
||||
if (cached.HasValue)
|
||||
{
|
||||
sw.Stop();
|
||||
_logger.LogDebug(
|
||||
"Cache hit for fix status {Distro}:{SourcePkg}:{CveId} in {ElapsedMs}ms",
|
||||
distro, sourcePkg, cveId, sw.Elapsed.TotalMilliseconds);
|
||||
return cached.Value;
|
||||
}
|
||||
|
||||
// Cache miss
|
||||
var result = await _inner.GetFixStatusAsync(distro, release, sourcePkg, cveId, ct).ConfigureAwait(false);
|
||||
sw.Stop();
|
||||
|
||||
// Store in cache
|
||||
await SetCacheAsync(cacheKey, result, _options.FixStatusTtl, ct).ConfigureAwait(false);
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
/// <inheritdoc />
|
||||
public async Task<ImmutableDictionary<string, FixStatusResult>> GetFixStatusBatchAsync(
|
||||
string distro,
|
||||
string release,
|
||||
string sourcePkg,
|
||||
IEnumerable<string> cveIds,
|
||||
CancellationToken ct = default)
|
||||
{
|
||||
var cveList = cveIds.ToList();
|
||||
if (cveList.Count == 0)
|
||||
{
|
||||
return ImmutableDictionary<string, FixStatusResult>.Empty;
|
||||
}
|
||||
|
||||
var db = await GetDatabaseAsync().ConfigureAwait(false);
|
||||
|
||||
// Build cache keys
|
||||
var cacheKeys = cveList
|
||||
.Select(cve => (CveId: cve, Key: BuildFixStatusKey(distro, release, sourcePkg, cve)))
|
||||
.ToList();
|
||||
|
||||
// Batch get from cache
|
||||
var redisKeys = cacheKeys.Select(k => (RedisKey)k.Key).ToArray();
|
||||
var cachedValues = await db.StringGetAsync(redisKeys).ConfigureAwait(false);
|
||||
|
||||
var results = new Dictionary<string, FixStatusResult>();
|
||||
var misses = new List<string>();
|
||||
|
||||
for (int i = 0; i < cacheKeys.Count; i++)
|
||||
{
|
||||
var (cveId, key) = cacheKeys[i];
|
||||
var value = cachedValues[i];
|
||||
|
||||
if (!value.IsNullOrEmpty)
|
||||
{
|
||||
try
|
||||
{
|
||||
var status = JsonSerializer.Deserialize<FixStatusResult>((string)value!, _jsonOptions);
|
||||
if (status is not null)
|
||||
{
|
||||
results[cveId] = status;
|
||||
continue;
|
||||
}
|
||||
}
|
||||
catch
|
||||
{
|
||||
// Deserialization failed
|
||||
}
|
||||
}
|
||||
|
||||
misses.Add(cveId);
|
||||
}
|
||||
|
||||
// Fetch misses from inner service
|
||||
if (misses.Count > 0)
|
||||
{
|
||||
var fetchedResults = await _inner.GetFixStatusBatchAsync(distro, release, sourcePkg, misses, ct)
|
||||
.ConfigureAwait(false);
|
||||
|
||||
var batch = db.CreateBatch();
|
||||
var tasks = new List<Task>();
|
||||
|
||||
foreach (var (cveId, status) in fetchedResults)
|
||||
{
|
||||
results[cveId] = status;
|
||||
|
||||
var cacheKey = BuildFixStatusKey(distro, release, sourcePkg, cveId);
|
||||
var serialized = JsonSerializer.Serialize(status, _jsonOptions);
|
||||
|
||||
tasks.Add(batch.StringSetAsync(cacheKey, serialized, _options.FixStatusTtl));
|
||||
}
|
||||
|
||||
batch.Execute();
|
||||
await Task.WhenAll(tasks).ConfigureAwait(false);
|
||||
}
|
||||
|
||||
return results.ToImmutableDictionary();
|
||||
}
|
||||
|
||||
/// <inheritdoc />
|
||||
public async Task<ImmutableArray<BinaryVulnMatch>> LookupByFingerprintAsync(
|
||||
byte[] fingerprint,
|
||||
FingerprintLookupOptions? options = null,
|
||||
CancellationToken ct = default)
|
||||
{
|
||||
var cacheKey = BuildFingerprintKey(fingerprint, options);
|
||||
var sw = Stopwatch.StartNew();
|
||||
|
||||
// Try cache first
|
||||
var cached = await GetFromCacheAsync<ImmutableArray<BinaryVulnMatch>>(cacheKey, ct).ConfigureAwait(false);
|
||||
if (cached.HasValue)
|
||||
{
|
||||
sw.Stop();
|
||||
_logger.LogDebug("Cache hit for fingerprint in {ElapsedMs}ms", sw.Elapsed.TotalMilliseconds);
|
||||
return cached.Value;
|
||||
}
|
||||
|
||||
// Cache miss
|
||||
var result = await _inner.LookupByFingerprintAsync(fingerprint, options, ct).ConfigureAwait(false);
|
||||
sw.Stop();
|
||||
|
||||
// Store in cache (shorter TTL for fingerprints as they may change)
|
||||
await SetCacheAsync(cacheKey, result, _options.FingerprintTtl, ct).ConfigureAwait(false);
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
/// <inheritdoc />
|
||||
public async Task<ImmutableDictionary<string, ImmutableArray<BinaryVulnMatch>>> LookupByFingerprintBatchAsync(
|
||||
IEnumerable<(string Key, byte[] Fingerprint)> fingerprints,
|
||||
FingerprintLookupOptions? options = null,
|
||||
CancellationToken ct = default)
|
||||
{
|
||||
// For fingerprint batch, delegate directly to inner service
|
||||
// Fingerprint lookups are less common and more expensive to cache
|
||||
return await _inner.LookupByFingerprintBatchAsync(fingerprints, options, ct).ConfigureAwait(false);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Invalidate all cache entries for a specific distro/release combination.
|
||||
/// Called when a new corpus update is published.
|
||||
/// </summary>
|
||||
public async Task InvalidateDistroAsync(string distro, string release, CancellationToken ct = default)
|
||||
{
|
||||
try
|
||||
{
|
||||
var db = await GetDatabaseAsync().ConfigureAwait(false);
|
||||
var server = _connectionMultiplexer.GetServer(_connectionMultiplexer.GetEndPoints().First());
|
||||
|
||||
var pattern = $"{_options.KeyPrefix}fix:{distro}:{release}:*";
|
||||
var keys = server.Keys(pattern: pattern).ToArray();
|
||||
|
||||
if (keys.Length > 0)
|
||||
{
|
||||
var deleted = await db.KeyDeleteAsync(keys).ConfigureAwait(false);
|
||||
_logger.LogInformation(
|
||||
"Invalidated {Count} cache entries for {Distro}:{Release}",
|
||||
deleted, distro, release);
|
||||
}
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
_logger.LogError(ex, "Error invalidating cache for {Distro}:{Release}", distro, release);
|
||||
}
|
||||
}
|
||||
|
||||
private string BuildIdentityKey(BinaryIdentity identity, LookupOptions? options)
|
||||
{
|
||||
var tenant = options?.TenantId ?? "default";
|
||||
return $"{_options.KeyPrefix}id:{tenant}:{identity.BinaryKey}";
|
||||
}
|
||||
|
||||
private string BuildFixStatusKey(string distro, string release, string sourcePkg, string cveId)
|
||||
{
|
||||
return $"{_options.KeyPrefix}fix:{distro}:{release}:{sourcePkg}:{cveId}";
|
||||
}
|
||||
|
||||
private string BuildFingerprintKey(byte[] fingerprint, FingerprintLookupOptions? options)
|
||||
{
|
||||
var hash = Convert.ToHexString(fingerprint).ToLowerInvariant();
|
||||
var algo = options?.Algorithm ?? "combined";
|
||||
return $"{_options.KeyPrefix}fp:{algo}:{hash[..Math.Min(32, hash.Length)]}";
|
||||
}
|
||||
|
||||
private async Task<T?> GetFromCacheAsync<T>(string key, CancellationToken ct)
|
||||
{
|
||||
try
|
||||
{
|
||||
var db = await GetDatabaseAsync().ConfigureAwait(false);
|
||||
var value = await db.StringGetAsync(key).ConfigureAwait(false);
|
||||
|
||||
if (value.IsNullOrEmpty)
|
||||
{
|
||||
return default;
|
||||
}
|
||||
|
||||
return JsonSerializer.Deserialize<T>((string)value!, _jsonOptions);
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
_logger.LogWarning(ex, "Error getting cache entry for key {Key}", key);
|
||||
return default;
|
||||
}
|
||||
}
|
||||
|
||||
private async Task SetCacheAsync<T>(string key, T value, TimeSpan ttl, CancellationToken ct)
|
||||
{
|
||||
try
|
||||
{
|
||||
var db = await GetDatabaseAsync().ConfigureAwait(false);
|
||||
var serialized = JsonSerializer.Serialize(value, _jsonOptions);
|
||||
|
||||
await db.StringSetAsync(key, serialized, ttl).ConfigureAwait(false);
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
_logger.LogWarning(ex, "Error setting cache entry for key {Key}", key);
|
||||
}
|
||||
}
|
||||
|
||||
private async Task<IDatabase> GetDatabaseAsync()
|
||||
{
|
||||
if (_database is not null)
|
||||
return _database;
|
||||
|
||||
await _connectionLock.WaitAsync().ConfigureAwait(false);
|
||||
try
|
||||
{
|
||||
_database ??= _connectionMultiplexer.GetDatabase();
|
||||
return _database;
|
||||
}
|
||||
finally
|
||||
{
|
||||
_connectionLock.Release();
|
||||
}
|
||||
}
|
||||
|
||||
public async ValueTask DisposeAsync()
|
||||
{
|
||||
_connectionLock.Dispose();
|
||||
await Task.CompletedTask;
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,26 @@
|
||||
<?xml version="1.0" encoding="utf-8"?>
|
||||
<Project Sdk="Microsoft.NET.Sdk">
|
||||
|
||||
<PropertyGroup>
|
||||
<TargetFramework>net10.0</TargetFramework>
|
||||
<ImplicitUsings>enable</ImplicitUsings>
|
||||
<Nullable>enable</Nullable>
|
||||
<LangVersion>preview</LangVersion>
|
||||
<TreatWarningsAsErrors>false</TreatWarningsAsErrors>
|
||||
<RootNamespace>StellaOps.BinaryIndex.Cache</RootNamespace>
|
||||
<AssemblyName>StellaOps.BinaryIndex.Cache</AssemblyName>
|
||||
<Description>Valkey/Redis cache layer for BinaryIndex vulnerability lookups</Description>
|
||||
</PropertyGroup>
|
||||
|
||||
<ItemGroup>
|
||||
<PackageReference Include="StackExchange.Redis" Version="2.8.37" />
|
||||
<PackageReference Include="Microsoft.Extensions.DependencyInjection.Abstractions" Version="10.0.0" />
|
||||
<PackageReference Include="Microsoft.Extensions.Logging.Abstractions" Version="10.0.0" />
|
||||
<PackageReference Include="Microsoft.Extensions.Options" Version="10.0.0" />
|
||||
</ItemGroup>
|
||||
|
||||
<ItemGroup>
|
||||
<ProjectReference Include="../StellaOps.BinaryIndex.Core/StellaOps.BinaryIndex.Core.csproj" />
|
||||
</ItemGroup>
|
||||
|
||||
</Project>
|
||||
@@ -0,0 +1,40 @@
|
||||
namespace StellaOps.BinaryIndex.Core.Models;
|
||||
|
||||
/// <summary>
|
||||
/// Fix state enumeration.
|
||||
/// </summary>
|
||||
public enum FixState
|
||||
{
|
||||
/// <summary>CVE is fixed in this version</summary>
|
||||
Fixed,
|
||||
|
||||
/// <summary>CVE affects this package</summary>
|
||||
Vulnerable,
|
||||
|
||||
/// <summary>CVE does not affect this package</summary>
|
||||
NotAffected,
|
||||
|
||||
/// <summary>Fix won't be applied (e.g., EOL version)</summary>
|
||||
Wontfix,
|
||||
|
||||
/// <summary>Unknown status</summary>
|
||||
Unknown
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Method used to identify the fix.
|
||||
/// </summary>
|
||||
public enum FixMethod
|
||||
{
|
||||
/// <summary>From official security feed (OVAL, DSA, etc.)</summary>
|
||||
SecurityFeed,
|
||||
|
||||
/// <summary>Parsed from Debian/Ubuntu changelog</summary>
|
||||
Changelog,
|
||||
|
||||
/// <summary>Extracted from patch header (DEP-3)</summary>
|
||||
PatchHeader,
|
||||
|
||||
/// <summary>Matched against upstream patch database</summary>
|
||||
UpstreamPatchMatch
|
||||
}
|
||||
@@ -1,6 +1,5 @@
|
||||
using System.Collections.Immutable;
|
||||
using StellaOps.BinaryIndex.Core.Models;
|
||||
using StellaOps.BinaryIndex.FixIndex.Models;
|
||||
|
||||
namespace StellaOps.BinaryIndex.Core.Services;
|
||||
|
||||
@@ -52,6 +51,51 @@ public interface IBinaryVulnerabilityService
|
||||
string sourcePkg,
|
||||
IEnumerable<string> cveIds,
|
||||
CancellationToken ct = default);
|
||||
|
||||
/// <summary>
|
||||
/// Look up vulnerabilities by binary fingerprint.
|
||||
/// Used for fingerprint-based matching when Build-ID is unavailable.
|
||||
/// </summary>
|
||||
/// <param name="fingerprint">Fingerprint bytes to match.</param>
|
||||
/// <param name="options">Matching options.</param>
|
||||
/// <param name="ct">Cancellation token.</param>
|
||||
/// <returns>List of vulnerability matches.</returns>
|
||||
Task<ImmutableArray<BinaryVulnMatch>> LookupByFingerprintAsync(
|
||||
byte[] fingerprint,
|
||||
FingerprintLookupOptions? options = null,
|
||||
CancellationToken ct = default);
|
||||
|
||||
/// <summary>
|
||||
/// Batch fingerprint lookup for scan performance.
|
||||
/// </summary>
|
||||
Task<ImmutableDictionary<string, ImmutableArray<BinaryVulnMatch>>> LookupByFingerprintBatchAsync(
|
||||
IEnumerable<(string Key, byte[] Fingerprint)> fingerprints,
|
||||
FingerprintLookupOptions? options = null,
|
||||
CancellationToken ct = default);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Options for fingerprint-based lookup.
|
||||
/// </summary>
|
||||
public sealed record FingerprintLookupOptions
|
||||
{
|
||||
/// <summary>Minimum similarity threshold (0.0-1.0). Default 0.95.</summary>
|
||||
public decimal MinSimilarity { get; init; } = 0.95m;
|
||||
|
||||
/// <summary>Maximum candidates to evaluate. Default 100.</summary>
|
||||
public int MaxCandidates { get; init; } = 100;
|
||||
|
||||
/// <summary>Architecture filter. Null means any.</summary>
|
||||
public string? Architecture { get; init; }
|
||||
|
||||
/// <summary>Check fix index for matched CVEs.</summary>
|
||||
public bool CheckFixIndex { get; init; } = true;
|
||||
|
||||
/// <summary>Distro hint for fix status lookup.</summary>
|
||||
public string? DistroHint { get; init; }
|
||||
|
||||
/// <summary>Release hint for fix status lookup.</summary>
|
||||
public string? ReleaseHint { get; init; }
|
||||
}
|
||||
|
||||
public sealed record LookupOptions
|
||||
|
||||
@@ -0,0 +1,306 @@
|
||||
// -----------------------------------------------------------------------------
|
||||
// BasicBlockFingerprintGenerator.cs
|
||||
// Sprint: SPRINT_20251226_013_BINIDX_fingerprint_factory
|
||||
// Task: FPRINT-06 — Implement BasicBlockFingerprintGenerator
|
||||
// -----------------------------------------------------------------------------
|
||||
|
||||
using System.Security.Cryptography;
|
||||
using Microsoft.Extensions.Logging;
|
||||
using StellaOps.BinaryIndex.Fingerprints.Models;
|
||||
|
||||
namespace StellaOps.BinaryIndex.Fingerprints.Generators;
|
||||
|
||||
/// <summary>
|
||||
/// Generates fingerprints based on basic block hashing.
|
||||
///
|
||||
/// Algorithm:
|
||||
/// 1. Disassemble function to basic blocks
|
||||
/// 2. Normalize instructions (remove absolute addresses)
|
||||
/// 3. Hash each basic block
|
||||
/// 4. Combine block hashes with topology info
|
||||
///
|
||||
/// Produces a 16-byte fingerprint.
|
||||
/// </summary>
|
||||
public sealed class BasicBlockFingerprintGenerator : IVulnFingerprintGenerator
|
||||
{
|
||||
private readonly ILogger<BasicBlockFingerprintGenerator> _logger;
|
||||
|
||||
public BasicBlockFingerprintGenerator(ILogger<BasicBlockFingerprintGenerator> logger)
|
||||
{
|
||||
_logger = logger;
|
||||
}
|
||||
|
||||
public FingerprintAlgorithm Algorithm => FingerprintAlgorithm.BasicBlock;
|
||||
|
||||
public bool CanProcess(FingerprintInput input)
|
||||
{
|
||||
// Require at least 16 bytes of binary data
|
||||
return input.BinaryData.Length >= 16;
|
||||
}
|
||||
|
||||
public Task<FingerprintOutput> GenerateAsync(FingerprintInput input, CancellationToken ct = default)
|
||||
{
|
||||
ct.ThrowIfCancellationRequested();
|
||||
|
||||
_logger.LogDebug(
|
||||
"Generating basic block fingerprint for {Component}/{CveId} ({Size} bytes)",
|
||||
input.Component,
|
||||
input.CveId,
|
||||
input.BinaryData.Length);
|
||||
|
||||
// Step 1: Identify basic blocks (simplified - real impl would use disassembler)
|
||||
var blocks = IdentifyBasicBlocks(input.BinaryData, input.Architecture);
|
||||
|
||||
// Step 2: Normalize each block
|
||||
var normalizedBlocks = blocks.Select(b => NormalizeBlock(b, input.Architecture)).ToList();
|
||||
|
||||
// Step 3: Hash each block
|
||||
var blockHashes = normalizedBlocks.Select(HashBlock).ToList();
|
||||
|
||||
// Step 4: Combine with topology
|
||||
var fingerprint = CombineBlockHashes(blockHashes);
|
||||
|
||||
var fingerprintId = Convert.ToHexString(fingerprint).ToLowerInvariant();
|
||||
|
||||
_logger.LogDebug(
|
||||
"Generated fingerprint {FingerprintId} with {BlockCount} blocks",
|
||||
fingerprintId,
|
||||
blocks.Count);
|
||||
|
||||
return Task.FromResult(new FingerprintOutput
|
||||
{
|
||||
Hash = fingerprint,
|
||||
FingerprintId = fingerprintId,
|
||||
Algorithm = FingerprintAlgorithm.BasicBlock,
|
||||
Confidence = CalculateConfidence(blocks.Count, input.BinaryData.Length),
|
||||
Metadata = new FingerprintMetadata
|
||||
{
|
||||
BasicBlockCount = blocks.Count,
|
||||
FunctionSize = input.BinaryData.Length
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Identifies basic blocks in the binary data.
|
||||
/// A basic block ends at: jump, call, return, or conditional branch.
|
||||
/// </summary>
|
||||
private List<byte[]> IdentifyBasicBlocks(byte[] binaryData, string architecture)
|
||||
{
|
||||
var blocks = new List<byte[]>();
|
||||
var currentBlockStart = 0;
|
||||
|
||||
// Simplified heuristic: split on common instruction boundaries
|
||||
// Real implementation would use a proper disassembler (Capstone, etc.)
|
||||
for (var i = 0; i < binaryData.Length; i++)
|
||||
{
|
||||
if (IsBlockTerminator(binaryData, i, architecture))
|
||||
{
|
||||
var blockSize = i - currentBlockStart + GetInstructionLength(binaryData, i, architecture);
|
||||
if (blockSize > 0 && currentBlockStart + blockSize <= binaryData.Length)
|
||||
{
|
||||
var block = new byte[blockSize];
|
||||
Array.Copy(binaryData, currentBlockStart, block, 0, blockSize);
|
||||
blocks.Add(block);
|
||||
currentBlockStart = i + GetInstructionLength(binaryData, i, architecture);
|
||||
i = currentBlockStart - 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Add final block if any remaining
|
||||
if (currentBlockStart < binaryData.Length)
|
||||
{
|
||||
var finalBlock = new byte[binaryData.Length - currentBlockStart];
|
||||
Array.Copy(binaryData, currentBlockStart, finalBlock, 0, finalBlock.Length);
|
||||
blocks.Add(finalBlock);
|
||||
}
|
||||
|
||||
// Ensure at least one block
|
||||
if (blocks.Count == 0)
|
||||
{
|
||||
blocks.Add(binaryData);
|
||||
}
|
||||
|
||||
return blocks;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Checks if the byte at position i is a block terminator instruction.
|
||||
/// </summary>
|
||||
private static bool IsBlockTerminator(byte[] data, int i, string architecture)
|
||||
{
|
||||
if (i >= data.Length) return false;
|
||||
|
||||
return architecture.ToLowerInvariant() switch
|
||||
{
|
||||
"x86_64" or "x64" or "amd64" => IsX64BlockTerminator(data, i),
|
||||
"aarch64" or "arm64" => IsArm64BlockTerminator(data, i),
|
||||
_ => false
|
||||
};
|
||||
}
|
||||
|
||||
private static bool IsX64BlockTerminator(byte[] data, int i)
|
||||
{
|
||||
// Common x64 terminators:
|
||||
// C3 = ret
|
||||
// E8 = call (near)
|
||||
// E9 = jmp (near)
|
||||
// 0F 8x = conditional jumps
|
||||
// EB = jmp (short)
|
||||
// 7x = short conditional jumps
|
||||
var b = data[i];
|
||||
return b switch
|
||||
{
|
||||
0xC3 => true, // ret
|
||||
0xE8 => true, // call
|
||||
0xE9 => true, // jmp near
|
||||
0xEB => true, // jmp short
|
||||
>= 0x70 and <= 0x7F => true, // short conditional jumps
|
||||
_ => i + 1 < data.Length && data[i] == 0x0F && data[i + 1] >= 0x80 && data[i + 1] <= 0x8F
|
||||
};
|
||||
}
|
||||
|
||||
private static bool IsArm64BlockTerminator(byte[] data, int i)
|
||||
{
|
||||
// ARM64 instructions are 4 bytes
|
||||
if (i + 3 >= data.Length) return false;
|
||||
|
||||
// Check for branch instructions (simplified)
|
||||
// Real impl would decode the instruction properly
|
||||
var opcode = (uint)(data[i + 3] & 0xFC);
|
||||
return opcode switch
|
||||
{
|
||||
0x14 => true, // B (branch)
|
||||
0x54 => true, // B.cond
|
||||
0x94 => true, // BL (branch with link)
|
||||
0xD4 => true, // RET (when full decode matches)
|
||||
_ => false
|
||||
};
|
||||
}
|
||||
|
||||
private static int GetInstructionLength(byte[] data, int i, string architecture)
|
||||
{
|
||||
// Simplified instruction length calculation
|
||||
return architecture.ToLowerInvariant() switch
|
||||
{
|
||||
"x86_64" or "x64" or "amd64" => GetX64InstructionLength(data, i),
|
||||
"aarch64" or "arm64" => 4, // ARM64 has fixed 4-byte instructions
|
||||
_ => 1
|
||||
};
|
||||
}
|
||||
|
||||
private static int GetX64InstructionLength(byte[] data, int i)
|
||||
{
|
||||
// Very simplified - real impl would use instruction decoder
|
||||
if (i >= data.Length) return 1;
|
||||
var b = data[i];
|
||||
return b switch
|
||||
{
|
||||
0xC3 => 1, // ret
|
||||
0xEB => 2, // jmp short
|
||||
>= 0x70 and <= 0x7F => 2, // short conditional
|
||||
0xE8 => 5, // call near
|
||||
0xE9 => 5, // jmp near
|
||||
0x0F when i + 1 < data.Length => 6, // 0F xx = 2 byte opcode + 4 byte offset
|
||||
_ => 1
|
||||
};
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Normalizes a basic block by removing absolute addresses.
|
||||
/// </summary>
|
||||
private byte[] NormalizeBlock(byte[] block, string architecture)
|
||||
{
|
||||
var normalized = new byte[block.Length];
|
||||
Array.Copy(block, normalized, block.Length);
|
||||
|
||||
// Zero out immediate address operands (simplified)
|
||||
// Real implementation would parse instructions and identify address operands
|
||||
return architecture.ToLowerInvariant() switch
|
||||
{
|
||||
"x86_64" or "x64" or "amd64" => NormalizeX64Block(normalized),
|
||||
"aarch64" or "arm64" => NormalizeArm64Block(normalized),
|
||||
_ => normalized
|
||||
};
|
||||
}
|
||||
|
||||
private static byte[] NormalizeX64Block(byte[] block)
|
||||
{
|
||||
// Zero out likely address operands (4-byte and 8-byte immediates)
|
||||
// This is a heuristic - real impl would parse properly
|
||||
for (var i = 0; i < block.Length; i++)
|
||||
{
|
||||
// After call/jmp instructions, zero the offset
|
||||
if (block[i] == 0xE8 || block[i] == 0xE9)
|
||||
{
|
||||
for (var j = 1; j <= 4 && i + j < block.Length; j++)
|
||||
{
|
||||
block[i + j] = 0;
|
||||
}
|
||||
i += 4;
|
||||
}
|
||||
}
|
||||
return block;
|
||||
}
|
||||
|
||||
private static byte[] NormalizeArm64Block(byte[] block)
|
||||
{
|
||||
// ARM64: zero out immediate fields in branch instructions
|
||||
for (var i = 0; i + 3 < block.Length; i += 4)
|
||||
{
|
||||
var opcode = block[i + 3] & 0xFC;
|
||||
if (opcode is 0x14 or 0x94) // B or BL
|
||||
{
|
||||
// Zero immediate field (bits 0-25)
|
||||
block[i] = 0;
|
||||
block[i + 1] = 0;
|
||||
block[i + 2] = 0;
|
||||
block[i + 3] = (byte)(block[i + 3] & 0xFC);
|
||||
}
|
||||
}
|
||||
return block;
|
||||
}
|
||||
|
||||
private static byte[] HashBlock(byte[] block)
|
||||
{
|
||||
// Use truncated SHA-256 for each block
|
||||
var hash = SHA256.HashData(block);
|
||||
var truncated = new byte[8];
|
||||
Array.Copy(hash, truncated, 8);
|
||||
return truncated;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Combines block hashes with topological ordering to produce final fingerprint.
|
||||
/// </summary>
|
||||
private static byte[] CombineBlockHashes(List<byte[]> blockHashes)
|
||||
{
|
||||
// Combine all block hashes into one fingerprint
|
||||
using var ms = new MemoryStream();
|
||||
|
||||
// Add block count as prefix
|
||||
ms.Write(BitConverter.GetBytes(blockHashes.Count));
|
||||
|
||||
// Add each block hash
|
||||
foreach (var hash in blockHashes)
|
||||
{
|
||||
ms.Write(hash);
|
||||
}
|
||||
|
||||
// Final hash and truncate to 16 bytes
|
||||
var combined = SHA256.HashData(ms.ToArray());
|
||||
var fingerprint = new byte[16];
|
||||
Array.Copy(combined, fingerprint, 16);
|
||||
return fingerprint;
|
||||
}
|
||||
|
||||
private static decimal CalculateConfidence(int blockCount, int size)
|
||||
{
|
||||
// Higher confidence for more blocks and larger functions
|
||||
if (blockCount < 2 || size < 32) return 0.5m;
|
||||
if (blockCount < 5 || size < 100) return 0.7m;
|
||||
if (blockCount < 10 || size < 500) return 0.85m;
|
||||
return 0.95m;
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,182 @@
|
||||
// -----------------------------------------------------------------------------
|
||||
// CombinedFingerprintGenerator.cs
|
||||
// Sprint: SPRINT_20251226_013_BINIDX_fingerprint_factory
|
||||
// Task: FPRINT-09 — Implement CombinedFingerprintGenerator (ensemble)
|
||||
// -----------------------------------------------------------------------------
|
||||
|
||||
using System.Security.Cryptography;
|
||||
using Microsoft.Extensions.Logging;
|
||||
using StellaOps.BinaryIndex.Fingerprints.Models;
|
||||
|
||||
namespace StellaOps.BinaryIndex.Fingerprints.Generators;
|
||||
|
||||
/// <summary>
|
||||
/// Combines multiple fingerprinting algorithms into an ensemble fingerprint.
|
||||
/// Uses weighted combination of BasicBlock, ControlFlowGraph, and StringRefs.
|
||||
///
|
||||
/// This provides the most robust fingerprint by combining structural and
|
||||
/// semantic features from all algorithms.
|
||||
///
|
||||
/// Produces a 48-byte fingerprint (16 + 32 + optional 16 for string refs).
|
||||
/// </summary>
|
||||
public sealed class CombinedFingerprintGenerator : IVulnFingerprintGenerator
|
||||
{
|
||||
private readonly ILogger<CombinedFingerprintGenerator> _logger;
|
||||
private readonly BasicBlockFingerprintGenerator _basicBlockGen;
|
||||
private readonly ControlFlowGraphFingerprintGenerator _cfgGen;
|
||||
private readonly StringRefsFingerprintGenerator _stringRefsGen;
|
||||
|
||||
public CombinedFingerprintGenerator(
|
||||
ILogger<CombinedFingerprintGenerator> logger,
|
||||
BasicBlockFingerprintGenerator basicBlockGen,
|
||||
ControlFlowGraphFingerprintGenerator cfgGen,
|
||||
StringRefsFingerprintGenerator stringRefsGen)
|
||||
{
|
||||
_logger = logger;
|
||||
_basicBlockGen = basicBlockGen;
|
||||
_cfgGen = cfgGen;
|
||||
_stringRefsGen = stringRefsGen;
|
||||
}
|
||||
|
||||
public FingerprintAlgorithm Algorithm => FingerprintAlgorithm.Combined;
|
||||
|
||||
public bool CanProcess(FingerprintInput input)
|
||||
{
|
||||
// Require at least basic block and CFG to work
|
||||
return _basicBlockGen.CanProcess(input) && _cfgGen.CanProcess(input);
|
||||
}
|
||||
|
||||
public async Task<FingerprintOutput> GenerateAsync(FingerprintInput input, CancellationToken ct = default)
|
||||
{
|
||||
ct.ThrowIfCancellationRequested();
|
||||
|
||||
_logger.LogDebug(
|
||||
"Generating combined fingerprint for {Component}/{CveId} ({Size} bytes)",
|
||||
input.Component,
|
||||
input.CveId,
|
||||
input.BinaryData.Length);
|
||||
|
||||
// Generate all component fingerprints
|
||||
var basicBlockTask = _basicBlockGen.GenerateAsync(input, ct);
|
||||
var cfgTask = _cfgGen.GenerateAsync(input, ct);
|
||||
|
||||
FingerprintOutput? stringRefsOutput = null;
|
||||
if (_stringRefsGen.CanProcess(input))
|
||||
{
|
||||
stringRefsOutput = await _stringRefsGen.GenerateAsync(input, ct);
|
||||
}
|
||||
|
||||
var basicBlockOutput = await basicBlockTask;
|
||||
var cfgOutput = await cfgTask;
|
||||
|
||||
// Combine fingerprints
|
||||
var combined = CombineFingerprints(basicBlockOutput, cfgOutput, stringRefsOutput);
|
||||
|
||||
var fingerprintId = Convert.ToHexString(combined).ToLowerInvariant();
|
||||
|
||||
// Combine metadata
|
||||
var metadata = CombineMetadata(basicBlockOutput.Metadata, cfgOutput.Metadata, stringRefsOutput?.Metadata);
|
||||
|
||||
// Calculate combined confidence (weighted average)
|
||||
var confidence = CalculateCombinedConfidence(basicBlockOutput, cfgOutput, stringRefsOutput);
|
||||
|
||||
_logger.LogDebug(
|
||||
"Generated combined fingerprint {FingerprintId} with confidence {Confidence:P0}",
|
||||
fingerprintId,
|
||||
confidence);
|
||||
|
||||
return new FingerprintOutput
|
||||
{
|
||||
Hash = combined,
|
||||
FingerprintId = fingerprintId,
|
||||
Algorithm = FingerprintAlgorithm.Combined,
|
||||
Confidence = confidence,
|
||||
Metadata = metadata
|
||||
};
|
||||
}
|
||||
|
||||
private static byte[] CombineFingerprints(
|
||||
FingerprintOutput basicBlock,
|
||||
FingerprintOutput cfg,
|
||||
FingerprintOutput? stringRefs)
|
||||
{
|
||||
using var ms = new MemoryStream();
|
||||
|
||||
// Version byte (for future compatibility)
|
||||
ms.WriteByte(0x01);
|
||||
|
||||
// Basic block fingerprint (16 bytes)
|
||||
ms.Write(basicBlock.Hash);
|
||||
|
||||
// CFG fingerprint (32 bytes)
|
||||
ms.Write(cfg.Hash);
|
||||
|
||||
// String refs fingerprint if available (16 bytes)
|
||||
if (stringRefs != null)
|
||||
{
|
||||
ms.WriteByte(0x01); // Marker: has string refs
|
||||
ms.Write(stringRefs.Hash);
|
||||
}
|
||||
else
|
||||
{
|
||||
ms.WriteByte(0x00); // Marker: no string refs
|
||||
}
|
||||
|
||||
// Final hash to fixed size (48 bytes)
|
||||
var combined = SHA256.HashData(ms.ToArray());
|
||||
var result = new byte[48];
|
||||
Array.Copy(combined, result, 32);
|
||||
|
||||
// Add original basic block hash for quick lookup (16 bytes)
|
||||
Array.Copy(basicBlock.Hash, 0, result, 32, 16);
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
private static FingerprintMetadata CombineMetadata(
|
||||
FingerprintMetadata? basicBlock,
|
||||
FingerprintMetadata? cfg,
|
||||
FingerprintMetadata? stringRefs)
|
||||
{
|
||||
return new FingerprintMetadata
|
||||
{
|
||||
BasicBlockCount = basicBlock?.BasicBlockCount ?? cfg?.BasicBlockCount,
|
||||
EdgeCount = cfg?.EdgeCount,
|
||||
CyclomaticComplexity = cfg?.CyclomaticComplexity,
|
||||
StringRefCount = stringRefs?.StringRefCount,
|
||||
InstructionCount = basicBlock?.InstructionCount,
|
||||
FunctionSize = basicBlock?.FunctionSize ?? cfg?.FunctionSize ?? stringRefs?.FunctionSize
|
||||
};
|
||||
}
|
||||
|
||||
private static decimal CalculateCombinedConfidence(
|
||||
FingerprintOutput basicBlock,
|
||||
FingerprintOutput cfg,
|
||||
FingerprintOutput? stringRefs)
|
||||
{
|
||||
// Weighted average: CFG (40%), BasicBlock (35%), StringRefs (25%)
|
||||
const decimal cfgWeight = 0.40m;
|
||||
const decimal basicBlockWeight = 0.35m;
|
||||
const decimal stringRefsWeight = 0.25m;
|
||||
|
||||
var totalWeight = cfgWeight + basicBlockWeight;
|
||||
var weightedSum = (cfg.Confidence * cfgWeight) + (basicBlock.Confidence * basicBlockWeight);
|
||||
|
||||
if (stringRefs != null)
|
||||
{
|
||||
totalWeight += stringRefsWeight;
|
||||
weightedSum += stringRefs.Confidence * stringRefsWeight;
|
||||
}
|
||||
|
||||
var combined = weightedSum / totalWeight;
|
||||
|
||||
// Boost confidence if all algorithms agree (all > 0.7)
|
||||
if (basicBlock.Confidence >= 0.7m && cfg.Confidence >= 0.7m &&
|
||||
(stringRefs == null || stringRefs.Confidence >= 0.7m))
|
||||
{
|
||||
combined = Math.Min(1.0m, combined * 1.1m);
|
||||
}
|
||||
|
||||
return combined;
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,432 @@
|
||||
// -----------------------------------------------------------------------------
|
||||
// ControlFlowGraphFingerprintGenerator.cs
|
||||
// Sprint: SPRINT_20251226_013_BINIDX_fingerprint_factory
|
||||
// Task: FPRINT-07 — Implement ControlFlowGraphFingerprintGenerator
|
||||
// -----------------------------------------------------------------------------
|
||||
|
||||
using System.Security.Cryptography;
|
||||
using Microsoft.Extensions.Logging;
|
||||
using StellaOps.BinaryIndex.Fingerprints.Models;
|
||||
|
||||
namespace StellaOps.BinaryIndex.Fingerprints.Generators;
|
||||
|
||||
/// <summary>
|
||||
/// Generates fingerprints based on control flow graph structure.
|
||||
///
|
||||
/// Algorithm:
|
||||
/// 1. Build CFG from disassembly
|
||||
/// 2. Extract graph properties (node count, edge count, cyclomatic complexity)
|
||||
/// 3. Compute structural hash (adjacency matrix or graph kernel)
|
||||
///
|
||||
/// Resilient to instruction reordering, captures loop and branch structure.
|
||||
/// Produces a 32-byte fingerprint.
|
||||
/// </summary>
|
||||
public sealed class ControlFlowGraphFingerprintGenerator : IVulnFingerprintGenerator
|
||||
{
|
||||
private readonly ILogger<ControlFlowGraphFingerprintGenerator> _logger;
|
||||
|
||||
public ControlFlowGraphFingerprintGenerator(ILogger<ControlFlowGraphFingerprintGenerator> logger)
|
||||
{
|
||||
_logger = logger;
|
||||
}
|
||||
|
||||
public FingerprintAlgorithm Algorithm => FingerprintAlgorithm.ControlFlowGraph;
|
||||
|
||||
public bool CanProcess(FingerprintInput input)
|
||||
{
|
||||
// Require at least 32 bytes of binary data for meaningful CFG
|
||||
return input.BinaryData.Length >= 32;
|
||||
}
|
||||
|
||||
public Task<FingerprintOutput> GenerateAsync(FingerprintInput input, CancellationToken ct = default)
|
||||
{
|
||||
ct.ThrowIfCancellationRequested();
|
||||
|
||||
_logger.LogDebug(
|
||||
"Generating CFG fingerprint for {Component}/{CveId} ({Size} bytes)",
|
||||
input.Component,
|
||||
input.CveId,
|
||||
input.BinaryData.Length);
|
||||
|
||||
// Step 1: Build control flow graph
|
||||
var cfg = BuildCfg(input.BinaryData, input.Architecture);
|
||||
|
||||
// Step 2: Extract graph properties
|
||||
var properties = ExtractGraphProperties(cfg);
|
||||
|
||||
// Step 3: Compute structural hash
|
||||
var fingerprint = ComputeStructuralHash(cfg, properties);
|
||||
|
||||
var fingerprintId = Convert.ToHexString(fingerprint).ToLowerInvariant();
|
||||
|
||||
_logger.LogDebug(
|
||||
"Generated CFG fingerprint {FingerprintId}: {NodeCount} nodes, {EdgeCount} edges, CC={CC}",
|
||||
fingerprintId,
|
||||
properties.NodeCount,
|
||||
properties.EdgeCount,
|
||||
properties.CyclomaticComplexity);
|
||||
|
||||
return Task.FromResult(new FingerprintOutput
|
||||
{
|
||||
Hash = fingerprint,
|
||||
FingerprintId = fingerprintId,
|
||||
Algorithm = FingerprintAlgorithm.ControlFlowGraph,
|
||||
Confidence = CalculateConfidence(properties),
|
||||
Metadata = new FingerprintMetadata
|
||||
{
|
||||
BasicBlockCount = properties.NodeCount,
|
||||
EdgeCount = properties.EdgeCount,
|
||||
CyclomaticComplexity = properties.CyclomaticComplexity,
|
||||
FunctionSize = input.BinaryData.Length
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Represents a node in the control flow graph.
|
||||
/// </summary>
|
||||
private sealed class CfgNode
|
||||
{
|
||||
public int Id { get; init; }
|
||||
public int StartOffset { get; init; }
|
||||
public int EndOffset { get; init; }
|
||||
public List<int> Successors { get; } = [];
|
||||
public List<int> Predecessors { get; } = [];
|
||||
public CfgNodeType Type { get; init; }
|
||||
}
|
||||
|
||||
private enum CfgNodeType
|
||||
{
|
||||
Entry,
|
||||
Exit,
|
||||
Basic,
|
||||
Conditional,
|
||||
Call,
|
||||
Loop
|
||||
}
|
||||
|
||||
private sealed class Cfg
|
||||
{
|
||||
public List<CfgNode> Nodes { get; } = [];
|
||||
public int EntryNode { get; set; }
|
||||
public List<int> ExitNodes { get; } = [];
|
||||
}
|
||||
|
||||
private sealed class CfgProperties
|
||||
{
|
||||
public int NodeCount { get; init; }
|
||||
public int EdgeCount { get; init; }
|
||||
public int CyclomaticComplexity { get; init; }
|
||||
public int[] DegreeSequence { get; init; } = [];
|
||||
public int MaxDepth { get; init; }
|
||||
public int LoopCount { get; init; }
|
||||
}
|
||||
|
||||
private Cfg BuildCfg(byte[] binaryData, string architecture)
|
||||
{
|
||||
var cfg = new Cfg();
|
||||
var currentNodeStart = 0;
|
||||
var nodeId = 0;
|
||||
var pendingEdges = new List<(int from, int targetOffset)>();
|
||||
|
||||
// Entry node
|
||||
cfg.EntryNode = 0;
|
||||
|
||||
for (var i = 0; i < binaryData.Length;)
|
||||
{
|
||||
var (isTerminator, type, branchTarget) = AnalyzeInstruction(binaryData, i, architecture);
|
||||
|
||||
if (isTerminator)
|
||||
{
|
||||
var node = new CfgNode
|
||||
{
|
||||
Id = nodeId,
|
||||
StartOffset = currentNodeStart,
|
||||
EndOffset = i,
|
||||
Type = type
|
||||
};
|
||||
cfg.Nodes.Add(node);
|
||||
|
||||
if (type == CfgNodeType.Exit)
|
||||
{
|
||||
cfg.ExitNodes.Add(nodeId);
|
||||
}
|
||||
else
|
||||
{
|
||||
// Fall-through to next node
|
||||
var instrLen = GetInstructionLength(binaryData, i, architecture);
|
||||
if (i + instrLen < binaryData.Length && type != CfgNodeType.Exit)
|
||||
{
|
||||
pendingEdges.Add((nodeId, i + instrLen));
|
||||
}
|
||||
|
||||
// Branch target
|
||||
if (branchTarget >= 0 && branchTarget < binaryData.Length)
|
||||
{
|
||||
pendingEdges.Add((nodeId, branchTarget));
|
||||
}
|
||||
}
|
||||
|
||||
nodeId++;
|
||||
i += GetInstructionLength(binaryData, i, architecture);
|
||||
currentNodeStart = i;
|
||||
}
|
||||
else
|
||||
{
|
||||
i++;
|
||||
}
|
||||
}
|
||||
|
||||
// Add final node if any remaining code
|
||||
if (currentNodeStart < binaryData.Length)
|
||||
{
|
||||
cfg.Nodes.Add(new CfgNode
|
||||
{
|
||||
Id = nodeId,
|
||||
StartOffset = currentNodeStart,
|
||||
EndOffset = binaryData.Length - 1,
|
||||
Type = CfgNodeType.Exit
|
||||
});
|
||||
cfg.ExitNodes.Add(nodeId);
|
||||
}
|
||||
|
||||
// Ensure at least one node
|
||||
if (cfg.Nodes.Count == 0)
|
||||
{
|
||||
cfg.Nodes.Add(new CfgNode
|
||||
{
|
||||
Id = 0,
|
||||
StartOffset = 0,
|
||||
EndOffset = binaryData.Length - 1,
|
||||
Type = CfgNodeType.Basic
|
||||
});
|
||||
}
|
||||
|
||||
// Resolve pending edges
|
||||
foreach (var (from, targetOffset) in pendingEdges)
|
||||
{
|
||||
var targetNode = cfg.Nodes.FirstOrDefault(n =>
|
||||
n.StartOffset <= targetOffset && targetOffset <= n.EndOffset);
|
||||
if (targetNode != null && from < cfg.Nodes.Count)
|
||||
{
|
||||
cfg.Nodes[from].Successors.Add(targetNode.Id);
|
||||
targetNode.Predecessors.Add(from);
|
||||
}
|
||||
}
|
||||
|
||||
return cfg;
|
||||
}
|
||||
|
||||
private static (bool isTerminator, CfgNodeType type, int branchTarget) AnalyzeInstruction(
|
||||
byte[] data, int i, string architecture)
|
||||
{
|
||||
if (i >= data.Length) return (false, CfgNodeType.Basic, -1);
|
||||
|
||||
return architecture.ToLowerInvariant() switch
|
||||
{
|
||||
"x86_64" or "x64" or "amd64" => AnalyzeX64Instruction(data, i),
|
||||
"aarch64" or "arm64" => AnalyzeArm64Instruction(data, i),
|
||||
_ => (false, CfgNodeType.Basic, -1)
|
||||
};
|
||||
}
|
||||
|
||||
private static (bool isTerminator, CfgNodeType type, int branchTarget) AnalyzeX64Instruction(byte[] data, int i)
|
||||
{
|
||||
var b = data[i];
|
||||
return b switch
|
||||
{
|
||||
0xC3 => (true, CfgNodeType.Exit, -1), // ret
|
||||
0xE8 => (true, CfgNodeType.Call, GetX64BranchTarget(data, i, 5)), // call
|
||||
0xE9 => (true, CfgNodeType.Basic, GetX64BranchTarget(data, i, 5)), // jmp
|
||||
0xEB => (true, CfgNodeType.Basic, GetX64ShortBranchTarget(data, i)), // jmp short
|
||||
>= 0x70 and <= 0x7F => (true, CfgNodeType.Conditional, GetX64ShortBranchTarget(data, i)), // Jcc short
|
||||
0x0F when i + 1 < data.Length && data[i + 1] >= 0x80 && data[i + 1] <= 0x8F =>
|
||||
(true, CfgNodeType.Conditional, GetX64BranchTarget(data, i, 6)), // Jcc near
|
||||
_ => (false, CfgNodeType.Basic, -1)
|
||||
};
|
||||
}
|
||||
|
||||
private static int GetX64BranchTarget(byte[] data, int i, int instrLen)
|
||||
{
|
||||
if (i + instrLen > data.Length) return -1;
|
||||
var offset = BitConverter.ToInt32(data, i + instrLen - 4);
|
||||
return i + instrLen + offset;
|
||||
}
|
||||
|
||||
private static int GetX64ShortBranchTarget(byte[] data, int i)
|
||||
{
|
||||
if (i + 2 > data.Length) return -1;
|
||||
var offset = (sbyte)data[i + 1];
|
||||
return i + 2 + offset;
|
||||
}
|
||||
|
||||
private static (bool isTerminator, CfgNodeType type, int branchTarget) AnalyzeArm64Instruction(byte[] data, int i)
|
||||
{
|
||||
if (i + 4 > data.Length) return (false, CfgNodeType.Basic, -1);
|
||||
|
||||
var opcode = (uint)(data[i + 3] & 0xFC);
|
||||
return opcode switch
|
||||
{
|
||||
0x14 => (true, CfgNodeType.Basic, GetArm64BranchTarget(data, i)), // B
|
||||
0x54 => (true, CfgNodeType.Conditional, GetArm64BranchTarget(data, i)), // B.cond
|
||||
0x94 => (true, CfgNodeType.Call, GetArm64BranchTarget(data, i)), // BL
|
||||
0xD4 when data[i + 3] == 0xD6 && (data[i + 2] & 0x1F) == 0x1F =>
|
||||
(true, CfgNodeType.Exit, -1), // RET
|
||||
_ => (false, CfgNodeType.Basic, -1)
|
||||
};
|
||||
}
|
||||
|
||||
private static int GetArm64BranchTarget(byte[] data, int i)
|
||||
{
|
||||
if (i + 4 > data.Length) return -1;
|
||||
var imm = ((data[i + 2] & 0x03) << 24) | (data[i + 1] << 16) | (data[i] << 8) | data[i];
|
||||
// Sign extend 26-bit immediate
|
||||
if ((imm & 0x02000000) != 0) imm |= unchecked((int)0xFC000000);
|
||||
return i + (imm << 2);
|
||||
}
|
||||
|
||||
private static int GetInstructionLength(byte[] data, int i, string architecture)
|
||||
{
|
||||
return architecture.ToLowerInvariant() switch
|
||||
{
|
||||
"x86_64" or "x64" or "amd64" => GetX64InstructionLength(data, i),
|
||||
"aarch64" or "arm64" => 4,
|
||||
_ => 1
|
||||
};
|
||||
}
|
||||
|
||||
private static int GetX64InstructionLength(byte[] data, int i)
|
||||
{
|
||||
if (i >= data.Length) return 1;
|
||||
var b = data[i];
|
||||
return b switch
|
||||
{
|
||||
0xC3 => 1,
|
||||
0xEB or (>= 0x70 and <= 0x7F) => 2,
|
||||
0xE8 or 0xE9 => 5,
|
||||
0x0F => 6,
|
||||
_ => 1
|
||||
};
|
||||
}
|
||||
|
||||
private static CfgProperties ExtractGraphProperties(Cfg cfg)
|
||||
{
|
||||
var edgeCount = cfg.Nodes.Sum(n => n.Successors.Count);
|
||||
|
||||
// Cyclomatic complexity = E - N + 2P (P=1 for single component)
|
||||
var cyclomaticComplexity = edgeCount - cfg.Nodes.Count + 2;
|
||||
|
||||
// Degree sequence (in-degree + out-degree for each node)
|
||||
var degreeSequence = cfg.Nodes
|
||||
.Select(n => n.Predecessors.Count + n.Successors.Count)
|
||||
.OrderDescending()
|
||||
.ToArray();
|
||||
|
||||
// Estimate loop count (nodes with back edges)
|
||||
var loopCount = CountBackEdges(cfg);
|
||||
|
||||
// Max depth via BFS from entry
|
||||
var maxDepth = ComputeMaxDepth(cfg);
|
||||
|
||||
return new CfgProperties
|
||||
{
|
||||
NodeCount = cfg.Nodes.Count,
|
||||
EdgeCount = edgeCount,
|
||||
CyclomaticComplexity = Math.Max(1, cyclomaticComplexity),
|
||||
DegreeSequence = degreeSequence,
|
||||
MaxDepth = maxDepth,
|
||||
LoopCount = loopCount
|
||||
};
|
||||
}
|
||||
|
||||
private static int CountBackEdges(Cfg cfg)
|
||||
{
|
||||
// Simple heuristic: count edges pointing to earlier nodes
|
||||
return cfg.Nodes.Sum(n => n.Successors.Count(s => s <= n.Id));
|
||||
}
|
||||
|
||||
private static int ComputeMaxDepth(Cfg cfg)
|
||||
{
|
||||
if (cfg.Nodes.Count == 0) return 0;
|
||||
|
||||
var visited = new HashSet<int>();
|
||||
var queue = new Queue<(int nodeId, int depth)>();
|
||||
queue.Enqueue((cfg.EntryNode, 0));
|
||||
var maxDepth = 0;
|
||||
|
||||
while (queue.Count > 0)
|
||||
{
|
||||
var (nodeId, depth) = queue.Dequeue();
|
||||
if (!visited.Add(nodeId)) continue;
|
||||
maxDepth = Math.Max(maxDepth, depth);
|
||||
|
||||
if (nodeId < cfg.Nodes.Count)
|
||||
{
|
||||
foreach (var succ in cfg.Nodes[nodeId].Successors)
|
||||
{
|
||||
if (!visited.Contains(succ))
|
||||
{
|
||||
queue.Enqueue((succ, depth + 1));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return maxDepth;
|
||||
}
|
||||
|
||||
private static byte[] ComputeStructuralHash(Cfg cfg, CfgProperties props)
|
||||
{
|
||||
using var ms = new MemoryStream();
|
||||
using var writer = new BinaryWriter(ms);
|
||||
|
||||
// Encode graph properties
|
||||
writer.Write(props.NodeCount);
|
||||
writer.Write(props.EdgeCount);
|
||||
writer.Write(props.CyclomaticComplexity);
|
||||
writer.Write(props.MaxDepth);
|
||||
writer.Write(props.LoopCount);
|
||||
|
||||
// Encode degree sequence (truncated)
|
||||
var degreeSeq = props.DegreeSequence.Take(16).ToArray();
|
||||
writer.Write(degreeSeq.Length);
|
||||
foreach (var d in degreeSeq)
|
||||
{
|
||||
writer.Write(d);
|
||||
}
|
||||
|
||||
// Encode node types histogram
|
||||
var typeHistogram = cfg.Nodes
|
||||
.GroupBy(n => n.Type)
|
||||
.ToDictionary(g => g.Key, g => g.Count());
|
||||
foreach (var type in Enum.GetValues<CfgNodeType>())
|
||||
{
|
||||
writer.Write(typeHistogram.GetValueOrDefault(type, 0));
|
||||
}
|
||||
|
||||
// Simplified adjacency encoding (first 64 edges)
|
||||
var edges = cfg.Nodes
|
||||
.SelectMany(n => n.Successors.Select(s => (n.Id, s)))
|
||||
.Take(64)
|
||||
.ToList();
|
||||
writer.Write(edges.Count);
|
||||
foreach (var (from, to) in edges)
|
||||
{
|
||||
writer.Write((ushort)from);
|
||||
writer.Write((ushort)to);
|
||||
}
|
||||
|
||||
// Hash to 32 bytes
|
||||
return SHA256.HashData(ms.ToArray());
|
||||
}
|
||||
|
||||
private static decimal CalculateConfidence(CfgProperties props)
|
||||
{
|
||||
// Higher confidence for more complex graphs
|
||||
if (props.NodeCount < 3) return 0.5m;
|
||||
if (props.CyclomaticComplexity < 3) return 0.6m;
|
||||
if (props.NodeCount < 10 && props.EdgeCount < 15) return 0.75m;
|
||||
if (props.LoopCount > 0) return 0.9m;
|
||||
return 0.85m;
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,113 @@
|
||||
// -----------------------------------------------------------------------------
|
||||
// IVulnFingerprintGenerator.cs
|
||||
// Sprint: SPRINT_20251226_013_BINIDX_fingerprint_factory
|
||||
// Task: FPRINT-05 — Design IVulnFingerprintGenerator interface
|
||||
// -----------------------------------------------------------------------------
|
||||
|
||||
using StellaOps.BinaryIndex.Fingerprints.Models;
|
||||
|
||||
namespace StellaOps.BinaryIndex.Fingerprints.Generators;
|
||||
|
||||
/// <summary>
|
||||
/// Input data for fingerprint generation.
|
||||
/// </summary>
|
||||
public sealed record FingerprintInput
|
||||
{
|
||||
/// <summary>Raw binary data of the function or code section.</summary>
|
||||
public required byte[] BinaryData { get; init; }
|
||||
|
||||
/// <summary>Target architecture (e.g., "x86_64", "aarch64").</summary>
|
||||
public required string Architecture { get; init; }
|
||||
|
||||
/// <summary>Function name if known.</summary>
|
||||
public string? FunctionName { get; init; }
|
||||
|
||||
/// <summary>Base address for disassembly normalization.</summary>
|
||||
public ulong BaseAddress { get; init; }
|
||||
|
||||
/// <summary>CVE identifier this fingerprint is for.</summary>
|
||||
public required string CveId { get; init; }
|
||||
|
||||
/// <summary>Component name (e.g., "openssl").</summary>
|
||||
public required string Component { get; init; }
|
||||
|
||||
/// <summary>Source file path if known.</summary>
|
||||
public string? SourceFile { get; init; }
|
||||
|
||||
/// <summary>Source line number if known.</summary>
|
||||
public int? SourceLine { get; init; }
|
||||
|
||||
/// <summary>Package URL if known.</summary>
|
||||
public string? Purl { get; init; }
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Output from fingerprint generation.
|
||||
/// </summary>
|
||||
public sealed record FingerprintOutput
|
||||
{
|
||||
/// <summary>Fingerprint hash bytes.</summary>
|
||||
public required byte[] Hash { get; init; }
|
||||
|
||||
/// <summary>Unique fingerprint identifier (hex-encoded).</summary>
|
||||
public required string FingerprintId { get; init; }
|
||||
|
||||
/// <summary>Algorithm used for generation.</summary>
|
||||
public required FingerprintAlgorithm Algorithm { get; init; }
|
||||
|
||||
/// <summary>Generation confidence score (0.0-1.0).</summary>
|
||||
public decimal Confidence { get; init; } = 1.0m;
|
||||
|
||||
/// <summary>Additional metadata from generation.</summary>
|
||||
public FingerprintMetadata? Metadata { get; init; }
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Additional metadata extracted during fingerprint generation.
|
||||
/// </summary>
|
||||
public sealed record FingerprintMetadata
|
||||
{
|
||||
/// <summary>Number of basic blocks in the function.</summary>
|
||||
public int? BasicBlockCount { get; init; }
|
||||
|
||||
/// <summary>Number of edges in the control flow graph.</summary>
|
||||
public int? EdgeCount { get; init; }
|
||||
|
||||
/// <summary>Cyclomatic complexity.</summary>
|
||||
public int? CyclomaticComplexity { get; init; }
|
||||
|
||||
/// <summary>Number of string references.</summary>
|
||||
public int? StringRefCount { get; init; }
|
||||
|
||||
/// <summary>Instruction count.</summary>
|
||||
public int? InstructionCount { get; init; }
|
||||
|
||||
/// <summary>Function size in bytes.</summary>
|
||||
public int? FunctionSize { get; init; }
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Interface for vulnerability fingerprint generators.
|
||||
/// </summary>
|
||||
public interface IVulnFingerprintGenerator
|
||||
{
|
||||
/// <summary>
|
||||
/// Algorithm type produced by this generator.
|
||||
/// </summary>
|
||||
FingerprintAlgorithm Algorithm { get; }
|
||||
|
||||
/// <summary>
|
||||
/// Generates a fingerprint from the given input.
|
||||
/// </summary>
|
||||
/// <param name="input">Input data for fingerprint generation.</param>
|
||||
/// <param name="ct">Cancellation token.</param>
|
||||
/// <returns>Generated fingerprint output.</returns>
|
||||
Task<FingerprintOutput> GenerateAsync(FingerprintInput input, CancellationToken ct = default);
|
||||
|
||||
/// <summary>
|
||||
/// Checks if this generator can process the given input.
|
||||
/// </summary>
|
||||
/// <param name="input">Input to validate.</param>
|
||||
/// <returns>True if the generator can process this input.</returns>
|
||||
bool CanProcess(FingerprintInput input);
|
||||
}
|
||||
@@ -0,0 +1,281 @@
|
||||
// -----------------------------------------------------------------------------
|
||||
// StringRefsFingerprintGenerator.cs
|
||||
// Sprint: SPRINT_20251226_013_BINIDX_fingerprint_factory
|
||||
// Task: FPRINT-08 — Implement StringRefsFingerprintGenerator
|
||||
// -----------------------------------------------------------------------------
|
||||
|
||||
using System.Security.Cryptography;
|
||||
using System.Text;
|
||||
using Microsoft.Extensions.Logging;
|
||||
using StellaOps.BinaryIndex.Fingerprints.Models;
|
||||
|
||||
namespace StellaOps.BinaryIndex.Fingerprints.Generators;
|
||||
|
||||
/// <summary>
|
||||
/// Generates fingerprints based on string references in code.
|
||||
///
|
||||
/// Algorithm:
|
||||
/// 1. Extract string constants referenced by function
|
||||
/// 2. Hash string content (normalized)
|
||||
/// 3. Include reference order/pattern
|
||||
///
|
||||
/// Useful for error message patterns and version strings.
|
||||
/// Produces a 16-byte fingerprint.
|
||||
/// </summary>
|
||||
public sealed class StringRefsFingerprintGenerator : IVulnFingerprintGenerator
|
||||
{
|
||||
private readonly ILogger<StringRefsFingerprintGenerator> _logger;
|
||||
|
||||
public StringRefsFingerprintGenerator(ILogger<StringRefsFingerprintGenerator> logger)
|
||||
{
|
||||
_logger = logger;
|
||||
}
|
||||
|
||||
public FingerprintAlgorithm Algorithm => FingerprintAlgorithm.StringRefs;
|
||||
|
||||
public bool CanProcess(FingerprintInput input)
|
||||
{
|
||||
// Only process if we can find at least one string reference
|
||||
var strings = ExtractStringReferences(input.BinaryData);
|
||||
return strings.Count >= 1;
|
||||
}
|
||||
|
||||
public Task<FingerprintOutput> GenerateAsync(FingerprintInput input, CancellationToken ct = default)
|
||||
{
|
||||
ct.ThrowIfCancellationRequested();
|
||||
|
||||
_logger.LogDebug(
|
||||
"Generating string refs fingerprint for {Component}/{CveId} ({Size} bytes)",
|
||||
input.Component,
|
||||
input.CveId,
|
||||
input.BinaryData.Length);
|
||||
|
||||
// Step 1: Extract string references
|
||||
var strings = ExtractStringReferences(input.BinaryData);
|
||||
|
||||
// Step 2: Normalize strings
|
||||
var normalized = strings.Select(NormalizeString).ToList();
|
||||
|
||||
// Step 3: Hash with order preserved
|
||||
var fingerprint = HashStringReferences(normalized);
|
||||
|
||||
var fingerprintId = Convert.ToHexString(fingerprint).ToLowerInvariant();
|
||||
|
||||
_logger.LogDebug(
|
||||
"Generated string refs fingerprint {FingerprintId} with {StringCount} strings",
|
||||
fingerprintId,
|
||||
strings.Count);
|
||||
|
||||
return Task.FromResult(new FingerprintOutput
|
||||
{
|
||||
Hash = fingerprint,
|
||||
FingerprintId = fingerprintId,
|
||||
Algorithm = FingerprintAlgorithm.StringRefs,
|
||||
Confidence = CalculateConfidence(strings),
|
||||
Metadata = new FingerprintMetadata
|
||||
{
|
||||
StringRefCount = strings.Count,
|
||||
FunctionSize = input.BinaryData.Length
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Extracts ASCII/UTF-8 string references from binary data.
|
||||
/// </summary>
|
||||
private List<ExtractedString> ExtractStringReferences(byte[] binaryData)
|
||||
{
|
||||
var strings = new List<ExtractedString>();
|
||||
var minLength = 4;
|
||||
var currentString = new StringBuilder();
|
||||
var currentOffset = 0;
|
||||
|
||||
for (var i = 0; i < binaryData.Length; i++)
|
||||
{
|
||||
var b = binaryData[i];
|
||||
|
||||
if (IsPrintableAscii(b))
|
||||
{
|
||||
if (currentString.Length == 0)
|
||||
{
|
||||
currentOffset = i;
|
||||
}
|
||||
currentString.Append((char)b);
|
||||
}
|
||||
else if (b == 0 && currentString.Length >= minLength)
|
||||
{
|
||||
// Null terminator ends a valid string
|
||||
strings.Add(new ExtractedString
|
||||
{
|
||||
Value = currentString.ToString(),
|
||||
Offset = currentOffset,
|
||||
Type = ClassifyString(currentString.ToString())
|
||||
});
|
||||
currentString.Clear();
|
||||
}
|
||||
else
|
||||
{
|
||||
if (currentString.Length >= minLength)
|
||||
{
|
||||
strings.Add(new ExtractedString
|
||||
{
|
||||
Value = currentString.ToString(),
|
||||
Offset = currentOffset,
|
||||
Type = ClassifyString(currentString.ToString())
|
||||
});
|
||||
}
|
||||
currentString.Clear();
|
||||
}
|
||||
}
|
||||
|
||||
// Handle trailing string
|
||||
if (currentString.Length >= minLength)
|
||||
{
|
||||
strings.Add(new ExtractedString
|
||||
{
|
||||
Value = currentString.ToString(),
|
||||
Offset = currentOffset,
|
||||
Type = ClassifyString(currentString.ToString())
|
||||
});
|
||||
}
|
||||
|
||||
// Filter out likely false positives
|
||||
return strings
|
||||
.Where(s => IsLikelyValidString(s.Value))
|
||||
.ToList();
|
||||
}
|
||||
|
||||
private static bool IsPrintableAscii(byte b)
|
||||
{
|
||||
return b >= 0x20 && b < 0x7F;
|
||||
}
|
||||
|
||||
private static bool IsLikelyValidString(string s)
|
||||
{
|
||||
// Filter out noise
|
||||
if (s.Length < 4) return false;
|
||||
if (s.All(c => c == s[0])) return false; // Repeated characters
|
||||
if (s.Count(char.IsLetter) < s.Length / 3) return false; // Too few letters
|
||||
|
||||
// Must have some word-like patterns
|
||||
return s.Any(char.IsLetter);
|
||||
}
|
||||
|
||||
private sealed record ExtractedString
|
||||
{
|
||||
public required string Value { get; init; }
|
||||
public int Offset { get; init; }
|
||||
public StringType Type { get; init; }
|
||||
}
|
||||
|
||||
private enum StringType
|
||||
{
|
||||
General,
|
||||
Error,
|
||||
Version,
|
||||
Path,
|
||||
Url,
|
||||
Format
|
||||
}
|
||||
|
||||
private static StringType ClassifyString(string s)
|
||||
{
|
||||
var lower = s.ToLowerInvariant();
|
||||
|
||||
if (lower.Contains("error") || lower.Contains("fail") || lower.Contains("invalid"))
|
||||
return StringType.Error;
|
||||
|
||||
if (lower.Contains("version") || s.Any(char.IsDigit) && s.Contains('.'))
|
||||
return StringType.Version;
|
||||
|
||||
if (s.Contains('/') && (s.StartsWith('/') || s.Contains("/usr") || s.Contains("/etc")))
|
||||
return StringType.Path;
|
||||
|
||||
if (lower.Contains("http://") || lower.Contains("https://"))
|
||||
return StringType.Url;
|
||||
|
||||
if (s.Contains('%') && s.Any(c => c is 'd' or 's' or 'x' or 'f'))
|
||||
return StringType.Format;
|
||||
|
||||
return StringType.General;
|
||||
}
|
||||
|
||||
private static string NormalizeString(ExtractedString s)
|
||||
{
|
||||
var value = s.Value;
|
||||
|
||||
// Remove common variable parts
|
||||
value = RemoveVersionNumbers(value);
|
||||
value = RemoveTimestamps(value);
|
||||
value = RemoveHexAddresses(value);
|
||||
|
||||
// Normalize whitespace
|
||||
value = NormalizeWhitespace(value);
|
||||
|
||||
return value.ToLowerInvariant();
|
||||
}
|
||||
|
||||
private static string RemoveVersionNumbers(string s)
|
||||
{
|
||||
// Replace version patterns like 1.2.3 with placeholder
|
||||
return System.Text.RegularExpressions.Regex.Replace(
|
||||
s, @"\d+\.\d+(\.\d+)?", "<VER>");
|
||||
}
|
||||
|
||||
private static string RemoveTimestamps(string s)
|
||||
{
|
||||
// Replace ISO timestamps
|
||||
return System.Text.RegularExpressions.Regex.Replace(
|
||||
s, @"\d{4}-\d{2}-\d{2}(T\d{2}:\d{2}:\d{2})?", "<TS>");
|
||||
}
|
||||
|
||||
private static string RemoveHexAddresses(string s)
|
||||
{
|
||||
// Replace hex addresses like 0x1234abcd
|
||||
return System.Text.RegularExpressions.Regex.Replace(
|
||||
s, @"0x[0-9a-fA-F]+", "<ADDR>");
|
||||
}
|
||||
|
||||
private static string NormalizeWhitespace(string s)
|
||||
{
|
||||
return System.Text.RegularExpressions.Regex.Replace(s.Trim(), @"\s+", " ");
|
||||
}
|
||||
|
||||
private static byte[] HashStringReferences(List<string> strings)
|
||||
{
|
||||
using var ms = new MemoryStream();
|
||||
using var writer = new BinaryWriter(ms);
|
||||
|
||||
// Encode string count
|
||||
writer.Write(strings.Count);
|
||||
|
||||
// Hash each string individually, then combine
|
||||
foreach (var s in strings)
|
||||
{
|
||||
var stringBytes = Encoding.UTF8.GetBytes(s);
|
||||
var stringHash = SHA256.HashData(stringBytes);
|
||||
writer.Write(stringHash, 0, 8); // First 8 bytes of each hash
|
||||
}
|
||||
|
||||
// Final hash and truncate to 16 bytes
|
||||
var combined = SHA256.HashData(ms.ToArray());
|
||||
var fingerprint = new byte[16];
|
||||
Array.Copy(combined, fingerprint, 16);
|
||||
return fingerprint;
|
||||
}
|
||||
|
||||
private static decimal CalculateConfidence(List<ExtractedString> strings)
|
||||
{
|
||||
if (strings.Count == 0) return 0.1m;
|
||||
if (strings.Count == 1) return 0.4m;
|
||||
|
||||
// Higher confidence for error messages and format strings
|
||||
var hasError = strings.Any(s => s.Type == StringType.Error);
|
||||
var hasFormat = strings.Any(s => s.Type == StringType.Format);
|
||||
|
||||
if (hasError && strings.Count >= 3) return 0.9m;
|
||||
if (hasFormat && strings.Count >= 2) return 0.8m;
|
||||
if (strings.Count >= 5) return 0.75m;
|
||||
return 0.6m;
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,308 @@
|
||||
// -----------------------------------------------------------------------------
|
||||
// FingerprintMatcher.cs
|
||||
// Sprint: SPRINT_20251226_013_BINIDX_fingerprint_factory
|
||||
// Task: FPRINT-13 — Implement similarity matching with configurable threshold
|
||||
// -----------------------------------------------------------------------------
|
||||
|
||||
using System.Diagnostics;
|
||||
using Microsoft.Extensions.Logging;
|
||||
using StellaOps.BinaryIndex.Fingerprints.Models;
|
||||
|
||||
namespace StellaOps.BinaryIndex.Fingerprints.Matching;
|
||||
|
||||
/// <summary>
|
||||
/// Implementation of fingerprint matching using multiple similarity metrics.
|
||||
/// </summary>
|
||||
public sealed class FingerprintMatcher : IFingerprintMatcher
|
||||
{
|
||||
private readonly ILogger<FingerprintMatcher> _logger;
|
||||
private readonly IFingerprintRepository _repository;
|
||||
|
||||
public FingerprintMatcher(
|
||||
ILogger<FingerprintMatcher> logger,
|
||||
IFingerprintRepository repository)
|
||||
{
|
||||
_logger = logger;
|
||||
_repository = repository;
|
||||
}
|
||||
|
||||
public async Task<FingerprintMatchResult> MatchAsync(
|
||||
byte[] fingerprint,
|
||||
MatchOptions? options = null,
|
||||
CancellationToken ct = default)
|
||||
{
|
||||
options ??= new MatchOptions();
|
||||
var sw = Stopwatch.StartNew();
|
||||
|
||||
_logger.LogDebug(
|
||||
"Matching fingerprint ({Size} bytes) with threshold {Threshold:P0}",
|
||||
fingerprint.Length,
|
||||
options.MinSimilarity);
|
||||
|
||||
// Determine algorithm from fingerprint size
|
||||
var algorithm = InferAlgorithm(fingerprint);
|
||||
|
||||
// Get candidate fingerprints from repository
|
||||
var candidates = await _repository.SearchByHashAsync(
|
||||
fingerprint,
|
||||
algorithm,
|
||||
options.Architecture ?? "",
|
||||
ct);
|
||||
|
||||
if (candidates.Length == 0)
|
||||
{
|
||||
_logger.LogDebug("No candidates found for fingerprint");
|
||||
return new FingerprintMatchResult
|
||||
{
|
||||
IsMatch = false,
|
||||
Similarity = 0,
|
||||
Confidence = 0,
|
||||
Details = new MatchDetails
|
||||
{
|
||||
MatchingAlgorithm = algorithm,
|
||||
CandidatesEvaluated = 0,
|
||||
MatchTimeMs = sw.ElapsedMilliseconds
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
// Apply filters
|
||||
var filteredCandidates = candidates
|
||||
.Where(c => !options.RequireValidated || c.Validated)
|
||||
.Take(options.MaxCandidates)
|
||||
.ToList();
|
||||
|
||||
// Find best match
|
||||
VulnFingerprint? bestMatch = null;
|
||||
var bestSimilarity = 0m;
|
||||
MatchDetails? bestDetails = null;
|
||||
|
||||
foreach (var candidate in filteredCandidates)
|
||||
{
|
||||
var similarity = CalculateSimilarity(fingerprint, candidate.FingerprintHash, algorithm);
|
||||
|
||||
if (similarity > bestSimilarity)
|
||||
{
|
||||
bestSimilarity = similarity;
|
||||
bestMatch = candidate;
|
||||
bestDetails = new MatchDetails
|
||||
{
|
||||
MatchingAlgorithm = algorithm,
|
||||
CandidatesEvaluated = filteredCandidates.Count,
|
||||
MatchTimeMs = sw.ElapsedMilliseconds
|
||||
};
|
||||
|
||||
// Add algorithm-specific similarity scores
|
||||
if (algorithm == FingerprintAlgorithm.Combined && fingerprint.Length >= 48)
|
||||
{
|
||||
bestDetails = bestDetails with
|
||||
{
|
||||
BasicBlockSimilarity = CalculateBasicBlockSimilarity(fingerprint, candidate.FingerprintHash),
|
||||
CfgSimilarity = CalculateCfgSimilarity(fingerprint, candidate.FingerprintHash)
|
||||
};
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
var isMatch = bestSimilarity >= options.MinSimilarity;
|
||||
|
||||
_logger.LogDebug(
|
||||
"Match result: {IsMatch}, similarity={Similarity:P2}, candidates={Candidates}",
|
||||
isMatch,
|
||||
bestSimilarity,
|
||||
filteredCandidates.Count);
|
||||
|
||||
return new FingerprintMatchResult
|
||||
{
|
||||
IsMatch = isMatch,
|
||||
Similarity = bestSimilarity,
|
||||
MatchedFingerprint = isMatch ? bestMatch : null,
|
||||
Confidence = isMatch ? CalculateMatchConfidence(bestSimilarity, bestMatch) : 0,
|
||||
Details = bestDetails
|
||||
};
|
||||
}
|
||||
|
||||
public async Task<IReadOnlyList<FingerprintMatchResult>> MatchBatchAsync(
|
||||
IEnumerable<byte[]> fingerprints,
|
||||
MatchOptions? options = null,
|
||||
CancellationToken ct = default)
|
||||
{
|
||||
var results = new List<FingerprintMatchResult>();
|
||||
|
||||
foreach (var fingerprint in fingerprints)
|
||||
{
|
||||
ct.ThrowIfCancellationRequested();
|
||||
var result = await MatchAsync(fingerprint, options, ct);
|
||||
results.Add(result);
|
||||
}
|
||||
|
||||
return results;
|
||||
}
|
||||
|
||||
public decimal CalculateSimilarity(byte[] fingerprint1, byte[] fingerprint2, FingerprintAlgorithm algorithm)
|
||||
{
|
||||
if (fingerprint1.Length != fingerprint2.Length)
|
||||
{
|
||||
// Handle mismatched sizes by comparing common prefix
|
||||
var minLen = Math.Min(fingerprint1.Length, fingerprint2.Length);
|
||||
var fp1 = fingerprint1.AsSpan(0, minLen);
|
||||
var fp2 = fingerprint2.AsSpan(0, minLen);
|
||||
return CalculateHashSimilarity(fp1, fp2);
|
||||
}
|
||||
|
||||
return algorithm switch
|
||||
{
|
||||
FingerprintAlgorithm.BasicBlock => CalculateBasicBlockSimilarity(fingerprint1, fingerprint2),
|
||||
FingerprintAlgorithm.ControlFlowGraph => CalculateCfgSimilarity(fingerprint1, fingerprint2),
|
||||
FingerprintAlgorithm.StringRefs => CalculateStringRefsSimilarity(fingerprint1, fingerprint2),
|
||||
FingerprintAlgorithm.Combined => CalculateCombinedSimilarity(fingerprint1, fingerprint2),
|
||||
_ => CalculateHashSimilarity(fingerprint1, fingerprint2)
|
||||
};
|
||||
}
|
||||
|
||||
private static FingerprintAlgorithm InferAlgorithm(byte[] fingerprint)
|
||||
{
|
||||
return fingerprint.Length switch
|
||||
{
|
||||
16 => FingerprintAlgorithm.BasicBlock, // Could also be StringRefs
|
||||
32 => FingerprintAlgorithm.ControlFlowGraph,
|
||||
48 => FingerprintAlgorithm.Combined,
|
||||
_ => FingerprintAlgorithm.BasicBlock
|
||||
};
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Calculates similarity using TLSH-like algorithm for basic blocks.
|
||||
/// </summary>
|
||||
private static decimal CalculateBasicBlockSimilarity(byte[] fp1, byte[] fp2)
|
||||
{
|
||||
// Use Hamming distance normalized to similarity
|
||||
var minLen = Math.Min(fp1.Length, Math.Min(fp2.Length, 16));
|
||||
var hammingDistance = 0;
|
||||
var totalBits = minLen * 8;
|
||||
|
||||
for (var i = 0; i < minLen; i++)
|
||||
{
|
||||
var xor = (byte)(fp1[i] ^ fp2[i]);
|
||||
hammingDistance += BitCount(xor);
|
||||
}
|
||||
|
||||
return 1m - ((decimal)hammingDistance / totalBits);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Calculates similarity for CFG fingerprints using structural comparison.
|
||||
/// </summary>
|
||||
private static decimal CalculateCfgSimilarity(byte[] fp1, byte[] fp2)
|
||||
{
|
||||
if (fp1.Length < 32 || fp2.Length < 32) return 0m;
|
||||
|
||||
// For CFG, compare structural properties (first 20 bytes) more heavily
|
||||
var structuralSimilarity = CalculateHashSimilarity(
|
||||
fp1.AsSpan(0, 20),
|
||||
fp2.AsSpan(0, 20));
|
||||
|
||||
// Compare remaining bytes (adjacency encoding)
|
||||
var adjacencySimilarity = CalculateHashSimilarity(
|
||||
fp1.AsSpan(20, 12),
|
||||
fp2.AsSpan(20, 12));
|
||||
|
||||
// Weight structural properties more heavily
|
||||
return (structuralSimilarity * 0.7m) + (adjacencySimilarity * 0.3m);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Calculates similarity for string reference fingerprints.
|
||||
/// </summary>
|
||||
private static decimal CalculateStringRefsSimilarity(byte[] fp1, byte[] fp2)
|
||||
{
|
||||
// String refs use direct hash comparison
|
||||
return CalculateHashSimilarity(fp1, fp2);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Calculates similarity for combined fingerprints.
|
||||
/// </summary>
|
||||
private static decimal CalculateCombinedSimilarity(byte[] fp1, byte[] fp2)
|
||||
{
|
||||
if (fp1.Length < 48 || fp2.Length < 48) return 0m;
|
||||
|
||||
// Skip version byte
|
||||
var offset = 1;
|
||||
|
||||
// Basic block (16 bytes)
|
||||
var bbSim = CalculateHashSimilarity(
|
||||
fp1.AsSpan(offset, 16),
|
||||
fp2.AsSpan(offset, 16));
|
||||
offset += 16;
|
||||
|
||||
// CFG (32 bytes)
|
||||
var cfgSim = CalculateHashSimilarity(
|
||||
fp1.AsSpan(offset, 32),
|
||||
fp2.AsSpan(offset, 32));
|
||||
|
||||
// Weighted combination
|
||||
return (cfgSim * 0.5m) + (bbSim * 0.5m);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// General hash similarity using normalized Hamming distance.
|
||||
/// </summary>
|
||||
private static decimal CalculateHashSimilarity(ReadOnlySpan<byte> hash1, ReadOnlySpan<byte> hash2)
|
||||
{
|
||||
if (hash1.Length != hash2.Length) return 0m;
|
||||
if (hash1.Length == 0) return 1m;
|
||||
|
||||
var matchingBytes = 0;
|
||||
for (var i = 0; i < hash1.Length; i++)
|
||||
{
|
||||
if (hash1[i] == hash2[i]) matchingBytes++;
|
||||
}
|
||||
|
||||
var exactMatchScore = (decimal)matchingBytes / hash1.Length;
|
||||
|
||||
// Also consider bit-level similarity for near matches
|
||||
var hammingDistance = 0;
|
||||
for (var i = 0; i < hash1.Length; i++)
|
||||
{
|
||||
hammingDistance += BitCount((byte)(hash1[i] ^ hash2[i]));
|
||||
}
|
||||
|
||||
var bitSimilarity = 1m - ((decimal)hammingDistance / (hash1.Length * 8));
|
||||
|
||||
// Combine: exact match is important, but bit similarity catches near matches
|
||||
return (exactMatchScore * 0.4m) + (bitSimilarity * 0.6m);
|
||||
}
|
||||
|
||||
private static int BitCount(byte b)
|
||||
{
|
||||
var count = 0;
|
||||
while (b != 0)
|
||||
{
|
||||
count += b & 1;
|
||||
b >>= 1;
|
||||
}
|
||||
return count;
|
||||
}
|
||||
|
||||
private static decimal CalculateMatchConfidence(decimal similarity, VulnFingerprint? fingerprint)
|
||||
{
|
||||
if (fingerprint == null) return 0m;
|
||||
|
||||
var baseConfidence = similarity;
|
||||
|
||||
// Boost confidence if fingerprint is validated
|
||||
if (fingerprint.Validated)
|
||||
{
|
||||
baseConfidence = Math.Min(1m, baseConfidence * 1.1m);
|
||||
}
|
||||
|
||||
// Consider fingerprint's own confidence
|
||||
if (fingerprint.Confidence.HasValue)
|
||||
{
|
||||
baseConfidence = (baseConfidence + fingerprint.Confidence.Value) / 2;
|
||||
}
|
||||
|
||||
return baseConfidence;
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,106 @@
|
||||
// -----------------------------------------------------------------------------
|
||||
// IFingerprintMatcher.cs
|
||||
// Sprint: SPRINT_20251226_013_BINIDX_fingerprint_factory
|
||||
// Task: FPRINT-12 — Implement IFingerprintMatcher interface
|
||||
// -----------------------------------------------------------------------------
|
||||
|
||||
using StellaOps.BinaryIndex.Fingerprints.Models;
|
||||
|
||||
namespace StellaOps.BinaryIndex.Fingerprints.Matching;
|
||||
|
||||
/// <summary>
|
||||
/// Result of a fingerprint matching operation.
|
||||
/// </summary>
|
||||
public sealed record FingerprintMatchResult
|
||||
{
|
||||
/// <summary>Whether a match was found.</summary>
|
||||
public bool IsMatch { get; init; }
|
||||
|
||||
/// <summary>Similarity score (0.0-1.0).</summary>
|
||||
public decimal Similarity { get; init; }
|
||||
|
||||
/// <summary>Matched fingerprint if found.</summary>
|
||||
public VulnFingerprint? MatchedFingerprint { get; init; }
|
||||
|
||||
/// <summary>Match confidence score.</summary>
|
||||
public decimal Confidence { get; init; }
|
||||
|
||||
/// <summary>Additional match details.</summary>
|
||||
public MatchDetails? Details { get; init; }
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Details about how a match was determined.
|
||||
/// </summary>
|
||||
public sealed record MatchDetails
|
||||
{
|
||||
/// <summary>Which algorithm found the match.</summary>
|
||||
public FingerprintAlgorithm MatchingAlgorithm { get; init; }
|
||||
|
||||
/// <summary>Basic block similarity if applicable.</summary>
|
||||
public decimal? BasicBlockSimilarity { get; init; }
|
||||
|
||||
/// <summary>CFG similarity if applicable.</summary>
|
||||
public decimal? CfgSimilarity { get; init; }
|
||||
|
||||
/// <summary>String refs similarity if applicable.</summary>
|
||||
public decimal? StringRefsSimilarity { get; init; }
|
||||
|
||||
/// <summary>Number of candidate fingerprints evaluated.</summary>
|
||||
public int CandidatesEvaluated { get; init; }
|
||||
|
||||
/// <summary>Time taken for matching in milliseconds.</summary>
|
||||
public long MatchTimeMs { get; init; }
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Options for fingerprint matching.
|
||||
/// </summary>
|
||||
public sealed record MatchOptions
|
||||
{
|
||||
/// <summary>Minimum similarity threshold (0.0-1.0). Default 0.95.</summary>
|
||||
public decimal MinSimilarity { get; init; } = 0.95m;
|
||||
|
||||
/// <summary>Maximum candidates to evaluate. Default 100.</summary>
|
||||
public int MaxCandidates { get; init; } = 100;
|
||||
|
||||
/// <summary>Algorithms to use for matching. Null means all.</summary>
|
||||
public FingerprintAlgorithm[]? Algorithms { get; init; }
|
||||
|
||||
/// <summary>Whether to require validation of matched fingerprint.</summary>
|
||||
public bool RequireValidated { get; init; }
|
||||
|
||||
/// <summary>Architecture filter. Null means any.</summary>
|
||||
public string? Architecture { get; init; }
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Interface for fingerprint matching operations.
|
||||
/// </summary>
|
||||
public interface IFingerprintMatcher
|
||||
{
|
||||
/// <summary>
|
||||
/// Matches a fingerprint against the vulnerability database.
|
||||
/// </summary>
|
||||
/// <param name="fingerprint">Fingerprint to match.</param>
|
||||
/// <param name="options">Matching options.</param>
|
||||
/// <param name="ct">Cancellation token.</param>
|
||||
/// <returns>Match result.</returns>
|
||||
Task<FingerprintMatchResult> MatchAsync(
|
||||
byte[] fingerprint,
|
||||
MatchOptions? options = null,
|
||||
CancellationToken ct = default);
|
||||
|
||||
/// <summary>
|
||||
/// Matches multiple fingerprints in batch.
|
||||
/// </summary>
|
||||
Task<IReadOnlyList<FingerprintMatchResult>> MatchBatchAsync(
|
||||
IEnumerable<byte[]> fingerprints,
|
||||
MatchOptions? options = null,
|
||||
CancellationToken ct = default);
|
||||
|
||||
/// <summary>
|
||||
/// Calculates similarity between two fingerprints.
|
||||
/// </summary>
|
||||
decimal CalculateSimilarity(byte[] fingerprint1, byte[] fingerprint2, FingerprintAlgorithm algorithm);
|
||||
}
|
||||
@@ -0,0 +1,390 @@
|
||||
// -----------------------------------------------------------------------------
|
||||
// ReferenceBuildPipeline.cs
|
||||
// Sprint: SPRINT_20251226_013_BINIDX_fingerprint_factory
|
||||
// Task: FPRINT-10 — Create reference build generation pipeline
|
||||
// Task: FPRINT-11 — Implement vulnerable/fixed binary pair builder
|
||||
// -----------------------------------------------------------------------------
|
||||
|
||||
using Microsoft.Extensions.Logging;
|
||||
using StellaOps.BinaryIndex.Fingerprints.Generators;
|
||||
using StellaOps.BinaryIndex.Fingerprints.Models;
|
||||
using StellaOps.BinaryIndex.Fingerprints.Storage;
|
||||
|
||||
namespace StellaOps.BinaryIndex.Fingerprints.Pipeline;
|
||||
|
||||
/// <summary>
|
||||
/// Request for building reference binaries.
|
||||
/// </summary>
|
||||
public sealed record ReferenceBuildRequest
|
||||
{
|
||||
/// <summary>CVE identifier.</summary>
|
||||
public required string CveId { get; init; }
|
||||
|
||||
/// <summary>Component name (e.g., "openssl").</summary>
|
||||
public required string Component { get; init; }
|
||||
|
||||
/// <summary>Git repository URL.</summary>
|
||||
public required string RepoUrl { get; init; }
|
||||
|
||||
/// <summary>Vulnerable commit or tag.</summary>
|
||||
public required string VulnerableRef { get; init; }
|
||||
|
||||
/// <summary>Fixed commit or tag.</summary>
|
||||
public required string FixedRef { get; init; }
|
||||
|
||||
/// <summary>Target architectures.</summary>
|
||||
public string[] Architectures { get; init; } = ["x86_64"];
|
||||
|
||||
/// <summary>Build command template.</summary>
|
||||
public string? BuildCommand { get; init; }
|
||||
|
||||
/// <summary>Function names to fingerprint (optional).</summary>
|
||||
public string[]? TargetFunctions { get; init; }
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Result of a reference build pipeline run.
|
||||
/// </summary>
|
||||
public sealed record ReferenceBuildResult
|
||||
{
|
||||
/// <summary>Whether the pipeline succeeded.</summary>
|
||||
public bool Success { get; init; }
|
||||
|
||||
/// <summary>Error message if failed.</summary>
|
||||
public string? Error { get; init; }
|
||||
|
||||
/// <summary>Generated fingerprints.</summary>
|
||||
public VulnFingerprint[] Fingerprints { get; init; } = [];
|
||||
|
||||
/// <summary>Storage path for vulnerable build.</summary>
|
||||
public string? VulnBuildPath { get; init; }
|
||||
|
||||
/// <summary>Storage path for fixed build.</summary>
|
||||
public string? FixedBuildPath { get; init; }
|
||||
|
||||
/// <summary>Build log.</summary>
|
||||
public string? BuildLog { get; init; }
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Represents a built binary artifact.
|
||||
/// </summary>
|
||||
public sealed record BuildArtifact
|
||||
{
|
||||
/// <summary>Path within build output.</summary>
|
||||
public required string Path { get; init; }
|
||||
|
||||
/// <summary>Binary content.</summary>
|
||||
public required byte[] Content { get; init; }
|
||||
|
||||
/// <summary>Target architecture.</summary>
|
||||
public required string Architecture { get; init; }
|
||||
|
||||
/// <summary>Whether this is the vulnerable or fixed version.</summary>
|
||||
public required bool IsVulnerable { get; init; }
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Represents a function extracted from a binary for fingerprinting.
|
||||
/// </summary>
|
||||
public sealed record ExtractedFunction
|
||||
{
|
||||
/// <summary>Function name.</summary>
|
||||
public required string Name { get; init; }
|
||||
|
||||
/// <summary>Function binary data.</summary>
|
||||
public required byte[] Data { get; init; }
|
||||
|
||||
/// <summary>Start offset in original binary.</summary>
|
||||
public long Offset { get; init; }
|
||||
|
||||
/// <summary>Size in bytes.</summary>
|
||||
public int Size { get; init; }
|
||||
|
||||
/// <summary>Source file if known.</summary>
|
||||
public string? SourceFile { get; init; }
|
||||
|
||||
/// <summary>Source line if known.</summary>
|
||||
public int? SourceLine { get; init; }
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Pipeline for generating reference builds and extracting vulnerability fingerprints.
|
||||
/// </summary>
|
||||
public sealed class ReferenceBuildPipeline
|
||||
{
|
||||
private readonly ILogger<ReferenceBuildPipeline> _logger;
|
||||
private readonly IFingerprintBlobStorage _storage;
|
||||
private readonly IFingerprintRepository _repository;
|
||||
private readonly CombinedFingerprintGenerator _fingerprintGenerator;
|
||||
|
||||
public ReferenceBuildPipeline(
|
||||
ILogger<ReferenceBuildPipeline> logger,
|
||||
IFingerprintBlobStorage storage,
|
||||
IFingerprintRepository repository,
|
||||
CombinedFingerprintGenerator fingerprintGenerator)
|
||||
{
|
||||
_logger = logger;
|
||||
_storage = storage;
|
||||
_repository = repository;
|
||||
_fingerprintGenerator = fingerprintGenerator;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Executes the full reference build pipeline.
|
||||
/// </summary>
|
||||
public async Task<ReferenceBuildResult> ExecuteAsync(
|
||||
ReferenceBuildRequest request,
|
||||
CancellationToken ct = default)
|
||||
{
|
||||
_logger.LogInformation(
|
||||
"Starting reference build pipeline for {CveId} ({Component})",
|
||||
request.CveId,
|
||||
request.Component);
|
||||
|
||||
try
|
||||
{
|
||||
// Step 1: Clone and build vulnerable version
|
||||
var vulnArtifacts = await BuildVersionAsync(request, isVulnerable: true, ct);
|
||||
if (vulnArtifacts.Count == 0)
|
||||
{
|
||||
return new ReferenceBuildResult
|
||||
{
|
||||
Success = false,
|
||||
Error = "Failed to build vulnerable version"
|
||||
};
|
||||
}
|
||||
|
||||
// Step 2: Clone and build fixed version
|
||||
var fixedArtifacts = await BuildVersionAsync(request, isVulnerable: false, ct);
|
||||
if (fixedArtifacts.Count == 0)
|
||||
{
|
||||
return new ReferenceBuildResult
|
||||
{
|
||||
Success = false,
|
||||
Error = "Failed to build fixed version"
|
||||
};
|
||||
}
|
||||
|
||||
// Step 3: Extract functions from both versions
|
||||
var vulnFunctions = await ExtractFunctionsAsync(vulnArtifacts, request.TargetFunctions, ct);
|
||||
var fixedFunctions = await ExtractFunctionsAsync(fixedArtifacts, request.TargetFunctions, ct);
|
||||
|
||||
// Step 4: Find differential fingerprints (what changed)
|
||||
var fingerprints = await GenerateDifferentialFingerprintsAsync(
|
||||
request,
|
||||
vulnFunctions,
|
||||
fixedFunctions,
|
||||
ct);
|
||||
|
||||
// Step 5: Store reference builds
|
||||
var vulnBuildPath = await StoreReferenceBuildAsync(request.CveId, vulnArtifacts, "vulnerable", ct);
|
||||
var fixedBuildPath = await StoreReferenceBuildAsync(request.CveId, fixedArtifacts, "fixed", ct);
|
||||
|
||||
// Step 6: Store fingerprints to repository
|
||||
foreach (var fp in fingerprints)
|
||||
{
|
||||
await _repository.CreateAsync(fp, ct);
|
||||
}
|
||||
|
||||
_logger.LogInformation(
|
||||
"Pipeline complete for {CveId}: generated {Count} fingerprints",
|
||||
request.CveId,
|
||||
fingerprints.Length);
|
||||
|
||||
return new ReferenceBuildResult
|
||||
{
|
||||
Success = true,
|
||||
Fingerprints = fingerprints,
|
||||
VulnBuildPath = vulnBuildPath,
|
||||
FixedBuildPath = fixedBuildPath
|
||||
};
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
_logger.LogError(ex, "Pipeline failed for {CveId}", request.CveId);
|
||||
return new ReferenceBuildResult
|
||||
{
|
||||
Success = false,
|
||||
Error = ex.Message
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Builds a specific version (vulnerable or fixed).
|
||||
/// </summary>
|
||||
private async Task<List<BuildArtifact>> BuildVersionAsync(
|
||||
ReferenceBuildRequest request,
|
||||
bool isVulnerable,
|
||||
CancellationToken ct)
|
||||
{
|
||||
var version = isVulnerable ? request.VulnerableRef : request.FixedRef;
|
||||
_logger.LogDebug(
|
||||
"Building {Type} version at {Ref}",
|
||||
isVulnerable ? "vulnerable" : "fixed",
|
||||
version);
|
||||
|
||||
// NOTE: Actual implementation would:
|
||||
// 1. Clone repo to sandboxed environment
|
||||
// 2. Checkout the specific ref
|
||||
// 3. Run build command
|
||||
// 4. Extract built binaries
|
||||
//
|
||||
// This is a placeholder that returns empty for now.
|
||||
// Production implementation would use containers or VMs for sandboxing.
|
||||
|
||||
await Task.CompletedTask;
|
||||
|
||||
// Placeholder: return empty list
|
||||
// Real impl would return built artifacts
|
||||
return [];
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Extracts functions from build artifacts.
|
||||
/// </summary>
|
||||
private async Task<List<ExtractedFunction>> ExtractFunctionsAsync(
|
||||
List<BuildArtifact> artifacts,
|
||||
string[]? targetFunctions,
|
||||
CancellationToken ct)
|
||||
{
|
||||
var functions = new List<ExtractedFunction>();
|
||||
|
||||
foreach (var artifact in artifacts)
|
||||
{
|
||||
ct.ThrowIfCancellationRequested();
|
||||
|
||||
// NOTE: Real implementation would:
|
||||
// 1. Parse ELF/PE headers
|
||||
// 2. Find symbol table
|
||||
// 3. Extract function boundaries
|
||||
// 4. Extract code bytes for each function
|
||||
//
|
||||
// This is a placeholder.
|
||||
|
||||
_logger.LogDebug(
|
||||
"Extracting functions from {Path} ({Size} bytes)",
|
||||
artifact.Path,
|
||||
artifact.Content.Length);
|
||||
|
||||
// Placeholder: would use ELF parser
|
||||
}
|
||||
|
||||
await Task.CompletedTask;
|
||||
return functions;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Generates differential fingerprints by comparing vulnerable and fixed versions.
|
||||
/// </summary>
|
||||
private async Task<VulnFingerprint[]> GenerateDifferentialFingerprintsAsync(
|
||||
ReferenceBuildRequest request,
|
||||
List<ExtractedFunction> vulnFunctions,
|
||||
List<ExtractedFunction> fixedFunctions,
|
||||
CancellationToken ct)
|
||||
{
|
||||
var fingerprints = new List<VulnFingerprint>();
|
||||
|
||||
// Find functions that changed between versions
|
||||
var changedFunctions = FindChangedFunctions(vulnFunctions, fixedFunctions);
|
||||
|
||||
_logger.LogDebug(
|
||||
"Found {Count} changed functions between vulnerable and fixed",
|
||||
changedFunctions.Count);
|
||||
|
||||
foreach (var (vulnFunc, fixedFunc) in changedFunctions)
|
||||
{
|
||||
ct.ThrowIfCancellationRequested();
|
||||
|
||||
// Generate fingerprint for the vulnerable version
|
||||
var input = new FingerprintInput
|
||||
{
|
||||
BinaryData = vulnFunc.Data,
|
||||
Architecture = "x86_64", // Would come from artifact
|
||||
FunctionName = vulnFunc.Name,
|
||||
CveId = request.CveId,
|
||||
Component = request.Component,
|
||||
SourceFile = vulnFunc.SourceFile,
|
||||
SourceLine = vulnFunc.SourceLine
|
||||
};
|
||||
|
||||
if (!_fingerprintGenerator.CanProcess(input))
|
||||
{
|
||||
_logger.LogDebug("Skipping function {Name}: too small", vulnFunc.Name);
|
||||
continue;
|
||||
}
|
||||
|
||||
var output = await _fingerprintGenerator.GenerateAsync(input, ct);
|
||||
|
||||
fingerprints.Add(new VulnFingerprint
|
||||
{
|
||||
Id = Guid.NewGuid(),
|
||||
CveId = request.CveId,
|
||||
Component = request.Component,
|
||||
Algorithm = output.Algorithm,
|
||||
FingerprintId = output.FingerprintId,
|
||||
FingerprintHash = output.Hash,
|
||||
Architecture = "x86_64",
|
||||
FunctionName = vulnFunc.Name,
|
||||
SourceFile = vulnFunc.SourceFile,
|
||||
SourceLine = vulnFunc.SourceLine,
|
||||
Confidence = output.Confidence,
|
||||
VulnBuildRef = request.VulnerableRef,
|
||||
FixedBuildRef = request.FixedRef,
|
||||
IndexedAt = DateTimeOffset.UtcNow
|
||||
});
|
||||
}
|
||||
|
||||
return fingerprints.ToArray();
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Finds functions that changed between vulnerable and fixed versions.
|
||||
/// </summary>
|
||||
private static List<(ExtractedFunction vuln, ExtractedFunction? fix)> FindChangedFunctions(
|
||||
List<ExtractedFunction> vulnFunctions,
|
||||
List<ExtractedFunction> fixedFunctions)
|
||||
{
|
||||
var results = new List<(ExtractedFunction, ExtractedFunction?)>();
|
||||
|
||||
foreach (var vuln in vulnFunctions)
|
||||
{
|
||||
var fix = fixedFunctions.FirstOrDefault(f => f.Name == vuln.Name);
|
||||
|
||||
// Include if function exists in vuln but not in fixed (deleted)
|
||||
// or if the function data changed
|
||||
if (fix == null || !vuln.Data.SequenceEqual(fix.Data))
|
||||
{
|
||||
results.Add((vuln, fix));
|
||||
}
|
||||
}
|
||||
|
||||
return results;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Stores reference build artifacts to blob storage.
|
||||
/// </summary>
|
||||
private async Task<string> StoreReferenceBuildAsync(
|
||||
string cveId,
|
||||
List<BuildArtifact> artifacts,
|
||||
string buildType,
|
||||
CancellationToken ct)
|
||||
{
|
||||
// NOTE: Real implementation would:
|
||||
// 1. Create tar archive of all artifacts
|
||||
// 2. Compress with zstd
|
||||
// 3. Store to blob storage
|
||||
|
||||
// Placeholder: just log
|
||||
_logger.LogDebug(
|
||||
"Storing {Count} artifacts for {CveId}/{BuildType}",
|
||||
artifacts.Count,
|
||||
cveId,
|
||||
buildType);
|
||||
|
||||
var storagePath = await _storage.StoreReferenceBuildAsync(cveId, buildType, [], ct);
|
||||
return storagePath;
|
||||
}
|
||||
}
|
||||
@@ -1,3 +1,5 @@
|
||||
using StellaOps.BinaryIndex.Core.Models;
|
||||
|
||||
namespace StellaOps.BinaryIndex.FixIndex.Models;
|
||||
|
||||
/// <summary>
|
||||
@@ -39,45 +41,6 @@ public sealed record FixEvidence
|
||||
public DateTimeOffset CreatedAt { get; init; }
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Fix state enumeration.
|
||||
/// </summary>
|
||||
public enum FixState
|
||||
{
|
||||
/// <summary>CVE is fixed in this version</summary>
|
||||
Fixed,
|
||||
|
||||
/// <summary>CVE affects this package</summary>
|
||||
Vulnerable,
|
||||
|
||||
/// <summary>CVE does not affect this package</summary>
|
||||
NotAffected,
|
||||
|
||||
/// <summary>Fix won't be applied (e.g., EOL version)</summary>
|
||||
Wontfix,
|
||||
|
||||
/// <summary>Unknown status</summary>
|
||||
Unknown
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Method used to identify the fix.
|
||||
/// </summary>
|
||||
public enum FixMethod
|
||||
{
|
||||
/// <summary>From official security feed (OVAL, DSA, etc.)</summary>
|
||||
SecurityFeed,
|
||||
|
||||
/// <summary>Parsed from Debian/Ubuntu changelog</summary>
|
||||
Changelog,
|
||||
|
||||
/// <summary>Extracted from patch header (DEP-3)</summary>
|
||||
PatchHeader,
|
||||
|
||||
/// <summary>Matched against upstream patch database</summary>
|
||||
UpstreamPatchMatch
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Base class for evidence payloads.
|
||||
/// </summary>
|
||||
|
||||
@@ -1,4 +1,5 @@
|
||||
using System.Text.RegularExpressions;
|
||||
using StellaOps.BinaryIndex.Core.Models;
|
||||
using StellaOps.BinaryIndex.FixIndex.Models;
|
||||
|
||||
namespace StellaOps.BinaryIndex.FixIndex.Parsers;
|
||||
|
||||
@@ -1,4 +1,5 @@
|
||||
using System.Text.RegularExpressions;
|
||||
using StellaOps.BinaryIndex.Core.Models;
|
||||
using StellaOps.BinaryIndex.FixIndex.Models;
|
||||
|
||||
namespace StellaOps.BinaryIndex.FixIndex.Parsers;
|
||||
|
||||
@@ -1,4 +1,5 @@
|
||||
using System.Text.RegularExpressions;
|
||||
using StellaOps.BinaryIndex.Core.Models;
|
||||
using StellaOps.BinaryIndex.FixIndex.Models;
|
||||
|
||||
namespace StellaOps.BinaryIndex.FixIndex.Parsers;
|
||||
|
||||
@@ -1,4 +1,5 @@
|
||||
using System.Text.RegularExpressions;
|
||||
using StellaOps.BinaryIndex.Core.Models;
|
||||
using StellaOps.BinaryIndex.FixIndex.Models;
|
||||
|
||||
namespace StellaOps.BinaryIndex.FixIndex.Parsers;
|
||||
|
||||
@@ -1,3 +1,4 @@
|
||||
using StellaOps.BinaryIndex.Core.Models;
|
||||
using StellaOps.BinaryIndex.FixIndex.Models;
|
||||
|
||||
namespace StellaOps.BinaryIndex.FixIndex.Repositories;
|
||||
|
||||
@@ -0,0 +1,116 @@
|
||||
-- -----------------------------------------------------------------------------
|
||||
-- 20251226_AddFingerprintTables.sql
|
||||
-- Sprint: SPRINT_20251226_013_BINIDX_fingerprint_factory
|
||||
-- Task: FPRINT-01 — Create vulnerable_fingerprints table schema
|
||||
-- Task: FPRINT-02 — Create fingerprint_matches table for match results
|
||||
-- -----------------------------------------------------------------------------
|
||||
|
||||
-- Fingerprint tables for vulnerability detection independent of package metadata
|
||||
|
||||
BEGIN;
|
||||
|
||||
-- Table: vulnerable_fingerprints
|
||||
-- Stores function-level vulnerability fingerprints
|
||||
CREATE TABLE IF NOT EXISTS binaries.vulnerable_fingerprints (
|
||||
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
|
||||
tenant_id TEXT NOT NULL DEFAULT binaries_app.require_current_tenant(),
|
||||
cve_id TEXT NOT NULL,
|
||||
component TEXT NOT NULL,
|
||||
purl TEXT,
|
||||
algorithm TEXT NOT NULL CHECK (algorithm IN ('basic_block', 'cfg', 'string_refs', 'combined')),
|
||||
fingerprint_id TEXT NOT NULL,
|
||||
fingerprint_hash BYTEA NOT NULL,
|
||||
architecture TEXT NOT NULL,
|
||||
function_name TEXT,
|
||||
source_file TEXT,
|
||||
source_line INT,
|
||||
similarity_threshold DECIMAL(3,2) DEFAULT 0.95 CHECK (similarity_threshold BETWEEN 0 AND 1),
|
||||
confidence DECIMAL(3,2) CHECK (confidence IS NULL OR confidence BETWEEN 0 AND 1),
|
||||
validated BOOLEAN DEFAULT false,
|
||||
validation_stats JSONB,
|
||||
vuln_build_ref TEXT,
|
||||
fixed_build_ref TEXT,
|
||||
indexed_at TIMESTAMPTZ NOT NULL DEFAULT now(),
|
||||
UNIQUE (tenant_id, fingerprint_id)
|
||||
);
|
||||
|
||||
-- Indexes for efficient lookups
|
||||
CREATE INDEX IF NOT EXISTS idx_fingerprint_cve
|
||||
ON binaries.vulnerable_fingerprints (tenant_id, cve_id);
|
||||
|
||||
CREATE INDEX IF NOT EXISTS idx_fingerprint_component
|
||||
ON binaries.vulnerable_fingerprints (tenant_id, component);
|
||||
|
||||
CREATE INDEX IF NOT EXISTS idx_fingerprint_algorithm
|
||||
ON binaries.vulnerable_fingerprints (tenant_id, algorithm, architecture);
|
||||
|
||||
CREATE INDEX IF NOT EXISTS idx_fingerprint_hash
|
||||
ON binaries.vulnerable_fingerprints USING hash (fingerprint_hash);
|
||||
|
||||
CREATE INDEX IF NOT EXISTS idx_fingerprint_validated
|
||||
ON binaries.vulnerable_fingerprints (tenant_id, validated)
|
||||
WHERE validated = true;
|
||||
|
||||
-- Enable Row-Level Security
|
||||
ALTER TABLE binaries.vulnerable_fingerprints ENABLE ROW LEVEL SECURITY;
|
||||
|
||||
CREATE POLICY fingerprints_tenant_isolation ON binaries.vulnerable_fingerprints
|
||||
USING (tenant_id = binaries_app.require_current_tenant());
|
||||
|
||||
-- Table: fingerprint_matches
|
||||
-- Stores results of fingerprint matching operations
|
||||
CREATE TABLE IF NOT EXISTS binaries.fingerprint_matches (
|
||||
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
|
||||
tenant_id TEXT NOT NULL DEFAULT binaries_app.require_current_tenant(),
|
||||
scan_id UUID NOT NULL,
|
||||
match_type TEXT NOT NULL CHECK (match_type IN ('fingerprint', 'build_id', 'hash_exact')),
|
||||
binary_key TEXT NOT NULL,
|
||||
vulnerable_purl TEXT NOT NULL,
|
||||
vulnerable_version TEXT NOT NULL,
|
||||
matched_fingerprint_id UUID REFERENCES binaries.vulnerable_fingerprints(id),
|
||||
matched_function TEXT,
|
||||
similarity DECIMAL(3,2) CHECK (similarity IS NULL OR similarity BETWEEN 0 AND 1),
|
||||
advisory_ids TEXT[],
|
||||
reachability_status TEXT CHECK (reachability_status IN ('reachable', 'unreachable', 'unknown', 'partial')),
|
||||
matched_at TIMESTAMPTZ NOT NULL DEFAULT now()
|
||||
);
|
||||
|
||||
-- Indexes for fingerprint_matches
|
||||
CREATE INDEX IF NOT EXISTS idx_match_scan
|
||||
ON binaries.fingerprint_matches (tenant_id, scan_id);
|
||||
|
||||
CREATE INDEX IF NOT EXISTS idx_match_fingerprint
|
||||
ON binaries.fingerprint_matches (matched_fingerprint_id);
|
||||
|
||||
CREATE INDEX IF NOT EXISTS idx_match_binary
|
||||
ON binaries.fingerprint_matches (tenant_id, binary_key);
|
||||
|
||||
CREATE INDEX IF NOT EXISTS idx_match_reachability
|
||||
ON binaries.fingerprint_matches (tenant_id, reachability_status);
|
||||
|
||||
-- Enable Row-Level Security
|
||||
ALTER TABLE binaries.fingerprint_matches ENABLE ROW LEVEL SECURITY;
|
||||
|
||||
CREATE POLICY matches_tenant_isolation ON binaries.fingerprint_matches
|
||||
USING (tenant_id = binaries_app.require_current_tenant());
|
||||
|
||||
-- Add comments
|
||||
COMMENT ON TABLE binaries.vulnerable_fingerprints IS
|
||||
'Function-level vulnerability fingerprints for detecting vulnerable code independent of package metadata';
|
||||
|
||||
COMMENT ON COLUMN binaries.vulnerable_fingerprints.algorithm IS
|
||||
'Fingerprinting algorithm: basic_block, cfg (control flow graph), string_refs, or combined (ensemble)';
|
||||
|
||||
COMMENT ON COLUMN binaries.vulnerable_fingerprints.fingerprint_hash IS
|
||||
'Binary fingerprint data (16-48 bytes depending on algorithm)';
|
||||
|
||||
COMMENT ON COLUMN binaries.vulnerable_fingerprints.validation_stats IS
|
||||
'JSON object with tp, fp, tn, fn counts from validation corpus';
|
||||
|
||||
COMMENT ON TABLE binaries.fingerprint_matches IS
|
||||
'Results of fingerprint matching operations during scans';
|
||||
|
||||
COMMENT ON COLUMN binaries.fingerprint_matches.similarity IS
|
||||
'Similarity score (0.0-1.0) for fingerprint matches';
|
||||
|
||||
COMMIT;
|
||||
@@ -1,6 +1,7 @@
|
||||
using System.Text.Json;
|
||||
using Npgsql;
|
||||
using NpgsqlTypes;
|
||||
using StellaOps.BinaryIndex.Core.Models;
|
||||
using StellaOps.BinaryIndex.FixIndex.Models;
|
||||
using StellaOps.BinaryIndex.FixIndex.Repositories;
|
||||
|
||||
@@ -11,16 +12,16 @@ namespace StellaOps.BinaryIndex.Persistence.Repositories;
|
||||
/// </summary>
|
||||
public sealed class FixIndexRepository : IFixIndexRepository
|
||||
{
|
||||
private readonly BinaryIndexDataSource _dataSource;
|
||||
private readonly BinaryIndexDbContext _dbContext;
|
||||
|
||||
private static readonly JsonSerializerOptions JsonOptions = new(JsonSerializerDefaults.Web)
|
||||
{
|
||||
PropertyNamingPolicy = JsonNamingPolicy.CamelCase
|
||||
};
|
||||
|
||||
public FixIndexRepository(BinaryIndexDataSource dataSource)
|
||||
public FixIndexRepository(BinaryIndexDbContext dbContext)
|
||||
{
|
||||
_dataSource = dataSource ?? throw new ArgumentNullException(nameof(dataSource));
|
||||
_dbContext = dbContext ?? throw new ArgumentNullException(nameof(dbContext));
|
||||
}
|
||||
|
||||
/// <inheritdoc />
|
||||
@@ -39,7 +40,7 @@ public sealed class FixIndexRepository : IFixIndexRepository
|
||||
AND source_pkg = @sourcePkg AND cve_id = @cveId
|
||||
""";
|
||||
|
||||
await using var conn = await _dataSource.OpenConnectionAsync(cancellationToken);
|
||||
await using var conn = await _dbContext.OpenConnectionAsync(cancellationToken);
|
||||
await using var cmd = new NpgsqlCommand(sql, conn);
|
||||
cmd.Parameters.AddWithValue("distro", distro);
|
||||
cmd.Parameters.AddWithValue("release", release);
|
||||
@@ -70,7 +71,7 @@ public sealed class FixIndexRepository : IFixIndexRepository
|
||||
ORDER BY cve_id
|
||||
""";
|
||||
|
||||
await using var conn = await _dataSource.OpenConnectionAsync(cancellationToken);
|
||||
await using var conn = await _dbContext.OpenConnectionAsync(cancellationToken);
|
||||
await using var cmd = new NpgsqlCommand(sql, conn);
|
||||
cmd.Parameters.AddWithValue("distro", distro);
|
||||
cmd.Parameters.AddWithValue("release", release);
|
||||
@@ -99,7 +100,7 @@ public sealed class FixIndexRepository : IFixIndexRepository
|
||||
ORDER BY distro, release, source_pkg
|
||||
""";
|
||||
|
||||
await using var conn = await _dataSource.OpenConnectionAsync(cancellationToken);
|
||||
await using var conn = await _dbContext.OpenConnectionAsync(cancellationToken);
|
||||
await using var cmd = new NpgsqlCommand(sql, conn);
|
||||
cmd.Parameters.AddWithValue("cveId", cveId);
|
||||
|
||||
@@ -145,7 +146,7 @@ public sealed class FixIndexRepository : IFixIndexRepository
|
||||
method, confidence, evidence_id, snapshot_id, indexed_at, updated_at
|
||||
""";
|
||||
|
||||
await using var conn = await _dataSource.OpenConnectionAsync(cancellationToken);
|
||||
await using var conn = await _dbContext.OpenConnectionAsync(cancellationToken);
|
||||
await using var cmd = new NpgsqlCommand(sql, conn);
|
||||
cmd.Parameters.AddWithValue("distro", evidence.Distro);
|
||||
cmd.Parameters.AddWithValue("release", evidence.Release);
|
||||
@@ -192,7 +193,7 @@ public sealed class FixIndexRepository : IFixIndexRepository
|
||||
RETURNING id
|
||||
""";
|
||||
|
||||
await using var conn = await _dataSource.OpenConnectionAsync(cancellationToken);
|
||||
await using var conn = await _dbContext.OpenConnectionAsync(cancellationToken);
|
||||
await using var cmd = new NpgsqlCommand(sql, conn);
|
||||
cmd.Parameters.AddWithValue("evidenceType", evidenceType);
|
||||
cmd.Parameters.AddWithValue("sourceFile", (object?)sourceFile ?? DBNull.Value);
|
||||
@@ -215,7 +216,7 @@ public sealed class FixIndexRepository : IFixIndexRepository
|
||||
WHERE id = @id
|
||||
""";
|
||||
|
||||
await using var conn = await _dataSource.OpenConnectionAsync(cancellationToken);
|
||||
await using var conn = await _dbContext.OpenConnectionAsync(cancellationToken);
|
||||
await using var cmd = new NpgsqlCommand(sql, conn);
|
||||
cmd.Parameters.AddWithValue("id", evidenceId);
|
||||
|
||||
@@ -253,7 +254,7 @@ public sealed class FixIndexRepository : IFixIndexRepository
|
||||
SELECT (SELECT COUNT(*) FROM deleted_index) + (SELECT COUNT(*) FROM deleted_evidence)
|
||||
""";
|
||||
|
||||
await using var conn = await _dataSource.OpenConnectionAsync(cancellationToken);
|
||||
await using var conn = await _dbContext.OpenConnectionAsync(cancellationToken);
|
||||
await using var cmd = new NpgsqlCommand(sql, conn);
|
||||
cmd.Parameters.AddWithValue("snapshotId", snapshotId);
|
||||
|
||||
|
||||
@@ -1,10 +1,12 @@
|
||||
using System.Collections.Immutable;
|
||||
using Microsoft.Extensions.Logging;
|
||||
using StellaOps.BinaryIndex.Core.Models;
|
||||
using StellaOps.BinaryIndex.FixIndex.Models;
|
||||
using StellaOps.BinaryIndex.Core.Services;
|
||||
using StellaOps.BinaryIndex.FixIndex.Repositories;
|
||||
using StellaOps.BinaryIndex.Fingerprints.Matching;
|
||||
using StellaOps.BinaryIndex.Persistence.Repositories;
|
||||
|
||||
namespace StellaOps.BinaryIndex.Core.Services;
|
||||
namespace StellaOps.BinaryIndex.Persistence.Services;
|
||||
|
||||
/// <summary>
|
||||
/// Implementation of binary vulnerability lookup service.
|
||||
@@ -13,16 +15,19 @@ public sealed class BinaryVulnerabilityService : IBinaryVulnerabilityService
|
||||
{
|
||||
private readonly IBinaryVulnAssertionRepository _assertionRepo;
|
||||
private readonly IFixIndexRepository? _fixIndexRepo;
|
||||
private readonly IFingerprintMatcher? _fingerprintMatcher;
|
||||
private readonly ILogger<BinaryVulnerabilityService> _logger;
|
||||
|
||||
public BinaryVulnerabilityService(
|
||||
IBinaryVulnAssertionRepository assertionRepo,
|
||||
ILogger<BinaryVulnerabilityService> logger,
|
||||
IFixIndexRepository? fixIndexRepo = null)
|
||||
IFixIndexRepository? fixIndexRepo = null,
|
||||
IFingerprintMatcher? fingerprintMatcher = null)
|
||||
{
|
||||
_assertionRepo = assertionRepo;
|
||||
_logger = logger;
|
||||
_fixIndexRepo = fixIndexRepo;
|
||||
_fingerprintMatcher = fingerprintMatcher;
|
||||
}
|
||||
|
||||
public async Task<ImmutableArray<BinaryVulnMatch>> LookupByIdentityAsync(
|
||||
@@ -133,4 +138,64 @@ public sealed class BinaryVulnerabilityService : IBinaryVulnerabilityService
|
||||
"fingerprint_match" => MatchMethod.FingerprintMatch,
|
||||
_ => MatchMethod.RangeMatch
|
||||
};
|
||||
|
||||
public async Task<ImmutableArray<BinaryVulnMatch>> LookupByFingerprintAsync(
|
||||
byte[] fingerprint,
|
||||
FingerprintLookupOptions? options = null,
|
||||
CancellationToken ct = default)
|
||||
{
|
||||
if (_fingerprintMatcher is null)
|
||||
{
|
||||
_logger.LogWarning("Fingerprint matcher not configured, cannot perform fingerprint lookup");
|
||||
return ImmutableArray<BinaryVulnMatch>.Empty;
|
||||
}
|
||||
|
||||
options ??= new FingerprintLookupOptions();
|
||||
var matches = new List<BinaryVulnMatch>();
|
||||
|
||||
var matchOptions = new MatchOptions
|
||||
{
|
||||
MinSimilarity = options.MinSimilarity,
|
||||
MaxCandidates = options.MaxCandidates,
|
||||
Architecture = options.Architecture
|
||||
};
|
||||
|
||||
var result = await _fingerprintMatcher.MatchAsync(fingerprint, matchOptions, ct).ConfigureAwait(false);
|
||||
|
||||
if (result.IsMatch && result.MatchedFingerprint is not null)
|
||||
{
|
||||
var fp = result.MatchedFingerprint;
|
||||
matches.Add(new BinaryVulnMatch
|
||||
{
|
||||
CveId = fp.CveId,
|
||||
VulnerablePurl = fp.Purl ?? $"pkg:generic/{fp.Component}",
|
||||
Method = MatchMethod.FingerprintMatch,
|
||||
Confidence = result.Confidence,
|
||||
Evidence = new MatchEvidence
|
||||
{
|
||||
Similarity = result.Similarity,
|
||||
MatchedFunction = fp.FunctionName
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
_logger.LogDebug("Fingerprint lookup found {Count} matches", matches.Count);
|
||||
return matches.ToImmutableArray();
|
||||
}
|
||||
|
||||
public async Task<ImmutableDictionary<string, ImmutableArray<BinaryVulnMatch>>> LookupByFingerprintBatchAsync(
|
||||
IEnumerable<(string Key, byte[] Fingerprint)> fingerprints,
|
||||
FingerprintLookupOptions? options = null,
|
||||
CancellationToken ct = default)
|
||||
{
|
||||
var results = new Dictionary<string, ImmutableArray<BinaryVulnMatch>>();
|
||||
|
||||
foreach (var (key, fingerprint) in fingerprints)
|
||||
{
|
||||
var matches = await LookupByFingerprintAsync(fingerprint, options, ct).ConfigureAwait(false);
|
||||
results[key] = matches;
|
||||
}
|
||||
|
||||
return results.ToImmutableDictionary();
|
||||
}
|
||||
}
|
||||
@@ -16,6 +16,7 @@
|
||||
<ItemGroup>
|
||||
<ProjectReference Include="..\StellaOps.BinaryIndex.Core\StellaOps.BinaryIndex.Core.csproj" />
|
||||
<ProjectReference Include="..\StellaOps.BinaryIndex.Corpus\StellaOps.BinaryIndex.Corpus.csproj" />
|
||||
<ProjectReference Include="..\StellaOps.BinaryIndex.FixIndex\StellaOps.BinaryIndex.FixIndex.csproj" />
|
||||
<ProjectReference Include="..\StellaOps.BinaryIndex.Fingerprints\StellaOps.BinaryIndex.Fingerprints.csproj" />
|
||||
<ProjectReference Include="..\..\..\__Libraries\StellaOps.Infrastructure.Postgres\StellaOps.Infrastructure.Postgres.csproj" />
|
||||
</ItemGroup>
|
||||
|
||||
Reference in New Issue
Block a user