// Licensed to StellaOps under the AGPL-3.0-or-later license.
using System.Collections.Concurrent;
using System.Collections.Immutable;
namespace StellaOps.Scanner.EntryTrace.Binary;
///
/// Interface for an index of fingerprints enabling fast lookup.
///
public interface IFingerprintIndex
{
///
/// Adds a fingerprint to the index.
///
/// The fingerprint to add.
/// Source package PURL.
/// Function name.
/// Source file path.
/// Cancellation token.
Task AddAsync(
CodeFingerprint fingerprint,
string sourcePackage,
string functionName,
string? sourceFile = null,
CancellationToken cancellationToken = default);
///
/// Adds a fingerprint match to the index.
///
/// The fingerprint match to add.
/// Cancellation token.
/// True if added, false if duplicate.
Task AddAsync(FingerprintMatch match, CancellationToken cancellationToken = default);
///
/// Looks up a fingerprint and returns matching entries.
///
/// The fingerprint to look up.
/// Cancellation token.
Task> LookupAsync(
CodeFingerprint fingerprint,
CancellationToken cancellationToken = default);
///
/// Looks up a fingerprint with additional options.
///
Task> LookupAsync(
CodeFingerprint fingerprint,
float minSimilarity,
int maxResults,
CancellationToken cancellationToken = default);
///
/// Looks up an exact fingerprint match.
///
Task LookupExactAsync(
CodeFingerprint fingerprint,
CancellationToken cancellationToken = default);
///
/// Gets the number of fingerprints in the index.
///
int Count { get; }
///
/// Gets all packages indexed.
///
ImmutableHashSet IndexedPackages { get; }
///
/// Clears the index.
///
Task ClearAsync(CancellationToken cancellationToken = default);
///
/// Gets statistics about the index.
///
FingerprintIndexStatistics GetStatistics();
}
///
/// Statistics about a fingerprint index.
///
/// Total fingerprints in the index.
/// Total unique packages indexed.
/// Total vulnerability associations.
/// When the index was last updated.
public sealed record FingerprintIndexStatistics(
int TotalFingerprints,
int TotalPackages,
int TotalVulnerabilities,
DateTimeOffset IndexedAt);
///
/// Result of a fingerprint lookup.
///
/// The matched fingerprint.
/// Name of the function.
/// PURL of the source package.
/// Version of the source package.
/// Source file path.
/// Source line number.
/// Associated vulnerability IDs.
/// Similarity score (0.0-1.0).
/// When the match was found.
public sealed record FingerprintMatch(
CodeFingerprint Fingerprint,
string FunctionName,
string SourcePackage,
string? SourceVersion,
string? SourceFile,
int? SourceLine,
ImmutableArray VulnerabilityIds,
float Similarity,
DateTimeOffset MatchedAt)
{
///
/// Whether this is an exact match.
///
public bool IsExactMatch => Similarity >= 0.999f;
///
/// Whether this is a high-confidence match.
///
public bool IsHighConfidence => Similarity >= 0.95f;
///
/// Whether this match has associated vulnerabilities.
///
public bool HasVulnerabilities => !VulnerabilityIds.IsEmpty;
}
///
/// In-memory fingerprint index for fast lookups.
///
public sealed class InMemoryFingerprintIndex : IFingerprintIndex
{
private readonly ConcurrentDictionary _exactIndex = new();
private readonly ConcurrentDictionary> _algorithmIndex = new();
private readonly HashSet _packages = new();
private readonly object _packagesLock = new();
private readonly TimeProvider _timeProvider;
private DateTimeOffset _lastUpdated;
///
/// Creates a new in-memory fingerprint index.
///
/// Optional time provider for deterministic timestamps.
public InMemoryFingerprintIndex(TimeProvider? timeProvider = null)
{
_timeProvider = timeProvider ?? TimeProvider.System;
_lastUpdated = _timeProvider.GetUtcNow();
}
///
public int Count => _exactIndex.Count;
///
public ImmutableHashSet IndexedPackages
{
get
{
lock (_packagesLock)
{
return _packages.ToImmutableHashSet();
}
}
}
///
public Task AddAsync(FingerprintMatch match, CancellationToken cancellationToken = default)
{
cancellationToken.ThrowIfCancellationRequested();
var added = _exactIndex.TryAdd(match.Fingerprint.Id, match);
if (added)
{
// Add to algorithm-specific index for similarity search
var algorithmList = _algorithmIndex.GetOrAdd(
match.Fingerprint.Algorithm,
_ => new List());
lock (algorithmList)
{
algorithmList.Add(match);
}
// Track packages
lock (_packagesLock)
{
_packages.Add(match.SourcePackage);
}
_lastUpdated = _timeProvider.GetUtcNow();
}
return Task.FromResult(added);
}
///
public Task LookupExactAsync(
CodeFingerprint fingerprint,
CancellationToken cancellationToken = default)
{
cancellationToken.ThrowIfCancellationRequested();
if (_exactIndex.TryGetValue(fingerprint.Id, out var match))
{
return Task.FromResult(match);
}
return Task.FromResult(null);
}
///
public Task> LookupAsync(
CodeFingerprint fingerprint,
CancellationToken cancellationToken = default)
=> LookupAsync(fingerprint, 0.95f, 10, cancellationToken);
///
public Task> LookupAsync(
CodeFingerprint fingerprint,
float minSimilarity,
int maxResults,
CancellationToken cancellationToken = default)
{
cancellationToken.ThrowIfCancellationRequested();
// First try exact match
if (_exactIndex.TryGetValue(fingerprint.Id, out var exactMatch))
{
return Task.FromResult(ImmutableArray.Create(exactMatch));
}
// Search for similar fingerprints
if (!_algorithmIndex.TryGetValue(fingerprint.Algorithm, out var algorithmList))
{
return Task.FromResult(ImmutableArray.Empty);
}
var matches = new List<(FingerprintMatch Match, float Similarity)>();
lock (algorithmList)
{
foreach (var entry in algorithmList)
{
cancellationToken.ThrowIfCancellationRequested();
var similarity = fingerprint.ComputeSimilarity(entry.Fingerprint);
if (similarity >= minSimilarity)
{
matches.Add((entry, similarity));
}
}
}
var result = matches
.OrderByDescending(m => m.Similarity)
.Take(maxResults)
.Select(m => m.Match with { Similarity = m.Similarity })
.ToImmutableArray();
return Task.FromResult(result);
}
///
public Task ClearAsync(CancellationToken cancellationToken = default)
{
_exactIndex.Clear();
_algorithmIndex.Clear();
lock (_packagesLock)
{
_packages.Clear();
}
return Task.CompletedTask;
}
///
public FingerprintIndexStatistics GetStatistics()
{
int vulnCount;
lock (_packagesLock)
{
vulnCount = _exactIndex.Values.Sum(m => m.VulnerabilityIds.Length);
}
return new FingerprintIndexStatistics(
TotalFingerprints: Count,
TotalPackages: IndexedPackages.Count,
TotalVulnerabilities: vulnCount,
IndexedAt: _lastUpdated);
}
///
public Task AddAsync(
CodeFingerprint fingerprint,
string sourcePackage,
string functionName,
string? sourceFile = null,
CancellationToken cancellationToken = default)
{
var match = new FingerprintMatch(
Fingerprint: fingerprint,
FunctionName: functionName,
SourcePackage: sourcePackage,
SourceVersion: null,
SourceFile: sourceFile,
SourceLine: null,
VulnerabilityIds: ImmutableArray.Empty,
Similarity: 1.0f,
MatchedAt: _timeProvider.GetUtcNow());
return AddAsync(match, cancellationToken).ContinueWith(_ => { }, cancellationToken);
}
}
///
/// Vulnerability-aware fingerprint index that tracks known-vulnerable functions.
///
public sealed class VulnerableFingerprintIndex : IFingerprintIndex
{
private readonly InMemoryFingerprintIndex _baseIndex;
private readonly TimeProvider _timeProvider;
private readonly ConcurrentDictionary _vulnerabilities = new();
///
/// Creates a new vulnerability-aware fingerprint index.
///
/// Optional time provider for deterministic timestamps.
public VulnerableFingerprintIndex(TimeProvider? timeProvider = null)
{
_timeProvider = timeProvider ?? TimeProvider.System;
_baseIndex = new InMemoryFingerprintIndex(_timeProvider);
}
///
public int Count => _baseIndex.Count;
///
public ImmutableHashSet IndexedPackages => _baseIndex.IndexedPackages;
///
/// Adds a fingerprint with associated vulnerability information.
///
public async Task AddVulnerableAsync(
CodeFingerprint fingerprint,
string sourcePackage,
string functionName,
string vulnerabilityId,
string vulnerableVersions,
VulnerabilitySeverity severity,
string? sourceFile = null,
CancellationToken cancellationToken = default)
{
var match = new FingerprintMatch(
Fingerprint: fingerprint,
FunctionName: functionName,
SourcePackage: sourcePackage,
SourceVersion: null,
SourceFile: sourceFile,
SourceLine: null,
VulnerabilityIds: ImmutableArray.Create(vulnerabilityId),
Similarity: 1.0f,
MatchedAt: _timeProvider.GetUtcNow());
var added = await _baseIndex.AddAsync(match, cancellationToken);
if (added)
{
_vulnerabilities[fingerprint.Id] = new VulnerabilityInfo(
vulnerabilityId,
vulnerableVersions,
severity);
}
return added;
}
///
public Task AddAsync(FingerprintMatch match, CancellationToken cancellationToken = default)
=> _baseIndex.AddAsync(match, cancellationToken);
///
public Task AddAsync(
CodeFingerprint fingerprint,
string sourcePackage,
string functionName,
string? sourceFile = null,
CancellationToken cancellationToken = default)
=> _baseIndex.AddAsync(fingerprint, sourcePackage, functionName, sourceFile, cancellationToken);
///
public Task LookupExactAsync(
CodeFingerprint fingerprint,
CancellationToken cancellationToken = default)
=> _baseIndex.LookupExactAsync(fingerprint, cancellationToken);
///
public Task> LookupAsync(
CodeFingerprint fingerprint,
CancellationToken cancellationToken = default)
=> _baseIndex.LookupAsync(fingerprint, cancellationToken);
///
public Task> LookupAsync(
CodeFingerprint fingerprint,
float minSimilarity,
int maxResults,
CancellationToken cancellationToken = default)
=> _baseIndex.LookupAsync(fingerprint, minSimilarity, maxResults, cancellationToken);
///
/// Looks up vulnerability information for a fingerprint.
///
public VulnerabilityInfo? GetVulnerability(string fingerprintId)
=> _vulnerabilities.TryGetValue(fingerprintId, out var info) ? info : null;
///
/// Checks if a fingerprint matches a known-vulnerable function.
///
public async Task CheckVulnerableAsync(
CodeFingerprint fingerprint,
long functionOffset,
CancellationToken cancellationToken = default)
{
var matches = await LookupAsync(fingerprint, 0.95f, 1, cancellationToken);
if (matches.IsEmpty)
{
return null;
}
var match = matches[0];
var vulnInfo = GetVulnerability(match.Fingerprint.Id);
if (vulnInfo is null)
{
return null;
}
return new VulnerableFunctionMatch(
functionOffset,
match.FunctionName,
vulnInfo.VulnerabilityId,
match.SourcePackage,
vulnInfo.VulnerableVersions,
match.FunctionName,
match.Similarity,
CorrelationEvidence.FingerprintMatch,
vulnInfo.Severity);
}
///
public async Task ClearAsync(CancellationToken cancellationToken = default)
{
await _baseIndex.ClearAsync(cancellationToken);
_vulnerabilities.Clear();
}
///
public FingerprintIndexStatistics GetStatistics() => _baseIndex.GetStatistics();
///
/// Vulnerability information associated with a fingerprint.
///
public sealed record VulnerabilityInfo(
string VulnerabilityId,
string VulnerableVersions,
VulnerabilitySeverity Severity);
}