Files
git.stella-ops.org/src/Scanner/__Libraries/StellaOps.Scanner.EntryTrace/Binary/IFingerprintIndex.cs
2026-01-04 21:48:13 +02:00

474 lines
16 KiB
C#

// Licensed to StellaOps under the AGPL-3.0-or-later license.
using System.Collections.Concurrent;
using System.Collections.Immutable;
namespace StellaOps.Scanner.EntryTrace.Binary;
/// <summary>
/// Interface for an index of fingerprints enabling fast lookup.
/// </summary>
public interface IFingerprintIndex
{
/// <summary>
/// Adds a fingerprint to the index.
/// </summary>
/// <param name="fingerprint">The fingerprint to add.</param>
/// <param name="sourcePackage">Source package PURL.</param>
/// <param name="functionName">Function name.</param>
/// <param name="sourceFile">Source file path.</param>
/// <param name="cancellationToken">Cancellation token.</param>
Task AddAsync(
CodeFingerprint fingerprint,
string sourcePackage,
string functionName,
string? sourceFile = null,
CancellationToken cancellationToken = default);
/// <summary>
/// Adds a fingerprint match to the index.
/// </summary>
/// <param name="match">The fingerprint match to add.</param>
/// <param name="cancellationToken">Cancellation token.</param>
/// <returns>True if added, false if duplicate.</returns>
Task<bool> AddAsync(FingerprintMatch match, CancellationToken cancellationToken = default);
/// <summary>
/// Looks up a fingerprint and returns matching entries.
/// </summary>
/// <param name="fingerprint">The fingerprint to look up.</param>
/// <param name="cancellationToken">Cancellation token.</param>
Task<ImmutableArray<FingerprintMatch>> LookupAsync(
CodeFingerprint fingerprint,
CancellationToken cancellationToken = default);
/// <summary>
/// Looks up a fingerprint with additional options.
/// </summary>
Task<ImmutableArray<FingerprintMatch>> LookupAsync(
CodeFingerprint fingerprint,
float minSimilarity,
int maxResults,
CancellationToken cancellationToken = default);
/// <summary>
/// Looks up an exact fingerprint match.
/// </summary>
Task<FingerprintMatch?> LookupExactAsync(
CodeFingerprint fingerprint,
CancellationToken cancellationToken = default);
/// <summary>
/// Gets the number of fingerprints in the index.
/// </summary>
int Count { get; }
/// <summary>
/// Gets all packages indexed.
/// </summary>
ImmutableHashSet<string> IndexedPackages { get; }
/// <summary>
/// Clears the index.
/// </summary>
Task ClearAsync(CancellationToken cancellationToken = default);
/// <summary>
/// Gets statistics about the index.
/// </summary>
FingerprintIndexStatistics GetStatistics();
}
/// <summary>
/// Statistics about a fingerprint index.
/// </summary>
/// <param name="TotalFingerprints">Total fingerprints in the index.</param>
/// <param name="TotalPackages">Total unique packages indexed.</param>
/// <param name="TotalVulnerabilities">Total vulnerability associations.</param>
/// <param name="IndexedAt">When the index was last updated.</param>
public sealed record FingerprintIndexStatistics(
int TotalFingerprints,
int TotalPackages,
int TotalVulnerabilities,
DateTimeOffset IndexedAt);
/// <summary>
/// Result of a fingerprint lookup.
/// </summary>
/// <param name="Fingerprint">The matched fingerprint.</param>
/// <param name="FunctionName">Name of the function.</param>
/// <param name="SourcePackage">PURL of the source package.</param>
/// <param name="SourceVersion">Version of the source package.</param>
/// <param name="SourceFile">Source file path.</param>
/// <param name="SourceLine">Source line number.</param>
/// <param name="VulnerabilityIds">Associated vulnerability IDs.</param>
/// <param name="Similarity">Similarity score (0.0-1.0).</param>
/// <param name="MatchedAt">When the match was found.</param>
public sealed record FingerprintMatch(
CodeFingerprint Fingerprint,
string FunctionName,
string SourcePackage,
string? SourceVersion,
string? SourceFile,
int? SourceLine,
ImmutableArray<string> VulnerabilityIds,
float Similarity,
DateTimeOffset MatchedAt)
{
/// <summary>
/// Whether this is an exact match.
/// </summary>
public bool IsExactMatch => Similarity >= 0.999f;
/// <summary>
/// Whether this is a high-confidence match.
/// </summary>
public bool IsHighConfidence => Similarity >= 0.95f;
/// <summary>
/// Whether this match has associated vulnerabilities.
/// </summary>
public bool HasVulnerabilities => !VulnerabilityIds.IsEmpty;
}
/// <summary>
/// In-memory fingerprint index for fast lookups.
/// </summary>
public sealed class InMemoryFingerprintIndex : IFingerprintIndex
{
private readonly ConcurrentDictionary<string, FingerprintMatch> _exactIndex = new();
private readonly ConcurrentDictionary<FingerprintAlgorithm, List<FingerprintMatch>> _algorithmIndex = new();
private readonly HashSet<string> _packages = new();
private readonly object _packagesLock = new();
private readonly TimeProvider _timeProvider;
private DateTimeOffset _lastUpdated;
/// <summary>
/// Creates a new in-memory fingerprint index.
/// </summary>
/// <param name="timeProvider">Optional time provider for deterministic timestamps.</param>
public InMemoryFingerprintIndex(TimeProvider? timeProvider = null)
{
_timeProvider = timeProvider ?? TimeProvider.System;
_lastUpdated = _timeProvider.GetUtcNow();
}
/// <inheritdoc/>
public int Count => _exactIndex.Count;
/// <inheritdoc/>
public ImmutableHashSet<string> IndexedPackages
{
get
{
lock (_packagesLock)
{
return _packages.ToImmutableHashSet();
}
}
}
/// <inheritdoc/>
public Task<bool> AddAsync(FingerprintMatch match, CancellationToken cancellationToken = default)
{
cancellationToken.ThrowIfCancellationRequested();
var added = _exactIndex.TryAdd(match.Fingerprint.Id, match);
if (added)
{
// Add to algorithm-specific index for similarity search
var algorithmList = _algorithmIndex.GetOrAdd(
match.Fingerprint.Algorithm,
_ => new List<FingerprintMatch>());
lock (algorithmList)
{
algorithmList.Add(match);
}
// Track packages
lock (_packagesLock)
{
_packages.Add(match.SourcePackage);
}
_lastUpdated = _timeProvider.GetUtcNow();
}
return Task.FromResult(added);
}
/// <inheritdoc/>
public Task<FingerprintMatch?> LookupExactAsync(
CodeFingerprint fingerprint,
CancellationToken cancellationToken = default)
{
cancellationToken.ThrowIfCancellationRequested();
if (_exactIndex.TryGetValue(fingerprint.Id, out var match))
{
return Task.FromResult<FingerprintMatch?>(match);
}
return Task.FromResult<FingerprintMatch?>(null);
}
/// <inheritdoc/>
public Task<ImmutableArray<FingerprintMatch>> LookupAsync(
CodeFingerprint fingerprint,
CancellationToken cancellationToken = default)
=> LookupAsync(fingerprint, 0.95f, 10, cancellationToken);
/// <inheritdoc/>
public Task<ImmutableArray<FingerprintMatch>> LookupAsync(
CodeFingerprint fingerprint,
float minSimilarity,
int maxResults,
CancellationToken cancellationToken = default)
{
cancellationToken.ThrowIfCancellationRequested();
// First try exact match
if (_exactIndex.TryGetValue(fingerprint.Id, out var exactMatch))
{
return Task.FromResult(ImmutableArray.Create(exactMatch));
}
// Search for similar fingerprints
if (!_algorithmIndex.TryGetValue(fingerprint.Algorithm, out var algorithmList))
{
return Task.FromResult(ImmutableArray<FingerprintMatch>.Empty);
}
var matches = new List<(FingerprintMatch Match, float Similarity)>();
lock (algorithmList)
{
foreach (var entry in algorithmList)
{
cancellationToken.ThrowIfCancellationRequested();
var similarity = fingerprint.ComputeSimilarity(entry.Fingerprint);
if (similarity >= minSimilarity)
{
matches.Add((entry, similarity));
}
}
}
var result = matches
.OrderByDescending(m => m.Similarity)
.Take(maxResults)
.Select(m => m.Match with { Similarity = m.Similarity })
.ToImmutableArray();
return Task.FromResult(result);
}
/// <inheritdoc/>
public Task ClearAsync(CancellationToken cancellationToken = default)
{
_exactIndex.Clear();
_algorithmIndex.Clear();
lock (_packagesLock)
{
_packages.Clear();
}
return Task.CompletedTask;
}
/// <inheritdoc/>
public FingerprintIndexStatistics GetStatistics()
{
int vulnCount;
lock (_packagesLock)
{
vulnCount = _exactIndex.Values.Sum(m => m.VulnerabilityIds.Length);
}
return new FingerprintIndexStatistics(
TotalFingerprints: Count,
TotalPackages: IndexedPackages.Count,
TotalVulnerabilities: vulnCount,
IndexedAt: _lastUpdated);
}
/// <inheritdoc/>
public Task AddAsync(
CodeFingerprint fingerprint,
string sourcePackage,
string functionName,
string? sourceFile = null,
CancellationToken cancellationToken = default)
{
var match = new FingerprintMatch(
Fingerprint: fingerprint,
FunctionName: functionName,
SourcePackage: sourcePackage,
SourceVersion: null,
SourceFile: sourceFile,
SourceLine: null,
VulnerabilityIds: ImmutableArray<string>.Empty,
Similarity: 1.0f,
MatchedAt: _timeProvider.GetUtcNow());
return AddAsync(match, cancellationToken).ContinueWith(_ => { }, cancellationToken);
}
}
/// <summary>
/// Vulnerability-aware fingerprint index that tracks known-vulnerable functions.
/// </summary>
public sealed class VulnerableFingerprintIndex : IFingerprintIndex
{
private readonly InMemoryFingerprintIndex _baseIndex;
private readonly TimeProvider _timeProvider;
private readonly ConcurrentDictionary<string, VulnerabilityInfo> _vulnerabilities = new();
/// <summary>
/// Creates a new vulnerability-aware fingerprint index.
/// </summary>
/// <param name="timeProvider">Optional time provider for deterministic timestamps.</param>
public VulnerableFingerprintIndex(TimeProvider? timeProvider = null)
{
_timeProvider = timeProvider ?? TimeProvider.System;
_baseIndex = new InMemoryFingerprintIndex(_timeProvider);
}
/// <inheritdoc/>
public int Count => _baseIndex.Count;
/// <inheritdoc/>
public ImmutableHashSet<string> IndexedPackages => _baseIndex.IndexedPackages;
/// <summary>
/// Adds a fingerprint with associated vulnerability information.
/// </summary>
public async Task<bool> AddVulnerableAsync(
CodeFingerprint fingerprint,
string sourcePackage,
string functionName,
string vulnerabilityId,
string vulnerableVersions,
VulnerabilitySeverity severity,
string? sourceFile = null,
CancellationToken cancellationToken = default)
{
var match = new FingerprintMatch(
Fingerprint: fingerprint,
FunctionName: functionName,
SourcePackage: sourcePackage,
SourceVersion: null,
SourceFile: sourceFile,
SourceLine: null,
VulnerabilityIds: ImmutableArray.Create(vulnerabilityId),
Similarity: 1.0f,
MatchedAt: _timeProvider.GetUtcNow());
var added = await _baseIndex.AddAsync(match, cancellationToken);
if (added)
{
_vulnerabilities[fingerprint.Id] = new VulnerabilityInfo(
vulnerabilityId,
vulnerableVersions,
severity);
}
return added;
}
/// <inheritdoc/>
public Task<bool> AddAsync(FingerprintMatch match, CancellationToken cancellationToken = default)
=> _baseIndex.AddAsync(match, cancellationToken);
/// <inheritdoc/>
public Task AddAsync(
CodeFingerprint fingerprint,
string sourcePackage,
string functionName,
string? sourceFile = null,
CancellationToken cancellationToken = default)
=> _baseIndex.AddAsync(fingerprint, sourcePackage, functionName, sourceFile, cancellationToken);
/// <inheritdoc/>
public Task<FingerprintMatch?> LookupExactAsync(
CodeFingerprint fingerprint,
CancellationToken cancellationToken = default)
=> _baseIndex.LookupExactAsync(fingerprint, cancellationToken);
/// <inheritdoc/>
public Task<ImmutableArray<FingerprintMatch>> LookupAsync(
CodeFingerprint fingerprint,
CancellationToken cancellationToken = default)
=> _baseIndex.LookupAsync(fingerprint, cancellationToken);
/// <inheritdoc/>
public Task<ImmutableArray<FingerprintMatch>> LookupAsync(
CodeFingerprint fingerprint,
float minSimilarity,
int maxResults,
CancellationToken cancellationToken = default)
=> _baseIndex.LookupAsync(fingerprint, minSimilarity, maxResults, cancellationToken);
/// <summary>
/// Looks up vulnerability information for a fingerprint.
/// </summary>
public VulnerabilityInfo? GetVulnerability(string fingerprintId)
=> _vulnerabilities.TryGetValue(fingerprintId, out var info) ? info : null;
/// <summary>
/// Checks if a fingerprint matches a known-vulnerable function.
/// </summary>
public async Task<VulnerableFunctionMatch?> CheckVulnerableAsync(
CodeFingerprint fingerprint,
long functionOffset,
CancellationToken cancellationToken = default)
{
var matches = await LookupAsync(fingerprint, 0.95f, 1, cancellationToken);
if (matches.IsEmpty)
{
return null;
}
var match = matches[0];
var vulnInfo = GetVulnerability(match.Fingerprint.Id);
if (vulnInfo is null)
{
return null;
}
return new VulnerableFunctionMatch(
functionOffset,
match.FunctionName,
vulnInfo.VulnerabilityId,
match.SourcePackage,
vulnInfo.VulnerableVersions,
match.FunctionName,
match.Similarity,
CorrelationEvidence.FingerprintMatch,
vulnInfo.Severity);
}
/// <inheritdoc/>
public async Task ClearAsync(CancellationToken cancellationToken = default)
{
await _baseIndex.ClearAsync(cancellationToken);
_vulnerabilities.Clear();
}
/// <inheritdoc/>
public FingerprintIndexStatistics GetStatistics() => _baseIndex.GetStatistics();
/// <summary>
/// Vulnerability information associated with a fingerprint.
/// </summary>
public sealed record VulnerabilityInfo(
string VulnerabilityId,
string VulnerableVersions,
VulnerabilitySeverity Severity);
}