474 lines
16 KiB
C#
474 lines
16 KiB
C#
// Licensed to StellaOps under the AGPL-3.0-or-later license.
|
|
|
|
using System.Collections.Concurrent;
|
|
using System.Collections.Immutable;
|
|
|
|
namespace StellaOps.Scanner.EntryTrace.Binary;
|
|
|
|
/// <summary>
|
|
/// Interface for an index of fingerprints enabling fast lookup.
|
|
/// </summary>
|
|
public interface IFingerprintIndex
|
|
{
|
|
/// <summary>
|
|
/// Adds a fingerprint to the index.
|
|
/// </summary>
|
|
/// <param name="fingerprint">The fingerprint to add.</param>
|
|
/// <param name="sourcePackage">Source package PURL.</param>
|
|
/// <param name="functionName">Function name.</param>
|
|
/// <param name="sourceFile">Source file path.</param>
|
|
/// <param name="cancellationToken">Cancellation token.</param>
|
|
Task AddAsync(
|
|
CodeFingerprint fingerprint,
|
|
string sourcePackage,
|
|
string functionName,
|
|
string? sourceFile = null,
|
|
CancellationToken cancellationToken = default);
|
|
|
|
/// <summary>
|
|
/// Adds a fingerprint match to the index.
|
|
/// </summary>
|
|
/// <param name="match">The fingerprint match to add.</param>
|
|
/// <param name="cancellationToken">Cancellation token.</param>
|
|
/// <returns>True if added, false if duplicate.</returns>
|
|
Task<bool> AddAsync(FingerprintMatch match, CancellationToken cancellationToken = default);
|
|
|
|
/// <summary>
|
|
/// Looks up a fingerprint and returns matching entries.
|
|
/// </summary>
|
|
/// <param name="fingerprint">The fingerprint to look up.</param>
|
|
/// <param name="cancellationToken">Cancellation token.</param>
|
|
Task<ImmutableArray<FingerprintMatch>> LookupAsync(
|
|
CodeFingerprint fingerprint,
|
|
CancellationToken cancellationToken = default);
|
|
|
|
/// <summary>
|
|
/// Looks up a fingerprint with additional options.
|
|
/// </summary>
|
|
Task<ImmutableArray<FingerprintMatch>> LookupAsync(
|
|
CodeFingerprint fingerprint,
|
|
float minSimilarity,
|
|
int maxResults,
|
|
CancellationToken cancellationToken = default);
|
|
|
|
/// <summary>
|
|
/// Looks up an exact fingerprint match.
|
|
/// </summary>
|
|
Task<FingerprintMatch?> LookupExactAsync(
|
|
CodeFingerprint fingerprint,
|
|
CancellationToken cancellationToken = default);
|
|
|
|
/// <summary>
|
|
/// Gets the number of fingerprints in the index.
|
|
/// </summary>
|
|
int Count { get; }
|
|
|
|
/// <summary>
|
|
/// Gets all packages indexed.
|
|
/// </summary>
|
|
ImmutableHashSet<string> IndexedPackages { get; }
|
|
|
|
/// <summary>
|
|
/// Clears the index.
|
|
/// </summary>
|
|
Task ClearAsync(CancellationToken cancellationToken = default);
|
|
|
|
/// <summary>
|
|
/// Gets statistics about the index.
|
|
/// </summary>
|
|
FingerprintIndexStatistics GetStatistics();
|
|
}
|
|
|
|
/// <summary>
|
|
/// Statistics about a fingerprint index.
|
|
/// </summary>
|
|
/// <param name="TotalFingerprints">Total fingerprints in the index.</param>
|
|
/// <param name="TotalPackages">Total unique packages indexed.</param>
|
|
/// <param name="TotalVulnerabilities">Total vulnerability associations.</param>
|
|
/// <param name="IndexedAt">When the index was last updated.</param>
|
|
public sealed record FingerprintIndexStatistics(
|
|
int TotalFingerprints,
|
|
int TotalPackages,
|
|
int TotalVulnerabilities,
|
|
DateTimeOffset IndexedAt);
|
|
|
|
/// <summary>
|
|
/// Result of a fingerprint lookup.
|
|
/// </summary>
|
|
/// <param name="Fingerprint">The matched fingerprint.</param>
|
|
/// <param name="FunctionName">Name of the function.</param>
|
|
/// <param name="SourcePackage">PURL of the source package.</param>
|
|
/// <param name="SourceVersion">Version of the source package.</param>
|
|
/// <param name="SourceFile">Source file path.</param>
|
|
/// <param name="SourceLine">Source line number.</param>
|
|
/// <param name="VulnerabilityIds">Associated vulnerability IDs.</param>
|
|
/// <param name="Similarity">Similarity score (0.0-1.0).</param>
|
|
/// <param name="MatchedAt">When the match was found.</param>
|
|
public sealed record FingerprintMatch(
|
|
CodeFingerprint Fingerprint,
|
|
string FunctionName,
|
|
string SourcePackage,
|
|
string? SourceVersion,
|
|
string? SourceFile,
|
|
int? SourceLine,
|
|
ImmutableArray<string> VulnerabilityIds,
|
|
float Similarity,
|
|
DateTimeOffset MatchedAt)
|
|
{
|
|
/// <summary>
|
|
/// Whether this is an exact match.
|
|
/// </summary>
|
|
public bool IsExactMatch => Similarity >= 0.999f;
|
|
|
|
/// <summary>
|
|
/// Whether this is a high-confidence match.
|
|
/// </summary>
|
|
public bool IsHighConfidence => Similarity >= 0.95f;
|
|
|
|
/// <summary>
|
|
/// Whether this match has associated vulnerabilities.
|
|
/// </summary>
|
|
public bool HasVulnerabilities => !VulnerabilityIds.IsEmpty;
|
|
}
|
|
|
|
/// <summary>
|
|
/// In-memory fingerprint index for fast lookups.
|
|
/// </summary>
|
|
public sealed class InMemoryFingerprintIndex : IFingerprintIndex
|
|
{
|
|
private readonly ConcurrentDictionary<string, FingerprintMatch> _exactIndex = new();
|
|
private readonly ConcurrentDictionary<FingerprintAlgorithm, List<FingerprintMatch>> _algorithmIndex = new();
|
|
private readonly HashSet<string> _packages = new();
|
|
private readonly object _packagesLock = new();
|
|
private readonly TimeProvider _timeProvider;
|
|
private DateTimeOffset _lastUpdated;
|
|
|
|
/// <summary>
|
|
/// Creates a new in-memory fingerprint index.
|
|
/// </summary>
|
|
/// <param name="timeProvider">Optional time provider for deterministic timestamps.</param>
|
|
public InMemoryFingerprintIndex(TimeProvider? timeProvider = null)
|
|
{
|
|
_timeProvider = timeProvider ?? TimeProvider.System;
|
|
_lastUpdated = _timeProvider.GetUtcNow();
|
|
}
|
|
|
|
/// <inheritdoc/>
|
|
public int Count => _exactIndex.Count;
|
|
|
|
/// <inheritdoc/>
|
|
public ImmutableHashSet<string> IndexedPackages
|
|
{
|
|
get
|
|
{
|
|
lock (_packagesLock)
|
|
{
|
|
return _packages.ToImmutableHashSet();
|
|
}
|
|
}
|
|
}
|
|
|
|
/// <inheritdoc/>
|
|
public Task<bool> AddAsync(FingerprintMatch match, CancellationToken cancellationToken = default)
|
|
{
|
|
cancellationToken.ThrowIfCancellationRequested();
|
|
|
|
var added = _exactIndex.TryAdd(match.Fingerprint.Id, match);
|
|
|
|
if (added)
|
|
{
|
|
// Add to algorithm-specific index for similarity search
|
|
var algorithmList = _algorithmIndex.GetOrAdd(
|
|
match.Fingerprint.Algorithm,
|
|
_ => new List<FingerprintMatch>());
|
|
|
|
lock (algorithmList)
|
|
{
|
|
algorithmList.Add(match);
|
|
}
|
|
|
|
// Track packages
|
|
lock (_packagesLock)
|
|
{
|
|
_packages.Add(match.SourcePackage);
|
|
}
|
|
|
|
_lastUpdated = _timeProvider.GetUtcNow();
|
|
}
|
|
|
|
return Task.FromResult(added);
|
|
}
|
|
|
|
/// <inheritdoc/>
|
|
public Task<FingerprintMatch?> LookupExactAsync(
|
|
CodeFingerprint fingerprint,
|
|
CancellationToken cancellationToken = default)
|
|
{
|
|
cancellationToken.ThrowIfCancellationRequested();
|
|
|
|
if (_exactIndex.TryGetValue(fingerprint.Id, out var match))
|
|
{
|
|
return Task.FromResult<FingerprintMatch?>(match);
|
|
}
|
|
|
|
return Task.FromResult<FingerprintMatch?>(null);
|
|
}
|
|
|
|
/// <inheritdoc/>
|
|
public Task<ImmutableArray<FingerprintMatch>> LookupAsync(
|
|
CodeFingerprint fingerprint,
|
|
CancellationToken cancellationToken = default)
|
|
=> LookupAsync(fingerprint, 0.95f, 10, cancellationToken);
|
|
|
|
/// <inheritdoc/>
|
|
public Task<ImmutableArray<FingerprintMatch>> LookupAsync(
|
|
CodeFingerprint fingerprint,
|
|
float minSimilarity,
|
|
int maxResults,
|
|
CancellationToken cancellationToken = default)
|
|
{
|
|
cancellationToken.ThrowIfCancellationRequested();
|
|
|
|
// First try exact match
|
|
if (_exactIndex.TryGetValue(fingerprint.Id, out var exactMatch))
|
|
{
|
|
return Task.FromResult(ImmutableArray.Create(exactMatch));
|
|
}
|
|
|
|
// Search for similar fingerprints
|
|
if (!_algorithmIndex.TryGetValue(fingerprint.Algorithm, out var algorithmList))
|
|
{
|
|
return Task.FromResult(ImmutableArray<FingerprintMatch>.Empty);
|
|
}
|
|
|
|
var matches = new List<(FingerprintMatch Match, float Similarity)>();
|
|
|
|
lock (algorithmList)
|
|
{
|
|
foreach (var entry in algorithmList)
|
|
{
|
|
cancellationToken.ThrowIfCancellationRequested();
|
|
|
|
var similarity = fingerprint.ComputeSimilarity(entry.Fingerprint);
|
|
if (similarity >= minSimilarity)
|
|
{
|
|
matches.Add((entry, similarity));
|
|
}
|
|
}
|
|
}
|
|
|
|
var result = matches
|
|
.OrderByDescending(m => m.Similarity)
|
|
.Take(maxResults)
|
|
.Select(m => m.Match with { Similarity = m.Similarity })
|
|
.ToImmutableArray();
|
|
|
|
return Task.FromResult(result);
|
|
}
|
|
|
|
/// <inheritdoc/>
|
|
public Task ClearAsync(CancellationToken cancellationToken = default)
|
|
{
|
|
_exactIndex.Clear();
|
|
_algorithmIndex.Clear();
|
|
|
|
lock (_packagesLock)
|
|
{
|
|
_packages.Clear();
|
|
}
|
|
|
|
return Task.CompletedTask;
|
|
}
|
|
|
|
/// <inheritdoc/>
|
|
public FingerprintIndexStatistics GetStatistics()
|
|
{
|
|
int vulnCount;
|
|
lock (_packagesLock)
|
|
{
|
|
vulnCount = _exactIndex.Values.Sum(m => m.VulnerabilityIds.Length);
|
|
}
|
|
|
|
return new FingerprintIndexStatistics(
|
|
TotalFingerprints: Count,
|
|
TotalPackages: IndexedPackages.Count,
|
|
TotalVulnerabilities: vulnCount,
|
|
IndexedAt: _lastUpdated);
|
|
}
|
|
|
|
/// <inheritdoc/>
|
|
public Task AddAsync(
|
|
CodeFingerprint fingerprint,
|
|
string sourcePackage,
|
|
string functionName,
|
|
string? sourceFile = null,
|
|
CancellationToken cancellationToken = default)
|
|
{
|
|
var match = new FingerprintMatch(
|
|
Fingerprint: fingerprint,
|
|
FunctionName: functionName,
|
|
SourcePackage: sourcePackage,
|
|
SourceVersion: null,
|
|
SourceFile: sourceFile,
|
|
SourceLine: null,
|
|
VulnerabilityIds: ImmutableArray<string>.Empty,
|
|
Similarity: 1.0f,
|
|
MatchedAt: _timeProvider.GetUtcNow());
|
|
|
|
return AddAsync(match, cancellationToken).ContinueWith(_ => { }, cancellationToken);
|
|
}
|
|
}
|
|
|
|
/// <summary>
|
|
/// Vulnerability-aware fingerprint index that tracks known-vulnerable functions.
|
|
/// </summary>
|
|
public sealed class VulnerableFingerprintIndex : IFingerprintIndex
|
|
{
|
|
private readonly InMemoryFingerprintIndex _baseIndex;
|
|
private readonly TimeProvider _timeProvider;
|
|
private readonly ConcurrentDictionary<string, VulnerabilityInfo> _vulnerabilities = new();
|
|
|
|
/// <summary>
|
|
/// Creates a new vulnerability-aware fingerprint index.
|
|
/// </summary>
|
|
/// <param name="timeProvider">Optional time provider for deterministic timestamps.</param>
|
|
public VulnerableFingerprintIndex(TimeProvider? timeProvider = null)
|
|
{
|
|
_timeProvider = timeProvider ?? TimeProvider.System;
|
|
_baseIndex = new InMemoryFingerprintIndex(_timeProvider);
|
|
}
|
|
|
|
/// <inheritdoc/>
|
|
public int Count => _baseIndex.Count;
|
|
|
|
/// <inheritdoc/>
|
|
public ImmutableHashSet<string> IndexedPackages => _baseIndex.IndexedPackages;
|
|
|
|
/// <summary>
|
|
/// Adds a fingerprint with associated vulnerability information.
|
|
/// </summary>
|
|
public async Task<bool> AddVulnerableAsync(
|
|
CodeFingerprint fingerprint,
|
|
string sourcePackage,
|
|
string functionName,
|
|
string vulnerabilityId,
|
|
string vulnerableVersions,
|
|
VulnerabilitySeverity severity,
|
|
string? sourceFile = null,
|
|
CancellationToken cancellationToken = default)
|
|
{
|
|
var match = new FingerprintMatch(
|
|
Fingerprint: fingerprint,
|
|
FunctionName: functionName,
|
|
SourcePackage: sourcePackage,
|
|
SourceVersion: null,
|
|
SourceFile: sourceFile,
|
|
SourceLine: null,
|
|
VulnerabilityIds: ImmutableArray.Create(vulnerabilityId),
|
|
Similarity: 1.0f,
|
|
MatchedAt: _timeProvider.GetUtcNow());
|
|
|
|
var added = await _baseIndex.AddAsync(match, cancellationToken);
|
|
|
|
if (added)
|
|
{
|
|
_vulnerabilities[fingerprint.Id] = new VulnerabilityInfo(
|
|
vulnerabilityId,
|
|
vulnerableVersions,
|
|
severity);
|
|
}
|
|
|
|
return added;
|
|
}
|
|
|
|
/// <inheritdoc/>
|
|
public Task<bool> AddAsync(FingerprintMatch match, CancellationToken cancellationToken = default)
|
|
=> _baseIndex.AddAsync(match, cancellationToken);
|
|
|
|
/// <inheritdoc/>
|
|
public Task AddAsync(
|
|
CodeFingerprint fingerprint,
|
|
string sourcePackage,
|
|
string functionName,
|
|
string? sourceFile = null,
|
|
CancellationToken cancellationToken = default)
|
|
=> _baseIndex.AddAsync(fingerprint, sourcePackage, functionName, sourceFile, cancellationToken);
|
|
|
|
/// <inheritdoc/>
|
|
public Task<FingerprintMatch?> LookupExactAsync(
|
|
CodeFingerprint fingerprint,
|
|
CancellationToken cancellationToken = default)
|
|
=> _baseIndex.LookupExactAsync(fingerprint, cancellationToken);
|
|
|
|
/// <inheritdoc/>
|
|
public Task<ImmutableArray<FingerprintMatch>> LookupAsync(
|
|
CodeFingerprint fingerprint,
|
|
CancellationToken cancellationToken = default)
|
|
=> _baseIndex.LookupAsync(fingerprint, cancellationToken);
|
|
|
|
/// <inheritdoc/>
|
|
public Task<ImmutableArray<FingerprintMatch>> LookupAsync(
|
|
CodeFingerprint fingerprint,
|
|
float minSimilarity,
|
|
int maxResults,
|
|
CancellationToken cancellationToken = default)
|
|
=> _baseIndex.LookupAsync(fingerprint, minSimilarity, maxResults, cancellationToken);
|
|
|
|
/// <summary>
|
|
/// Looks up vulnerability information for a fingerprint.
|
|
/// </summary>
|
|
public VulnerabilityInfo? GetVulnerability(string fingerprintId)
|
|
=> _vulnerabilities.TryGetValue(fingerprintId, out var info) ? info : null;
|
|
|
|
/// <summary>
|
|
/// Checks if a fingerprint matches a known-vulnerable function.
|
|
/// </summary>
|
|
public async Task<VulnerableFunctionMatch?> CheckVulnerableAsync(
|
|
CodeFingerprint fingerprint,
|
|
long functionOffset,
|
|
CancellationToken cancellationToken = default)
|
|
{
|
|
var matches = await LookupAsync(fingerprint, 0.95f, 1, cancellationToken);
|
|
if (matches.IsEmpty)
|
|
{
|
|
return null;
|
|
}
|
|
|
|
var match = matches[0];
|
|
var vulnInfo = GetVulnerability(match.Fingerprint.Id);
|
|
if (vulnInfo is null)
|
|
{
|
|
return null;
|
|
}
|
|
|
|
return new VulnerableFunctionMatch(
|
|
functionOffset,
|
|
match.FunctionName,
|
|
vulnInfo.VulnerabilityId,
|
|
match.SourcePackage,
|
|
vulnInfo.VulnerableVersions,
|
|
match.FunctionName,
|
|
match.Similarity,
|
|
CorrelationEvidence.FingerprintMatch,
|
|
vulnInfo.Severity);
|
|
}
|
|
|
|
/// <inheritdoc/>
|
|
public async Task ClearAsync(CancellationToken cancellationToken = default)
|
|
{
|
|
await _baseIndex.ClearAsync(cancellationToken);
|
|
_vulnerabilities.Clear();
|
|
}
|
|
|
|
/// <inheritdoc/>
|
|
public FingerprintIndexStatistics GetStatistics() => _baseIndex.GetStatistics();
|
|
|
|
/// <summary>
|
|
/// Vulnerability information associated with a fingerprint.
|
|
/// </summary>
|
|
public sealed record VulnerabilityInfo(
|
|
string VulnerabilityId,
|
|
string VulnerableVersions,
|
|
VulnerabilitySeverity Severity);
|
|
}
|