// Licensed to StellaOps under the AGPL-3.0-or-later license. using System.Collections.Concurrent; using System.Collections.Immutable; namespace StellaOps.Scanner.EntryTrace.Binary; /// /// Interface for an index of fingerprints enabling fast lookup. /// public interface IFingerprintIndex { /// /// Adds a fingerprint to the index. /// /// The fingerprint to add. /// Source package PURL. /// Function name. /// Source file path. /// Cancellation token. Task AddAsync( CodeFingerprint fingerprint, string sourcePackage, string functionName, string? sourceFile = null, CancellationToken cancellationToken = default); /// /// Adds a fingerprint match to the index. /// /// The fingerprint match to add. /// Cancellation token. /// True if added, false if duplicate. Task AddAsync(FingerprintMatch match, CancellationToken cancellationToken = default); /// /// Looks up a fingerprint and returns matching entries. /// /// The fingerprint to look up. /// Cancellation token. Task> LookupAsync( CodeFingerprint fingerprint, CancellationToken cancellationToken = default); /// /// Looks up a fingerprint with additional options. /// Task> LookupAsync( CodeFingerprint fingerprint, float minSimilarity, int maxResults, CancellationToken cancellationToken = default); /// /// Looks up an exact fingerprint match. /// Task LookupExactAsync( CodeFingerprint fingerprint, CancellationToken cancellationToken = default); /// /// Gets the number of fingerprints in the index. /// int Count { get; } /// /// Gets all packages indexed. /// ImmutableHashSet IndexedPackages { get; } /// /// Clears the index. /// Task ClearAsync(CancellationToken cancellationToken = default); /// /// Gets statistics about the index. /// FingerprintIndexStatistics GetStatistics(); } /// /// Statistics about a fingerprint index. /// /// Total fingerprints in the index. /// Total unique packages indexed. /// Total vulnerability associations. /// When the index was last updated. public sealed record FingerprintIndexStatistics( int TotalFingerprints, int TotalPackages, int TotalVulnerabilities, DateTimeOffset IndexedAt); /// /// Result of a fingerprint lookup. /// /// The matched fingerprint. /// Name of the function. /// PURL of the source package. /// Version of the source package. /// Source file path. /// Source line number. /// Associated vulnerability IDs. /// Similarity score (0.0-1.0). /// When the match was found. public sealed record FingerprintMatch( CodeFingerprint Fingerprint, string FunctionName, string SourcePackage, string? SourceVersion, string? SourceFile, int? SourceLine, ImmutableArray VulnerabilityIds, float Similarity, DateTimeOffset MatchedAt) { /// /// Whether this is an exact match. /// public bool IsExactMatch => Similarity >= 0.999f; /// /// Whether this is a high-confidence match. /// public bool IsHighConfidence => Similarity >= 0.95f; /// /// Whether this match has associated vulnerabilities. /// public bool HasVulnerabilities => !VulnerabilityIds.IsEmpty; } /// /// In-memory fingerprint index for fast lookups. /// public sealed class InMemoryFingerprintIndex : IFingerprintIndex { private readonly ConcurrentDictionary _exactIndex = new(); private readonly ConcurrentDictionary> _algorithmIndex = new(); private readonly HashSet _packages = new(); private readonly object _packagesLock = new(); private readonly TimeProvider _timeProvider; private DateTimeOffset _lastUpdated; /// /// Creates a new in-memory fingerprint index. /// /// Optional time provider for deterministic timestamps. public InMemoryFingerprintIndex(TimeProvider? timeProvider = null) { _timeProvider = timeProvider ?? TimeProvider.System; _lastUpdated = _timeProvider.GetUtcNow(); } /// public int Count => _exactIndex.Count; /// public ImmutableHashSet IndexedPackages { get { lock (_packagesLock) { return _packages.ToImmutableHashSet(); } } } /// public Task AddAsync(FingerprintMatch match, CancellationToken cancellationToken = default) { cancellationToken.ThrowIfCancellationRequested(); var added = _exactIndex.TryAdd(match.Fingerprint.Id, match); if (added) { // Add to algorithm-specific index for similarity search var algorithmList = _algorithmIndex.GetOrAdd( match.Fingerprint.Algorithm, _ => new List()); lock (algorithmList) { algorithmList.Add(match); } // Track packages lock (_packagesLock) { _packages.Add(match.SourcePackage); } _lastUpdated = _timeProvider.GetUtcNow(); } return Task.FromResult(added); } /// public Task LookupExactAsync( CodeFingerprint fingerprint, CancellationToken cancellationToken = default) { cancellationToken.ThrowIfCancellationRequested(); if (_exactIndex.TryGetValue(fingerprint.Id, out var match)) { return Task.FromResult(match); } return Task.FromResult(null); } /// public Task> LookupAsync( CodeFingerprint fingerprint, CancellationToken cancellationToken = default) => LookupAsync(fingerprint, 0.95f, 10, cancellationToken); /// public Task> LookupAsync( CodeFingerprint fingerprint, float minSimilarity, int maxResults, CancellationToken cancellationToken = default) { cancellationToken.ThrowIfCancellationRequested(); // First try exact match if (_exactIndex.TryGetValue(fingerprint.Id, out var exactMatch)) { return Task.FromResult(ImmutableArray.Create(exactMatch)); } // Search for similar fingerprints if (!_algorithmIndex.TryGetValue(fingerprint.Algorithm, out var algorithmList)) { return Task.FromResult(ImmutableArray.Empty); } var matches = new List<(FingerprintMatch Match, float Similarity)>(); lock (algorithmList) { foreach (var entry in algorithmList) { cancellationToken.ThrowIfCancellationRequested(); var similarity = fingerprint.ComputeSimilarity(entry.Fingerprint); if (similarity >= minSimilarity) { matches.Add((entry, similarity)); } } } var result = matches .OrderByDescending(m => m.Similarity) .Take(maxResults) .Select(m => m.Match with { Similarity = m.Similarity }) .ToImmutableArray(); return Task.FromResult(result); } /// public Task ClearAsync(CancellationToken cancellationToken = default) { _exactIndex.Clear(); _algorithmIndex.Clear(); lock (_packagesLock) { _packages.Clear(); } return Task.CompletedTask; } /// public FingerprintIndexStatistics GetStatistics() { int vulnCount; lock (_packagesLock) { vulnCount = _exactIndex.Values.Sum(m => m.VulnerabilityIds.Length); } return new FingerprintIndexStatistics( TotalFingerprints: Count, TotalPackages: IndexedPackages.Count, TotalVulnerabilities: vulnCount, IndexedAt: _lastUpdated); } /// public Task AddAsync( CodeFingerprint fingerprint, string sourcePackage, string functionName, string? sourceFile = null, CancellationToken cancellationToken = default) { var match = new FingerprintMatch( Fingerprint: fingerprint, FunctionName: functionName, SourcePackage: sourcePackage, SourceVersion: null, SourceFile: sourceFile, SourceLine: null, VulnerabilityIds: ImmutableArray.Empty, Similarity: 1.0f, MatchedAt: _timeProvider.GetUtcNow()); return AddAsync(match, cancellationToken).ContinueWith(_ => { }, cancellationToken); } } /// /// Vulnerability-aware fingerprint index that tracks known-vulnerable functions. /// public sealed class VulnerableFingerprintIndex : IFingerprintIndex { private readonly InMemoryFingerprintIndex _baseIndex; private readonly TimeProvider _timeProvider; private readonly ConcurrentDictionary _vulnerabilities = new(); /// /// Creates a new vulnerability-aware fingerprint index. /// /// Optional time provider for deterministic timestamps. public VulnerableFingerprintIndex(TimeProvider? timeProvider = null) { _timeProvider = timeProvider ?? TimeProvider.System; _baseIndex = new InMemoryFingerprintIndex(_timeProvider); } /// public int Count => _baseIndex.Count; /// public ImmutableHashSet IndexedPackages => _baseIndex.IndexedPackages; /// /// Adds a fingerprint with associated vulnerability information. /// public async Task AddVulnerableAsync( CodeFingerprint fingerprint, string sourcePackage, string functionName, string vulnerabilityId, string vulnerableVersions, VulnerabilitySeverity severity, string? sourceFile = null, CancellationToken cancellationToken = default) { var match = new FingerprintMatch( Fingerprint: fingerprint, FunctionName: functionName, SourcePackage: sourcePackage, SourceVersion: null, SourceFile: sourceFile, SourceLine: null, VulnerabilityIds: ImmutableArray.Create(vulnerabilityId), Similarity: 1.0f, MatchedAt: _timeProvider.GetUtcNow()); var added = await _baseIndex.AddAsync(match, cancellationToken); if (added) { _vulnerabilities[fingerprint.Id] = new VulnerabilityInfo( vulnerabilityId, vulnerableVersions, severity); } return added; } /// public Task AddAsync(FingerprintMatch match, CancellationToken cancellationToken = default) => _baseIndex.AddAsync(match, cancellationToken); /// public Task AddAsync( CodeFingerprint fingerprint, string sourcePackage, string functionName, string? sourceFile = null, CancellationToken cancellationToken = default) => _baseIndex.AddAsync(fingerprint, sourcePackage, functionName, sourceFile, cancellationToken); /// public Task LookupExactAsync( CodeFingerprint fingerprint, CancellationToken cancellationToken = default) => _baseIndex.LookupExactAsync(fingerprint, cancellationToken); /// public Task> LookupAsync( CodeFingerprint fingerprint, CancellationToken cancellationToken = default) => _baseIndex.LookupAsync(fingerprint, cancellationToken); /// public Task> LookupAsync( CodeFingerprint fingerprint, float minSimilarity, int maxResults, CancellationToken cancellationToken = default) => _baseIndex.LookupAsync(fingerprint, minSimilarity, maxResults, cancellationToken); /// /// Looks up vulnerability information for a fingerprint. /// public VulnerabilityInfo? GetVulnerability(string fingerprintId) => _vulnerabilities.TryGetValue(fingerprintId, out var info) ? info : null; /// /// Checks if a fingerprint matches a known-vulnerable function. /// public async Task CheckVulnerableAsync( CodeFingerprint fingerprint, long functionOffset, CancellationToken cancellationToken = default) { var matches = await LookupAsync(fingerprint, 0.95f, 1, cancellationToken); if (matches.IsEmpty) { return null; } var match = matches[0]; var vulnInfo = GetVulnerability(match.Fingerprint.Id); if (vulnInfo is null) { return null; } return new VulnerableFunctionMatch( functionOffset, match.FunctionName, vulnInfo.VulnerabilityId, match.SourcePackage, vulnInfo.VulnerableVersions, match.FunctionName, match.Similarity, CorrelationEvidence.FingerprintMatch, vulnInfo.Severity); } /// public async Task ClearAsync(CancellationToken cancellationToken = default) { await _baseIndex.ClearAsync(cancellationToken); _vulnerabilities.Clear(); } /// public FingerprintIndexStatistics GetStatistics() => _baseIndex.GetStatistics(); /// /// Vulnerability information associated with a fingerprint. /// public sealed record VulnerabilityInfo( string VulnerabilityId, string VulnerableVersions, VulnerabilitySeverity Severity); }