318 lines
10 KiB
C#
318 lines
10 KiB
C#
// Copyright (c) StellaOps. All rights reserved.
|
|
// Licensed under BUSL-1.1. See LICENSE in the project root.
|
|
// Sprint: SPRINT_20260112_004_BINIDX_b2r2_lowuir_perf_cache (BINIDX-CACHE-03)
|
|
// Task: Function-level cache for canonical IR and semantic fingerprints
|
|
|
|
|
|
using Microsoft.Extensions.Caching.Distributed;
|
|
using Microsoft.Extensions.Logging;
|
|
using Microsoft.Extensions.Options;
|
|
using System.Collections.Concurrent;
|
|
using System.Collections.Immutable;
|
|
using System.Globalization;
|
|
using System.Security.Cryptography;
|
|
using System.Text;
|
|
using System.Text.Json;
|
|
|
|
namespace StellaOps.BinaryIndex.Cache;
|
|
|
|
/// <summary>
|
|
/// Configuration options for the function IR cache.
|
|
/// </summary>
|
|
public sealed class FunctionIrCacheOptions
|
|
{
|
|
/// <summary>
|
|
/// Configuration section name.
|
|
/// </summary>
|
|
public const string SectionName = "StellaOps:BinaryIndex:FunctionIrCache";
|
|
|
|
/// <summary>
|
|
/// Valkey key prefix for function IR cache entries.
|
|
/// </summary>
|
|
public string KeyPrefix { get; init; } = "stellaops:binidx:funccache:";
|
|
|
|
/// <summary>
|
|
/// TTL for cached function IR entries.
|
|
/// </summary>
|
|
public TimeSpan CacheTtl { get; init; } = TimeSpan.FromHours(4);
|
|
|
|
/// <summary>
|
|
/// Maximum TTL for any cache entry.
|
|
/// </summary>
|
|
public TimeSpan MaxTtl { get; init; } = TimeSpan.FromHours(24);
|
|
|
|
/// <summary>
|
|
/// Whether to enable the cache.
|
|
/// </summary>
|
|
public bool Enabled { get; init; } = true;
|
|
|
|
/// <summary>
|
|
/// B2R2 version string to include in cache keys.
|
|
/// </summary>
|
|
public string B2R2Version { get; init; } = "0.9.1";
|
|
|
|
/// <summary>
|
|
/// Normalization recipe version for cache key stability.
|
|
/// </summary>
|
|
public string NormalizationRecipeVersion { get; init; } = "v1";
|
|
}
|
|
|
|
/// <summary>
|
|
/// Cache key components for function IR caching.
|
|
/// </summary>
|
|
/// <param name="Isa">ISA identifier (e.g., "intel-64").</param>
|
|
/// <param name="B2R2Version">B2R2 version string.</param>
|
|
/// <param name="NormalizationRecipe">Normalization recipe version.</param>
|
|
/// <param name="CanonicalIrHash">SHA-256 hash of the canonical IR bytes.</param>
|
|
public sealed record FunctionCacheKey(
|
|
string Isa,
|
|
string B2R2Version,
|
|
string NormalizationRecipe,
|
|
string CanonicalIrHash)
|
|
{
|
|
/// <summary>
|
|
/// Converts to a deterministic cache key string.
|
|
/// </summary>
|
|
public string ToKeyString() =>
|
|
string.Format(
|
|
CultureInfo.InvariantCulture,
|
|
"{0}:{1}:{2}:{3}",
|
|
Isa,
|
|
B2R2Version,
|
|
NormalizationRecipe,
|
|
CanonicalIrHash);
|
|
}
|
|
|
|
/// <summary>
|
|
/// Cached function IR and semantic fingerprint entry.
|
|
/// </summary>
|
|
/// <param name="FunctionAddress">Original function address.</param>
|
|
/// <param name="FunctionName">Original function name.</param>
|
|
/// <param name="SemanticFingerprint">Computed semantic fingerprint.</param>
|
|
/// <param name="IrStatementCount">Number of IR statements.</param>
|
|
/// <param name="BasicBlockCount">Number of basic blocks.</param>
|
|
/// <param name="ComputedAtUtc">When the fingerprint was computed (ISO-8601).</param>
|
|
/// <param name="B2R2Version">B2R2 version used.</param>
|
|
/// <param name="NormalizationRecipe">Normalization recipe used.</param>
|
|
public sealed record CachedFunctionFingerprint(
|
|
ulong FunctionAddress,
|
|
string FunctionName,
|
|
string SemanticFingerprint,
|
|
int IrStatementCount,
|
|
int BasicBlockCount,
|
|
string ComputedAtUtc,
|
|
string B2R2Version,
|
|
string NormalizationRecipe);
|
|
|
|
/// <summary>
|
|
/// Cache statistics for the function IR cache.
|
|
/// </summary>
|
|
public sealed record FunctionIrCacheStats(
|
|
long Hits,
|
|
long Misses,
|
|
long Evictions,
|
|
double HitRate,
|
|
bool IsEnabled,
|
|
string KeyPrefix,
|
|
TimeSpan CacheTtl);
|
|
|
|
/// <summary>
|
|
/// Service for caching function IR and semantic fingerprints.
|
|
/// Uses Valkey as hot cache with deterministic key generation.
|
|
/// </summary>
|
|
public sealed class FunctionIrCacheService
|
|
{
|
|
private readonly IDistributedCache _cache;
|
|
private readonly ILogger<FunctionIrCacheService> _logger;
|
|
private readonly FunctionIrCacheOptions _options;
|
|
private readonly TimeProvider _timeProvider;
|
|
|
|
// Thread-safe statistics
|
|
private long _hits;
|
|
private long _misses;
|
|
private long _evictions;
|
|
|
|
private static readonly JsonSerializerOptions s_jsonOptions = new()
|
|
{
|
|
PropertyNamingPolicy = JsonNamingPolicy.CamelCase,
|
|
WriteIndented = false
|
|
};
|
|
|
|
/// <summary>
|
|
/// Creates a new function IR cache service.
|
|
/// </summary>
|
|
public FunctionIrCacheService(
|
|
IDistributedCache cache,
|
|
ILogger<FunctionIrCacheService> logger,
|
|
IOptions<FunctionIrCacheOptions> options,
|
|
TimeProvider timeProvider)
|
|
{
|
|
_cache = cache ?? throw new ArgumentNullException(nameof(cache));
|
|
_logger = logger ?? throw new ArgumentNullException(nameof(logger));
|
|
_options = options?.Value ?? new FunctionIrCacheOptions();
|
|
_timeProvider = timeProvider ?? throw new ArgumentNullException(nameof(timeProvider));
|
|
}
|
|
|
|
/// <summary>
|
|
/// Gets the current cache statistics.
|
|
/// </summary>
|
|
public FunctionIrCacheStats GetStats()
|
|
{
|
|
var hits = Interlocked.Read(ref _hits);
|
|
var misses = Interlocked.Read(ref _misses);
|
|
var total = hits + misses;
|
|
var hitRate = total > 0 ? (double)hits / total : 0.0;
|
|
|
|
return new FunctionIrCacheStats(
|
|
Hits: hits,
|
|
Misses: misses,
|
|
Evictions: Interlocked.Read(ref _evictions),
|
|
HitRate: hitRate,
|
|
IsEnabled: _options.Enabled,
|
|
KeyPrefix: _options.KeyPrefix,
|
|
CacheTtl: _options.CacheTtl);
|
|
}
|
|
|
|
/// <summary>
|
|
/// Tries to get a cached function fingerprint.
|
|
/// </summary>
|
|
/// <param name="key">The cache key.</param>
|
|
/// <param name="ct">Cancellation token.</param>
|
|
/// <returns>The cached fingerprint if found, null otherwise.</returns>
|
|
public async Task<CachedFunctionFingerprint?> TryGetAsync(
|
|
FunctionCacheKey key,
|
|
CancellationToken ct = default)
|
|
{
|
|
if (!_options.Enabled)
|
|
{
|
|
return null;
|
|
}
|
|
|
|
var cacheKey = BuildCacheKey(key);
|
|
|
|
try
|
|
{
|
|
var bytes = await _cache.GetAsync(cacheKey, ct).ConfigureAwait(false);
|
|
|
|
if (bytes is null || bytes.Length == 0)
|
|
{
|
|
Interlocked.Increment(ref _misses);
|
|
return null;
|
|
}
|
|
|
|
var result = JsonSerializer.Deserialize<CachedFunctionFingerprint>(bytes, s_jsonOptions);
|
|
Interlocked.Increment(ref _hits);
|
|
|
|
_logger.LogTrace(
|
|
"Cache hit for function {FunctionName} at {Address}",
|
|
result?.FunctionName,
|
|
result?.FunctionAddress);
|
|
|
|
return result;
|
|
}
|
|
catch (Exception ex)
|
|
{
|
|
_logger.LogWarning(ex, "Failed to get cached function fingerprint for key {Key}", cacheKey);
|
|
Interlocked.Increment(ref _misses);
|
|
return null;
|
|
}
|
|
}
|
|
|
|
/// <summary>
|
|
/// Sets a function fingerprint in the cache.
|
|
/// </summary>
|
|
/// <param name="key">The cache key.</param>
|
|
/// <param name="fingerprint">The fingerprint to cache.</param>
|
|
/// <param name="ct">Cancellation token.</param>
|
|
public async Task SetAsync(
|
|
FunctionCacheKey key,
|
|
CachedFunctionFingerprint fingerprint,
|
|
CancellationToken ct = default)
|
|
{
|
|
if (!_options.Enabled)
|
|
{
|
|
return;
|
|
}
|
|
|
|
var cacheKey = BuildCacheKey(key);
|
|
|
|
try
|
|
{
|
|
var bytes = JsonSerializer.SerializeToUtf8Bytes(fingerprint, s_jsonOptions);
|
|
var options = new DistributedCacheEntryOptions
|
|
{
|
|
AbsoluteExpirationRelativeToNow = _options.CacheTtl
|
|
};
|
|
|
|
await _cache.SetAsync(cacheKey, bytes, options, ct).ConfigureAwait(false);
|
|
|
|
_logger.LogTrace(
|
|
"Cached function {FunctionName} fingerprint with key {Key}",
|
|
fingerprint.FunctionName,
|
|
cacheKey);
|
|
}
|
|
catch (Exception ex)
|
|
{
|
|
_logger.LogWarning(ex, "Failed to cache function fingerprint for key {Key}", cacheKey);
|
|
}
|
|
}
|
|
|
|
/// <summary>
|
|
/// Removes a cached function fingerprint.
|
|
/// </summary>
|
|
/// <param name="key">The cache key.</param>
|
|
/// <param name="ct">Cancellation token.</param>
|
|
public async Task RemoveAsync(FunctionCacheKey key, CancellationToken ct = default)
|
|
{
|
|
if (!_options.Enabled)
|
|
{
|
|
return;
|
|
}
|
|
|
|
var cacheKey = BuildCacheKey(key);
|
|
|
|
try
|
|
{
|
|
await _cache.RemoveAsync(cacheKey, ct).ConfigureAwait(false);
|
|
Interlocked.Increment(ref _evictions);
|
|
|
|
_logger.LogTrace("Removed cached function fingerprint for key {Key}", cacheKey);
|
|
}
|
|
catch (Exception ex)
|
|
{
|
|
_logger.LogWarning(ex, "Failed to remove cached function fingerprint for key {Key}", cacheKey);
|
|
}
|
|
}
|
|
|
|
/// <summary>
|
|
/// Computes a canonical IR hash from function bytes.
|
|
/// </summary>
|
|
/// <param name="irBytes">The canonical IR bytes.</param>
|
|
/// <returns>Hex-encoded SHA-256 hash.</returns>
|
|
public static string ComputeCanonicalIrHash(ReadOnlySpan<byte> irBytes)
|
|
{
|
|
Span<byte> hashBytes = stackalloc byte[32];
|
|
SHA256.HashData(irBytes, hashBytes);
|
|
return Convert.ToHexString(hashBytes).ToLowerInvariant();
|
|
}
|
|
|
|
/// <summary>
|
|
/// Creates a cache key for a function.
|
|
/// </summary>
|
|
/// <param name="isa">ISA identifier.</param>
|
|
/// <param name="canonicalIrBytes">The canonical IR bytes.</param>
|
|
/// <returns>The cache key.</returns>
|
|
public FunctionCacheKey CreateKey(string isa, ReadOnlySpan<byte> canonicalIrBytes)
|
|
{
|
|
var hash = ComputeCanonicalIrHash(canonicalIrBytes);
|
|
return new FunctionCacheKey(
|
|
Isa: isa,
|
|
B2R2Version: _options.B2R2Version,
|
|
NormalizationRecipe: _options.NormalizationRecipeVersion,
|
|
CanonicalIrHash: hash);
|
|
}
|
|
|
|
private string BuildCacheKey(FunctionCacheKey key) =>
|
|
_options.KeyPrefix + key.ToKeyString();
|
|
}
|