doctor enhancements, setup, enhancements, ui functionality and design consolidation and , test projects fixes , product advisory attestation/rekor and delta verfications enhancements

This commit is contained in:
master
2026-01-19 09:02:59 +02:00
parent 8c4bf54aed
commit 17419ba7c4
809 changed files with 170738 additions and 12244 deletions

View File

@@ -0,0 +1,344 @@
// -----------------------------------------------------------------------------
// CallNgramGenerator.cs
// Sprint: SPRINT_20260118_026_BinaryIndex_deltasig_enhancements
// Task: DS-ENH-003 - Implement call-ngrams fingerprinting
// Description: Generates call-ngram fingerprints for cross-compiler resilience
// -----------------------------------------------------------------------------
using System.Security.Cryptography;
using System.Text;
namespace StellaOps.BinaryIndex.Semantic;
/// <summary>
/// Generates call-ngram fingerprints from lifted IR for cross-compiler resilient matching.
/// Call-ngrams capture function call sequences which are more stable across different
/// compilers and optimization levels than raw instruction sequences.
/// </summary>
public sealed class CallNgramGenerator : ICallNgramGenerator
{
private readonly CallNgramOptions _options;
private readonly ILogger<CallNgramGenerator> _logger;
/// <summary>
/// Creates a new call-ngram generator.
/// </summary>
public CallNgramGenerator(
IOptions<CallNgramOptions> options,
ILogger<CallNgramGenerator> logger)
{
_options = options?.Value ?? new CallNgramOptions();
_logger = logger ?? throw new ArgumentNullException(nameof(logger));
}
/// <inheritdoc />
public CallNgramFingerprint Generate(LiftedFunction function)
{
if (function == null)
{
throw new ArgumentNullException(nameof(function));
}
// Extract call targets in order of occurrence
var callSequence = ExtractCallSequence(function);
if (callSequence.Count == 0)
{
return CallNgramFingerprint.Empty;
}
// Generate n-grams for each configured size
var allNgrams = new HashSet<string>();
var ngramsBySize = new Dictionary<int, IReadOnlyList<string>>();
foreach (var n in _options.NgramSizes)
{
var ngrams = GenerateNgrams(callSequence, n);
ngramsBySize[n] = ngrams;
foreach (var ngram in ngrams)
{
allNgrams.Add(ngram);
}
}
// Compute fingerprint hash
var fingerprint = ComputeFingerprintHash(allNgrams);
return new CallNgramFingerprint
{
Hash = fingerprint,
CallCount = callSequence.Count,
UniqueTargets = callSequence.Distinct().Count(),
NgramCounts = ngramsBySize.ToDictionary(kv => kv.Key, kv => kv.Value.Count),
CallSequence = _options.IncludeSequence ? callSequence : null
};
}
/// <inheritdoc />
public double ComputeSimilarity(CallNgramFingerprint a, CallNgramFingerprint b)
{
if (a == null || b == null)
{
return 0.0;
}
if (a.Hash == b.Hash)
{
return 1.0;
}
if (a.CallSequence == null || b.CallSequence == null)
{
// Can only compare by hash - binary match
return a.Hash == b.Hash ? 1.0 : 0.0;
}
// Jaccard similarity on call ngrams
var ngramsA = GenerateAllNgrams(a.CallSequence);
var ngramsB = GenerateAllNgrams(b.CallSequence);
var intersection = ngramsA.Intersect(ngramsB).Count();
var union = ngramsA.Union(ngramsB).Count();
if (union == 0)
{
return 0.0;
}
return (double)intersection / union;
}
private IReadOnlyList<string> ExtractCallSequence(LiftedFunction function)
{
var calls = new List<string>();
foreach (var block in function.BasicBlocks.OrderBy(b => b.Address))
{
foreach (var stmt in block.Statements)
{
if (stmt is CallStatement call)
{
// Normalize call target
var target = NormalizeCallTarget(call.Target);
if (!string.IsNullOrEmpty(target))
{
calls.Add(target);
}
}
}
}
return calls;
}
private string NormalizeCallTarget(string? target)
{
if (string.IsNullOrEmpty(target))
{
return "INDIRECT";
}
// Strip module prefixes for cross-library matching
if (target.Contains("@"))
{
target = target.Split('@')[0];
}
// Normalize common patterns
target = target.TrimStart('_');
// Strip version suffixes (e.g., memcpy@@GLIBC_2.14)
if (target.Contains("@@"))
{
target = target.Split("@@")[0];
}
return target.ToUpperInvariant();
}
private IReadOnlyList<string> GenerateNgrams(IReadOnlyList<string> sequence, int n)
{
if (sequence.Count < n)
{
return [];
}
var ngrams = new List<string>();
for (var i = 0; i <= sequence.Count - n; i++)
{
var ngram = string.Join("->", sequence.Skip(i).Take(n));
ngrams.Add(ngram);
}
return ngrams;
}
private HashSet<string> GenerateAllNgrams(IReadOnlyList<string> sequence)
{
var allNgrams = new HashSet<string>();
foreach (var n in _options.NgramSizes)
{
foreach (var ngram in GenerateNgrams(sequence, n))
{
allNgrams.Add(ngram);
}
}
return allNgrams;
}
private string ComputeFingerprintHash(IEnumerable<string> ngrams)
{
// Sort for determinism
var sorted = ngrams.OrderBy(x => x, StringComparer.Ordinal).ToList();
var combined = string.Join("\n", sorted);
var bytes = Encoding.UTF8.GetBytes(combined);
using var sha256 = SHA256.Create();
var hash = sha256.ComputeHash(bytes);
return Convert.ToHexString(hash).ToLowerInvariant();
}
}
/// <summary>
/// Interface for call-ngram generation.
/// </summary>
public interface ICallNgramGenerator
{
/// <summary>
/// Generates a call-ngram fingerprint for a lifted function.
/// </summary>
CallNgramFingerprint Generate(LiftedFunction function);
/// <summary>
/// Computes similarity between two fingerprints.
/// </summary>
double ComputeSimilarity(CallNgramFingerprint a, CallNgramFingerprint b);
}
/// <summary>
/// Call-ngram generator options.
/// </summary>
public sealed record CallNgramOptions
{
/// <summary>Configuration section name.</summary>
public const string SectionName = "BinaryIndex:CallNgram";
/// <summary>N-gram sizes to generate (default: 2, 3, 4).</summary>
public int[] NgramSizes { get; init; } = [2, 3, 4];
/// <summary>Whether to include the full call sequence in output.</summary>
public bool IncludeSequence { get; init; } = false;
/// <summary>Minimum call count to generate fingerprint.</summary>
public int MinCallCount { get; init; } = 2;
}
/// <summary>
/// Call-ngram fingerprint result.
/// </summary>
public sealed record CallNgramFingerprint
{
/// <summary>Empty fingerprint for functions with no calls.</summary>
public static readonly CallNgramFingerprint Empty = new()
{
Hash = "0000000000000000000000000000000000000000000000000000000000000000",
CallCount = 0,
UniqueTargets = 0,
NgramCounts = new Dictionary<int, int>()
};
/// <summary>SHA-256 hash of all n-grams.</summary>
public required string Hash { get; init; }
/// <summary>Total call count in function.</summary>
public int CallCount { get; init; }
/// <summary>Number of unique call targets.</summary>
public int UniqueTargets { get; init; }
/// <summary>N-gram count per size.</summary>
public required IReadOnlyDictionary<int, int> NgramCounts { get; init; }
/// <summary>Original call sequence (if included).</summary>
public IReadOnlyList<string>? CallSequence { get; init; }
}
/// <summary>
/// Extended symbol signature with call-ngram and bom-ref.
/// </summary>
public sealed record SymbolSignatureV2
{
/// <summary>Function identifier (module:bom-ref:offset:canonical-IR-hash).</summary>
public required string FuncId { get; init; }
/// <summary>Human-readable function name.</summary>
public string? Name { get; init; }
/// <summary>Demangled name.</summary>
public string? Demangled { get; init; }
/// <summary>Module name.</summary>
public required string Module { get; init; }
/// <summary>SBOM bom-ref linking to component.</summary>
public string? BomRef { get; init; }
/// <summary>Function offset within module.</summary>
public ulong Offset { get; init; }
/// <summary>Canonical IR hash (Weisfeiler-Lehman).</summary>
public required string CanonicalIrHash { get; init; }
/// <summary>Call-ngram fingerprint hash.</summary>
public string? CallNgramHash { get; init; }
/// <summary>Target architecture.</summary>
public string? Architecture { get; init; }
/// <summary>IR lifter used.</summary>
public string? Lifter { get; init; }
/// <summary>IR version for cache invalidation.</summary>
public string IrVersion { get; init; } = "v1.0.0";
/// <summary>
/// Generates the func_id in advisory format.
/// </summary>
public static string GenerateFuncId(string module, string? bomRef, ulong offset, string canonicalHash)
{
var bomRefPart = bomRef ?? "unknown";
return $"{module}:{bomRefPart}:0x{offset:X}:{canonicalHash}";
}
}
// Placeholder models
public sealed record LiftedFunction
{
public IReadOnlyList<BasicBlock> BasicBlocks { get; init; } = [];
}
public sealed record BasicBlock
{
public ulong Address { get; init; }
public IReadOnlyList<IrStatement> Statements { get; init; } = [];
}
public abstract record IrStatement;
public sealed record CallStatement : IrStatement
{
public string? Target { get; init; }
}
public interface IOptions<T> where T : class
{
T Value { get; }
}
public interface ILogger<T> { }

View File

@@ -0,0 +1,351 @@
// -----------------------------------------------------------------------------
// B2R2LifterPool.cs
// Sprint: SPRINT_20260118_027_BinaryIndex_b2r2_full_integration
// Task: B2R2-003 - Implement B2R2LifterPool
// Description: Resource-managed pool of B2R2 lifter instances
// -----------------------------------------------------------------------------
using System.Collections.Concurrent;
using System.Threading.Channels;
namespace StellaOps.BinaryIndex.Semantic.Lifting;
/// <summary>
/// Pooled B2R2 lifter for resource management.
/// Bounds concurrent lifter instances to prevent memory exhaustion.
/// </summary>
public interface IB2R2LifterPool : IAsyncDisposable
{
/// <summary>
/// Acquires a pooled lifter instance.
/// </summary>
/// <param name="ct">Cancellation token.</param>
/// <returns>Pooled lifter lease.</returns>
ValueTask<PooledLifterLease> AcquireAsync(CancellationToken ct = default);
/// <summary>
/// Gets pool statistics.
/// </summary>
B2R2PoolStats GetStats();
}
/// <summary>
/// Default B2R2 lifter pool implementation.
/// </summary>
public sealed class B2R2LifterPool : IB2R2LifterPool
{
private readonly B2R2PoolOptions _options;
private readonly Channel<PooledB2R2Lifter> _availableLifters;
private readonly ConcurrentDictionary<Guid, PooledB2R2Lifter> _allLifters = new();
private readonly SemaphoreSlim _creationSemaphore;
private int _createdCount;
private int _acquiredCount;
private int _returnedCount;
private bool _disposed;
/// <summary>
/// Creates a new B2R2 lifter pool.
/// </summary>
public B2R2LifterPool(B2R2PoolOptions? options = null)
{
_options = options ?? new B2R2PoolOptions();
_availableLifters = Channel.CreateBounded<PooledB2R2Lifter>(
new BoundedChannelOptions(_options.MaxPoolSize)
{
FullMode = BoundedChannelFullMode.Wait
});
_creationSemaphore = new SemaphoreSlim(_options.MaxPoolSize);
// Pre-warm pool
for (var i = 0; i < _options.MinPoolSize; i++)
{
var lifter = CreateLifter();
_availableLifters.Writer.TryWrite(lifter);
}
}
/// <inheritdoc />
public async ValueTask<PooledLifterLease> AcquireAsync(CancellationToken ct = default)
{
ObjectDisposedException.ThrowIf(_disposed, this);
// Try to get an existing lifter
if (_availableLifters.Reader.TryRead(out var lifter))
{
Interlocked.Increment(ref _acquiredCount);
return new PooledLifterLease(lifter, this);
}
// Try to create a new one
if (await _creationSemaphore.WaitAsync(TimeSpan.Zero, ct))
{
try
{
if (_createdCount < _options.MaxPoolSize)
{
lifter = CreateLifter();
Interlocked.Increment(ref _acquiredCount);
return new PooledLifterLease(lifter, this);
}
}
finally
{
_creationSemaphore.Release();
}
}
// Wait for one to become available
using var cts = CancellationTokenSource.CreateLinkedTokenSource(ct);
cts.CancelAfter(_options.AcquireTimeout);
try
{
lifter = await _availableLifters.Reader.ReadAsync(cts.Token);
Interlocked.Increment(ref _acquiredCount);
return new PooledLifterLease(lifter, this);
}
catch (OperationCanceledException) when (!ct.IsCancellationRequested)
{
throw new TimeoutException($"Failed to acquire lifter within {_options.AcquireTimeout}");
}
}
/// <inheritdoc />
public B2R2PoolStats GetStats()
{
return new B2R2PoolStats
{
TotalCreated = _createdCount,
TotalAcquired = _acquiredCount,
TotalReturned = _returnedCount,
CurrentAvailable = _availableLifters.Reader.Count,
MaxPoolSize = _options.MaxPoolSize
};
}
/// <summary>
/// Returns a lifter to the pool.
/// </summary>
internal void Return(PooledB2R2Lifter lifter)
{
if (_disposed)
{
lifter.Dispose();
return;
}
Interlocked.Increment(ref _returnedCount);
// Reset lifter state if needed
lifter.ResetState();
// Return to pool
if (!_availableLifters.Writer.TryWrite(lifter))
{
// Pool is full, dispose this one
lifter.Dispose();
_allLifters.TryRemove(lifter.Id, out _);
}
}
private PooledB2R2Lifter CreateLifter()
{
var lifter = new PooledB2R2Lifter();
_allLifters[lifter.Id] = lifter;
Interlocked.Increment(ref _createdCount);
return lifter;
}
/// <inheritdoc />
public async ValueTask DisposeAsync()
{
if (_disposed) return;
_disposed = true;
_availableLifters.Writer.Complete();
while (_availableLifters.Reader.TryRead(out var lifter))
{
lifter.Dispose();
}
foreach (var lifter in _allLifters.Values)
{
lifter.Dispose();
}
_allLifters.Clear();
_creationSemaphore.Dispose();
}
}
/// <summary>
/// Pooled B2R2 lifter instance.
/// </summary>
public sealed class PooledB2R2Lifter : IDisposable
{
/// <summary>Unique ID for tracking.</summary>
public Guid Id { get; } = Guid.NewGuid();
/// <summary>When this lifter was created.</summary>
public DateTimeOffset CreatedAt { get; } = DateTimeOffset.UtcNow;
/// <summary>Number of times this lifter has been used.</summary>
public int UseCount { get; private set; }
// Internal B2R2 state would be here
/// <summary>
/// Lifts a binary to IR.
/// </summary>
public LiftedFunction LiftToIr(byte[] code, Architecture arch, ulong baseAddress)
{
UseCount++;
// Would call B2R2 LowUIR lifting here
return new LiftedFunction
{
Name = $"func_{baseAddress:X}",
Architecture = arch,
BaseAddress = baseAddress,
Statements = [],
BasicBlocks = []
};
}
/// <summary>
/// Resets lifter state for reuse.
/// </summary>
internal void ResetState()
{
// Clear any cached state
}
/// <inheritdoc />
public void Dispose()
{
// Cleanup B2R2 resources
}
}
/// <summary>
/// RAII lease for pooled lifter.
/// </summary>
public readonly struct PooledLifterLease : IDisposable
{
private readonly PooledB2R2Lifter _lifter;
private readonly B2R2LifterPool _pool;
internal PooledLifterLease(PooledB2R2Lifter lifter, B2R2LifterPool pool)
{
_lifter = lifter;
_pool = pool;
}
/// <summary>Gets the lifter.</summary>
public PooledB2R2Lifter Lifter => _lifter;
/// <inheritdoc />
public void Dispose()
{
_pool.Return(_lifter);
}
}
/// <summary>
/// B2R2 pool configuration.
/// </summary>
public sealed record B2R2PoolOptions
{
/// <summary>Minimum pool size.</summary>
public int MinPoolSize { get; init; } = 2;
/// <summary>Maximum pool size.</summary>
public int MaxPoolSize { get; init; } = 8;
/// <summary>Timeout for acquiring a lifter.</summary>
public TimeSpan AcquireTimeout { get; init; } = TimeSpan.FromSeconds(30);
/// <summary>Maximum uses before recycling a lifter.</summary>
public int MaxUsesPerLifter { get; init; } = 1000;
}
/// <summary>
/// B2R2 pool statistics.
/// </summary>
public sealed record B2R2PoolStats
{
/// <summary>Total lifters created.</summary>
public int TotalCreated { get; init; }
/// <summary>Total acquisitions.</summary>
public int TotalAcquired { get; init; }
/// <summary>Total returns.</summary>
public int TotalReturned { get; init; }
/// <summary>Currently available.</summary>
public int CurrentAvailable { get; init; }
/// <summary>Max pool size.</summary>
public int MaxPoolSize { get; init; }
}
/// <summary>
/// Lifted function result.
/// </summary>
public sealed record LiftedFunction
{
/// <summary>Function name.</summary>
public required string Name { get; init; }
/// <summary>Target architecture.</summary>
public Architecture Architecture { get; init; }
/// <summary>Base address.</summary>
public ulong BaseAddress { get; init; }
/// <summary>IR statements.</summary>
public required IReadOnlyList<IrStatement> Statements { get; init; }
/// <summary>Basic blocks.</summary>
public required IReadOnlyList<BasicBlock> BasicBlocks { get; init; }
}
/// <summary>
/// IR statement placeholder.
/// </summary>
public abstract record IrStatement;
/// <summary>
/// Basic block placeholder.
/// </summary>
public sealed record BasicBlock
{
/// <summary>Block address.</summary>
public ulong Address { get; init; }
/// <summary>Statements in block.</summary>
public IReadOnlyList<IrStatement> Statements { get; init; } = [];
}
/// <summary>
/// Target architecture.
/// </summary>
public enum Architecture
{
/// <summary>x86-64.</summary>
X64,
/// <summary>ARM64/AArch64.</summary>
ARM64,
/// <summary>MIPS32.</summary>
MIPS32,
/// <summary>MIPS64.</summary>
MIPS64,
/// <summary>RISC-V 64.</summary>
RISCV64
}