using System; using System.Collections.Concurrent; using System.Collections.Generic; using System.Linq; using System.Security.Cryptography; using System.Text; using System.Text.Json; using System.Text.Json.Serialization; using System.Threading; using System.Threading.Tasks; using Microsoft.Extensions.Logging; using Microsoft.Extensions.Options; using StellaOps.Signals.Models; using StellaOps.Signals.Options; namespace StellaOps.Signals.Services; /// /// Builds execution evidence predicates from runtime trace data. /// Produces deterministic, idempotent DSSE-ready predicates. /// Sprint: SPRINT_20260219_013 (SEE-02) /// public interface IExecutionEvidenceBuilder { /// /// Builds an execution evidence predicate from runtime trace events. /// Returns null if rate-limited or below minimum event threshold. /// Task BuildAsync( ExecutionEvidenceRequest request, CancellationToken cancellationToken = default); /// /// Gets the last generated predicate for an (artifact, environment) pair, if any. /// ExecutionEvidencePredicate? GetCachedPredicate(string artifactId, string environmentId); } /// /// Default implementation of execution evidence builder. /// Uses address canonicalization and hot-symbol aggregation from existing Signals infrastructure. /// public sealed class ExecutionEvidenceBuilder : IExecutionEvidenceBuilder { private readonly IOptionsMonitor _options; private readonly TimeProvider _timeProvider; private readonly ILogger _logger; // Rate limiting: tracks last generation time per (artifact_id, environment_id). private readonly ConcurrentDictionary _rateLimitTracker = new(StringComparer.Ordinal); // Cache of last generated predicates for retrieval. private readonly ConcurrentDictionary _predicateCache = new(StringComparer.Ordinal); private static readonly JsonSerializerOptions CanonicalJsonOptions = new() { PropertyNamingPolicy = JsonNamingPolicy.SnakeCaseLower, DefaultIgnoreCondition = JsonIgnoreCondition.WhenWritingNull, WriteIndented = false, }; // Known syscall families for classification. private static readonly IReadOnlyDictionary SyscallFamilyMap = new Dictionary(StringComparer.OrdinalIgnoreCase) { ["socket"] = "network", ["connect"] = "network", ["bind"] = "network", ["listen"] = "network", ["accept"] = "network", ["send"] = "network", ["recv"] = "network", ["open"] = "filesystem", ["read"] = "filesystem", ["write"] = "filesystem", ["close"] = "filesystem", ["stat"] = "filesystem", ["unlink"] = "filesystem", ["fork"] = "process", ["exec"] = "process", ["clone"] = "process", ["wait"] = "process", ["mmap"] = "memory", ["mprotect"] = "memory", ["brk"] = "memory", }; public ExecutionEvidenceBuilder( IOptionsMonitor options, TimeProvider timeProvider, ILogger logger) { _options = options ?? throw new ArgumentNullException(nameof(options)); _timeProvider = timeProvider ?? throw new ArgumentNullException(nameof(timeProvider)); _logger = logger ?? throw new ArgumentNullException(nameof(logger)); } public Task BuildAsync( ExecutionEvidenceRequest request, CancellationToken cancellationToken = default) { ArgumentNullException.ThrowIfNull(request); var opts = _options.CurrentValue; if (!opts.Enabled) { _logger.LogDebug("Execution evidence pipeline is disabled"); return Task.FromResult(null); } if (request.Events.Count < opts.MinEventsThreshold) { _logger.LogDebug( "Below minimum event threshold ({Count} < {Threshold}) for {ArtifactId}", request.Events.Count, opts.MinEventsThreshold, request.ArtifactId); return Task.FromResult(null); } var rateKey = BuildRateKey(request.ArtifactId, request.EnvironmentId); var now = _timeProvider.GetUtcNow(); if (IsRateLimited(rateKey, now, opts.RateLimitWindowMinutes)) { _logger.LogDebug( "Rate limited for {ArtifactId} in {EnvironmentId}", request.ArtifactId, request.EnvironmentId); return Task.FromResult(new ExecutionEvidenceResult { EvidenceId = string.Empty, ArtifactId = request.ArtifactId, EnvironmentId = request.EnvironmentId, TraceDigest = string.Empty, PredicateDigest = string.Empty, CreatedAt = now, RateLimited = true, }); } var predicate = BuildPredicate(request, now, opts); var predicateBytes = JsonSerializer.SerializeToUtf8Bytes(predicate, CanonicalJsonOptions); var predicateDigest = ComputeSha256(predicateBytes); var evidenceId = $"see-{predicateDigest[..16]}"; // Update rate limit tracker and cache. _rateLimitTracker[rateKey] = now; _predicateCache[rateKey] = predicate; _logger.LogInformation( "Built execution evidence {EvidenceId} for {ArtifactId} in {EnvironmentId} ({EventCount} events)", evidenceId, request.ArtifactId, request.EnvironmentId, request.Events.Count); var result = new ExecutionEvidenceResult { EvidenceId = evidenceId, ArtifactId = request.ArtifactId, EnvironmentId = request.EnvironmentId, TraceDigest = predicate.TraceDigest, PredicateDigest = predicateDigest, CreatedAt = now, }; return Task.FromResult(result); } public ExecutionEvidencePredicate? GetCachedPredicate(string artifactId, string environmentId) { var key = BuildRateKey(artifactId, environmentId); _predicateCache.TryGetValue(key, out var predicate); return predicate; } private ExecutionEvidencePredicate BuildPredicate( ExecutionEvidenceRequest request, DateTimeOffset timestamp, ExecutionEvidenceOptions opts) { var events = request.Events .Where(e => e is not null && !string.IsNullOrWhiteSpace(e.SymbolId)) .ToList(); // Canonicalize addresses (strip ASLR noise from LoaderBase). foreach (var evt in events) { if (!string.IsNullOrWhiteSpace(evt.LoaderBase)) { evt.LoaderBase = "0x0"; } if (!string.IsNullOrWhiteSpace(evt.SocketAddress)) { evt.SocketAddress = CanonicalizeSocketAddress(evt.SocketAddress); } } // Aggregate hot symbols (sorted by hit count descending, then by name for determinism). var hotSymbols = events .GroupBy(e => e.SymbolId, StringComparer.Ordinal) .Select(g => new { Symbol = g.Key, HitCount = g.Sum(e => e.HitCount) }) .OrderByDescending(x => x.HitCount) .ThenBy(x => x.Symbol, StringComparer.Ordinal) .Take(opts.MaxHotSymbols) .Select(x => x.Symbol) .ToList(); // Classify syscall families from process metadata. var syscallFamilies = ClassifySyscallFamilies(events); // Count unique call paths (approximate by distinct CodeId values). var uniqueCallPaths = events .Where(e => !string.IsNullOrWhiteSpace(e.CodeId)) .Select(e => e.CodeId!) .Distinct(StringComparer.Ordinal) .Count(); // Compute trace digest over canonical event representation. var traceDigest = ComputeTraceDigest(events); // Compute inputs digest for replay determinism. var inputsDigest = ComputeInputsDigest(request); var durationMs = (long)(request.ObservationEnd - request.ObservationStart).TotalMilliseconds; return new ExecutionEvidencePredicate { ArtifactId = request.ArtifactId, EnvironmentId = request.EnvironmentId, TraceSource = request.TraceSource, ObservationWindow = new ObservationWindow { Start = request.ObservationStart, End = request.ObservationEnd, DurationMs = durationMs > 0 ? durationMs : 0, }, TraceSummary = new TraceSummary { SyscallFamiliesObserved = syscallFamilies, HotSymbols = hotSymbols, HotSymbolCount = events .Select(e => e.SymbolId) .Distinct(StringComparer.Ordinal) .Count(), UniqueCallPaths = uniqueCallPaths, AddressCanonicalized = true, }, TraceDigest = $"sha256:{traceDigest}", Determinism = new DeterminismMetadata { InputsDigest = $"sha256:{inputsDigest}", }, Timestamp = timestamp, }; } private static IReadOnlyList ClassifySyscallFamilies(IReadOnlyList events) { var families = new SortedSet(StringComparer.Ordinal); foreach (var evt in events) { if (!string.IsNullOrWhiteSpace(evt.SocketAddress)) { families.Add("network"); } if (!string.IsNullOrWhiteSpace(evt.ProcessName)) { families.Add("process"); } if (evt.Metadata is not null) { foreach (var key in evt.Metadata.Keys) { if (SyscallFamilyMap.TryGetValue(key, out var family)) { families.Add(family); } } } } // Always include process if we have events (something executed). if (events.Count > 0 && families.Count == 0) { families.Add("process"); } return families.ToList().AsReadOnly(); } private static string ComputeTraceDigest(IReadOnlyList events) { // Canonical representation: sorted symbol IDs with hit counts. var sb = new StringBuilder(); foreach (var group in events .GroupBy(e => e.SymbolId, StringComparer.Ordinal) .OrderBy(g => g.Key, StringComparer.Ordinal)) { sb.Append(group.Key); sb.Append(':'); sb.Append(group.Sum(e => e.HitCount)); sb.Append('\n'); } return ComputeSha256(Encoding.UTF8.GetBytes(sb.ToString())); } private static string ComputeInputsDigest(ExecutionEvidenceRequest request) { var sb = new StringBuilder(); sb.Append(request.ArtifactId); sb.Append('|'); sb.Append(request.EnvironmentId); sb.Append('|'); sb.Append(request.TraceSource); sb.Append('|'); sb.Append(request.Events.Count); return ComputeSha256(Encoding.UTF8.GetBytes(sb.ToString())); } private bool IsRateLimited(string rateKey, DateTimeOffset now, int windowMinutes) { if (_rateLimitTracker.TryGetValue(rateKey, out var lastGeneration)) { return (now - lastGeneration).TotalMinutes < windowMinutes; } return false; } private static string BuildRateKey(string artifactId, string environmentId) => $"{artifactId}|{environmentId}"; private static string CanonicalizeSocketAddress(string address) { // Strip port for privacy; keep protocol family indicator. var colonIndex = address.LastIndexOf(':'); return colonIndex > 0 ? address[..colonIndex] : address; } private static string ComputeSha256(byte[] data) { var hash = SHA256.HashData(data); return Convert.ToHexStringLower(hash); } }