Files
git.stella-ops.org/src/Signals/StellaOps.Signals/Services/ExecutionEvidenceBuilder.cs
2026-02-19 22:10:54 +02:00

346 lines
12 KiB
C#

using System;
using System.Collections.Concurrent;
using System.Collections.Generic;
using System.Linq;
using System.Security.Cryptography;
using System.Text;
using System.Text.Json;
using System.Text.Json.Serialization;
using System.Threading;
using System.Threading.Tasks;
using Microsoft.Extensions.Logging;
using Microsoft.Extensions.Options;
using StellaOps.Signals.Models;
using StellaOps.Signals.Options;
namespace StellaOps.Signals.Services;
/// <summary>
/// Builds execution evidence predicates from runtime trace data.
/// Produces deterministic, idempotent DSSE-ready predicates.
/// Sprint: SPRINT_20260219_013 (SEE-02)
/// </summary>
public interface IExecutionEvidenceBuilder
{
/// <summary>
/// Builds an execution evidence predicate from runtime trace events.
/// Returns null if rate-limited or below minimum event threshold.
/// </summary>
Task<ExecutionEvidenceResult?> BuildAsync(
ExecutionEvidenceRequest request,
CancellationToken cancellationToken = default);
/// <summary>
/// Gets the last generated predicate for an (artifact, environment) pair, if any.
/// </summary>
ExecutionEvidencePredicate? GetCachedPredicate(string artifactId, string environmentId);
}
/// <summary>
/// Default implementation of execution evidence builder.
/// Uses address canonicalization and hot-symbol aggregation from existing Signals infrastructure.
/// </summary>
public sealed class ExecutionEvidenceBuilder : IExecutionEvidenceBuilder
{
private readonly IOptionsMonitor<ExecutionEvidenceOptions> _options;
private readonly TimeProvider _timeProvider;
private readonly ILogger<ExecutionEvidenceBuilder> _logger;
// Rate limiting: tracks last generation time per (artifact_id, environment_id).
private readonly ConcurrentDictionary<string, DateTimeOffset> _rateLimitTracker = new(StringComparer.Ordinal);
// Cache of last generated predicates for retrieval.
private readonly ConcurrentDictionary<string, ExecutionEvidencePredicate> _predicateCache = new(StringComparer.Ordinal);
private static readonly JsonSerializerOptions CanonicalJsonOptions = new()
{
PropertyNamingPolicy = JsonNamingPolicy.SnakeCaseLower,
DefaultIgnoreCondition = JsonIgnoreCondition.WhenWritingNull,
WriteIndented = false,
};
// Known syscall families for classification.
private static readonly IReadOnlyDictionary<string, string> SyscallFamilyMap = new Dictionary<string, string>(StringComparer.OrdinalIgnoreCase)
{
["socket"] = "network",
["connect"] = "network",
["bind"] = "network",
["listen"] = "network",
["accept"] = "network",
["send"] = "network",
["recv"] = "network",
["open"] = "filesystem",
["read"] = "filesystem",
["write"] = "filesystem",
["close"] = "filesystem",
["stat"] = "filesystem",
["unlink"] = "filesystem",
["fork"] = "process",
["exec"] = "process",
["clone"] = "process",
["wait"] = "process",
["mmap"] = "memory",
["mprotect"] = "memory",
["brk"] = "memory",
};
public ExecutionEvidenceBuilder(
IOptionsMonitor<ExecutionEvidenceOptions> options,
TimeProvider timeProvider,
ILogger<ExecutionEvidenceBuilder> logger)
{
_options = options ?? throw new ArgumentNullException(nameof(options));
_timeProvider = timeProvider ?? throw new ArgumentNullException(nameof(timeProvider));
_logger = logger ?? throw new ArgumentNullException(nameof(logger));
}
public Task<ExecutionEvidenceResult?> BuildAsync(
ExecutionEvidenceRequest request,
CancellationToken cancellationToken = default)
{
ArgumentNullException.ThrowIfNull(request);
var opts = _options.CurrentValue;
if (!opts.Enabled)
{
_logger.LogDebug("Execution evidence pipeline is disabled");
return Task.FromResult<ExecutionEvidenceResult?>(null);
}
if (request.Events.Count < opts.MinEventsThreshold)
{
_logger.LogDebug(
"Below minimum event threshold ({Count} < {Threshold}) for {ArtifactId}",
request.Events.Count, opts.MinEventsThreshold, request.ArtifactId);
return Task.FromResult<ExecutionEvidenceResult?>(null);
}
var rateKey = BuildRateKey(request.ArtifactId, request.EnvironmentId);
var now = _timeProvider.GetUtcNow();
if (IsRateLimited(rateKey, now, opts.RateLimitWindowMinutes))
{
_logger.LogDebug(
"Rate limited for {ArtifactId} in {EnvironmentId}",
request.ArtifactId, request.EnvironmentId);
return Task.FromResult<ExecutionEvidenceResult?>(new ExecutionEvidenceResult
{
EvidenceId = string.Empty,
ArtifactId = request.ArtifactId,
EnvironmentId = request.EnvironmentId,
TraceDigest = string.Empty,
PredicateDigest = string.Empty,
CreatedAt = now,
RateLimited = true,
});
}
var predicate = BuildPredicate(request, now, opts);
var predicateBytes = JsonSerializer.SerializeToUtf8Bytes(predicate, CanonicalJsonOptions);
var predicateDigest = ComputeSha256(predicateBytes);
var evidenceId = $"see-{predicateDigest[..16]}";
// Update rate limit tracker and cache.
_rateLimitTracker[rateKey] = now;
_predicateCache[rateKey] = predicate;
_logger.LogInformation(
"Built execution evidence {EvidenceId} for {ArtifactId} in {EnvironmentId} ({EventCount} events)",
evidenceId, request.ArtifactId, request.EnvironmentId, request.Events.Count);
var result = new ExecutionEvidenceResult
{
EvidenceId = evidenceId,
ArtifactId = request.ArtifactId,
EnvironmentId = request.EnvironmentId,
TraceDigest = predicate.TraceDigest,
PredicateDigest = predicateDigest,
CreatedAt = now,
};
return Task.FromResult<ExecutionEvidenceResult?>(result);
}
public ExecutionEvidencePredicate? GetCachedPredicate(string artifactId, string environmentId)
{
var key = BuildRateKey(artifactId, environmentId);
_predicateCache.TryGetValue(key, out var predicate);
return predicate;
}
private ExecutionEvidencePredicate BuildPredicate(
ExecutionEvidenceRequest request,
DateTimeOffset timestamp,
ExecutionEvidenceOptions opts)
{
var events = request.Events
.Where(e => e is not null && !string.IsNullOrWhiteSpace(e.SymbolId))
.ToList();
// Canonicalize addresses (strip ASLR noise from LoaderBase).
foreach (var evt in events)
{
if (!string.IsNullOrWhiteSpace(evt.LoaderBase))
{
evt.LoaderBase = "0x0";
}
if (!string.IsNullOrWhiteSpace(evt.SocketAddress))
{
evt.SocketAddress = CanonicalizeSocketAddress(evt.SocketAddress);
}
}
// Aggregate hot symbols (sorted by hit count descending, then by name for determinism).
var hotSymbols = events
.GroupBy(e => e.SymbolId, StringComparer.Ordinal)
.Select(g => new { Symbol = g.Key, HitCount = g.Sum(e => e.HitCount) })
.OrderByDescending(x => x.HitCount)
.ThenBy(x => x.Symbol, StringComparer.Ordinal)
.Take(opts.MaxHotSymbols)
.Select(x => x.Symbol)
.ToList();
// Classify syscall families from process metadata.
var syscallFamilies = ClassifySyscallFamilies(events);
// Count unique call paths (approximate by distinct CodeId values).
var uniqueCallPaths = events
.Where(e => !string.IsNullOrWhiteSpace(e.CodeId))
.Select(e => e.CodeId!)
.Distinct(StringComparer.Ordinal)
.Count();
// Compute trace digest over canonical event representation.
var traceDigest = ComputeTraceDigest(events);
// Compute inputs digest for replay determinism.
var inputsDigest = ComputeInputsDigest(request);
var durationMs = (long)(request.ObservationEnd - request.ObservationStart).TotalMilliseconds;
return new ExecutionEvidencePredicate
{
ArtifactId = request.ArtifactId,
EnvironmentId = request.EnvironmentId,
TraceSource = request.TraceSource,
ObservationWindow = new ObservationWindow
{
Start = request.ObservationStart,
End = request.ObservationEnd,
DurationMs = durationMs > 0 ? durationMs : 0,
},
TraceSummary = new TraceSummary
{
SyscallFamiliesObserved = syscallFamilies,
HotSymbols = hotSymbols,
HotSymbolCount = events
.Select(e => e.SymbolId)
.Distinct(StringComparer.Ordinal)
.Count(),
UniqueCallPaths = uniqueCallPaths,
AddressCanonicalized = true,
},
TraceDigest = $"sha256:{traceDigest}",
Determinism = new DeterminismMetadata
{
InputsDigest = $"sha256:{inputsDigest}",
},
Timestamp = timestamp,
};
}
private static IReadOnlyList<string> ClassifySyscallFamilies(IReadOnlyList<RuntimeFactEvent> events)
{
var families = new SortedSet<string>(StringComparer.Ordinal);
foreach (var evt in events)
{
if (!string.IsNullOrWhiteSpace(evt.SocketAddress))
{
families.Add("network");
}
if (!string.IsNullOrWhiteSpace(evt.ProcessName))
{
families.Add("process");
}
if (evt.Metadata is not null)
{
foreach (var key in evt.Metadata.Keys)
{
if (SyscallFamilyMap.TryGetValue(key, out var family))
{
families.Add(family);
}
}
}
}
// Always include process if we have events (something executed).
if (events.Count > 0 && families.Count == 0)
{
families.Add("process");
}
return families.ToList().AsReadOnly();
}
private static string ComputeTraceDigest(IReadOnlyList<RuntimeFactEvent> events)
{
// Canonical representation: sorted symbol IDs with hit counts.
var sb = new StringBuilder();
foreach (var group in events
.GroupBy(e => e.SymbolId, StringComparer.Ordinal)
.OrderBy(g => g.Key, StringComparer.Ordinal))
{
sb.Append(group.Key);
sb.Append(':');
sb.Append(group.Sum(e => e.HitCount));
sb.Append('\n');
}
return ComputeSha256(Encoding.UTF8.GetBytes(sb.ToString()));
}
private static string ComputeInputsDigest(ExecutionEvidenceRequest request)
{
var sb = new StringBuilder();
sb.Append(request.ArtifactId);
sb.Append('|');
sb.Append(request.EnvironmentId);
sb.Append('|');
sb.Append(request.TraceSource);
sb.Append('|');
sb.Append(request.Events.Count);
return ComputeSha256(Encoding.UTF8.GetBytes(sb.ToString()));
}
private bool IsRateLimited(string rateKey, DateTimeOffset now, int windowMinutes)
{
if (_rateLimitTracker.TryGetValue(rateKey, out var lastGeneration))
{
return (now - lastGeneration).TotalMinutes < windowMinutes;
}
return false;
}
private static string BuildRateKey(string artifactId, string environmentId)
=> $"{artifactId}|{environmentId}";
private static string CanonicalizeSocketAddress(string address)
{
// Strip port for privacy; keep protocol family indicator.
var colonIndex = address.LastIndexOf(':');
return colonIndex > 0 ? address[..colonIndex] : address;
}
private static string ComputeSha256(byte[] data)
{
var hash = SHA256.HashData(data);
return Convert.ToHexStringLower(hash);
}
}