346 lines
12 KiB
C#
346 lines
12 KiB
C#
using System;
|
|
using System.Collections.Concurrent;
|
|
using System.Collections.Generic;
|
|
using System.Linq;
|
|
using System.Security.Cryptography;
|
|
using System.Text;
|
|
using System.Text.Json;
|
|
using System.Text.Json.Serialization;
|
|
using System.Threading;
|
|
using System.Threading.Tasks;
|
|
using Microsoft.Extensions.Logging;
|
|
using Microsoft.Extensions.Options;
|
|
using StellaOps.Signals.Models;
|
|
using StellaOps.Signals.Options;
|
|
|
|
namespace StellaOps.Signals.Services;
|
|
|
|
/// <summary>
|
|
/// Builds execution evidence predicates from runtime trace data.
|
|
/// Produces deterministic, idempotent DSSE-ready predicates.
|
|
/// Sprint: SPRINT_20260219_013 (SEE-02)
|
|
/// </summary>
|
|
public interface IExecutionEvidenceBuilder
|
|
{
|
|
/// <summary>
|
|
/// Builds an execution evidence predicate from runtime trace events.
|
|
/// Returns null if rate-limited or below minimum event threshold.
|
|
/// </summary>
|
|
Task<ExecutionEvidenceResult?> BuildAsync(
|
|
ExecutionEvidenceRequest request,
|
|
CancellationToken cancellationToken = default);
|
|
|
|
/// <summary>
|
|
/// Gets the last generated predicate for an (artifact, environment) pair, if any.
|
|
/// </summary>
|
|
ExecutionEvidencePredicate? GetCachedPredicate(string artifactId, string environmentId);
|
|
}
|
|
|
|
/// <summary>
|
|
/// Default implementation of execution evidence builder.
|
|
/// Uses address canonicalization and hot-symbol aggregation from existing Signals infrastructure.
|
|
/// </summary>
|
|
public sealed class ExecutionEvidenceBuilder : IExecutionEvidenceBuilder
|
|
{
|
|
private readonly IOptionsMonitor<ExecutionEvidenceOptions> _options;
|
|
private readonly TimeProvider _timeProvider;
|
|
private readonly ILogger<ExecutionEvidenceBuilder> _logger;
|
|
|
|
// Rate limiting: tracks last generation time per (artifact_id, environment_id).
|
|
private readonly ConcurrentDictionary<string, DateTimeOffset> _rateLimitTracker = new(StringComparer.Ordinal);
|
|
|
|
// Cache of last generated predicates for retrieval.
|
|
private readonly ConcurrentDictionary<string, ExecutionEvidencePredicate> _predicateCache = new(StringComparer.Ordinal);
|
|
|
|
private static readonly JsonSerializerOptions CanonicalJsonOptions = new()
|
|
{
|
|
PropertyNamingPolicy = JsonNamingPolicy.SnakeCaseLower,
|
|
DefaultIgnoreCondition = JsonIgnoreCondition.WhenWritingNull,
|
|
WriteIndented = false,
|
|
};
|
|
|
|
// Known syscall families for classification.
|
|
private static readonly IReadOnlyDictionary<string, string> SyscallFamilyMap = new Dictionary<string, string>(StringComparer.OrdinalIgnoreCase)
|
|
{
|
|
["socket"] = "network",
|
|
["connect"] = "network",
|
|
["bind"] = "network",
|
|
["listen"] = "network",
|
|
["accept"] = "network",
|
|
["send"] = "network",
|
|
["recv"] = "network",
|
|
["open"] = "filesystem",
|
|
["read"] = "filesystem",
|
|
["write"] = "filesystem",
|
|
["close"] = "filesystem",
|
|
["stat"] = "filesystem",
|
|
["unlink"] = "filesystem",
|
|
["fork"] = "process",
|
|
["exec"] = "process",
|
|
["clone"] = "process",
|
|
["wait"] = "process",
|
|
["mmap"] = "memory",
|
|
["mprotect"] = "memory",
|
|
["brk"] = "memory",
|
|
};
|
|
|
|
public ExecutionEvidenceBuilder(
|
|
IOptionsMonitor<ExecutionEvidenceOptions> options,
|
|
TimeProvider timeProvider,
|
|
ILogger<ExecutionEvidenceBuilder> logger)
|
|
{
|
|
_options = options ?? throw new ArgumentNullException(nameof(options));
|
|
_timeProvider = timeProvider ?? throw new ArgumentNullException(nameof(timeProvider));
|
|
_logger = logger ?? throw new ArgumentNullException(nameof(logger));
|
|
}
|
|
|
|
public Task<ExecutionEvidenceResult?> BuildAsync(
|
|
ExecutionEvidenceRequest request,
|
|
CancellationToken cancellationToken = default)
|
|
{
|
|
ArgumentNullException.ThrowIfNull(request);
|
|
|
|
var opts = _options.CurrentValue;
|
|
|
|
if (!opts.Enabled)
|
|
{
|
|
_logger.LogDebug("Execution evidence pipeline is disabled");
|
|
return Task.FromResult<ExecutionEvidenceResult?>(null);
|
|
}
|
|
|
|
if (request.Events.Count < opts.MinEventsThreshold)
|
|
{
|
|
_logger.LogDebug(
|
|
"Below minimum event threshold ({Count} < {Threshold}) for {ArtifactId}",
|
|
request.Events.Count, opts.MinEventsThreshold, request.ArtifactId);
|
|
return Task.FromResult<ExecutionEvidenceResult?>(null);
|
|
}
|
|
|
|
var rateKey = BuildRateKey(request.ArtifactId, request.EnvironmentId);
|
|
var now = _timeProvider.GetUtcNow();
|
|
|
|
if (IsRateLimited(rateKey, now, opts.RateLimitWindowMinutes))
|
|
{
|
|
_logger.LogDebug(
|
|
"Rate limited for {ArtifactId} in {EnvironmentId}",
|
|
request.ArtifactId, request.EnvironmentId);
|
|
|
|
return Task.FromResult<ExecutionEvidenceResult?>(new ExecutionEvidenceResult
|
|
{
|
|
EvidenceId = string.Empty,
|
|
ArtifactId = request.ArtifactId,
|
|
EnvironmentId = request.EnvironmentId,
|
|
TraceDigest = string.Empty,
|
|
PredicateDigest = string.Empty,
|
|
CreatedAt = now,
|
|
RateLimited = true,
|
|
});
|
|
}
|
|
|
|
var predicate = BuildPredicate(request, now, opts);
|
|
var predicateBytes = JsonSerializer.SerializeToUtf8Bytes(predicate, CanonicalJsonOptions);
|
|
var predicateDigest = ComputeSha256(predicateBytes);
|
|
var evidenceId = $"see-{predicateDigest[..16]}";
|
|
|
|
// Update rate limit tracker and cache.
|
|
_rateLimitTracker[rateKey] = now;
|
|
_predicateCache[rateKey] = predicate;
|
|
|
|
_logger.LogInformation(
|
|
"Built execution evidence {EvidenceId} for {ArtifactId} in {EnvironmentId} ({EventCount} events)",
|
|
evidenceId, request.ArtifactId, request.EnvironmentId, request.Events.Count);
|
|
|
|
var result = new ExecutionEvidenceResult
|
|
{
|
|
EvidenceId = evidenceId,
|
|
ArtifactId = request.ArtifactId,
|
|
EnvironmentId = request.EnvironmentId,
|
|
TraceDigest = predicate.TraceDigest,
|
|
PredicateDigest = predicateDigest,
|
|
CreatedAt = now,
|
|
};
|
|
|
|
return Task.FromResult<ExecutionEvidenceResult?>(result);
|
|
}
|
|
|
|
public ExecutionEvidencePredicate? GetCachedPredicate(string artifactId, string environmentId)
|
|
{
|
|
var key = BuildRateKey(artifactId, environmentId);
|
|
_predicateCache.TryGetValue(key, out var predicate);
|
|
return predicate;
|
|
}
|
|
|
|
private ExecutionEvidencePredicate BuildPredicate(
|
|
ExecutionEvidenceRequest request,
|
|
DateTimeOffset timestamp,
|
|
ExecutionEvidenceOptions opts)
|
|
{
|
|
var events = request.Events
|
|
.Where(e => e is not null && !string.IsNullOrWhiteSpace(e.SymbolId))
|
|
.ToList();
|
|
|
|
// Canonicalize addresses (strip ASLR noise from LoaderBase).
|
|
foreach (var evt in events)
|
|
{
|
|
if (!string.IsNullOrWhiteSpace(evt.LoaderBase))
|
|
{
|
|
evt.LoaderBase = "0x0";
|
|
}
|
|
if (!string.IsNullOrWhiteSpace(evt.SocketAddress))
|
|
{
|
|
evt.SocketAddress = CanonicalizeSocketAddress(evt.SocketAddress);
|
|
}
|
|
}
|
|
|
|
// Aggregate hot symbols (sorted by hit count descending, then by name for determinism).
|
|
var hotSymbols = events
|
|
.GroupBy(e => e.SymbolId, StringComparer.Ordinal)
|
|
.Select(g => new { Symbol = g.Key, HitCount = g.Sum(e => e.HitCount) })
|
|
.OrderByDescending(x => x.HitCount)
|
|
.ThenBy(x => x.Symbol, StringComparer.Ordinal)
|
|
.Take(opts.MaxHotSymbols)
|
|
.Select(x => x.Symbol)
|
|
.ToList();
|
|
|
|
// Classify syscall families from process metadata.
|
|
var syscallFamilies = ClassifySyscallFamilies(events);
|
|
|
|
// Count unique call paths (approximate by distinct CodeId values).
|
|
var uniqueCallPaths = events
|
|
.Where(e => !string.IsNullOrWhiteSpace(e.CodeId))
|
|
.Select(e => e.CodeId!)
|
|
.Distinct(StringComparer.Ordinal)
|
|
.Count();
|
|
|
|
// Compute trace digest over canonical event representation.
|
|
var traceDigest = ComputeTraceDigest(events);
|
|
|
|
// Compute inputs digest for replay determinism.
|
|
var inputsDigest = ComputeInputsDigest(request);
|
|
|
|
var durationMs = (long)(request.ObservationEnd - request.ObservationStart).TotalMilliseconds;
|
|
|
|
return new ExecutionEvidencePredicate
|
|
{
|
|
ArtifactId = request.ArtifactId,
|
|
EnvironmentId = request.EnvironmentId,
|
|
TraceSource = request.TraceSource,
|
|
ObservationWindow = new ObservationWindow
|
|
{
|
|
Start = request.ObservationStart,
|
|
End = request.ObservationEnd,
|
|
DurationMs = durationMs > 0 ? durationMs : 0,
|
|
},
|
|
TraceSummary = new TraceSummary
|
|
{
|
|
SyscallFamiliesObserved = syscallFamilies,
|
|
HotSymbols = hotSymbols,
|
|
HotSymbolCount = events
|
|
.Select(e => e.SymbolId)
|
|
.Distinct(StringComparer.Ordinal)
|
|
.Count(),
|
|
UniqueCallPaths = uniqueCallPaths,
|
|
AddressCanonicalized = true,
|
|
},
|
|
TraceDigest = $"sha256:{traceDigest}",
|
|
Determinism = new DeterminismMetadata
|
|
{
|
|
InputsDigest = $"sha256:{inputsDigest}",
|
|
},
|
|
Timestamp = timestamp,
|
|
};
|
|
}
|
|
|
|
private static IReadOnlyList<string> ClassifySyscallFamilies(IReadOnlyList<RuntimeFactEvent> events)
|
|
{
|
|
var families = new SortedSet<string>(StringComparer.Ordinal);
|
|
|
|
foreach (var evt in events)
|
|
{
|
|
if (!string.IsNullOrWhiteSpace(evt.SocketAddress))
|
|
{
|
|
families.Add("network");
|
|
}
|
|
|
|
if (!string.IsNullOrWhiteSpace(evt.ProcessName))
|
|
{
|
|
families.Add("process");
|
|
}
|
|
|
|
if (evt.Metadata is not null)
|
|
{
|
|
foreach (var key in evt.Metadata.Keys)
|
|
{
|
|
if (SyscallFamilyMap.TryGetValue(key, out var family))
|
|
{
|
|
families.Add(family);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
// Always include process if we have events (something executed).
|
|
if (events.Count > 0 && families.Count == 0)
|
|
{
|
|
families.Add("process");
|
|
}
|
|
|
|
return families.ToList().AsReadOnly();
|
|
}
|
|
|
|
private static string ComputeTraceDigest(IReadOnlyList<RuntimeFactEvent> events)
|
|
{
|
|
// Canonical representation: sorted symbol IDs with hit counts.
|
|
var sb = new StringBuilder();
|
|
foreach (var group in events
|
|
.GroupBy(e => e.SymbolId, StringComparer.Ordinal)
|
|
.OrderBy(g => g.Key, StringComparer.Ordinal))
|
|
{
|
|
sb.Append(group.Key);
|
|
sb.Append(':');
|
|
sb.Append(group.Sum(e => e.HitCount));
|
|
sb.Append('\n');
|
|
}
|
|
|
|
return ComputeSha256(Encoding.UTF8.GetBytes(sb.ToString()));
|
|
}
|
|
|
|
private static string ComputeInputsDigest(ExecutionEvidenceRequest request)
|
|
{
|
|
var sb = new StringBuilder();
|
|
sb.Append(request.ArtifactId);
|
|
sb.Append('|');
|
|
sb.Append(request.EnvironmentId);
|
|
sb.Append('|');
|
|
sb.Append(request.TraceSource);
|
|
sb.Append('|');
|
|
sb.Append(request.Events.Count);
|
|
return ComputeSha256(Encoding.UTF8.GetBytes(sb.ToString()));
|
|
}
|
|
|
|
private bool IsRateLimited(string rateKey, DateTimeOffset now, int windowMinutes)
|
|
{
|
|
if (_rateLimitTracker.TryGetValue(rateKey, out var lastGeneration))
|
|
{
|
|
return (now - lastGeneration).TotalMinutes < windowMinutes;
|
|
}
|
|
return false;
|
|
}
|
|
|
|
private static string BuildRateKey(string artifactId, string environmentId)
|
|
=> $"{artifactId}|{environmentId}";
|
|
|
|
private static string CanonicalizeSocketAddress(string address)
|
|
{
|
|
// Strip port for privacy; keep protocol family indicator.
|
|
var colonIndex = address.LastIndexOf(':');
|
|
return colonIndex > 0 ? address[..colonIndex] : address;
|
|
}
|
|
|
|
private static string ComputeSha256(byte[] data)
|
|
{
|
|
var hash = SHA256.HashData(data);
|
|
return Convert.ToHexStringLower(hash);
|
|
}
|
|
}
|