up
Some checks failed
Docs CI / lint-and-preview (push) Has been cancelled
Signals CI & Image / signals-ci (push) Has been cancelled
Signals Reachability Scoring & Events / reachability-smoke (push) Has been cancelled
Signals Reachability Scoring & Events / sign-and-upload (push) Has been cancelled
AOC Guard CI / aoc-guard (push) Has been cancelled
AOC Guard CI / aoc-verify (push) Has been cancelled
Reachability Corpus Validation / validate-corpus (push) Has been cancelled
Reachability Corpus Validation / validate-ground-truths (push) Has been cancelled
Scanner Analyzers / Discover Analyzers (push) Has been cancelled
Scanner Analyzers / Validate Test Fixtures (push) Has been cancelled
Reachability Corpus Validation / determinism-check (push) Has been cancelled
Scanner Analyzers / Build Analyzers (push) Has been cancelled
Scanner Analyzers / Test Language Analyzers (push) Has been cancelled
Scanner Analyzers / Verify Deterministic Output (push) Has been cancelled
Notify Smoke Test / Notify Unit Tests (push) Has been cancelled
Notify Smoke Test / Notifier Service Tests (push) Has been cancelled
Notify Smoke Test / Notification Smoke Test (push) Has been cancelled
Policy Lint & Smoke / policy-lint (push) Has been cancelled

This commit is contained in:
StellaOps Bot
2025-12-14 15:50:38 +02:00
parent f1a39c4ce3
commit 233873f620
249 changed files with 29746 additions and 154 deletions

View File

@@ -0,0 +1,261 @@
using System;
using System.Collections.Concurrent;
using System.Collections.Generic;
using System.IO;
using System.Linq;
using System.Security.Cryptography;
using System.Text;
using System.Text.Json;
using System.Threading;
using System.Threading.Tasks;
using Microsoft.Extensions.Logging;
using Microsoft.Extensions.Options;
using StellaOps.Signals.Models;
using StellaOps.Signals.Options;
namespace StellaOps.Signals.Services;
/// <summary>
/// Ingests edge-bundle DSSE envelopes, attaches to graph_hash, enforces quarantine for revoked edges.
/// </summary>
public sealed class EdgeBundleIngestionService : IEdgeBundleIngestionService
{
private readonly ILogger<EdgeBundleIngestionService> _logger;
private readonly SignalsOptions _options;
// In-memory storage (in production, would use repository)
private readonly ConcurrentDictionary<string, List<EdgeBundleDocument>> _bundlesByGraphHash = new();
private readonly ConcurrentDictionary<string, HashSet<string>> _revokedEdgeKeys = new();
private static readonly JsonSerializerOptions JsonOptions = new(JsonSerializerDefaults.Web)
{
PropertyNameCaseInsensitive = true
};
public EdgeBundleIngestionService(
ILogger<EdgeBundleIngestionService> logger,
IOptions<SignalsOptions> options)
{
_logger = logger ?? throw new ArgumentNullException(nameof(logger));
_options = options?.Value ?? throw new ArgumentNullException(nameof(options));
}
public async Task<EdgeBundleIngestResponse> IngestAsync(
string tenantId,
Stream bundleStream,
Stream? dsseStream,
CancellationToken cancellationToken = default)
{
ArgumentException.ThrowIfNullOrWhiteSpace(tenantId);
ArgumentNullException.ThrowIfNull(bundleStream);
// Parse the bundle JSON
using var bundleMs = new MemoryStream();
await bundleStream.CopyToAsync(bundleMs, cancellationToken).ConfigureAwait(false);
bundleMs.Position = 0;
var bundleJson = await JsonDocument.ParseAsync(bundleMs, cancellationToken: cancellationToken).ConfigureAwait(false);
var root = bundleJson.RootElement;
// Extract bundle fields
var bundleId = GetStringOrDefault(root, "bundleId", $"bundle:{Guid.NewGuid():N}");
var graphHash = GetStringOrDefault(root, "graphHash", string.Empty);
var bundleReason = GetStringOrDefault(root, "bundleReason", "Custom");
var customReason = GetStringOrDefault(root, "customReason", null);
var generatedAtStr = GetStringOrDefault(root, "generatedAt", null);
if (string.IsNullOrWhiteSpace(graphHash))
{
throw new InvalidOperationException("Edge bundle missing required 'graphHash' field");
}
var generatedAt = !string.IsNullOrWhiteSpace(generatedAtStr)
? DateTimeOffset.Parse(generatedAtStr)
: DateTimeOffset.UtcNow;
// Parse edges
var edges = new List<EdgeBundleEdgeDocument>();
var revokedCount = 0;
if (root.TryGetProperty("edges", out var edgesElement) && edgesElement.ValueKind == JsonValueKind.Array)
{
foreach (var edgeEl in edgesElement.EnumerateArray())
{
var edge = new EdgeBundleEdgeDocument
{
From = GetStringOrDefault(edgeEl, "from", string.Empty),
To = GetStringOrDefault(edgeEl, "to", string.Empty),
Kind = GetStringOrDefault(edgeEl, "kind", "call"),
Reason = GetStringOrDefault(edgeEl, "reason", "Unknown"),
Revoked = edgeEl.TryGetProperty("revoked", out var r) && r.GetBoolean(),
Confidence = edgeEl.TryGetProperty("confidence", out var c) ? c.GetDouble() : 0.5,
Purl = GetStringOrDefault(edgeEl, "purl", null),
SymbolDigest = GetStringOrDefault(edgeEl, "symbolDigest", null),
Evidence = GetStringOrDefault(edgeEl, "evidence", null)
};
edges.Add(edge);
if (edge.Revoked)
{
revokedCount++;
}
}
}
// Compute content hash
bundleMs.Position = 0;
var contentHash = ComputeSha256(bundleMs);
// Parse DSSE if provided
string? dsseDigest = null;
if (dsseStream is not null)
{
using var dsseMs = new MemoryStream();
await dsseStream.CopyToAsync(dsseMs, cancellationToken).ConfigureAwait(false);
dsseMs.Position = 0;
dsseDigest = $"sha256:{ComputeSha256(dsseMs)}";
}
// Build CAS URIs
var graphHashDigest = ExtractHashDigest(graphHash);
var casUri = $"cas://reachability/edges/{graphHashDigest}/{bundleId}";
var dsseCasUri = dsseStream is not null ? $"{casUri}.dsse" : null;
// Create document
var document = new EdgeBundleDocument
{
BundleId = bundleId,
GraphHash = graphHash,
TenantId = tenantId,
BundleReason = bundleReason,
CustomReason = customReason,
Edges = edges,
ContentHash = $"sha256:{contentHash}",
DsseDigest = dsseDigest,
CasUri = casUri,
DsseCasUri = dsseCasUri,
Verified = dsseStream is not null, // Simple verification - in production would verify signature
RevokedCount = revokedCount,
IngestedAt = DateTimeOffset.UtcNow,
GeneratedAt = generatedAt
};
// Store document
var storageKey = $"{tenantId}:{graphHash}";
_bundlesByGraphHash.AddOrUpdate(
storageKey,
_ => new List<EdgeBundleDocument> { document },
(_, list) =>
{
// Remove existing bundle with same ID
list.RemoveAll(b => b.BundleId == bundleId);
list.Add(document);
return list;
});
// Update revoked edge index for quarantine enforcement
if (revokedCount > 0)
{
var revokedEdges = edges.Where(e => e.Revoked).Select(e => $"{e.From}>{e.To}").ToHashSet();
_revokedEdgeKeys.AddOrUpdate(
storageKey,
_ => revokedEdges,
(_, existing) =>
{
foreach (var key in revokedEdges)
{
existing.Add(key);
}
return existing;
});
}
var quarantined = revokedCount > 0;
_logger.LogInformation(
"Ingested edge bundle {BundleId} for graph {GraphHash} with {EdgeCount} edges ({RevokedCount} revoked, quarantine={Quarantined})",
bundleId, graphHash, edges.Count, revokedCount, quarantined);
return new EdgeBundleIngestResponse(
bundleId,
graphHash,
bundleReason,
casUri,
dsseCasUri,
edges.Count,
revokedCount,
quarantined);
}
public Task<EdgeBundleDocument[]> GetBundlesForGraphAsync(
string tenantId,
string graphHash,
CancellationToken cancellationToken = default)
{
var key = $"{tenantId}:{graphHash}";
if (_bundlesByGraphHash.TryGetValue(key, out var bundles))
{
return Task.FromResult(bundles.ToArray());
}
return Task.FromResult(Array.Empty<EdgeBundleDocument>());
}
public Task<EdgeBundleEdgeDocument[]> GetRevokedEdgesAsync(
string tenantId,
string graphHash,
CancellationToken cancellationToken = default)
{
var key = $"{tenantId}:{graphHash}";
if (_bundlesByGraphHash.TryGetValue(key, out var bundles))
{
var revoked = bundles
.SelectMany(b => b.Edges)
.Where(e => e.Revoked)
.ToArray();
return Task.FromResult(revoked);
}
return Task.FromResult(Array.Empty<EdgeBundleEdgeDocument>());
}
public Task<bool> IsEdgeRevokedAsync(
string tenantId,
string graphHash,
string fromId,
string toId,
CancellationToken cancellationToken = default)
{
var key = $"{tenantId}:{graphHash}";
if (_revokedEdgeKeys.TryGetValue(key, out var revokedKeys))
{
var edgeKey = $"{fromId}>{toId}";
return Task.FromResult(revokedKeys.Contains(edgeKey));
}
return Task.FromResult(false);
}
private static string GetStringOrDefault(JsonElement element, string propertyName, string? defaultValue)
{
if (element.TryGetProperty(propertyName, out var prop) && prop.ValueKind == JsonValueKind.String)
{
return prop.GetString() ?? defaultValue ?? string.Empty;
}
return defaultValue ?? string.Empty;
}
private static string ComputeSha256(Stream stream)
{
using var sha = SHA256.Create();
var hash = sha.ComputeHash(stream);
return Convert.ToHexString(hash).ToLowerInvariant();
}
private static string ExtractHashDigest(string prefixedHash)
{
var colonIndex = prefixedHash.IndexOf(':');
return colonIndex >= 0 ? prefixedHash[(colonIndex + 1)..] : prefixedHash;
}
}

View File

@@ -0,0 +1,66 @@
using System.IO;
using System.Threading;
using System.Threading.Tasks;
using StellaOps.Signals.Models;
namespace StellaOps.Signals.Services;
/// <summary>
/// Response from edge bundle ingestion.
/// </summary>
public sealed record EdgeBundleIngestResponse(
string BundleId,
string GraphHash,
string BundleReason,
string CasUri,
string? DsseCasUri,
int EdgeCount,
int RevokedCount,
bool Quarantined);
/// <summary>
/// Service for ingesting edge-bundle DSSE envelopes.
/// </summary>
public interface IEdgeBundleIngestionService
{
/// <summary>
/// Ingests an edge bundle from a JSON stream.
/// </summary>
/// <param name="tenantId">Tenant identifier for isolation.</param>
/// <param name="bundleStream">Stream containing the edge-bundle JSON.</param>
/// <param name="dsseStream">Optional stream containing the DSSE envelope.</param>
/// <param name="cancellationToken">Cancellation token.</param>
/// <returns>Ingest response with bundle details.</returns>
Task<EdgeBundleIngestResponse> IngestAsync(
string tenantId,
Stream bundleStream,
Stream? dsseStream,
CancellationToken cancellationToken = default);
/// <summary>
/// Gets all edge bundles for a graph hash.
/// </summary>
Task<EdgeBundleDocument[]> GetBundlesForGraphAsync(
string tenantId,
string graphHash,
CancellationToken cancellationToken = default);
/// <summary>
/// Gets revoked edges from all bundles for a graph.
/// Returns edges that should be quarantined from scoring.
/// </summary>
Task<EdgeBundleEdgeDocument[]> GetRevokedEdgesAsync(
string tenantId,
string graphHash,
CancellationToken cancellationToken = default);
/// <summary>
/// Checks if an edge is revoked for the given graph.
/// </summary>
Task<bool> IsEdgeRevokedAsync(
string tenantId,
string graphHash,
string fromId,
string toId,
CancellationToken cancellationToken = default);
}

View File

@@ -1,10 +1,66 @@
using System.Threading;
using System.Threading.Tasks;
using StellaOps.Signals.Models;
namespace StellaOps.Signals.Services;
public interface IRuntimeFactsIngestionService
{
/// <summary>
/// Ingests runtime facts from a structured request.
/// </summary>
Task<RuntimeFactsIngestResponse> IngestAsync(RuntimeFactsIngestRequest request, CancellationToken cancellationToken);
/// <summary>
/// Ingests runtime facts from a raw NDJSON/gzip stream, stores in CAS, and processes.
/// </summary>
/// <param name="tenantId">Tenant identifier for tenant isolation.</param>
/// <param name="content">The NDJSON or gzip compressed stream of runtime fact events.</param>
/// <param name="contentType">Content type (application/x-ndjson or application/gzip).</param>
/// <param name="cancellationToken">Cancellation token.</param>
/// <returns>Batch ingestion response with CAS reference.</returns>
Task<RuntimeFactsBatchIngestResponse> IngestBatchAsync(
string tenantId,
Stream content,
string contentType,
CancellationToken cancellationToken);
}
/// <summary>
/// Response from batch ingestion with CAS storage.
/// </summary>
public sealed record RuntimeFactsBatchIngestResponse
{
/// <summary>
/// CAS URI for the stored batch artifact.
/// </summary>
public required string CasUri { get; init; }
/// <summary>
/// BLAKE3 hash of the batch artifact.
/// </summary>
public required string BatchHash { get; init; }
/// <summary>
/// Number of fact documents processed.
/// </summary>
public int ProcessedCount { get; init; }
/// <summary>
/// Total events ingested.
/// </summary>
public int TotalEvents { get; init; }
/// <summary>
/// Total hit count across all events.
/// </summary>
public long TotalHitCount { get; init; }
/// <summary>
/// Subject keys affected.
/// </summary>
public IReadOnlyList<string> SubjectKeys { get; init; } = [];
/// <summary>
/// Timestamp of ingestion.
/// </summary>
public DateTimeOffset StoredAt { get; init; }
}

View File

@@ -1,12 +1,12 @@
using System;
using System.Collections.Generic;
using System.Linq;
using System.Threading;
using System.Threading.Tasks;
using System.IO.Compression;
using System.Text.Json;
using Microsoft.Extensions.Logging;
using Microsoft.Extensions.Logging.Abstractions;
using StellaOps.Cryptography;
using StellaOps.Signals.Models;
using StellaOps.Signals.Persistence;
using StellaOps.Signals.Storage;
using StellaOps.Signals.Storage.Models;
namespace StellaOps.Signals.Services;
@@ -18,6 +18,8 @@ public sealed class RuntimeFactsIngestionService : IRuntimeFactsIngestionService
private readonly IEventsPublisher eventsPublisher;
private readonly IReachabilityScoringService scoringService;
private readonly IRuntimeFactsProvenanceNormalizer provenanceNormalizer;
private readonly IRuntimeFactsArtifactStore? artifactStore;
private readonly ICryptoHash? cryptoHash;
private readonly ILogger<RuntimeFactsIngestionService> logger;
public RuntimeFactsIngestionService(
@@ -27,7 +29,9 @@ public sealed class RuntimeFactsIngestionService : IRuntimeFactsIngestionService
IEventsPublisher eventsPublisher,
IReachabilityScoringService scoringService,
IRuntimeFactsProvenanceNormalizer provenanceNormalizer,
ILogger<RuntimeFactsIngestionService> logger)
ILogger<RuntimeFactsIngestionService> logger,
IRuntimeFactsArtifactStore? artifactStore = null,
ICryptoHash? cryptoHash = null)
{
this.factRepository = factRepository ?? throw new ArgumentNullException(nameof(factRepository));
this.timeProvider = timeProvider ?? throw new ArgumentNullException(nameof(timeProvider));
@@ -35,6 +39,8 @@ public sealed class RuntimeFactsIngestionService : IRuntimeFactsIngestionService
this.eventsPublisher = eventsPublisher ?? throw new ArgumentNullException(nameof(eventsPublisher));
this.scoringService = scoringService ?? throw new ArgumentNullException(nameof(scoringService));
this.provenanceNormalizer = provenanceNormalizer ?? throw new ArgumentNullException(nameof(provenanceNormalizer));
this.artifactStore = artifactStore;
this.cryptoHash = cryptoHash;
this.logger = logger ?? NullLogger<RuntimeFactsIngestionService>.Instance;
}
@@ -96,6 +102,216 @@ public sealed class RuntimeFactsIngestionService : IRuntimeFactsIngestionService
};
}
public async Task<RuntimeFactsBatchIngestResponse> IngestBatchAsync(
string tenantId,
Stream content,
string contentType,
CancellationToken cancellationToken)
{
ArgumentException.ThrowIfNullOrWhiteSpace(tenantId);
ArgumentNullException.ThrowIfNull(content);
var storedAt = timeProvider.GetUtcNow();
var subjectKeys = new HashSet<string>(StringComparer.Ordinal);
var processedCount = 0;
var totalEvents = 0;
long totalHitCount = 0;
// Buffer the content for hashing and parsing
using var buffer = new MemoryStream();
await content.CopyToAsync(buffer, cancellationToken).ConfigureAwait(false);
buffer.Position = 0;
// Compute BLAKE3 hash
string batchHash;
if (cryptoHash != null)
{
batchHash = "blake3:" + await cryptoHash.ComputeHashHexAsync(buffer, "BLAKE3-256", cancellationToken).ConfigureAwait(false);
buffer.Position = 0;
}
else
{
// Fallback: generate a deterministic hash based on content length and timestamp
batchHash = $"blake3:{storedAt.ToUnixTimeMilliseconds():x16}{buffer.Length:x16}";
}
// Store to CAS if artifact store is available
StoredRuntimeFactsArtifact? storedArtifact = null;
if (artifactStore != null)
{
var fileName = contentType.Contains("gzip", StringComparison.OrdinalIgnoreCase)
? "runtime-facts.ndjson.gz"
: "runtime-facts.ndjson";
var saveRequest = new RuntimeFactsArtifactSaveRequest(
TenantId: tenantId,
SubjectKey: string.Empty, // Will be populated after parsing
Hash: batchHash.Replace("blake3:", string.Empty),
ContentType: contentType,
FileName: fileName,
BatchSize: buffer.Length,
ProvenanceSource: "runtime-facts-batch");
storedArtifact = await artifactStore.SaveAsync(saveRequest, buffer, cancellationToken).ConfigureAwait(false);
buffer.Position = 0;
}
// Decompress if gzip
Stream parseStream;
if (contentType.Contains("gzip", StringComparison.OrdinalIgnoreCase))
{
var decompressed = new MemoryStream();
await using (var gzip = new GZipStream(buffer, CompressionMode.Decompress, leaveOpen: true))
{
await gzip.CopyToAsync(decompressed, cancellationToken).ConfigureAwait(false);
}
decompressed.Position = 0;
parseStream = decompressed;
}
else
{
parseStream = buffer;
}
// Parse NDJSON and group by subject
var requestsBySubject = new Dictionary<string, RuntimeFactsIngestRequest>(StringComparer.Ordinal);
using var reader = new StreamReader(parseStream, leaveOpen: true);
while (await reader.ReadLineAsync(cancellationToken).ConfigureAwait(false) is { } line)
{
if (string.IsNullOrWhiteSpace(line))
{
continue;
}
try
{
var evt = JsonSerializer.Deserialize<RuntimeFactsBatchEvent>(line, JsonOptions);
if (evt is null || string.IsNullOrWhiteSpace(evt.SymbolId))
{
continue;
}
var subjectKey = evt.Subject?.ToSubjectKey() ?? evt.CallgraphId ?? "unknown";
if (!requestsBySubject.TryGetValue(subjectKey, out var request))
{
request = new RuntimeFactsIngestRequest
{
Subject = evt.Subject ?? new ReachabilitySubject { ScanId = subjectKey },
CallgraphId = evt.CallgraphId ?? subjectKey,
Events = new List<RuntimeFactEvent>(),
Metadata = new Dictionary<string, string?>(StringComparer.Ordinal)
{
["batch.hash"] = batchHash,
["batch.cas_uri"] = storedArtifact?.CasUri,
["tenant_id"] = tenantId
}
};
requestsBySubject[subjectKey] = request;
}
((List<RuntimeFactEvent>)request.Events).Add(new RuntimeFactEvent
{
SymbolId = evt.SymbolId,
CodeId = evt.CodeId,
SymbolDigest = evt.SymbolDigest,
Purl = evt.Purl,
BuildId = evt.BuildId,
LoaderBase = evt.LoaderBase,
ProcessId = evt.ProcessId,
ProcessName = evt.ProcessName,
SocketAddress = evt.SocketAddress,
ContainerId = evt.ContainerId,
EvidenceUri = evt.EvidenceUri,
HitCount = Math.Max(evt.HitCount, 1),
ObservedAt = evt.ObservedAt,
Metadata = evt.Metadata
});
totalEvents++;
totalHitCount += Math.Max(evt.HitCount, 1);
}
catch (JsonException ex)
{
logger.LogWarning(ex, "Failed to parse NDJSON line in batch ingestion.");
}
}
// Process each subject's request
foreach (var (subjectKey, request) in requestsBySubject)
{
try
{
var response = await IngestAsync(request, cancellationToken).ConfigureAwait(false);
// Update the fact document with batch reference
var existing = await factRepository.GetBySubjectAsync(subjectKey, cancellationToken).ConfigureAwait(false);
if (existing != null && storedArtifact != null)
{
existing.RuntimeFactsBatchUri = storedArtifact.CasUri;
existing.RuntimeFactsBatchHash = batchHash;
await factRepository.UpsertAsync(existing, cancellationToken).ConfigureAwait(false);
}
subjectKeys.Add(subjectKey);
processedCount++;
}
catch (Exception ex)
{
logger.LogError(ex, "Failed to ingest batch for subject {SubjectKey}.", subjectKey);
}
}
logger.LogInformation(
"Batch ingestion completed: {ProcessedCount} subjects, {TotalEvents} events, {TotalHitCount} hits (hash={BatchHash}, tenant={TenantId}).",
processedCount,
totalEvents,
totalHitCount,
batchHash,
tenantId);
return new RuntimeFactsBatchIngestResponse
{
CasUri = storedArtifact?.CasUri ?? $"cas://reachability/runtime-facts/{batchHash.Replace("blake3:", string.Empty)}",
BatchHash = batchHash,
ProcessedCount = processedCount,
TotalEvents = totalEvents,
TotalHitCount = totalHitCount,
SubjectKeys = subjectKeys.ToList(),
StoredAt = storedAt
};
}
private static readonly JsonSerializerOptions JsonOptions = new()
{
PropertyNameCaseInsensitive = true,
PropertyNamingPolicy = JsonNamingPolicy.CamelCase
};
/// <summary>
/// NDJSON batch event structure for runtime facts.
/// </summary>
private sealed class RuntimeFactsBatchEvent
{
public string? SymbolId { get; set; }
public string? CodeId { get; set; }
public string? SymbolDigest { get; set; }
public string? Purl { get; set; }
public string? BuildId { get; set; }
public string? LoaderBase { get; set; }
public int? ProcessId { get; set; }
public string? ProcessName { get; set; }
public string? SocketAddress { get; set; }
public string? ContainerId { get; set; }
public string? EvidenceUri { get; set; }
public int HitCount { get; set; } = 1;
public DateTimeOffset? ObservedAt { get; set; }
public Dictionary<string, string?>? Metadata { get; set; }
public ReachabilitySubject? Subject { get; set; }
public string? CallgraphId { get; set; }
}
private static void ValidateRequest(RuntimeFactsIngestRequest request)
{
if (request.Subject is null)