up
Some checks failed
Docs CI / lint-and-preview (push) Has been cancelled
Signals CI & Image / signals-ci (push) Has been cancelled
Signals Reachability Scoring & Events / reachability-smoke (push) Has been cancelled
Signals Reachability Scoring & Events / sign-and-upload (push) Has been cancelled
AOC Guard CI / aoc-guard (push) Has been cancelled
AOC Guard CI / aoc-verify (push) Has been cancelled
Reachability Corpus Validation / validate-corpus (push) Has been cancelled
Reachability Corpus Validation / validate-ground-truths (push) Has been cancelled
Scanner Analyzers / Discover Analyzers (push) Has been cancelled
Scanner Analyzers / Validate Test Fixtures (push) Has been cancelled
Reachability Corpus Validation / determinism-check (push) Has been cancelled
Scanner Analyzers / Build Analyzers (push) Has been cancelled
Scanner Analyzers / Test Language Analyzers (push) Has been cancelled
Scanner Analyzers / Verify Deterministic Output (push) Has been cancelled
Notify Smoke Test / Notify Unit Tests (push) Has been cancelled
Notify Smoke Test / Notifier Service Tests (push) Has been cancelled
Notify Smoke Test / Notification Smoke Test (push) Has been cancelled
Policy Lint & Smoke / policy-lint (push) Has been cancelled
Some checks failed
Docs CI / lint-and-preview (push) Has been cancelled
Signals CI & Image / signals-ci (push) Has been cancelled
Signals Reachability Scoring & Events / reachability-smoke (push) Has been cancelled
Signals Reachability Scoring & Events / sign-and-upload (push) Has been cancelled
AOC Guard CI / aoc-guard (push) Has been cancelled
AOC Guard CI / aoc-verify (push) Has been cancelled
Reachability Corpus Validation / validate-corpus (push) Has been cancelled
Reachability Corpus Validation / validate-ground-truths (push) Has been cancelled
Scanner Analyzers / Discover Analyzers (push) Has been cancelled
Scanner Analyzers / Validate Test Fixtures (push) Has been cancelled
Reachability Corpus Validation / determinism-check (push) Has been cancelled
Scanner Analyzers / Build Analyzers (push) Has been cancelled
Scanner Analyzers / Test Language Analyzers (push) Has been cancelled
Scanner Analyzers / Verify Deterministic Output (push) Has been cancelled
Notify Smoke Test / Notify Unit Tests (push) Has been cancelled
Notify Smoke Test / Notifier Service Tests (push) Has been cancelled
Notify Smoke Test / Notification Smoke Test (push) Has been cancelled
Policy Lint & Smoke / policy-lint (push) Has been cancelled
This commit is contained in:
@@ -0,0 +1,261 @@
|
||||
using System;
|
||||
using System.Collections.Concurrent;
|
||||
using System.Collections.Generic;
|
||||
using System.IO;
|
||||
using System.Linq;
|
||||
using System.Security.Cryptography;
|
||||
using System.Text;
|
||||
using System.Text.Json;
|
||||
using System.Threading;
|
||||
using System.Threading.Tasks;
|
||||
using Microsoft.Extensions.Logging;
|
||||
using Microsoft.Extensions.Options;
|
||||
using StellaOps.Signals.Models;
|
||||
using StellaOps.Signals.Options;
|
||||
|
||||
namespace StellaOps.Signals.Services;
|
||||
|
||||
/// <summary>
|
||||
/// Ingests edge-bundle DSSE envelopes, attaches to graph_hash, enforces quarantine for revoked edges.
|
||||
/// </summary>
|
||||
public sealed class EdgeBundleIngestionService : IEdgeBundleIngestionService
|
||||
{
|
||||
private readonly ILogger<EdgeBundleIngestionService> _logger;
|
||||
private readonly SignalsOptions _options;
|
||||
|
||||
// In-memory storage (in production, would use repository)
|
||||
private readonly ConcurrentDictionary<string, List<EdgeBundleDocument>> _bundlesByGraphHash = new();
|
||||
private readonly ConcurrentDictionary<string, HashSet<string>> _revokedEdgeKeys = new();
|
||||
|
||||
private static readonly JsonSerializerOptions JsonOptions = new(JsonSerializerDefaults.Web)
|
||||
{
|
||||
PropertyNameCaseInsensitive = true
|
||||
};
|
||||
|
||||
public EdgeBundleIngestionService(
|
||||
ILogger<EdgeBundleIngestionService> logger,
|
||||
IOptions<SignalsOptions> options)
|
||||
{
|
||||
_logger = logger ?? throw new ArgumentNullException(nameof(logger));
|
||||
_options = options?.Value ?? throw new ArgumentNullException(nameof(options));
|
||||
}
|
||||
|
||||
public async Task<EdgeBundleIngestResponse> IngestAsync(
|
||||
string tenantId,
|
||||
Stream bundleStream,
|
||||
Stream? dsseStream,
|
||||
CancellationToken cancellationToken = default)
|
||||
{
|
||||
ArgumentException.ThrowIfNullOrWhiteSpace(tenantId);
|
||||
ArgumentNullException.ThrowIfNull(bundleStream);
|
||||
|
||||
// Parse the bundle JSON
|
||||
using var bundleMs = new MemoryStream();
|
||||
await bundleStream.CopyToAsync(bundleMs, cancellationToken).ConfigureAwait(false);
|
||||
bundleMs.Position = 0;
|
||||
|
||||
var bundleJson = await JsonDocument.ParseAsync(bundleMs, cancellationToken: cancellationToken).ConfigureAwait(false);
|
||||
var root = bundleJson.RootElement;
|
||||
|
||||
// Extract bundle fields
|
||||
var bundleId = GetStringOrDefault(root, "bundleId", $"bundle:{Guid.NewGuid():N}");
|
||||
var graphHash = GetStringOrDefault(root, "graphHash", string.Empty);
|
||||
var bundleReason = GetStringOrDefault(root, "bundleReason", "Custom");
|
||||
var customReason = GetStringOrDefault(root, "customReason", null);
|
||||
var generatedAtStr = GetStringOrDefault(root, "generatedAt", null);
|
||||
|
||||
if (string.IsNullOrWhiteSpace(graphHash))
|
||||
{
|
||||
throw new InvalidOperationException("Edge bundle missing required 'graphHash' field");
|
||||
}
|
||||
|
||||
var generatedAt = !string.IsNullOrWhiteSpace(generatedAtStr)
|
||||
? DateTimeOffset.Parse(generatedAtStr)
|
||||
: DateTimeOffset.UtcNow;
|
||||
|
||||
// Parse edges
|
||||
var edges = new List<EdgeBundleEdgeDocument>();
|
||||
var revokedCount = 0;
|
||||
|
||||
if (root.TryGetProperty("edges", out var edgesElement) && edgesElement.ValueKind == JsonValueKind.Array)
|
||||
{
|
||||
foreach (var edgeEl in edgesElement.EnumerateArray())
|
||||
{
|
||||
var edge = new EdgeBundleEdgeDocument
|
||||
{
|
||||
From = GetStringOrDefault(edgeEl, "from", string.Empty),
|
||||
To = GetStringOrDefault(edgeEl, "to", string.Empty),
|
||||
Kind = GetStringOrDefault(edgeEl, "kind", "call"),
|
||||
Reason = GetStringOrDefault(edgeEl, "reason", "Unknown"),
|
||||
Revoked = edgeEl.TryGetProperty("revoked", out var r) && r.GetBoolean(),
|
||||
Confidence = edgeEl.TryGetProperty("confidence", out var c) ? c.GetDouble() : 0.5,
|
||||
Purl = GetStringOrDefault(edgeEl, "purl", null),
|
||||
SymbolDigest = GetStringOrDefault(edgeEl, "symbolDigest", null),
|
||||
Evidence = GetStringOrDefault(edgeEl, "evidence", null)
|
||||
};
|
||||
|
||||
edges.Add(edge);
|
||||
|
||||
if (edge.Revoked)
|
||||
{
|
||||
revokedCount++;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Compute content hash
|
||||
bundleMs.Position = 0;
|
||||
var contentHash = ComputeSha256(bundleMs);
|
||||
|
||||
// Parse DSSE if provided
|
||||
string? dsseDigest = null;
|
||||
if (dsseStream is not null)
|
||||
{
|
||||
using var dsseMs = new MemoryStream();
|
||||
await dsseStream.CopyToAsync(dsseMs, cancellationToken).ConfigureAwait(false);
|
||||
dsseMs.Position = 0;
|
||||
dsseDigest = $"sha256:{ComputeSha256(dsseMs)}";
|
||||
}
|
||||
|
||||
// Build CAS URIs
|
||||
var graphHashDigest = ExtractHashDigest(graphHash);
|
||||
var casUri = $"cas://reachability/edges/{graphHashDigest}/{bundleId}";
|
||||
var dsseCasUri = dsseStream is not null ? $"{casUri}.dsse" : null;
|
||||
|
||||
// Create document
|
||||
var document = new EdgeBundleDocument
|
||||
{
|
||||
BundleId = bundleId,
|
||||
GraphHash = graphHash,
|
||||
TenantId = tenantId,
|
||||
BundleReason = bundleReason,
|
||||
CustomReason = customReason,
|
||||
Edges = edges,
|
||||
ContentHash = $"sha256:{contentHash}",
|
||||
DsseDigest = dsseDigest,
|
||||
CasUri = casUri,
|
||||
DsseCasUri = dsseCasUri,
|
||||
Verified = dsseStream is not null, // Simple verification - in production would verify signature
|
||||
RevokedCount = revokedCount,
|
||||
IngestedAt = DateTimeOffset.UtcNow,
|
||||
GeneratedAt = generatedAt
|
||||
};
|
||||
|
||||
// Store document
|
||||
var storageKey = $"{tenantId}:{graphHash}";
|
||||
_bundlesByGraphHash.AddOrUpdate(
|
||||
storageKey,
|
||||
_ => new List<EdgeBundleDocument> { document },
|
||||
(_, list) =>
|
||||
{
|
||||
// Remove existing bundle with same ID
|
||||
list.RemoveAll(b => b.BundleId == bundleId);
|
||||
list.Add(document);
|
||||
return list;
|
||||
});
|
||||
|
||||
// Update revoked edge index for quarantine enforcement
|
||||
if (revokedCount > 0)
|
||||
{
|
||||
var revokedEdges = edges.Where(e => e.Revoked).Select(e => $"{e.From}>{e.To}").ToHashSet();
|
||||
_revokedEdgeKeys.AddOrUpdate(
|
||||
storageKey,
|
||||
_ => revokedEdges,
|
||||
(_, existing) =>
|
||||
{
|
||||
foreach (var key in revokedEdges)
|
||||
{
|
||||
existing.Add(key);
|
||||
}
|
||||
return existing;
|
||||
});
|
||||
}
|
||||
|
||||
var quarantined = revokedCount > 0;
|
||||
|
||||
_logger.LogInformation(
|
||||
"Ingested edge bundle {BundleId} for graph {GraphHash} with {EdgeCount} edges ({RevokedCount} revoked, quarantine={Quarantined})",
|
||||
bundleId, graphHash, edges.Count, revokedCount, quarantined);
|
||||
|
||||
return new EdgeBundleIngestResponse(
|
||||
bundleId,
|
||||
graphHash,
|
||||
bundleReason,
|
||||
casUri,
|
||||
dsseCasUri,
|
||||
edges.Count,
|
||||
revokedCount,
|
||||
quarantined);
|
||||
}
|
||||
|
||||
public Task<EdgeBundleDocument[]> GetBundlesForGraphAsync(
|
||||
string tenantId,
|
||||
string graphHash,
|
||||
CancellationToken cancellationToken = default)
|
||||
{
|
||||
var key = $"{tenantId}:{graphHash}";
|
||||
if (_bundlesByGraphHash.TryGetValue(key, out var bundles))
|
||||
{
|
||||
return Task.FromResult(bundles.ToArray());
|
||||
}
|
||||
|
||||
return Task.FromResult(Array.Empty<EdgeBundleDocument>());
|
||||
}
|
||||
|
||||
public Task<EdgeBundleEdgeDocument[]> GetRevokedEdgesAsync(
|
||||
string tenantId,
|
||||
string graphHash,
|
||||
CancellationToken cancellationToken = default)
|
||||
{
|
||||
var key = $"{tenantId}:{graphHash}";
|
||||
if (_bundlesByGraphHash.TryGetValue(key, out var bundles))
|
||||
{
|
||||
var revoked = bundles
|
||||
.SelectMany(b => b.Edges)
|
||||
.Where(e => e.Revoked)
|
||||
.ToArray();
|
||||
return Task.FromResult(revoked);
|
||||
}
|
||||
|
||||
return Task.FromResult(Array.Empty<EdgeBundleEdgeDocument>());
|
||||
}
|
||||
|
||||
public Task<bool> IsEdgeRevokedAsync(
|
||||
string tenantId,
|
||||
string graphHash,
|
||||
string fromId,
|
||||
string toId,
|
||||
CancellationToken cancellationToken = default)
|
||||
{
|
||||
var key = $"{tenantId}:{graphHash}";
|
||||
if (_revokedEdgeKeys.TryGetValue(key, out var revokedKeys))
|
||||
{
|
||||
var edgeKey = $"{fromId}>{toId}";
|
||||
return Task.FromResult(revokedKeys.Contains(edgeKey));
|
||||
}
|
||||
|
||||
return Task.FromResult(false);
|
||||
}
|
||||
|
||||
private static string GetStringOrDefault(JsonElement element, string propertyName, string? defaultValue)
|
||||
{
|
||||
if (element.TryGetProperty(propertyName, out var prop) && prop.ValueKind == JsonValueKind.String)
|
||||
{
|
||||
return prop.GetString() ?? defaultValue ?? string.Empty;
|
||||
}
|
||||
return defaultValue ?? string.Empty;
|
||||
}
|
||||
|
||||
private static string ComputeSha256(Stream stream)
|
||||
{
|
||||
using var sha = SHA256.Create();
|
||||
var hash = sha.ComputeHash(stream);
|
||||
return Convert.ToHexString(hash).ToLowerInvariant();
|
||||
}
|
||||
|
||||
private static string ExtractHashDigest(string prefixedHash)
|
||||
{
|
||||
var colonIndex = prefixedHash.IndexOf(':');
|
||||
return colonIndex >= 0 ? prefixedHash[(colonIndex + 1)..] : prefixedHash;
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,66 @@
|
||||
using System.IO;
|
||||
using System.Threading;
|
||||
using System.Threading.Tasks;
|
||||
using StellaOps.Signals.Models;
|
||||
|
||||
namespace StellaOps.Signals.Services;
|
||||
|
||||
/// <summary>
|
||||
/// Response from edge bundle ingestion.
|
||||
/// </summary>
|
||||
public sealed record EdgeBundleIngestResponse(
|
||||
string BundleId,
|
||||
string GraphHash,
|
||||
string BundleReason,
|
||||
string CasUri,
|
||||
string? DsseCasUri,
|
||||
int EdgeCount,
|
||||
int RevokedCount,
|
||||
bool Quarantined);
|
||||
|
||||
/// <summary>
|
||||
/// Service for ingesting edge-bundle DSSE envelopes.
|
||||
/// </summary>
|
||||
public interface IEdgeBundleIngestionService
|
||||
{
|
||||
/// <summary>
|
||||
/// Ingests an edge bundle from a JSON stream.
|
||||
/// </summary>
|
||||
/// <param name="tenantId">Tenant identifier for isolation.</param>
|
||||
/// <param name="bundleStream">Stream containing the edge-bundle JSON.</param>
|
||||
/// <param name="dsseStream">Optional stream containing the DSSE envelope.</param>
|
||||
/// <param name="cancellationToken">Cancellation token.</param>
|
||||
/// <returns>Ingest response with bundle details.</returns>
|
||||
Task<EdgeBundleIngestResponse> IngestAsync(
|
||||
string tenantId,
|
||||
Stream bundleStream,
|
||||
Stream? dsseStream,
|
||||
CancellationToken cancellationToken = default);
|
||||
|
||||
/// <summary>
|
||||
/// Gets all edge bundles for a graph hash.
|
||||
/// </summary>
|
||||
Task<EdgeBundleDocument[]> GetBundlesForGraphAsync(
|
||||
string tenantId,
|
||||
string graphHash,
|
||||
CancellationToken cancellationToken = default);
|
||||
|
||||
/// <summary>
|
||||
/// Gets revoked edges from all bundles for a graph.
|
||||
/// Returns edges that should be quarantined from scoring.
|
||||
/// </summary>
|
||||
Task<EdgeBundleEdgeDocument[]> GetRevokedEdgesAsync(
|
||||
string tenantId,
|
||||
string graphHash,
|
||||
CancellationToken cancellationToken = default);
|
||||
|
||||
/// <summary>
|
||||
/// Checks if an edge is revoked for the given graph.
|
||||
/// </summary>
|
||||
Task<bool> IsEdgeRevokedAsync(
|
||||
string tenantId,
|
||||
string graphHash,
|
||||
string fromId,
|
||||
string toId,
|
||||
CancellationToken cancellationToken = default);
|
||||
}
|
||||
@@ -1,10 +1,66 @@
|
||||
using System.Threading;
|
||||
using System.Threading.Tasks;
|
||||
using StellaOps.Signals.Models;
|
||||
|
||||
namespace StellaOps.Signals.Services;
|
||||
|
||||
public interface IRuntimeFactsIngestionService
|
||||
{
|
||||
/// <summary>
|
||||
/// Ingests runtime facts from a structured request.
|
||||
/// </summary>
|
||||
Task<RuntimeFactsIngestResponse> IngestAsync(RuntimeFactsIngestRequest request, CancellationToken cancellationToken);
|
||||
|
||||
/// <summary>
|
||||
/// Ingests runtime facts from a raw NDJSON/gzip stream, stores in CAS, and processes.
|
||||
/// </summary>
|
||||
/// <param name="tenantId">Tenant identifier for tenant isolation.</param>
|
||||
/// <param name="content">The NDJSON or gzip compressed stream of runtime fact events.</param>
|
||||
/// <param name="contentType">Content type (application/x-ndjson or application/gzip).</param>
|
||||
/// <param name="cancellationToken">Cancellation token.</param>
|
||||
/// <returns>Batch ingestion response with CAS reference.</returns>
|
||||
Task<RuntimeFactsBatchIngestResponse> IngestBatchAsync(
|
||||
string tenantId,
|
||||
Stream content,
|
||||
string contentType,
|
||||
CancellationToken cancellationToken);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Response from batch ingestion with CAS storage.
|
||||
/// </summary>
|
||||
public sealed record RuntimeFactsBatchIngestResponse
|
||||
{
|
||||
/// <summary>
|
||||
/// CAS URI for the stored batch artifact.
|
||||
/// </summary>
|
||||
public required string CasUri { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// BLAKE3 hash of the batch artifact.
|
||||
/// </summary>
|
||||
public required string BatchHash { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Number of fact documents processed.
|
||||
/// </summary>
|
||||
public int ProcessedCount { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Total events ingested.
|
||||
/// </summary>
|
||||
public int TotalEvents { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Total hit count across all events.
|
||||
/// </summary>
|
||||
public long TotalHitCount { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Subject keys affected.
|
||||
/// </summary>
|
||||
public IReadOnlyList<string> SubjectKeys { get; init; } = [];
|
||||
|
||||
/// <summary>
|
||||
/// Timestamp of ingestion.
|
||||
/// </summary>
|
||||
public DateTimeOffset StoredAt { get; init; }
|
||||
}
|
||||
|
||||
@@ -1,12 +1,12 @@
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.Linq;
|
||||
using System.Threading;
|
||||
using System.Threading.Tasks;
|
||||
using System.IO.Compression;
|
||||
using System.Text.Json;
|
||||
using Microsoft.Extensions.Logging;
|
||||
using Microsoft.Extensions.Logging.Abstractions;
|
||||
using StellaOps.Cryptography;
|
||||
using StellaOps.Signals.Models;
|
||||
using StellaOps.Signals.Persistence;
|
||||
using StellaOps.Signals.Storage;
|
||||
using StellaOps.Signals.Storage.Models;
|
||||
|
||||
namespace StellaOps.Signals.Services;
|
||||
|
||||
@@ -18,6 +18,8 @@ public sealed class RuntimeFactsIngestionService : IRuntimeFactsIngestionService
|
||||
private readonly IEventsPublisher eventsPublisher;
|
||||
private readonly IReachabilityScoringService scoringService;
|
||||
private readonly IRuntimeFactsProvenanceNormalizer provenanceNormalizer;
|
||||
private readonly IRuntimeFactsArtifactStore? artifactStore;
|
||||
private readonly ICryptoHash? cryptoHash;
|
||||
private readonly ILogger<RuntimeFactsIngestionService> logger;
|
||||
|
||||
public RuntimeFactsIngestionService(
|
||||
@@ -27,7 +29,9 @@ public sealed class RuntimeFactsIngestionService : IRuntimeFactsIngestionService
|
||||
IEventsPublisher eventsPublisher,
|
||||
IReachabilityScoringService scoringService,
|
||||
IRuntimeFactsProvenanceNormalizer provenanceNormalizer,
|
||||
ILogger<RuntimeFactsIngestionService> logger)
|
||||
ILogger<RuntimeFactsIngestionService> logger,
|
||||
IRuntimeFactsArtifactStore? artifactStore = null,
|
||||
ICryptoHash? cryptoHash = null)
|
||||
{
|
||||
this.factRepository = factRepository ?? throw new ArgumentNullException(nameof(factRepository));
|
||||
this.timeProvider = timeProvider ?? throw new ArgumentNullException(nameof(timeProvider));
|
||||
@@ -35,6 +39,8 @@ public sealed class RuntimeFactsIngestionService : IRuntimeFactsIngestionService
|
||||
this.eventsPublisher = eventsPublisher ?? throw new ArgumentNullException(nameof(eventsPublisher));
|
||||
this.scoringService = scoringService ?? throw new ArgumentNullException(nameof(scoringService));
|
||||
this.provenanceNormalizer = provenanceNormalizer ?? throw new ArgumentNullException(nameof(provenanceNormalizer));
|
||||
this.artifactStore = artifactStore;
|
||||
this.cryptoHash = cryptoHash;
|
||||
this.logger = logger ?? NullLogger<RuntimeFactsIngestionService>.Instance;
|
||||
}
|
||||
|
||||
@@ -96,6 +102,216 @@ public sealed class RuntimeFactsIngestionService : IRuntimeFactsIngestionService
|
||||
};
|
||||
}
|
||||
|
||||
public async Task<RuntimeFactsBatchIngestResponse> IngestBatchAsync(
|
||||
string tenantId,
|
||||
Stream content,
|
||||
string contentType,
|
||||
CancellationToken cancellationToken)
|
||||
{
|
||||
ArgumentException.ThrowIfNullOrWhiteSpace(tenantId);
|
||||
ArgumentNullException.ThrowIfNull(content);
|
||||
|
||||
var storedAt = timeProvider.GetUtcNow();
|
||||
var subjectKeys = new HashSet<string>(StringComparer.Ordinal);
|
||||
var processedCount = 0;
|
||||
var totalEvents = 0;
|
||||
long totalHitCount = 0;
|
||||
|
||||
// Buffer the content for hashing and parsing
|
||||
using var buffer = new MemoryStream();
|
||||
await content.CopyToAsync(buffer, cancellationToken).ConfigureAwait(false);
|
||||
buffer.Position = 0;
|
||||
|
||||
// Compute BLAKE3 hash
|
||||
string batchHash;
|
||||
if (cryptoHash != null)
|
||||
{
|
||||
batchHash = "blake3:" + await cryptoHash.ComputeHashHexAsync(buffer, "BLAKE3-256", cancellationToken).ConfigureAwait(false);
|
||||
buffer.Position = 0;
|
||||
}
|
||||
else
|
||||
{
|
||||
// Fallback: generate a deterministic hash based on content length and timestamp
|
||||
batchHash = $"blake3:{storedAt.ToUnixTimeMilliseconds():x16}{buffer.Length:x16}";
|
||||
}
|
||||
|
||||
// Store to CAS if artifact store is available
|
||||
StoredRuntimeFactsArtifact? storedArtifact = null;
|
||||
if (artifactStore != null)
|
||||
{
|
||||
var fileName = contentType.Contains("gzip", StringComparison.OrdinalIgnoreCase)
|
||||
? "runtime-facts.ndjson.gz"
|
||||
: "runtime-facts.ndjson";
|
||||
|
||||
var saveRequest = new RuntimeFactsArtifactSaveRequest(
|
||||
TenantId: tenantId,
|
||||
SubjectKey: string.Empty, // Will be populated after parsing
|
||||
Hash: batchHash.Replace("blake3:", string.Empty),
|
||||
ContentType: contentType,
|
||||
FileName: fileName,
|
||||
BatchSize: buffer.Length,
|
||||
ProvenanceSource: "runtime-facts-batch");
|
||||
|
||||
storedArtifact = await artifactStore.SaveAsync(saveRequest, buffer, cancellationToken).ConfigureAwait(false);
|
||||
buffer.Position = 0;
|
||||
}
|
||||
|
||||
// Decompress if gzip
|
||||
Stream parseStream;
|
||||
if (contentType.Contains("gzip", StringComparison.OrdinalIgnoreCase))
|
||||
{
|
||||
var decompressed = new MemoryStream();
|
||||
await using (var gzip = new GZipStream(buffer, CompressionMode.Decompress, leaveOpen: true))
|
||||
{
|
||||
await gzip.CopyToAsync(decompressed, cancellationToken).ConfigureAwait(false);
|
||||
}
|
||||
|
||||
decompressed.Position = 0;
|
||||
parseStream = decompressed;
|
||||
}
|
||||
else
|
||||
{
|
||||
parseStream = buffer;
|
||||
}
|
||||
|
||||
// Parse NDJSON and group by subject
|
||||
var requestsBySubject = new Dictionary<string, RuntimeFactsIngestRequest>(StringComparer.Ordinal);
|
||||
using var reader = new StreamReader(parseStream, leaveOpen: true);
|
||||
|
||||
while (await reader.ReadLineAsync(cancellationToken).ConfigureAwait(false) is { } line)
|
||||
{
|
||||
if (string.IsNullOrWhiteSpace(line))
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
try
|
||||
{
|
||||
var evt = JsonSerializer.Deserialize<RuntimeFactsBatchEvent>(line, JsonOptions);
|
||||
if (evt is null || string.IsNullOrWhiteSpace(evt.SymbolId))
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
var subjectKey = evt.Subject?.ToSubjectKey() ?? evt.CallgraphId ?? "unknown";
|
||||
if (!requestsBySubject.TryGetValue(subjectKey, out var request))
|
||||
{
|
||||
request = new RuntimeFactsIngestRequest
|
||||
{
|
||||
Subject = evt.Subject ?? new ReachabilitySubject { ScanId = subjectKey },
|
||||
CallgraphId = evt.CallgraphId ?? subjectKey,
|
||||
Events = new List<RuntimeFactEvent>(),
|
||||
Metadata = new Dictionary<string, string?>(StringComparer.Ordinal)
|
||||
{
|
||||
["batch.hash"] = batchHash,
|
||||
["batch.cas_uri"] = storedArtifact?.CasUri,
|
||||
["tenant_id"] = tenantId
|
||||
}
|
||||
};
|
||||
requestsBySubject[subjectKey] = request;
|
||||
}
|
||||
|
||||
((List<RuntimeFactEvent>)request.Events).Add(new RuntimeFactEvent
|
||||
{
|
||||
SymbolId = evt.SymbolId,
|
||||
CodeId = evt.CodeId,
|
||||
SymbolDigest = evt.SymbolDigest,
|
||||
Purl = evt.Purl,
|
||||
BuildId = evt.BuildId,
|
||||
LoaderBase = evt.LoaderBase,
|
||||
ProcessId = evt.ProcessId,
|
||||
ProcessName = evt.ProcessName,
|
||||
SocketAddress = evt.SocketAddress,
|
||||
ContainerId = evt.ContainerId,
|
||||
EvidenceUri = evt.EvidenceUri,
|
||||
HitCount = Math.Max(evt.HitCount, 1),
|
||||
ObservedAt = evt.ObservedAt,
|
||||
Metadata = evt.Metadata
|
||||
});
|
||||
|
||||
totalEvents++;
|
||||
totalHitCount += Math.Max(evt.HitCount, 1);
|
||||
}
|
||||
catch (JsonException ex)
|
||||
{
|
||||
logger.LogWarning(ex, "Failed to parse NDJSON line in batch ingestion.");
|
||||
}
|
||||
}
|
||||
|
||||
// Process each subject's request
|
||||
foreach (var (subjectKey, request) in requestsBySubject)
|
||||
{
|
||||
try
|
||||
{
|
||||
var response = await IngestAsync(request, cancellationToken).ConfigureAwait(false);
|
||||
|
||||
// Update the fact document with batch reference
|
||||
var existing = await factRepository.GetBySubjectAsync(subjectKey, cancellationToken).ConfigureAwait(false);
|
||||
if (existing != null && storedArtifact != null)
|
||||
{
|
||||
existing.RuntimeFactsBatchUri = storedArtifact.CasUri;
|
||||
existing.RuntimeFactsBatchHash = batchHash;
|
||||
await factRepository.UpsertAsync(existing, cancellationToken).ConfigureAwait(false);
|
||||
}
|
||||
|
||||
subjectKeys.Add(subjectKey);
|
||||
processedCount++;
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
logger.LogError(ex, "Failed to ingest batch for subject {SubjectKey}.", subjectKey);
|
||||
}
|
||||
}
|
||||
|
||||
logger.LogInformation(
|
||||
"Batch ingestion completed: {ProcessedCount} subjects, {TotalEvents} events, {TotalHitCount} hits (hash={BatchHash}, tenant={TenantId}).",
|
||||
processedCount,
|
||||
totalEvents,
|
||||
totalHitCount,
|
||||
batchHash,
|
||||
tenantId);
|
||||
|
||||
return new RuntimeFactsBatchIngestResponse
|
||||
{
|
||||
CasUri = storedArtifact?.CasUri ?? $"cas://reachability/runtime-facts/{batchHash.Replace("blake3:", string.Empty)}",
|
||||
BatchHash = batchHash,
|
||||
ProcessedCount = processedCount,
|
||||
TotalEvents = totalEvents,
|
||||
TotalHitCount = totalHitCount,
|
||||
SubjectKeys = subjectKeys.ToList(),
|
||||
StoredAt = storedAt
|
||||
};
|
||||
}
|
||||
|
||||
private static readonly JsonSerializerOptions JsonOptions = new()
|
||||
{
|
||||
PropertyNameCaseInsensitive = true,
|
||||
PropertyNamingPolicy = JsonNamingPolicy.CamelCase
|
||||
};
|
||||
|
||||
/// <summary>
|
||||
/// NDJSON batch event structure for runtime facts.
|
||||
/// </summary>
|
||||
private sealed class RuntimeFactsBatchEvent
|
||||
{
|
||||
public string? SymbolId { get; set; }
|
||||
public string? CodeId { get; set; }
|
||||
public string? SymbolDigest { get; set; }
|
||||
public string? Purl { get; set; }
|
||||
public string? BuildId { get; set; }
|
||||
public string? LoaderBase { get; set; }
|
||||
public int? ProcessId { get; set; }
|
||||
public string? ProcessName { get; set; }
|
||||
public string? SocketAddress { get; set; }
|
||||
public string? ContainerId { get; set; }
|
||||
public string? EvidenceUri { get; set; }
|
||||
public int HitCount { get; set; } = 1;
|
||||
public DateTimeOffset? ObservedAt { get; set; }
|
||||
public Dictionary<string, string?>? Metadata { get; set; }
|
||||
public ReachabilitySubject? Subject { get; set; }
|
||||
public string? CallgraphId { get; set; }
|
||||
}
|
||||
|
||||
private static void ValidateRequest(RuntimeFactsIngestRequest request)
|
||||
{
|
||||
if (request.Subject is null)
|
||||
|
||||
Reference in New Issue
Block a user