using System; using System.Collections.Generic; using System.Globalization; using System.IO; using System.Linq; using System.Security.Cryptography; using System.Text; using System.Text.Json; using System.Threading; using System.Threading.Tasks; using Microsoft.Extensions.Logging; using Microsoft.Extensions.Options; using StellaOps.Signals.Models; using StellaOps.Signals.Options; using StellaOps.Signals.Parsing; using StellaOps.Signals.Persistence; using StellaOps.Signals.Storage; using StellaOps.Signals.Storage.Models; namespace StellaOps.Signals.Services; internal sealed class CallgraphIngestionService : ICallgraphIngestionService { private static readonly HashSet AllowedContentTypes = new(StringComparer.OrdinalIgnoreCase) { "application/json", "application/vnd.stellaops.callgraph+json" }; private readonly ICallgraphParserResolver parserResolver; private readonly ICallgraphArtifactStore artifactStore; private readonly ICallgraphRepository repository; private readonly IReachabilityStoreRepository reachabilityStore; private readonly ICallgraphNormalizationService normalizer; private readonly ILogger logger; private readonly SignalsOptions options; private readonly TimeProvider timeProvider; private static readonly JsonSerializerOptions ManifestSerializerOptions = new(JsonSerializerDefaults.Web); public CallgraphIngestionService( ICallgraphParserResolver parserResolver, ICallgraphArtifactStore artifactStore, ICallgraphRepository repository, IReachabilityStoreRepository reachabilityStore, ICallgraphNormalizationService normalizer, IOptions options, TimeProvider timeProvider, ILogger logger) { this.parserResolver = parserResolver ?? throw new ArgumentNullException(nameof(parserResolver)); this.artifactStore = artifactStore ?? throw new ArgumentNullException(nameof(artifactStore)); this.repository = repository ?? throw new ArgumentNullException(nameof(repository)); this.reachabilityStore = reachabilityStore ?? throw new ArgumentNullException(nameof(reachabilityStore)); this.normalizer = normalizer ?? throw new ArgumentNullException(nameof(normalizer)); this.logger = logger ?? throw new ArgumentNullException(nameof(logger)); this.timeProvider = timeProvider ?? throw new ArgumentNullException(nameof(timeProvider)); this.options = options?.Value ?? throw new ArgumentNullException(nameof(options)); } public async Task IngestAsync(CallgraphIngestRequest request, CancellationToken cancellationToken) { ValidateRequest(request); var parser = parserResolver.Resolve(request.Language); var artifactBytes = Convert.FromBase64String(request.ArtifactContentBase64); await using var parseStream = new MemoryStream(artifactBytes, writable: false); var parsed = await parser.ParseAsync(parseStream, cancellationToken).ConfigureAwait(false); var normalized = normalizer.Normalize(parser.Language, parsed); var schemaVersion = !string.IsNullOrWhiteSpace(request.SchemaVersion) ? request.SchemaVersion! : normalized.SchemaVersion; var analyzerMeta = request.Analyzer ?? normalized.Analyzer; var artifactHash = ComputeSha256(artifactBytes); var document = new CallgraphDocument { Language = parser.Language, LanguageType = CallgraphLanguage.Unknown, Component = request.Component, Version = request.Version, Nodes = new List(normalized.Nodes), Edges = new List(normalized.Edges), Roots = new List(normalized.Roots), Entrypoints = normalized.Entrypoints is null ? new List() : new List(normalized.Entrypoints), Metadata = request.Metadata is null ? null : new Dictionary(request.Metadata, StringComparer.OrdinalIgnoreCase), Artifact = new CallgraphArtifactMetadata { ContentType = request.ArtifactContentType }, IngestedAt = timeProvider.GetUtcNow() }; document.Metadata ??= new Dictionary(StringComparer.OrdinalIgnoreCase); document.Metadata["formatVersion"] = normalized.FormatVersion; document.Metadata["schemaVersion"] = schemaVersion; if (analyzerMeta is not null) { foreach (var kv in analyzerMeta) { document.Metadata[$"analyzer.{kv.Key}"] = kv.Value; } } document.SchemaVersion = schemaVersion; document = CallgraphSchemaMigrator.EnsureV1(document); var graphHash = ComputeGraphHash(document); document.GraphHash = graphHash; var manifest = new CallgraphManifest { Language = request.Language, Component = request.Component, Version = request.Version, ArtifactHash = artifactHash, GraphHash = graphHash, SchemaVersion = schemaVersion, NodeCount = document.Nodes.Count, EdgeCount = document.Edges.Count, RootCount = document.Roots?.Count ?? 0, CreatedAt = timeProvider.GetUtcNow() }; await using var manifestStream = new MemoryStream(); await JsonSerializer.SerializeAsync(manifestStream, manifest, ManifestSerializerOptions, cancellationToken).ConfigureAwait(false); manifestStream.Position = 0; parseStream.Position = 0; var artifactMetadata = await artifactStore.SaveAsync( new CallgraphArtifactSaveRequest( request.Language, request.Component, request.Version, request.ArtifactFileName, request.ArtifactContentType, artifactHash, manifestStream), parseStream, cancellationToken).ConfigureAwait(false); document.Artifact.Path = artifactMetadata.Path; document.Artifact.Hash = artifactMetadata.Hash; document.Artifact.CasUri = artifactMetadata.CasUri; document.Artifact.ManifestPath = artifactMetadata.ManifestPath; document.Artifact.ManifestCasUri = artifactMetadata.ManifestCasUri; document.Artifact.GraphHash = graphHash; document.Artifact.ContentType = artifactMetadata.ContentType; document.Artifact.Length = artifactMetadata.Length; document = await repository.UpsertAsync(document, cancellationToken).ConfigureAwait(false); await reachabilityStore.UpsertGraphAsync( document.GraphHash, document.Nodes, document.Edges, cancellationToken).ConfigureAwait(false); logger.LogInformation( "Ingested callgraph {Language}:{Component}:{Version} (id={Id}) with {NodeCount} nodes and {EdgeCount} edges.", document.Language, document.Component, document.Version, document.Id, document.Nodes.Count, document.Edges.Count); return new CallgraphIngestResponse( document.Id, document.Artifact.Path, document.Artifact.Hash, document.Artifact.CasUri, document.GraphHash, document.Artifact.ManifestCasUri, schemaVersion, document.Nodes.Count, document.Edges.Count, document.Roots?.Count ?? 0); } private static void ValidateRequest(CallgraphIngestRequest request) { ArgumentNullException.ThrowIfNull(request); if (string.IsNullOrWhiteSpace(request.Language)) { throw new CallgraphIngestionValidationException("Language is required."); } if (string.IsNullOrWhiteSpace(request.Component)) { throw new CallgraphIngestionValidationException("Component is required."); } if (string.IsNullOrWhiteSpace(request.Version)) { throw new CallgraphIngestionValidationException("Version is required."); } if (string.IsNullOrWhiteSpace(request.ArtifactContentBase64)) { throw new CallgraphIngestionValidationException("Artifact content is required."); } if (string.IsNullOrWhiteSpace(request.ArtifactFileName)) { throw new CallgraphIngestionValidationException("Artifact file name is required."); } if (string.IsNullOrWhiteSpace(request.ArtifactContentType) || !AllowedContentTypes.Contains(request.ArtifactContentType)) { throw new CallgraphIngestionValidationException($"Unsupported artifact content type '{request.ArtifactContentType}'."); } } private static string ComputeSha256(ReadOnlySpan buffer) { Span hash = stackalloc byte[SHA256.HashSizeInBytes]; SHA256.HashData(buffer, hash); return Convert.ToHexString(hash); } private static string ComputeGraphHash(CallgraphDocument document) { var builder = new StringBuilder(); builder.Append("schema|").Append(document.Schema).AppendLine(); builder.Append("language|").Append(document.LanguageType).Append('|').Append(document.Language).AppendLine(); foreach (var node in document.Nodes.OrderBy(n => n.Id, StringComparer.Ordinal)) { builder .Append(node.Id).Append('|') .Append(node.Name).Append('|') .Append(node.Kind).Append('|') .Append(node.Namespace).Append('|') .Append(node.File).Append('|') .Append(node.Line?.ToString() ?? string.Empty).Append('|') .Append(node.Purl).Append('|') .Append(node.SymbolDigest).Append('|') .Append(node.BuildId).Append('|') .Append(node.CodeId).Append('|') .Append(node.Language).Append('|') .Append(node.SymbolKey).Append('|') .Append(node.ArtifactKey).Append('|') .Append(node.Visibility).Append('|') .Append(node.IsEntrypointCandidate).Append('|') .Append(node.Flags).Append('|') .Append(Join(node.Evidence)).Append('|') .Append(JoinDict(node.Analyzer)).Append('|') .Append(JoinDict(node.Attributes)) .AppendLine(); } foreach (var edge in document.Edges .OrderBy(e => e.SourceId, StringComparer.Ordinal) .ThenBy(e => e.TargetId, StringComparer.Ordinal) .ThenBy(e => e.Type, StringComparer.Ordinal) .ThenBy(e => e.Offset ?? -1)) { builder .Append(edge.SourceId).Append("->").Append(edge.TargetId).Append('|') .Append(edge.Type).Append('|') .Append(edge.Kind).Append('|') .Append(edge.Reason).Append('|') .Append(edge.Weight.ToString("G17", CultureInfo.InvariantCulture)).Append('|') .Append(edge.Offset?.ToString(CultureInfo.InvariantCulture) ?? string.Empty).Append('|') .Append(edge.IsResolved).Append('|') .Append(edge.Provenance).Append('|') .Append(edge.Purl).Append('|') .Append(edge.SymbolDigest).Append('|') .Append(edge.Confidence?.ToString("G17", CultureInfo.InvariantCulture) ?? string.Empty).Append('|') .Append(Join(edge.Candidates)).Append('|') .Append(Join(edge.Evidence)) .AppendLine(); } foreach (var root in (document.Roots ?? new List()).OrderBy(r => r.Id, StringComparer.Ordinal)) { builder.Append("root|").Append(root.Id).Append('|').Append(root.Phase).Append('|').Append(root.Source).AppendLine(); } foreach (var entrypoint in document.Entrypoints .OrderBy(e => (int)e.Phase) .ThenBy(e => e.Order) .ThenBy(e => e.NodeId, StringComparer.Ordinal)) { builder .Append("entrypoint|").Append(entrypoint.NodeId).Append('|') .Append(entrypoint.Kind).Append('|') .Append(entrypoint.Framework).Append('|') .Append(entrypoint.Phase).Append('|') .Append(entrypoint.Route).Append('|') .Append(entrypoint.HttpMethod).Append('|') .Append(entrypoint.Source).Append('|') .Append(entrypoint.Order.ToString(CultureInfo.InvariantCulture)) .AppendLine(); } return ComputeSha256(Encoding.UTF8.GetBytes(builder.ToString())); } private static string Join(IEnumerable? values) { if (values is null) { return string.Empty; } return string.Join(',', values.OrderBy(v => v, StringComparer.Ordinal)); } private static string JoinDict(IReadOnlyDictionary? values) { if (values is null) { return string.Empty; } var ordered = new StringBuilder(); foreach (var kv in values.OrderBy(k => k.Key, StringComparer.Ordinal)) { ordered.Append(kv.Key).Append('=').Append(kv.Value).Append(';'); } return ordered.ToString(); } } /// /// Exception thrown when the ingestion request is invalid. /// public sealed class CallgraphIngestionValidationException : Exception { public CallgraphIngestionValidationException(string message) : base(message) { } }