Files
git.stella-ops.org/src/Signals/StellaOps.Signals/Services/CallgraphIngestionService.cs
master 4391f35d8a Refactor SurfaceCacheValidator to simplify oldest entry calculation
Add global using for Xunit in test project

Enhance ImportValidatorTests with async validation and quarantine checks

Implement FileSystemQuarantineServiceTests for quarantine functionality

Add integration tests for ImportValidator to check monotonicity

Create BundleVersionTests to validate version parsing and comparison logic

Implement VersionMonotonicityCheckerTests for monotonicity checks and activation logic
2025-12-16 10:44:00 +02:00

344 lines
14 KiB
C#

using System;
using System.Collections.Generic;
using System.Globalization;
using System.IO;
using System.Linq;
using System.Security.Cryptography;
using System.Text;
using System.Text.Json;
using System.Threading;
using System.Threading.Tasks;
using Microsoft.Extensions.Logging;
using Microsoft.Extensions.Options;
using StellaOps.Signals.Models;
using StellaOps.Signals.Options;
using StellaOps.Signals.Parsing;
using StellaOps.Signals.Persistence;
using StellaOps.Signals.Storage;
using StellaOps.Signals.Storage.Models;
namespace StellaOps.Signals.Services;
internal sealed class CallgraphIngestionService : ICallgraphIngestionService
{
private static readonly HashSet<string> AllowedContentTypes = new(StringComparer.OrdinalIgnoreCase)
{
"application/json",
"application/vnd.stellaops.callgraph+json"
};
private readonly ICallgraphParserResolver parserResolver;
private readonly ICallgraphArtifactStore artifactStore;
private readonly ICallgraphRepository repository;
private readonly IReachabilityStoreRepository reachabilityStore;
private readonly ICallgraphNormalizationService normalizer;
private readonly ILogger<CallgraphIngestionService> logger;
private readonly SignalsOptions options;
private readonly TimeProvider timeProvider;
private static readonly JsonSerializerOptions ManifestSerializerOptions = new(JsonSerializerDefaults.Web);
public CallgraphIngestionService(
ICallgraphParserResolver parserResolver,
ICallgraphArtifactStore artifactStore,
ICallgraphRepository repository,
IReachabilityStoreRepository reachabilityStore,
ICallgraphNormalizationService normalizer,
IOptions<SignalsOptions> options,
TimeProvider timeProvider,
ILogger<CallgraphIngestionService> logger)
{
this.parserResolver = parserResolver ?? throw new ArgumentNullException(nameof(parserResolver));
this.artifactStore = artifactStore ?? throw new ArgumentNullException(nameof(artifactStore));
this.repository = repository ?? throw new ArgumentNullException(nameof(repository));
this.reachabilityStore = reachabilityStore ?? throw new ArgumentNullException(nameof(reachabilityStore));
this.normalizer = normalizer ?? throw new ArgumentNullException(nameof(normalizer));
this.logger = logger ?? throw new ArgumentNullException(nameof(logger));
this.timeProvider = timeProvider ?? throw new ArgumentNullException(nameof(timeProvider));
this.options = options?.Value ?? throw new ArgumentNullException(nameof(options));
}
public async Task<CallgraphIngestResponse> IngestAsync(CallgraphIngestRequest request, CancellationToken cancellationToken)
{
ValidateRequest(request);
var parser = parserResolver.Resolve(request.Language);
var artifactBytes = Convert.FromBase64String(request.ArtifactContentBase64);
await using var parseStream = new MemoryStream(artifactBytes, writable: false);
var parsed = await parser.ParseAsync(parseStream, cancellationToken).ConfigureAwait(false);
var normalized = normalizer.Normalize(parser.Language, parsed);
var schemaVersion = !string.IsNullOrWhiteSpace(request.SchemaVersion)
? request.SchemaVersion!
: normalized.SchemaVersion;
var analyzerMeta = request.Analyzer ?? normalized.Analyzer;
var artifactHash = ComputeSha256(artifactBytes);
var document = new CallgraphDocument
{
Language = parser.Language,
LanguageType = CallgraphLanguage.Unknown,
Component = request.Component,
Version = request.Version,
Nodes = new List<CallgraphNode>(normalized.Nodes),
Edges = new List<CallgraphEdge>(normalized.Edges),
Roots = new List<CallgraphRoot>(normalized.Roots),
Entrypoints = normalized.Entrypoints is null
? new List<CallgraphEntrypoint>()
: new List<CallgraphEntrypoint>(normalized.Entrypoints),
Metadata = request.Metadata is null
? null
: new Dictionary<string, string?>(request.Metadata, StringComparer.OrdinalIgnoreCase),
Artifact = new CallgraphArtifactMetadata
{
ContentType = request.ArtifactContentType
},
IngestedAt = timeProvider.GetUtcNow()
};
document.Metadata ??= new Dictionary<string, string?>(StringComparer.OrdinalIgnoreCase);
document.Metadata["formatVersion"] = normalized.FormatVersion;
document.Metadata["schemaVersion"] = schemaVersion;
if (analyzerMeta is not null)
{
foreach (var kv in analyzerMeta)
{
document.Metadata[$"analyzer.{kv.Key}"] = kv.Value;
}
}
document.SchemaVersion = schemaVersion;
document = CallgraphSchemaMigrator.EnsureV1(document);
var graphHash = ComputeGraphHash(document);
document.GraphHash = graphHash;
var manifest = new CallgraphManifest
{
Language = request.Language,
Component = request.Component,
Version = request.Version,
ArtifactHash = artifactHash,
GraphHash = graphHash,
SchemaVersion = schemaVersion,
NodeCount = document.Nodes.Count,
EdgeCount = document.Edges.Count,
RootCount = document.Roots?.Count ?? 0,
CreatedAt = timeProvider.GetUtcNow()
};
await using var manifestStream = new MemoryStream();
await JsonSerializer.SerializeAsync(manifestStream, manifest, ManifestSerializerOptions, cancellationToken).ConfigureAwait(false);
manifestStream.Position = 0;
parseStream.Position = 0;
var artifactMetadata = await artifactStore.SaveAsync(
new CallgraphArtifactSaveRequest(
request.Language,
request.Component,
request.Version,
request.ArtifactFileName,
request.ArtifactContentType,
artifactHash,
manifestStream),
parseStream,
cancellationToken).ConfigureAwait(false);
document.Artifact.Path = artifactMetadata.Path;
document.Artifact.Hash = artifactMetadata.Hash;
document.Artifact.CasUri = artifactMetadata.CasUri;
document.Artifact.ManifestPath = artifactMetadata.ManifestPath;
document.Artifact.ManifestCasUri = artifactMetadata.ManifestCasUri;
document.Artifact.GraphHash = graphHash;
document.Artifact.ContentType = artifactMetadata.ContentType;
document.Artifact.Length = artifactMetadata.Length;
document = await repository.UpsertAsync(document, cancellationToken).ConfigureAwait(false);
await reachabilityStore.UpsertGraphAsync(
document.GraphHash,
document.Nodes,
document.Edges,
cancellationToken).ConfigureAwait(false);
logger.LogInformation(
"Ingested callgraph {Language}:{Component}:{Version} (id={Id}) with {NodeCount} nodes and {EdgeCount} edges.",
document.Language,
document.Component,
document.Version,
document.Id,
document.Nodes.Count,
document.Edges.Count);
return new CallgraphIngestResponse(
document.Id,
document.Artifact.Path,
document.Artifact.Hash,
document.Artifact.CasUri,
document.GraphHash,
document.Artifact.ManifestCasUri,
schemaVersion,
document.Nodes.Count,
document.Edges.Count,
document.Roots?.Count ?? 0);
}
private static void ValidateRequest(CallgraphIngestRequest request)
{
ArgumentNullException.ThrowIfNull(request);
if (string.IsNullOrWhiteSpace(request.Language))
{
throw new CallgraphIngestionValidationException("Language is required.");
}
if (string.IsNullOrWhiteSpace(request.Component))
{
throw new CallgraphIngestionValidationException("Component is required.");
}
if (string.IsNullOrWhiteSpace(request.Version))
{
throw new CallgraphIngestionValidationException("Version is required.");
}
if (string.IsNullOrWhiteSpace(request.ArtifactContentBase64))
{
throw new CallgraphIngestionValidationException("Artifact content is required.");
}
if (string.IsNullOrWhiteSpace(request.ArtifactFileName))
{
throw new CallgraphIngestionValidationException("Artifact file name is required.");
}
if (string.IsNullOrWhiteSpace(request.ArtifactContentType) || !AllowedContentTypes.Contains(request.ArtifactContentType))
{
throw new CallgraphIngestionValidationException($"Unsupported artifact content type '{request.ArtifactContentType}'.");
}
}
private static string ComputeSha256(ReadOnlySpan<byte> buffer)
{
Span<byte> hash = stackalloc byte[SHA256.HashSizeInBytes];
SHA256.HashData(buffer, hash);
return Convert.ToHexString(hash);
}
private static string ComputeGraphHash(CallgraphDocument document)
{
var builder = new StringBuilder();
builder.Append("schema|").Append(document.Schema).AppendLine();
builder.Append("language|").Append(document.LanguageType).Append('|').Append(document.Language).AppendLine();
foreach (var node in document.Nodes.OrderBy(n => n.Id, StringComparer.Ordinal))
{
builder
.Append(node.Id).Append('|')
.Append(node.Name).Append('|')
.Append(node.Kind).Append('|')
.Append(node.Namespace).Append('|')
.Append(node.File).Append('|')
.Append(node.Line?.ToString() ?? string.Empty).Append('|')
.Append(node.Purl).Append('|')
.Append(node.SymbolDigest).Append('|')
.Append(node.BuildId).Append('|')
.Append(node.CodeId).Append('|')
.Append(node.Language).Append('|')
.Append(node.SymbolKey).Append('|')
.Append(node.ArtifactKey).Append('|')
.Append(node.Visibility).Append('|')
.Append(node.IsEntrypointCandidate).Append('|')
.Append(node.Flags).Append('|')
.Append(Join(node.Evidence)).Append('|')
.Append(JoinDict(node.Analyzer)).Append('|')
.Append(JoinDict(node.Attributes))
.AppendLine();
}
foreach (var edge in document.Edges
.OrderBy(e => e.SourceId, StringComparer.Ordinal)
.ThenBy(e => e.TargetId, StringComparer.Ordinal)
.ThenBy(e => e.Type, StringComparer.Ordinal)
.ThenBy(e => e.Offset ?? -1))
{
builder
.Append(edge.SourceId).Append("->").Append(edge.TargetId).Append('|')
.Append(edge.Type).Append('|')
.Append(edge.Kind).Append('|')
.Append(edge.Reason).Append('|')
.Append(edge.Weight.ToString("G17", CultureInfo.InvariantCulture)).Append('|')
.Append(edge.Offset?.ToString(CultureInfo.InvariantCulture) ?? string.Empty).Append('|')
.Append(edge.IsResolved).Append('|')
.Append(edge.Provenance).Append('|')
.Append(edge.Purl).Append('|')
.Append(edge.SymbolDigest).Append('|')
.Append(edge.Confidence?.ToString("G17", CultureInfo.InvariantCulture) ?? string.Empty).Append('|')
.Append(Join(edge.Candidates)).Append('|')
.Append(Join(edge.Evidence))
.AppendLine();
}
foreach (var root in (document.Roots ?? new List<CallgraphRoot>()).OrderBy(r => r.Id, StringComparer.Ordinal))
{
builder.Append("root|").Append(root.Id).Append('|').Append(root.Phase).Append('|').Append(root.Source).AppendLine();
}
foreach (var entrypoint in document.Entrypoints
.OrderBy(e => (int)e.Phase)
.ThenBy(e => e.Order)
.ThenBy(e => e.NodeId, StringComparer.Ordinal))
{
builder
.Append("entrypoint|").Append(entrypoint.NodeId).Append('|')
.Append(entrypoint.Kind).Append('|')
.Append(entrypoint.Framework).Append('|')
.Append(entrypoint.Phase).Append('|')
.Append(entrypoint.Route).Append('|')
.Append(entrypoint.HttpMethod).Append('|')
.Append(entrypoint.Source).Append('|')
.Append(entrypoint.Order.ToString(CultureInfo.InvariantCulture))
.AppendLine();
}
return ComputeSha256(Encoding.UTF8.GetBytes(builder.ToString()));
}
private static string Join(IEnumerable<string>? values)
{
if (values is null)
{
return string.Empty;
}
return string.Join(',', values.OrderBy(v => v, StringComparer.Ordinal));
}
private static string JoinDict(IReadOnlyDictionary<string, string?>? values)
{
if (values is null)
{
return string.Empty;
}
var ordered = new StringBuilder();
foreach (var kv in values.OrderBy(k => k.Key, StringComparer.Ordinal))
{
ordered.Append(kv.Key).Append('=').Append(kv.Value).Append(';');
}
return ordered.ToString();
}
}
/// <summary>
/// Exception thrown when the ingestion request is invalid.
/// </summary>
public sealed class CallgraphIngestionValidationException : Exception
{
public CallgraphIngestionValidationException(string message) : base(message)
{
}
}