Files
git.stella-ops.org/src/Signals/StellaOps.Signals/Parsing/SimpleJsonCallgraphParser.cs
2026-02-01 21:37:40 +02:00

514 lines
19 KiB
C#

using StellaOps.Signals.Models;
using System;
using System.Collections.Generic;
using System.IO;
using System.Text.Json;
using System.Threading;
using System.Threading.Tasks;
namespace StellaOps.Signals.Parsing;
/// <summary>
/// Simple JSON-based callgraph parser used for initial language coverage.
/// </summary>
public sealed class SimpleJsonCallgraphParser : ICallgraphParser
{
private readonly JsonSerializerOptions serializerOptions;
public SimpleJsonCallgraphParser(string language)
{
ArgumentException.ThrowIfNullOrWhiteSpace(language);
Language = language;
serializerOptions = new JsonSerializerOptions
{
PropertyNameCaseInsensitive = true
};
}
public string Language { get; }
public async Task<CallgraphParseResult> ParseAsync(Stream artifactStream, CancellationToken cancellationToken)
{
ArgumentNullException.ThrowIfNull(artifactStream);
using var document = await JsonDocument.ParseAsync(artifactStream, cancellationToken: cancellationToken).ConfigureAwait(false);
var root = document.RootElement;
if (TryParseLegacy(root, out var legacyResult))
{
return legacyResult;
}
if (TryParseSchemaV1(root, out var schemaResult))
{
return schemaResult;
}
if (TryParseFlatGraph(root, out var flatResult))
{
return flatResult;
}
throw new CallgraphParserValidationException("Callgraph artifact payload is empty or missing required fields.");
}
private static bool TryParseLegacy(JsonElement root, out CallgraphParseResult result)
{
result = default!;
if (!root.TryGetProperty("graph", out var graphElement))
{
return false;
}
var nodesElement = graphElement.GetProperty("nodes");
var edgesElement = graphElement.TryGetProperty("edges", out var edgesValue) ? edgesValue : default;
var nodes = new List<CallgraphNode>(nodesElement.GetArrayLength());
foreach (var nodeElement in nodesElement.EnumerateArray())
{
var id = nodeElement.GetProperty("id").GetString();
if (string.IsNullOrWhiteSpace(id))
{
throw new CallgraphParserValidationException("Callgraph node is missing an id.");
}
nodes.Add(new CallgraphNode
{
Id = id.Trim(),
Name = nodeElement.TryGetProperty("name", out var nameEl) ? nameEl.GetString() ?? id.Trim() : id.Trim(),
Kind = nodeElement.TryGetProperty("kind", out var kindEl) ? kindEl.GetString() ?? "function" : "function",
Namespace = nodeElement.TryGetProperty("namespace", out var nsEl) ? nsEl.GetString() : null,
File = nodeElement.TryGetProperty("file", out var fileEl) ? fileEl.GetString() : null,
Line = nodeElement.TryGetProperty("line", out var lineEl) && lineEl.ValueKind == JsonValueKind.Number ? lineEl.GetInt32() : null,
Purl = GetString(nodeElement, "purl"),
SymbolDigest = GetString(nodeElement, "symbol_digest", "symbolDigest"),
BuildId = GetString(nodeElement, "build_id", "buildId"),
Language = GetString(nodeElement, "language"),
Evidence = GetStringArray(nodeElement, "evidence"),
Analyzer = GetStringDictionary(nodeElement, "analyzer"),
CodeId = GetString(nodeElement, "code_id", "codeId")
});
}
var edges = new List<CallgraphEdge>();
if (edgesElement.ValueKind == JsonValueKind.Array)
{
foreach (var edgeElement in edgesElement.EnumerateArray())
{
var source = edgeElement.GetProperty("source").GetString();
var target = edgeElement.GetProperty("target").GetString();
if (string.IsNullOrWhiteSpace(source) || string.IsNullOrWhiteSpace(target))
{
throw new CallgraphParserValidationException("Callgraph edge requires both source and target.");
}
var type = edgeElement.TryGetProperty("type", out var typeEl) ? typeEl.GetString() ?? "call" : "call";
edges.Add(new CallgraphEdge
{
SourceId = source.Trim(),
TargetId = target.Trim(),
Type = type,
Purl = GetString(edgeElement, "purl"),
SymbolDigest = GetString(edgeElement, "symbol_digest", "symbolDigest"),
Candidates = GetStringArray(edgeElement, "candidates"),
Confidence = GetNullableDouble(edgeElement, "confidence"),
Evidence = GetStringArray(edgeElement, "evidence"),
Gates = ParseGates(edgeElement),
GateMultiplierBps = GetNullableInt(edgeElement, "gate_multiplier_bps", "gateMultiplierBps") ?? 10000
});
}
}
var formatVersion = root.TryGetProperty("formatVersion", out var versionEl)
? versionEl.GetString()
: null;
var schemaVersion = root.TryGetProperty("schema_version", out var schemaEl)
? schemaEl.GetString()
: formatVersion;
var roots = ParseRoots(root);
var entrypoints = ParseEntrypoints(root);
var analyzer = GetStringDictionary(root, "analyzer") ?? GetStringDictionary(root, "toolchain");
result = new CallgraphParseResult(
nodes,
edges,
roots,
string.IsNullOrWhiteSpace(formatVersion) ? "1.0" : formatVersion!.Trim(),
string.IsNullOrWhiteSpace(schemaVersion) ? "1.0" : schemaVersion!.Trim(),
analyzer,
entrypoints);
return true;
}
private static bool TryParseSchemaV1(JsonElement root, out CallgraphParseResult result)
{
result = default!;
if (!root.TryGetProperty("nodes", out var nodesElement) && !root.TryGetProperty("edges", out _))
{
return false;
}
var nodes = new List<CallgraphNode>();
if (nodesElement.ValueKind == JsonValueKind.Array)
{
foreach (var nodeElement in nodesElement.EnumerateArray())
{
var id = nodeElement.TryGetProperty("sid", out var sidEl) ? sidEl.GetString() : nodeElement.GetProperty("id").GetString();
if (string.IsNullOrWhiteSpace(id))
{
throw new CallgraphParserValidationException("Callgraph node is missing an id.");
}
nodes.Add(new CallgraphNode
{
Id = id.Trim(),
Name = nodeElement.TryGetProperty("name", out var nameEl) ? nameEl.GetString() ?? id.Trim() : id.Trim(),
Kind = nodeElement.TryGetProperty("kind", out var kindEl) ? kindEl.GetString() ?? "function" : "function",
Namespace = nodeElement.TryGetProperty("namespace", out var nsEl) ? nsEl.GetString() : null,
File = nodeElement.TryGetProperty("file", out var fileEl) ? fileEl.GetString() : null,
Line = nodeElement.TryGetProperty("line", out var lineEl) && lineEl.ValueKind == JsonValueKind.Number ? lineEl.GetInt32() : null,
Purl = GetString(nodeElement, "purl"),
SymbolDigest = GetString(nodeElement, "symbol_digest", "symbolDigest"),
BuildId = GetString(nodeElement, "build_id", "buildId"),
Language = GetString(nodeElement, "language"),
Evidence = GetStringArray(nodeElement, "evidence"),
Analyzer = GetStringDictionary(nodeElement, "analyzer"),
CodeId = GetString(nodeElement, "code_id", "codeId")
});
}
}
if (!root.TryGetProperty("edges", out var edgesElement) || edgesElement.ValueKind != JsonValueKind.Array)
{
edgesElement = default;
}
var edges = new List<CallgraphEdge>();
if (edgesElement.ValueKind == JsonValueKind.Array)
{
foreach (var edgeElement in edgesElement.EnumerateArray())
{
var from = edgeElement.TryGetProperty("from", out var fromEl) ? fromEl.GetString() : edgeElement.GetProperty("source").GetString();
var to = edgeElement.TryGetProperty("to", out var toEl) ? toEl.GetString() : edgeElement.GetProperty("target").GetString();
if (string.IsNullOrWhiteSpace(from) || string.IsNullOrWhiteSpace(to))
{
throw new CallgraphParserValidationException("Callgraph edge requires both source and target.");
}
var kind = edgeElement.TryGetProperty("kind", out var kindEl)
? kindEl.GetString() ?? "call"
: edgeElement.TryGetProperty("type", out var typeEl)
? typeEl.GetString() ?? "call"
: "call";
edges.Add(new CallgraphEdge
{
SourceId = from.Trim(),
TargetId = to.Trim(),
Type = kind,
Purl = GetString(edgeElement, "purl"),
SymbolDigest = GetString(edgeElement, "symbol_digest", "symbolDigest"),
Candidates = GetStringArray(edgeElement, "candidates"),
Confidence = GetNullableDouble(edgeElement, "confidence"),
Evidence = GetStringArray(edgeElement, "evidence"),
Gates = ParseGates(edgeElement),
GateMultiplierBps = GetNullableInt(edgeElement, "gate_multiplier_bps", "gateMultiplierBps") ?? 10000
});
}
}
if (nodes.Count == 0)
{
// When nodes are omitted (framework overlay), derive them from the referenced edges.
var uniqueNodeIds = new HashSet<string>(StringComparer.Ordinal);
foreach (var edge in edges)
{
uniqueNodeIds.Add(edge.SourceId);
uniqueNodeIds.Add(edge.TargetId);
}
foreach (var nodeId in uniqueNodeIds)
{
nodes.Add(new CallgraphNode { Id = nodeId, Name = nodeId, Kind = "function" });
}
}
var schemaVersion = root.TryGetProperty("schema_version", out var schemaEl)
? schemaEl.GetString()
: "1.0";
var roots = ParseRoots(root);
var analyzer = GetStringDictionary(root, "analyzer") ?? GetStringDictionary(root, "toolchain");
result = new CallgraphParseResult(
nodes,
edges,
roots,
string.IsNullOrWhiteSpace(schemaVersion) ? "1.0" : schemaVersion!.Trim(),
string.IsNullOrWhiteSpace(schemaVersion) ? "1.0" : schemaVersion!.Trim(),
analyzer);
return true;
}
private static bool TryParseFlatGraph(JsonElement root, out CallgraphParseResult result)
{
result = default!;
// Flat graph format: array of edges only, nodes derived from edge endpoints
if (root.ValueKind != JsonValueKind.Array)
{
return false;
}
var edges = new List<CallgraphEdge>();
var uniqueNodeIds = new HashSet<string>(StringComparer.Ordinal);
foreach (var edgeElement in root.EnumerateArray())
{
var source = GetString(edgeElement, "source", "from");
var target = GetString(edgeElement, "target", "to");
if (string.IsNullOrWhiteSpace(source) || string.IsNullOrWhiteSpace(target))
{
continue;
}
uniqueNodeIds.Add(source.Trim());
uniqueNodeIds.Add(target.Trim());
edges.Add(new CallgraphEdge
{
SourceId = source.Trim(),
TargetId = target.Trim(),
Type = GetString(edgeElement, "type", "kind") ?? "call",
Purl = GetString(edgeElement, "purl"),
SymbolDigest = GetString(edgeElement, "symbol_digest", "symbolDigest"),
Candidates = GetStringArray(edgeElement, "candidates"),
Confidence = GetNullableDouble(edgeElement, "confidence"),
Evidence = GetStringArray(edgeElement, "evidence"),
Gates = ParseGates(edgeElement),
GateMultiplierBps = GetNullableInt(edgeElement, "gate_multiplier_bps", "gateMultiplierBps") ?? 10000
});
}
if (edges.Count == 0)
{
return false;
}
var nodes = new List<CallgraphNode>();
foreach (var nodeId in uniqueNodeIds)
{
nodes.Add(new CallgraphNode { Id = nodeId, Name = nodeId, Kind = "function" });
}
result = new CallgraphParseResult(
nodes,
edges,
Array.Empty<CallgraphRoot>(),
"1.0",
"1.0",
null);
return true;
}
private static IReadOnlyList<CallgraphEntrypoint> ParseEntrypoints(JsonElement root)
{
if (!root.TryGetProperty("entrypoints", out var entrypointsEl) || entrypointsEl.ValueKind != JsonValueKind.Array)
{
return Array.Empty<CallgraphEntrypoint>();
}
var entrypoints = new List<CallgraphEntrypoint>(entrypointsEl.GetArrayLength());
var order = 0;
foreach (var ep in entrypointsEl.EnumerateArray())
{
var nodeId = GetString(ep, "nodeId", "node_id");
if (string.IsNullOrWhiteSpace(nodeId))
{
continue;
}
var kindStr = GetString(ep, "kind") ?? "unknown";
var phaseStr = GetString(ep, "phase") ?? "runtime";
var frameworkStr = GetString(ep, "framework") ?? "unknown";
entrypoints.Add(new CallgraphEntrypoint
{
NodeId = nodeId.Trim(),
Kind = Enum.TryParse<EntrypointKind>(kindStr, true, out var kind) ? kind : EntrypointKind.Unknown,
Phase = Enum.TryParse<EntrypointPhase>(phaseStr, true, out var phase) ? phase : EntrypointPhase.Runtime,
Framework = Enum.TryParse<EntrypointFramework>(frameworkStr, true, out var framework) ? framework : EntrypointFramework.Unknown,
Route = GetString(ep, "route"),
HttpMethod = GetString(ep, "httpMethod", "http_method"),
Source = GetString(ep, "source"),
Order = order++
});
}
return entrypoints;
}
private static IReadOnlyList<CallgraphRoot> ParseRoots(JsonElement root)
{
if (!root.TryGetProperty("roots", out var rootsEl) || rootsEl.ValueKind != JsonValueKind.Array)
{
return Array.Empty<CallgraphRoot>();
}
var roots = new List<CallgraphRoot>(rootsEl.GetArrayLength());
foreach (var r in rootsEl.EnumerateArray())
{
var id = GetString(r, "id");
if (string.IsNullOrWhiteSpace(id))
{
continue;
}
var phase = GetString(r, "phase") ?? "runtime";
var source = GetString(r, "source");
roots.Add(new CallgraphRoot(id.Trim(), phase.Trim(), source));
}
return roots;
}
private static string? GetString(JsonElement element, string name1, string? name2 = null)
{
if (element.TryGetProperty(name1, out var v1) && v1.ValueKind == JsonValueKind.String)
{
return v1.GetString();
}
if (!string.IsNullOrEmpty(name2) && element.TryGetProperty(name2!, out var v2) && v2.ValueKind == JsonValueKind.String)
{
return v2.GetString();
}
return null;
}
private static IReadOnlyList<string>? GetStringArray(JsonElement element, string name)
{
if (!element.TryGetProperty(name, out var arr) || arr.ValueKind != JsonValueKind.Array)
{
return null;
}
var list = new List<string>(arr.GetArrayLength());
foreach (var item in arr.EnumerateArray())
{
if (item.ValueKind == JsonValueKind.String)
{
list.Add(item.GetString()!);
}
}
return list;
}
private static IReadOnlyDictionary<string, string?>? GetStringDictionary(JsonElement element, string name)
{
if (!element.TryGetProperty(name, out var obj) || obj.ValueKind != JsonValueKind.Object)
{
return null;
}
var dict = new Dictionary<string, string?>(StringComparer.OrdinalIgnoreCase);
foreach (var prop in obj.EnumerateObject())
{
dict[prop.Name] = prop.Value.ValueKind == JsonValueKind.String ? prop.Value.GetString() : prop.Value.ToString();
}
return dict;
}
private static double? GetNullableDouble(JsonElement element, string name)
{
if (!element.TryGetProperty(name, out var val))
{
return null;
}
return val.ValueKind switch
{
JsonValueKind.Number when val.TryGetDouble(out var d) => d,
_ => null
};
}
private static int? GetNullableInt(JsonElement element, string name1, string? name2 = null)
{
if (element.TryGetProperty(name1, out var v1) && v1.ValueKind == JsonValueKind.Number && v1.TryGetInt32(out var i1))
{
return i1;
}
if (!string.IsNullOrEmpty(name2)
&& element.TryGetProperty(name2!, out var v2)
&& v2.ValueKind == JsonValueKind.Number
&& v2.TryGetInt32(out var i2))
{
return i2;
}
return null;
}
private static IReadOnlyList<CallgraphGate>? ParseGates(JsonElement edgeElement)
{
if (!edgeElement.TryGetProperty("gates", out var gatesEl) || gatesEl.ValueKind != JsonValueKind.Array)
{
return null;
}
var gates = new List<CallgraphGate>(gatesEl.GetArrayLength());
foreach (var gateEl in gatesEl.EnumerateArray())
{
if (gateEl.ValueKind != JsonValueKind.Object)
{
continue;
}
var typeRaw = GetString(gateEl, "type");
if (!TryParseGateType(typeRaw, out var gateType))
{
continue;
}
gates.Add(new CallgraphGate
{
Type = gateType,
Detail = GetString(gateEl, "detail") ?? string.Empty,
GuardSymbol = GetString(gateEl, "guard_symbol", "guardSymbol") ?? string.Empty,
SourceFile = GetString(gateEl, "source_file", "sourceFile"),
LineNumber = GetNullableInt(gateEl, "line_number", "lineNumber"),
Confidence = GetNullableDouble(gateEl, "confidence") ?? 1.0,
DetectionMethod = GetString(gateEl, "detection_method", "detectionMethod") ?? string.Empty
});
}
return gates.Count == 0 ? null : gates;
}
private static bool TryParseGateType(string? raw, out CallgraphGateType gateType)
{
gateType = default;
if (string.IsNullOrWhiteSpace(raw))
{
return false;
}
var normalized = raw.Trim()
.Replace("_", string.Empty, StringComparison.Ordinal)
.Replace("-", string.Empty, StringComparison.Ordinal);
return Enum.TryParse(normalized, ignoreCase: true, out gateType);
}
}