514 lines
19 KiB
C#
514 lines
19 KiB
C#
|
|
using StellaOps.Signals.Models;
|
|
using System;
|
|
using System.Collections.Generic;
|
|
using System.IO;
|
|
using System.Text.Json;
|
|
using System.Threading;
|
|
using System.Threading.Tasks;
|
|
|
|
namespace StellaOps.Signals.Parsing;
|
|
|
|
/// <summary>
|
|
/// Simple JSON-based callgraph parser used for initial language coverage.
|
|
/// </summary>
|
|
public sealed class SimpleJsonCallgraphParser : ICallgraphParser
|
|
{
|
|
private readonly JsonSerializerOptions serializerOptions;
|
|
|
|
public SimpleJsonCallgraphParser(string language)
|
|
{
|
|
ArgumentException.ThrowIfNullOrWhiteSpace(language);
|
|
Language = language;
|
|
serializerOptions = new JsonSerializerOptions
|
|
{
|
|
PropertyNameCaseInsensitive = true
|
|
};
|
|
}
|
|
|
|
public string Language { get; }
|
|
|
|
public async Task<CallgraphParseResult> ParseAsync(Stream artifactStream, CancellationToken cancellationToken)
|
|
{
|
|
ArgumentNullException.ThrowIfNull(artifactStream);
|
|
|
|
using var document = await JsonDocument.ParseAsync(artifactStream, cancellationToken: cancellationToken).ConfigureAwait(false);
|
|
var root = document.RootElement;
|
|
|
|
if (TryParseLegacy(root, out var legacyResult))
|
|
{
|
|
return legacyResult;
|
|
}
|
|
|
|
if (TryParseSchemaV1(root, out var schemaResult))
|
|
{
|
|
return schemaResult;
|
|
}
|
|
|
|
if (TryParseFlatGraph(root, out var flatResult))
|
|
{
|
|
return flatResult;
|
|
}
|
|
|
|
throw new CallgraphParserValidationException("Callgraph artifact payload is empty or missing required fields.");
|
|
}
|
|
|
|
private static bool TryParseLegacy(JsonElement root, out CallgraphParseResult result)
|
|
{
|
|
result = default!;
|
|
|
|
if (!root.TryGetProperty("graph", out var graphElement))
|
|
{
|
|
return false;
|
|
}
|
|
|
|
var nodesElement = graphElement.GetProperty("nodes");
|
|
var edgesElement = graphElement.TryGetProperty("edges", out var edgesValue) ? edgesValue : default;
|
|
|
|
var nodes = new List<CallgraphNode>(nodesElement.GetArrayLength());
|
|
foreach (var nodeElement in nodesElement.EnumerateArray())
|
|
{
|
|
var id = nodeElement.GetProperty("id").GetString();
|
|
if (string.IsNullOrWhiteSpace(id))
|
|
{
|
|
throw new CallgraphParserValidationException("Callgraph node is missing an id.");
|
|
}
|
|
|
|
nodes.Add(new CallgraphNode
|
|
{
|
|
Id = id.Trim(),
|
|
Name = nodeElement.TryGetProperty("name", out var nameEl) ? nameEl.GetString() ?? id.Trim() : id.Trim(),
|
|
Kind = nodeElement.TryGetProperty("kind", out var kindEl) ? kindEl.GetString() ?? "function" : "function",
|
|
Namespace = nodeElement.TryGetProperty("namespace", out var nsEl) ? nsEl.GetString() : null,
|
|
File = nodeElement.TryGetProperty("file", out var fileEl) ? fileEl.GetString() : null,
|
|
Line = nodeElement.TryGetProperty("line", out var lineEl) && lineEl.ValueKind == JsonValueKind.Number ? lineEl.GetInt32() : null,
|
|
Purl = GetString(nodeElement, "purl"),
|
|
SymbolDigest = GetString(nodeElement, "symbol_digest", "symbolDigest"),
|
|
BuildId = GetString(nodeElement, "build_id", "buildId"),
|
|
Language = GetString(nodeElement, "language"),
|
|
Evidence = GetStringArray(nodeElement, "evidence"),
|
|
Analyzer = GetStringDictionary(nodeElement, "analyzer"),
|
|
CodeId = GetString(nodeElement, "code_id", "codeId")
|
|
});
|
|
}
|
|
|
|
var edges = new List<CallgraphEdge>();
|
|
if (edgesElement.ValueKind == JsonValueKind.Array)
|
|
{
|
|
foreach (var edgeElement in edgesElement.EnumerateArray())
|
|
{
|
|
var source = edgeElement.GetProperty("source").GetString();
|
|
var target = edgeElement.GetProperty("target").GetString();
|
|
if (string.IsNullOrWhiteSpace(source) || string.IsNullOrWhiteSpace(target))
|
|
{
|
|
throw new CallgraphParserValidationException("Callgraph edge requires both source and target.");
|
|
}
|
|
|
|
var type = edgeElement.TryGetProperty("type", out var typeEl) ? typeEl.GetString() ?? "call" : "call";
|
|
edges.Add(new CallgraphEdge
|
|
{
|
|
SourceId = source.Trim(),
|
|
TargetId = target.Trim(),
|
|
Type = type,
|
|
Purl = GetString(edgeElement, "purl"),
|
|
SymbolDigest = GetString(edgeElement, "symbol_digest", "symbolDigest"),
|
|
Candidates = GetStringArray(edgeElement, "candidates"),
|
|
Confidence = GetNullableDouble(edgeElement, "confidence"),
|
|
Evidence = GetStringArray(edgeElement, "evidence"),
|
|
Gates = ParseGates(edgeElement),
|
|
GateMultiplierBps = GetNullableInt(edgeElement, "gate_multiplier_bps", "gateMultiplierBps") ?? 10000
|
|
});
|
|
}
|
|
}
|
|
|
|
var formatVersion = root.TryGetProperty("formatVersion", out var versionEl)
|
|
? versionEl.GetString()
|
|
: null;
|
|
|
|
var schemaVersion = root.TryGetProperty("schema_version", out var schemaEl)
|
|
? schemaEl.GetString()
|
|
: formatVersion;
|
|
|
|
var roots = ParseRoots(root);
|
|
var entrypoints = ParseEntrypoints(root);
|
|
var analyzer = GetStringDictionary(root, "analyzer") ?? GetStringDictionary(root, "toolchain");
|
|
|
|
result = new CallgraphParseResult(
|
|
nodes,
|
|
edges,
|
|
roots,
|
|
string.IsNullOrWhiteSpace(formatVersion) ? "1.0" : formatVersion!.Trim(),
|
|
string.IsNullOrWhiteSpace(schemaVersion) ? "1.0" : schemaVersion!.Trim(),
|
|
analyzer,
|
|
entrypoints);
|
|
return true;
|
|
}
|
|
|
|
private static bool TryParseSchemaV1(JsonElement root, out CallgraphParseResult result)
|
|
{
|
|
result = default!;
|
|
|
|
if (!root.TryGetProperty("nodes", out var nodesElement) && !root.TryGetProperty("edges", out _))
|
|
{
|
|
return false;
|
|
}
|
|
|
|
var nodes = new List<CallgraphNode>();
|
|
if (nodesElement.ValueKind == JsonValueKind.Array)
|
|
{
|
|
foreach (var nodeElement in nodesElement.EnumerateArray())
|
|
{
|
|
var id = nodeElement.TryGetProperty("sid", out var sidEl) ? sidEl.GetString() : nodeElement.GetProperty("id").GetString();
|
|
if (string.IsNullOrWhiteSpace(id))
|
|
{
|
|
throw new CallgraphParserValidationException("Callgraph node is missing an id.");
|
|
}
|
|
|
|
nodes.Add(new CallgraphNode
|
|
{
|
|
Id = id.Trim(),
|
|
Name = nodeElement.TryGetProperty("name", out var nameEl) ? nameEl.GetString() ?? id.Trim() : id.Trim(),
|
|
Kind = nodeElement.TryGetProperty("kind", out var kindEl) ? kindEl.GetString() ?? "function" : "function",
|
|
Namespace = nodeElement.TryGetProperty("namespace", out var nsEl) ? nsEl.GetString() : null,
|
|
File = nodeElement.TryGetProperty("file", out var fileEl) ? fileEl.GetString() : null,
|
|
Line = nodeElement.TryGetProperty("line", out var lineEl) && lineEl.ValueKind == JsonValueKind.Number ? lineEl.GetInt32() : null,
|
|
Purl = GetString(nodeElement, "purl"),
|
|
SymbolDigest = GetString(nodeElement, "symbol_digest", "symbolDigest"),
|
|
BuildId = GetString(nodeElement, "build_id", "buildId"),
|
|
Language = GetString(nodeElement, "language"),
|
|
Evidence = GetStringArray(nodeElement, "evidence"),
|
|
Analyzer = GetStringDictionary(nodeElement, "analyzer"),
|
|
CodeId = GetString(nodeElement, "code_id", "codeId")
|
|
});
|
|
}
|
|
}
|
|
|
|
if (!root.TryGetProperty("edges", out var edgesElement) || edgesElement.ValueKind != JsonValueKind.Array)
|
|
{
|
|
edgesElement = default;
|
|
}
|
|
|
|
var edges = new List<CallgraphEdge>();
|
|
if (edgesElement.ValueKind == JsonValueKind.Array)
|
|
{
|
|
foreach (var edgeElement in edgesElement.EnumerateArray())
|
|
{
|
|
var from = edgeElement.TryGetProperty("from", out var fromEl) ? fromEl.GetString() : edgeElement.GetProperty("source").GetString();
|
|
var to = edgeElement.TryGetProperty("to", out var toEl) ? toEl.GetString() : edgeElement.GetProperty("target").GetString();
|
|
if (string.IsNullOrWhiteSpace(from) || string.IsNullOrWhiteSpace(to))
|
|
{
|
|
throw new CallgraphParserValidationException("Callgraph edge requires both source and target.");
|
|
}
|
|
|
|
var kind = edgeElement.TryGetProperty("kind", out var kindEl)
|
|
? kindEl.GetString() ?? "call"
|
|
: edgeElement.TryGetProperty("type", out var typeEl)
|
|
? typeEl.GetString() ?? "call"
|
|
: "call";
|
|
|
|
edges.Add(new CallgraphEdge
|
|
{
|
|
SourceId = from.Trim(),
|
|
TargetId = to.Trim(),
|
|
Type = kind,
|
|
Purl = GetString(edgeElement, "purl"),
|
|
SymbolDigest = GetString(edgeElement, "symbol_digest", "symbolDigest"),
|
|
Candidates = GetStringArray(edgeElement, "candidates"),
|
|
Confidence = GetNullableDouble(edgeElement, "confidence"),
|
|
Evidence = GetStringArray(edgeElement, "evidence"),
|
|
Gates = ParseGates(edgeElement),
|
|
GateMultiplierBps = GetNullableInt(edgeElement, "gate_multiplier_bps", "gateMultiplierBps") ?? 10000
|
|
});
|
|
}
|
|
}
|
|
|
|
if (nodes.Count == 0)
|
|
{
|
|
// When nodes are omitted (framework overlay), derive them from the referenced edges.
|
|
var uniqueNodeIds = new HashSet<string>(StringComparer.Ordinal);
|
|
foreach (var edge in edges)
|
|
{
|
|
uniqueNodeIds.Add(edge.SourceId);
|
|
uniqueNodeIds.Add(edge.TargetId);
|
|
}
|
|
|
|
foreach (var nodeId in uniqueNodeIds)
|
|
{
|
|
nodes.Add(new CallgraphNode { Id = nodeId, Name = nodeId, Kind = "function" });
|
|
}
|
|
}
|
|
|
|
var schemaVersion = root.TryGetProperty("schema_version", out var schemaEl)
|
|
? schemaEl.GetString()
|
|
: "1.0";
|
|
|
|
var roots = ParseRoots(root);
|
|
|
|
var analyzer = GetStringDictionary(root, "analyzer") ?? GetStringDictionary(root, "toolchain");
|
|
|
|
result = new CallgraphParseResult(
|
|
nodes,
|
|
edges,
|
|
roots,
|
|
string.IsNullOrWhiteSpace(schemaVersion) ? "1.0" : schemaVersion!.Trim(),
|
|
string.IsNullOrWhiteSpace(schemaVersion) ? "1.0" : schemaVersion!.Trim(),
|
|
analyzer);
|
|
return true;
|
|
}
|
|
|
|
private static bool TryParseFlatGraph(JsonElement root, out CallgraphParseResult result)
|
|
{
|
|
result = default!;
|
|
|
|
// Flat graph format: array of edges only, nodes derived from edge endpoints
|
|
if (root.ValueKind != JsonValueKind.Array)
|
|
{
|
|
return false;
|
|
}
|
|
|
|
var edges = new List<CallgraphEdge>();
|
|
var uniqueNodeIds = new HashSet<string>(StringComparer.Ordinal);
|
|
|
|
foreach (var edgeElement in root.EnumerateArray())
|
|
{
|
|
var source = GetString(edgeElement, "source", "from");
|
|
var target = GetString(edgeElement, "target", "to");
|
|
if (string.IsNullOrWhiteSpace(source) || string.IsNullOrWhiteSpace(target))
|
|
{
|
|
continue;
|
|
}
|
|
|
|
uniqueNodeIds.Add(source.Trim());
|
|
uniqueNodeIds.Add(target.Trim());
|
|
|
|
edges.Add(new CallgraphEdge
|
|
{
|
|
SourceId = source.Trim(),
|
|
TargetId = target.Trim(),
|
|
Type = GetString(edgeElement, "type", "kind") ?? "call",
|
|
Purl = GetString(edgeElement, "purl"),
|
|
SymbolDigest = GetString(edgeElement, "symbol_digest", "symbolDigest"),
|
|
Candidates = GetStringArray(edgeElement, "candidates"),
|
|
Confidence = GetNullableDouble(edgeElement, "confidence"),
|
|
Evidence = GetStringArray(edgeElement, "evidence"),
|
|
Gates = ParseGates(edgeElement),
|
|
GateMultiplierBps = GetNullableInt(edgeElement, "gate_multiplier_bps", "gateMultiplierBps") ?? 10000
|
|
});
|
|
}
|
|
|
|
if (edges.Count == 0)
|
|
{
|
|
return false;
|
|
}
|
|
|
|
var nodes = new List<CallgraphNode>();
|
|
foreach (var nodeId in uniqueNodeIds)
|
|
{
|
|
nodes.Add(new CallgraphNode { Id = nodeId, Name = nodeId, Kind = "function" });
|
|
}
|
|
|
|
result = new CallgraphParseResult(
|
|
nodes,
|
|
edges,
|
|
Array.Empty<CallgraphRoot>(),
|
|
"1.0",
|
|
"1.0",
|
|
null);
|
|
return true;
|
|
}
|
|
|
|
private static IReadOnlyList<CallgraphEntrypoint> ParseEntrypoints(JsonElement root)
|
|
{
|
|
if (!root.TryGetProperty("entrypoints", out var entrypointsEl) || entrypointsEl.ValueKind != JsonValueKind.Array)
|
|
{
|
|
return Array.Empty<CallgraphEntrypoint>();
|
|
}
|
|
|
|
var entrypoints = new List<CallgraphEntrypoint>(entrypointsEl.GetArrayLength());
|
|
var order = 0;
|
|
foreach (var ep in entrypointsEl.EnumerateArray())
|
|
{
|
|
var nodeId = GetString(ep, "nodeId", "node_id");
|
|
if (string.IsNullOrWhiteSpace(nodeId))
|
|
{
|
|
continue;
|
|
}
|
|
|
|
var kindStr = GetString(ep, "kind") ?? "unknown";
|
|
var phaseStr = GetString(ep, "phase") ?? "runtime";
|
|
var frameworkStr = GetString(ep, "framework") ?? "unknown";
|
|
|
|
entrypoints.Add(new CallgraphEntrypoint
|
|
{
|
|
NodeId = nodeId.Trim(),
|
|
Kind = Enum.TryParse<EntrypointKind>(kindStr, true, out var kind) ? kind : EntrypointKind.Unknown,
|
|
Phase = Enum.TryParse<EntrypointPhase>(phaseStr, true, out var phase) ? phase : EntrypointPhase.Runtime,
|
|
Framework = Enum.TryParse<EntrypointFramework>(frameworkStr, true, out var framework) ? framework : EntrypointFramework.Unknown,
|
|
Route = GetString(ep, "route"),
|
|
HttpMethod = GetString(ep, "httpMethod", "http_method"),
|
|
Source = GetString(ep, "source"),
|
|
Order = order++
|
|
});
|
|
}
|
|
|
|
return entrypoints;
|
|
}
|
|
|
|
private static IReadOnlyList<CallgraphRoot> ParseRoots(JsonElement root)
|
|
{
|
|
if (!root.TryGetProperty("roots", out var rootsEl) || rootsEl.ValueKind != JsonValueKind.Array)
|
|
{
|
|
return Array.Empty<CallgraphRoot>();
|
|
}
|
|
|
|
var roots = new List<CallgraphRoot>(rootsEl.GetArrayLength());
|
|
foreach (var r in rootsEl.EnumerateArray())
|
|
{
|
|
var id = GetString(r, "id");
|
|
if (string.IsNullOrWhiteSpace(id))
|
|
{
|
|
continue;
|
|
}
|
|
var phase = GetString(r, "phase") ?? "runtime";
|
|
var source = GetString(r, "source");
|
|
roots.Add(new CallgraphRoot(id.Trim(), phase.Trim(), source));
|
|
}
|
|
|
|
return roots;
|
|
}
|
|
|
|
private static string? GetString(JsonElement element, string name1, string? name2 = null)
|
|
{
|
|
if (element.TryGetProperty(name1, out var v1) && v1.ValueKind == JsonValueKind.String)
|
|
{
|
|
return v1.GetString();
|
|
}
|
|
|
|
if (!string.IsNullOrEmpty(name2) && element.TryGetProperty(name2!, out var v2) && v2.ValueKind == JsonValueKind.String)
|
|
{
|
|
return v2.GetString();
|
|
}
|
|
|
|
return null;
|
|
}
|
|
|
|
private static IReadOnlyList<string>? GetStringArray(JsonElement element, string name)
|
|
{
|
|
if (!element.TryGetProperty(name, out var arr) || arr.ValueKind != JsonValueKind.Array)
|
|
{
|
|
return null;
|
|
}
|
|
|
|
var list = new List<string>(arr.GetArrayLength());
|
|
foreach (var item in arr.EnumerateArray())
|
|
{
|
|
if (item.ValueKind == JsonValueKind.String)
|
|
{
|
|
list.Add(item.GetString()!);
|
|
}
|
|
}
|
|
|
|
return list;
|
|
}
|
|
|
|
private static IReadOnlyDictionary<string, string?>? GetStringDictionary(JsonElement element, string name)
|
|
{
|
|
if (!element.TryGetProperty(name, out var obj) || obj.ValueKind != JsonValueKind.Object)
|
|
{
|
|
return null;
|
|
}
|
|
|
|
var dict = new Dictionary<string, string?>(StringComparer.OrdinalIgnoreCase);
|
|
foreach (var prop in obj.EnumerateObject())
|
|
{
|
|
dict[prop.Name] = prop.Value.ValueKind == JsonValueKind.String ? prop.Value.GetString() : prop.Value.ToString();
|
|
}
|
|
|
|
return dict;
|
|
}
|
|
|
|
private static double? GetNullableDouble(JsonElement element, string name)
|
|
{
|
|
if (!element.TryGetProperty(name, out var val))
|
|
{
|
|
return null;
|
|
}
|
|
|
|
return val.ValueKind switch
|
|
{
|
|
JsonValueKind.Number when val.TryGetDouble(out var d) => d,
|
|
_ => null
|
|
};
|
|
}
|
|
|
|
private static int? GetNullableInt(JsonElement element, string name1, string? name2 = null)
|
|
{
|
|
if (element.TryGetProperty(name1, out var v1) && v1.ValueKind == JsonValueKind.Number && v1.TryGetInt32(out var i1))
|
|
{
|
|
return i1;
|
|
}
|
|
|
|
if (!string.IsNullOrEmpty(name2)
|
|
&& element.TryGetProperty(name2!, out var v2)
|
|
&& v2.ValueKind == JsonValueKind.Number
|
|
&& v2.TryGetInt32(out var i2))
|
|
{
|
|
return i2;
|
|
}
|
|
|
|
return null;
|
|
}
|
|
|
|
private static IReadOnlyList<CallgraphGate>? ParseGates(JsonElement edgeElement)
|
|
{
|
|
if (!edgeElement.TryGetProperty("gates", out var gatesEl) || gatesEl.ValueKind != JsonValueKind.Array)
|
|
{
|
|
return null;
|
|
}
|
|
|
|
var gates = new List<CallgraphGate>(gatesEl.GetArrayLength());
|
|
foreach (var gateEl in gatesEl.EnumerateArray())
|
|
{
|
|
if (gateEl.ValueKind != JsonValueKind.Object)
|
|
{
|
|
continue;
|
|
}
|
|
|
|
var typeRaw = GetString(gateEl, "type");
|
|
if (!TryParseGateType(typeRaw, out var gateType))
|
|
{
|
|
continue;
|
|
}
|
|
|
|
gates.Add(new CallgraphGate
|
|
{
|
|
Type = gateType,
|
|
Detail = GetString(gateEl, "detail") ?? string.Empty,
|
|
GuardSymbol = GetString(gateEl, "guard_symbol", "guardSymbol") ?? string.Empty,
|
|
SourceFile = GetString(gateEl, "source_file", "sourceFile"),
|
|
LineNumber = GetNullableInt(gateEl, "line_number", "lineNumber"),
|
|
Confidence = GetNullableDouble(gateEl, "confidence") ?? 1.0,
|
|
DetectionMethod = GetString(gateEl, "detection_method", "detectionMethod") ?? string.Empty
|
|
});
|
|
}
|
|
|
|
return gates.Count == 0 ? null : gates;
|
|
}
|
|
|
|
private static bool TryParseGateType(string? raw, out CallgraphGateType gateType)
|
|
{
|
|
gateType = default;
|
|
if (string.IsNullOrWhiteSpace(raw))
|
|
{
|
|
return false;
|
|
}
|
|
|
|
var normalized = raw.Trim()
|
|
.Replace("_", string.Empty, StringComparison.Ordinal)
|
|
.Replace("-", string.Empty, StringComparison.Ordinal);
|
|
|
|
return Enum.TryParse(normalized, ignoreCase: true, out gateType);
|
|
}
|
|
|
|
}
|