using System; using System.Collections.Generic; using System.Collections.Immutable; using System.IO; using System.Linq; using System.Text.Json; namespace StellaOps.Scanner.Reachability; /// /// Builds reachability graphs with full schema support including /// rich node metadata, confidence levels, and source provenance. /// public sealed class ReachabilityGraphBuilder { private const string GraphSchemaVersion = "1.0"; private readonly Dictionary _richNodes = new(StringComparer.Ordinal); private readonly HashSet _richEdges = new(); // Legacy compatibility private readonly HashSet nodes = new(StringComparer.Ordinal); private readonly HashSet edges = new(); /// /// Adds a simple node (legacy API). /// public ReachabilityGraphBuilder AddNode(string symbolId) { if (!string.IsNullOrWhiteSpace(symbolId)) { nodes.Add(symbolId.Trim()); } return this; } /// /// Adds a rich node with full metadata. /// public ReachabilityGraphBuilder AddNode( string symbolId, string lang, string kind, string? display = null, string? sourceFile = null, int? sourceLine = null, IReadOnlyDictionary? attributes = null, string? purl = null, string? symbolDigest = null, ReachabilitySymbol? symbol = null, string? codeBlockHash = null) { if (string.IsNullOrWhiteSpace(symbolId)) { return this; } var id = symbolId.Trim(); var node = new RichNode( id, lang?.Trim() ?? string.Empty, kind?.Trim() ?? "symbol", display?.Trim(), sourceFile?.Trim(), sourceLine, attributes?.ToImmutableSortedDictionary(StringComparer.Ordinal) ?? ImmutableSortedDictionary.Empty, purl?.Trim(), symbolDigest?.Trim(), symbol?.Trimmed(), codeBlockHash?.Trim()); _richNodes[id] = node; nodes.Add(id); return this; } /// /// Adds a simple edge (legacy API). /// public ReachabilityGraphBuilder AddEdge(string from, string to, string kind = "call") { if (string.IsNullOrWhiteSpace(from) || string.IsNullOrWhiteSpace(to)) { return this; } var edge = new ReachabilityEdge(from.Trim(), to.Trim(), string.IsNullOrWhiteSpace(kind) ? "call" : kind.Trim()); edges.Add(edge); nodes.Add(edge.From); nodes.Add(edge.To); return this; } /// /// Adds a rich edge with confidence and provenance. /// /// Source symbol ID. /// Target symbol ID. /// Edge type: call, import, inherits, loads, dynamic, reflects, dlopen, ffi, wasm, spawn. /// Confidence level: certain, high, medium, low. /// Origin: static or runtime. /// Provenance hint: jvm-bytecode, il, ts-ast, ssa, ebpf, etw, jfr, hook. /// Evidence locator (e.g., "file:path:line"). /// PURL of the component that defines the callee. /// Stable hash of the normalized callee signature. /// Ranked candidate purls when resolution is ambiguous. public ReachabilityGraphBuilder AddEdge( string from, string to, string edgeType, EdgeConfidence confidence, string origin = "static", string? provenance = null, string? evidence = null, string? purl = null, string? symbolDigest = null, IReadOnlyList<(string Purl, string? SymbolDigest, double? Score)>? candidates = null) { if (string.IsNullOrWhiteSpace(from) || string.IsNullOrWhiteSpace(to)) { return this; } var fromId = from.Trim(); var toId = to.Trim(); var type = string.IsNullOrWhiteSpace(edgeType) ? "call" : edgeType.Trim(); var richEdge = new RichEdge( fromId, toId, type, confidence, origin?.Trim() ?? "static", provenance?.Trim(), evidence?.Trim(), purl?.Trim(), symbolDigest?.Trim(), candidates); _richEdges.Add(richEdge); nodes.Add(fromId); nodes.Add(toId); // Also add to legacy set for compatibility edges.Add(new ReachabilityEdge(fromId, toId, type)); return this; } public string BuildJson(bool indented = true) { var payload = new ReachabilityGraphPayload { SchemaVersion = GraphSchemaVersion, Nodes = nodes .OrderBy(id => id, StringComparer.Ordinal) .Select(id => new ReachabilityNode(id)) .ToList(), Edges = edges .OrderBy(edge => edge.From, StringComparer.Ordinal) .ThenBy(edge => edge.To, StringComparer.Ordinal) .ThenBy(edge => edge.Kind, StringComparer.Ordinal) .Select(edge => new ReachabilityEdgePayload(edge.From, edge.To, edge.Kind)) .ToList() }; var options = new JsonSerializerOptions { PropertyNamingPolicy = JsonNamingPolicy.CamelCase, WriteIndented = indented }; return JsonSerializer.Serialize(payload, options); } /// /// Converts the builder contents to a union graph using rich metadata when available. /// public ReachabilityUnionGraph ToUnionGraph(string language) { ArgumentException.ThrowIfNullOrWhiteSpace(language); var lang = language.Trim(); // Build nodes: prefer rich metadata, fall back to simple nodes var nodeList = new List(); foreach (var id in nodes.OrderBy(n => n, StringComparer.Ordinal)) { if (_richNodes.TryGetValue(id, out var rich)) { var source = rich.SourceFile is not null ? new ReachabilitySource("static", null, rich.SourceLine.HasValue ? $"file:{rich.SourceFile}:{rich.SourceLine}" : $"file:{rich.SourceFile}") : null; nodeList.Add(new ReachabilityUnionNode( id, rich.Lang, rich.Kind, rich.Display, rich.CodeBlockHash, rich.Symbol, source, rich.Attributes.Count > 0 ? rich.Attributes : null, rich.Purl, rich.SymbolDigest)); } else { nodeList.Add(new ReachabilityUnionNode(id, lang, "symbol")); } } // Build edges: prefer rich metadata, fall back to simple edges var edgeSet = new HashSet<(string, string, string)>(); var edgeList = new List(); foreach (var rich in _richEdges.OrderBy(e => e.From, StringComparer.Ordinal) .ThenBy(e => e.To, StringComparer.Ordinal) .ThenBy(e => e.EdgeType, StringComparer.Ordinal)) { var key = (rich.From, rich.To, rich.EdgeType); if (!edgeSet.Add(key)) { continue; } var source = new ReachabilitySource( rich.Origin, rich.Provenance, rich.Evidence); var candidates = rich.Candidates?.Select(c => new ReachabilityEdgeCandidate(c.Purl, c.SymbolDigest, c.Score)).ToList(); edgeList.Add(new ReachabilityUnionEdge( rich.From, rich.To, rich.EdgeType, ConfidenceToString(rich.Confidence), source, rich.Purl, rich.SymbolDigest, candidates)); } // Add any legacy edges not already covered foreach (var edge in edges.OrderBy(e => e.From, StringComparer.Ordinal) .ThenBy(e => e.To, StringComparer.Ordinal) .ThenBy(e => e.Kind, StringComparer.Ordinal)) { var key = (edge.From, edge.To, edge.Kind); if (!edgeSet.Add(key)) { continue; } edgeList.Add(new ReachabilityUnionEdge(edge.From, edge.To, edge.Kind)); } return new ReachabilityUnionGraph(nodeList, edgeList); } /// /// Gets the count of nodes in the graph. /// public int NodeCount => nodes.Count; /// /// Gets the count of edges in the graph. /// public int EdgeCount => edges.Count + _richEdges.Count(re => !edges.Contains(new ReachabilityEdge(re.From, re.To, re.EdgeType))); private static string ConfidenceToString(EdgeConfidence confidence) => confidence switch { EdgeConfidence.Certain => "certain", EdgeConfidence.High => "high", EdgeConfidence.Medium => "medium", EdgeConfidence.Low => "low", _ => "certain" }; public static ReachabilityGraphBuilder FromFixture(string variantPath) { ArgumentException.ThrowIfNullOrWhiteSpace(variantPath); var builder = new ReachabilityGraphBuilder(); foreach (var fileName in new[] { "callgraph.static.json", "callgraph.framework.json" }) { var path = Path.Combine(variantPath, fileName); if (!File.Exists(path)) { continue; } using var stream = File.OpenRead(path); using var document = JsonDocument.Parse(stream); var root = document.RootElement; if (root.TryGetProperty("nodes", out var nodesElement) && nodesElement.ValueKind == JsonValueKind.Array) { foreach (var node in nodesElement.EnumerateArray()) { var sid = node.TryGetProperty("sid", out var sidElement) ? sidElement.GetString() : node.GetProperty("id").GetString(); builder.AddNode(sid ?? string.Empty); } } if (root.TryGetProperty("edges", out var edgesElement) && edgesElement.ValueKind == JsonValueKind.Array) { foreach (var edge in edgesElement.EnumerateArray()) { var from = edge.TryGetProperty("from", out var fromEl) ? fromEl.GetString() : edge.GetProperty("source").GetString(); var to = edge.TryGetProperty("to", out var toEl) ? toEl.GetString() : edge.GetProperty("target").GetString(); var kind = edge.TryGetProperty("kind", out var kindEl) ? kindEl.GetString() : edge.TryGetProperty("type", out var typeEl) ? typeEl.GetString() : "call"; builder.AddEdge(from ?? string.Empty, to ?? string.Empty, kind ?? "call"); } } } return builder; } private sealed record ReachabilityEdge(string From, string To, string Kind); private sealed record ReachabilityNode(string Sid); private sealed record ReachabilityEdgePayload(string From, string To, string Kind); private sealed record ReachabilityGraphPayload { public string SchemaVersion { get; set; } = GraphSchemaVersion; public List Nodes { get; set; } = new(); public List Edges { get; set; } = new(); } private sealed record RichNode( string SymbolId, string Lang, string Kind, string? Display, string? SourceFile, int? SourceLine, ImmutableSortedDictionary Attributes, string? Purl = null, string? SymbolDigest = null, ReachabilitySymbol? Symbol = null, string? CodeBlockHash = null); private sealed record RichEdge( string From, string To, string EdgeType, EdgeConfidence Confidence, string Origin, string? Provenance, string? Evidence, string? Purl = null, string? SymbolDigest = null, IReadOnlyList<(string Purl, string? SymbolDigest, double? Score)>? Candidates = null); } /// /// Confidence levels for reachability edges per the union schema. /// public enum EdgeConfidence { /// /// Edge is certain (direct call, import statement). /// Certain, /// /// High confidence (type-constrained virtual call). /// High, /// /// Medium confidence (interface dispatch, some dynamic patterns). /// Medium, /// /// Low confidence (reflection, string-based loading). /// Low } /// /// Well-known edge types per the reachability union schema. /// public static class EdgeTypes { public const string Call = "call"; public const string Import = "import"; public const string Inherits = "inherits"; public const string Loads = "loads"; public const string Dynamic = "dynamic"; public const string Reflects = "reflects"; public const string Dlopen = "dlopen"; public const string Ffi = "ffi"; public const string Wasm = "wasm"; public const string Spawn = "spawn"; } /// /// Well-known provenance hints per the reachability union schema. /// public static class Provenance { public const string JvmBytecode = "jvm-bytecode"; public const string Il = "il"; public const string TsAst = "ts-ast"; public const string Ssa = "ssa"; public const string Ebpf = "ebpf"; public const string Etw = "etw"; public const string Jfr = "jfr"; public const string Hook = "hook"; }