using System;
using System.Collections.Generic;
using System.Collections.Immutable;
using System.IO;
using System.Linq;
using System.Text.Json;
namespace StellaOps.Scanner.Reachability;
///
/// Builds reachability graphs with full schema support including
/// rich node metadata, confidence levels, and source provenance.
///
public sealed class ReachabilityGraphBuilder
{
private const string GraphSchemaVersion = "1.0";
private readonly Dictionary _richNodes = new(StringComparer.Ordinal);
private readonly HashSet _richEdges = new();
// Legacy compatibility
private readonly HashSet nodes = new(StringComparer.Ordinal);
private readonly HashSet edges = new();
///
/// Adds a simple node (legacy API).
///
public ReachabilityGraphBuilder AddNode(string symbolId)
{
if (!string.IsNullOrWhiteSpace(symbolId))
{
nodes.Add(symbolId.Trim());
}
return this;
}
///
/// Adds a rich node with full metadata.
///
public ReachabilityGraphBuilder AddNode(
string symbolId,
string lang,
string kind,
string? display = null,
string? sourceFile = null,
int? sourceLine = null,
IReadOnlyDictionary? attributes = null,
string? purl = null,
string? symbolDigest = null,
ReachabilitySymbol? symbol = null,
string? codeBlockHash = null)
{
if (string.IsNullOrWhiteSpace(symbolId))
{
return this;
}
var id = symbolId.Trim();
var node = new RichNode(
id,
lang?.Trim() ?? string.Empty,
kind?.Trim() ?? "symbol",
display?.Trim(),
sourceFile?.Trim(),
sourceLine,
attributes?.ToImmutableSortedDictionary(StringComparer.Ordinal) ?? ImmutableSortedDictionary.Empty,
purl?.Trim(),
symbolDigest?.Trim(),
symbol?.Trimmed(),
codeBlockHash?.Trim());
_richNodes[id] = node;
nodes.Add(id);
return this;
}
///
/// Adds a simple edge (legacy API).
///
public ReachabilityGraphBuilder AddEdge(string from, string to, string kind = "call")
{
if (string.IsNullOrWhiteSpace(from) || string.IsNullOrWhiteSpace(to))
{
return this;
}
var edge = new ReachabilityEdge(from.Trim(), to.Trim(), string.IsNullOrWhiteSpace(kind) ? "call" : kind.Trim());
edges.Add(edge);
nodes.Add(edge.From);
nodes.Add(edge.To);
return this;
}
///
/// Adds a rich edge with confidence and provenance.
///
/// Source symbol ID.
/// Target symbol ID.
/// Edge type: call, import, inherits, loads, dynamic, reflects, dlopen, ffi, wasm, spawn.
/// Confidence level: certain, high, medium, low.
/// Origin: static or runtime.
/// Provenance hint: jvm-bytecode, il, ts-ast, ssa, ebpf, etw, jfr, hook.
/// Evidence locator (e.g., "file:path:line").
/// PURL of the component that defines the callee.
/// Stable hash of the normalized callee signature.
/// Ranked candidate purls when resolution is ambiguous.
public ReachabilityGraphBuilder AddEdge(
string from,
string to,
string edgeType,
EdgeConfidence confidence,
string origin = "static",
string? provenance = null,
string? evidence = null,
string? purl = null,
string? symbolDigest = null,
IReadOnlyList<(string Purl, string? SymbolDigest, double? Score)>? candidates = null)
{
if (string.IsNullOrWhiteSpace(from) || string.IsNullOrWhiteSpace(to))
{
return this;
}
var fromId = from.Trim();
var toId = to.Trim();
var type = string.IsNullOrWhiteSpace(edgeType) ? "call" : edgeType.Trim();
var richEdge = new RichEdge(
fromId,
toId,
type,
confidence,
origin?.Trim() ?? "static",
provenance?.Trim(),
evidence?.Trim(),
purl?.Trim(),
symbolDigest?.Trim(),
candidates);
_richEdges.Add(richEdge);
nodes.Add(fromId);
nodes.Add(toId);
// Also add to legacy set for compatibility
edges.Add(new ReachabilityEdge(fromId, toId, type));
return this;
}
public string BuildJson(bool indented = true)
{
var payload = new ReachabilityGraphPayload
{
SchemaVersion = GraphSchemaVersion,
Nodes = nodes
.OrderBy(id => id, StringComparer.Ordinal)
.Select(id => new ReachabilityNode(id))
.ToList(),
Edges = edges
.OrderBy(edge => edge.From, StringComparer.Ordinal)
.ThenBy(edge => edge.To, StringComparer.Ordinal)
.ThenBy(edge => edge.Kind, StringComparer.Ordinal)
.Select(edge => new ReachabilityEdgePayload(edge.From, edge.To, edge.Kind))
.ToList()
};
var options = new JsonSerializerOptions
{
PropertyNamingPolicy = JsonNamingPolicy.CamelCase,
WriteIndented = indented
};
return JsonSerializer.Serialize(payload, options);
}
///
/// Converts the builder contents to a union graph using rich metadata when available.
///
public ReachabilityUnionGraph ToUnionGraph(string language)
{
ArgumentException.ThrowIfNullOrWhiteSpace(language);
var lang = language.Trim();
// Build nodes: prefer rich metadata, fall back to simple nodes
var nodeList = new List();
foreach (var id in nodes.OrderBy(n => n, StringComparer.Ordinal))
{
if (_richNodes.TryGetValue(id, out var rich))
{
var source = rich.SourceFile is not null
? new ReachabilitySource("static", null, rich.SourceLine.HasValue ? $"file:{rich.SourceFile}:{rich.SourceLine}" : $"file:{rich.SourceFile}")
: null;
nodeList.Add(new ReachabilityUnionNode(
id,
rich.Lang,
rich.Kind,
rich.Display,
rich.CodeBlockHash,
rich.Symbol,
source,
rich.Attributes.Count > 0 ? rich.Attributes : null,
rich.Purl,
rich.SymbolDigest));
}
else
{
nodeList.Add(new ReachabilityUnionNode(id, lang, "symbol"));
}
}
// Build edges: prefer rich metadata, fall back to simple edges
var edgeSet = new HashSet<(string, string, string)>();
var edgeList = new List();
foreach (var rich in _richEdges.OrderBy(e => e.From, StringComparer.Ordinal)
.ThenBy(e => e.To, StringComparer.Ordinal)
.ThenBy(e => e.EdgeType, StringComparer.Ordinal))
{
var key = (rich.From, rich.To, rich.EdgeType);
if (!edgeSet.Add(key))
{
continue;
}
var source = new ReachabilitySource(
rich.Origin,
rich.Provenance,
rich.Evidence);
var candidates = rich.Candidates?.Select(c => new ReachabilityEdgeCandidate(c.Purl, c.SymbolDigest, c.Score)).ToList();
edgeList.Add(new ReachabilityUnionEdge(
rich.From,
rich.To,
rich.EdgeType,
ConfidenceToString(rich.Confidence),
source,
rich.Purl,
rich.SymbolDigest,
candidates));
}
// Add any legacy edges not already covered
foreach (var edge in edges.OrderBy(e => e.From, StringComparer.Ordinal)
.ThenBy(e => e.To, StringComparer.Ordinal)
.ThenBy(e => e.Kind, StringComparer.Ordinal))
{
var key = (edge.From, edge.To, edge.Kind);
if (!edgeSet.Add(key))
{
continue;
}
edgeList.Add(new ReachabilityUnionEdge(edge.From, edge.To, edge.Kind));
}
return new ReachabilityUnionGraph(nodeList, edgeList);
}
///
/// Gets the count of nodes in the graph.
///
public int NodeCount => nodes.Count;
///
/// Gets the count of edges in the graph.
///
public int EdgeCount => edges.Count + _richEdges.Count(re => !edges.Contains(new ReachabilityEdge(re.From, re.To, re.EdgeType)));
private static string ConfidenceToString(EdgeConfidence confidence) => confidence switch
{
EdgeConfidence.Certain => "certain",
EdgeConfidence.High => "high",
EdgeConfidence.Medium => "medium",
EdgeConfidence.Low => "low",
_ => "certain"
};
public static ReachabilityGraphBuilder FromFixture(string variantPath)
{
ArgumentException.ThrowIfNullOrWhiteSpace(variantPath);
var builder = new ReachabilityGraphBuilder();
foreach (var fileName in new[] { "callgraph.static.json", "callgraph.framework.json" })
{
var path = Path.Combine(variantPath, fileName);
if (!File.Exists(path))
{
continue;
}
using var stream = File.OpenRead(path);
using var document = JsonDocument.Parse(stream);
var root = document.RootElement;
if (root.TryGetProperty("nodes", out var nodesElement) && nodesElement.ValueKind == JsonValueKind.Array)
{
foreach (var node in nodesElement.EnumerateArray())
{
var sid = node.TryGetProperty("sid", out var sidElement)
? sidElement.GetString()
: node.GetProperty("id").GetString();
builder.AddNode(sid ?? string.Empty);
}
}
if (root.TryGetProperty("edges", out var edgesElement) && edgesElement.ValueKind == JsonValueKind.Array)
{
foreach (var edge in edgesElement.EnumerateArray())
{
var from = edge.TryGetProperty("from", out var fromEl)
? fromEl.GetString()
: edge.GetProperty("source").GetString();
var to = edge.TryGetProperty("to", out var toEl)
? toEl.GetString()
: edge.GetProperty("target").GetString();
var kind = edge.TryGetProperty("kind", out var kindEl)
? kindEl.GetString()
: edge.TryGetProperty("type", out var typeEl)
? typeEl.GetString()
: "call";
builder.AddEdge(from ?? string.Empty, to ?? string.Empty, kind ?? "call");
}
}
}
return builder;
}
private sealed record ReachabilityEdge(string From, string To, string Kind);
private sealed record ReachabilityNode(string Sid);
private sealed record ReachabilityEdgePayload(string From, string To, string Kind);
private sealed record ReachabilityGraphPayload
{
public string SchemaVersion { get; set; } = GraphSchemaVersion;
public List Nodes { get; set; } = new();
public List Edges { get; set; } = new();
}
private sealed record RichNode(
string SymbolId,
string Lang,
string Kind,
string? Display,
string? SourceFile,
int? SourceLine,
ImmutableSortedDictionary Attributes,
string? Purl = null,
string? SymbolDigest = null,
ReachabilitySymbol? Symbol = null,
string? CodeBlockHash = null);
private sealed record RichEdge(
string From,
string To,
string EdgeType,
EdgeConfidence Confidence,
string Origin,
string? Provenance,
string? Evidence,
string? Purl = null,
string? SymbolDigest = null,
IReadOnlyList<(string Purl, string? SymbolDigest, double? Score)>? Candidates = null);
}
///
/// Confidence levels for reachability edges per the union schema.
///
public enum EdgeConfidence
{
///
/// Edge is certain (direct call, import statement).
///
Certain,
///
/// High confidence (type-constrained virtual call).
///
High,
///
/// Medium confidence (interface dispatch, some dynamic patterns).
///
Medium,
///
/// Low confidence (reflection, string-based loading).
///
Low
}
///
/// Well-known edge types per the reachability union schema.
///
public static class EdgeTypes
{
public const string Call = "call";
public const string Import = "import";
public const string Inherits = "inherits";
public const string Loads = "loads";
public const string Dynamic = "dynamic";
public const string Reflects = "reflects";
public const string Dlopen = "dlopen";
public const string Ffi = "ffi";
public const string Wasm = "wasm";
public const string Spawn = "spawn";
}
///
/// Well-known provenance hints per the reachability union schema.
///
public static class Provenance
{
public const string JvmBytecode = "jvm-bytecode";
public const string Il = "il";
public const string TsAst = "ts-ast";
public const string Ssa = "ssa";
public const string Ebpf = "ebpf";
public const string Etw = "etw";
public const string Jfr = "jfr";
public const string Hook = "hook";
}