428 lines
14 KiB
C#
428 lines
14 KiB
C#
using System;
|
|
using System.Collections.Generic;
|
|
using System.Collections.Immutable;
|
|
using System.IO;
|
|
using System.Linq;
|
|
using System.Text.Json;
|
|
|
|
namespace StellaOps.Scanner.Reachability;
|
|
|
|
/// <summary>
|
|
/// Builds reachability graphs with full schema support including
|
|
/// rich node metadata, confidence levels, and source provenance.
|
|
/// </summary>
|
|
public sealed class ReachabilityGraphBuilder
|
|
{
|
|
private const string GraphSchemaVersion = "1.0";
|
|
private readonly Dictionary<string, RichNode> _richNodes = new(StringComparer.Ordinal);
|
|
private readonly HashSet<RichEdge> _richEdges = new();
|
|
|
|
// Legacy compatibility
|
|
private readonly HashSet<string> nodes = new(StringComparer.Ordinal);
|
|
private readonly HashSet<ReachabilityEdge> edges = new();
|
|
|
|
/// <summary>
|
|
/// Adds a simple node (legacy API).
|
|
/// </summary>
|
|
public ReachabilityGraphBuilder AddNode(string symbolId)
|
|
{
|
|
if (!string.IsNullOrWhiteSpace(symbolId))
|
|
{
|
|
nodes.Add(symbolId.Trim());
|
|
}
|
|
|
|
return this;
|
|
}
|
|
|
|
/// <summary>
|
|
/// Adds a rich node with full metadata.
|
|
/// </summary>
|
|
public ReachabilityGraphBuilder AddNode(
|
|
string symbolId,
|
|
string lang,
|
|
string kind,
|
|
string? display = null,
|
|
string? sourceFile = null,
|
|
int? sourceLine = null,
|
|
IReadOnlyDictionary<string, string>? attributes = null,
|
|
string? purl = null,
|
|
string? symbolDigest = null,
|
|
ReachabilitySymbol? symbol = null,
|
|
string? codeBlockHash = null)
|
|
{
|
|
if (string.IsNullOrWhiteSpace(symbolId))
|
|
{
|
|
return this;
|
|
}
|
|
|
|
var id = symbolId.Trim();
|
|
var node = new RichNode(
|
|
id,
|
|
lang?.Trim() ?? string.Empty,
|
|
kind?.Trim() ?? "symbol",
|
|
display?.Trim(),
|
|
sourceFile?.Trim(),
|
|
sourceLine,
|
|
attributes?.ToImmutableSortedDictionary(StringComparer.Ordinal) ?? ImmutableSortedDictionary<string, string>.Empty,
|
|
purl?.Trim(),
|
|
symbolDigest?.Trim(),
|
|
symbol?.Trimmed(),
|
|
codeBlockHash?.Trim());
|
|
|
|
_richNodes[id] = node;
|
|
nodes.Add(id);
|
|
return this;
|
|
}
|
|
|
|
/// <summary>
|
|
/// Adds a simple edge (legacy API).
|
|
/// </summary>
|
|
public ReachabilityGraphBuilder AddEdge(string from, string to, string kind = "call")
|
|
{
|
|
if (string.IsNullOrWhiteSpace(from) || string.IsNullOrWhiteSpace(to))
|
|
{
|
|
return this;
|
|
}
|
|
|
|
var edge = new ReachabilityEdge(from.Trim(), to.Trim(), string.IsNullOrWhiteSpace(kind) ? "call" : kind.Trim());
|
|
edges.Add(edge);
|
|
nodes.Add(edge.From);
|
|
nodes.Add(edge.To);
|
|
return this;
|
|
}
|
|
|
|
/// <summary>
|
|
/// Adds a rich edge with confidence and provenance.
|
|
/// </summary>
|
|
/// <param name="from">Source symbol ID.</param>
|
|
/// <param name="to">Target symbol ID.</param>
|
|
/// <param name="edgeType">Edge type: call, import, inherits, loads, dynamic, reflects, dlopen, ffi, wasm, spawn.</param>
|
|
/// <param name="confidence">Confidence level: certain, high, medium, low.</param>
|
|
/// <param name="origin">Origin: static or runtime.</param>
|
|
/// <param name="provenance">Provenance hint: jvm-bytecode, il, ts-ast, ssa, ebpf, etw, jfr, hook.</param>
|
|
/// <param name="evidence">Evidence locator (e.g., "file:path:line").</param>
|
|
/// <param name="purl">PURL of the component that defines the callee.</param>
|
|
/// <param name="symbolDigest">Stable hash of the normalized callee signature.</param>
|
|
/// <param name="candidates">Ranked candidate purls when resolution is ambiguous.</param>
|
|
public ReachabilityGraphBuilder AddEdge(
|
|
string from,
|
|
string to,
|
|
string edgeType,
|
|
EdgeConfidence confidence,
|
|
string origin = "static",
|
|
string? provenance = null,
|
|
string? evidence = null,
|
|
string? purl = null,
|
|
string? symbolDigest = null,
|
|
IReadOnlyList<(string Purl, string? SymbolDigest, double? Score)>? candidates = null)
|
|
{
|
|
if (string.IsNullOrWhiteSpace(from) || string.IsNullOrWhiteSpace(to))
|
|
{
|
|
return this;
|
|
}
|
|
|
|
var fromId = from.Trim();
|
|
var toId = to.Trim();
|
|
var type = string.IsNullOrWhiteSpace(edgeType) ? "call" : edgeType.Trim();
|
|
|
|
var richEdge = new RichEdge(
|
|
fromId,
|
|
toId,
|
|
type,
|
|
confidence,
|
|
origin?.Trim() ?? "static",
|
|
provenance?.Trim(),
|
|
evidence?.Trim(),
|
|
purl?.Trim(),
|
|
symbolDigest?.Trim(),
|
|
candidates);
|
|
|
|
_richEdges.Add(richEdge);
|
|
nodes.Add(fromId);
|
|
nodes.Add(toId);
|
|
|
|
// Also add to legacy set for compatibility
|
|
edges.Add(new ReachabilityEdge(fromId, toId, type));
|
|
return this;
|
|
}
|
|
|
|
public string BuildJson(bool indented = true)
|
|
{
|
|
var payload = new ReachabilityGraphPayload
|
|
{
|
|
SchemaVersion = GraphSchemaVersion,
|
|
Nodes = nodes
|
|
.OrderBy(id => id, StringComparer.Ordinal)
|
|
.Select(id => new ReachabilityNode(id))
|
|
.ToList(),
|
|
Edges = edges
|
|
.OrderBy(edge => edge.From, StringComparer.Ordinal)
|
|
.ThenBy(edge => edge.To, StringComparer.Ordinal)
|
|
.ThenBy(edge => edge.Kind, StringComparer.Ordinal)
|
|
.Select(edge => new ReachabilityEdgePayload(edge.From, edge.To, edge.Kind))
|
|
.ToList()
|
|
};
|
|
|
|
var options = new JsonSerializerOptions
|
|
{
|
|
PropertyNamingPolicy = JsonNamingPolicy.CamelCase,
|
|
WriteIndented = indented
|
|
};
|
|
|
|
return JsonSerializer.Serialize(payload, options);
|
|
}
|
|
|
|
/// <summary>
|
|
/// Converts the builder contents to a union graph using rich metadata when available.
|
|
/// </summary>
|
|
public ReachabilityUnionGraph ToUnionGraph(string language)
|
|
{
|
|
ArgumentException.ThrowIfNullOrWhiteSpace(language);
|
|
|
|
var lang = language.Trim();
|
|
|
|
// Build nodes: prefer rich metadata, fall back to simple nodes
|
|
var nodeList = new List<ReachabilityUnionNode>();
|
|
foreach (var id in nodes.OrderBy(n => n, StringComparer.Ordinal))
|
|
{
|
|
if (_richNodes.TryGetValue(id, out var rich))
|
|
{
|
|
var source = rich.SourceFile is not null
|
|
? new ReachabilitySource("static", null, rich.SourceLine.HasValue ? $"file:{rich.SourceFile}:{rich.SourceLine}" : $"file:{rich.SourceFile}")
|
|
: null;
|
|
|
|
nodeList.Add(new ReachabilityUnionNode(
|
|
id,
|
|
rich.Lang,
|
|
rich.Kind,
|
|
rich.Display,
|
|
rich.CodeBlockHash,
|
|
rich.Symbol,
|
|
source,
|
|
rich.Attributes.Count > 0 ? rich.Attributes : null,
|
|
rich.Purl,
|
|
rich.SymbolDigest));
|
|
}
|
|
else
|
|
{
|
|
nodeList.Add(new ReachabilityUnionNode(id, lang, "symbol"));
|
|
}
|
|
}
|
|
|
|
// Build edges: prefer rich metadata, fall back to simple edges
|
|
var edgeSet = new HashSet<(string, string, string)>();
|
|
var edgeList = new List<ReachabilityUnionEdge>();
|
|
|
|
foreach (var rich in _richEdges.OrderBy(e => e.From, StringComparer.Ordinal)
|
|
.ThenBy(e => e.To, StringComparer.Ordinal)
|
|
.ThenBy(e => e.EdgeType, StringComparer.Ordinal))
|
|
{
|
|
var key = (rich.From, rich.To, rich.EdgeType);
|
|
if (!edgeSet.Add(key))
|
|
{
|
|
continue;
|
|
}
|
|
|
|
var source = new ReachabilitySource(
|
|
rich.Origin,
|
|
rich.Provenance,
|
|
rich.Evidence);
|
|
|
|
var candidates = rich.Candidates?.Select(c => new ReachabilityEdgeCandidate(c.Purl, c.SymbolDigest, c.Score)).ToList();
|
|
|
|
edgeList.Add(new ReachabilityUnionEdge(
|
|
rich.From,
|
|
rich.To,
|
|
rich.EdgeType,
|
|
ConfidenceToString(rich.Confidence),
|
|
source,
|
|
rich.Purl,
|
|
rich.SymbolDigest,
|
|
candidates));
|
|
}
|
|
|
|
// Add any legacy edges not already covered
|
|
foreach (var edge in edges.OrderBy(e => e.From, StringComparer.Ordinal)
|
|
.ThenBy(e => e.To, StringComparer.Ordinal)
|
|
.ThenBy(e => e.Kind, StringComparer.Ordinal))
|
|
{
|
|
var key = (edge.From, edge.To, edge.Kind);
|
|
if (!edgeSet.Add(key))
|
|
{
|
|
continue;
|
|
}
|
|
|
|
edgeList.Add(new ReachabilityUnionEdge(edge.From, edge.To, edge.Kind));
|
|
}
|
|
|
|
return new ReachabilityUnionGraph(nodeList, edgeList);
|
|
}
|
|
|
|
/// <summary>
|
|
/// Gets the count of nodes in the graph.
|
|
/// </summary>
|
|
public int NodeCount => nodes.Count;
|
|
|
|
/// <summary>
|
|
/// Gets the count of edges in the graph.
|
|
/// </summary>
|
|
public int EdgeCount => edges.Count + _richEdges.Count(re => !edges.Contains(new ReachabilityEdge(re.From, re.To, re.EdgeType)));
|
|
|
|
private static string ConfidenceToString(EdgeConfidence confidence) => confidence switch
|
|
{
|
|
EdgeConfidence.Certain => "certain",
|
|
EdgeConfidence.High => "high",
|
|
EdgeConfidence.Medium => "medium",
|
|
EdgeConfidence.Low => "low",
|
|
_ => "certain"
|
|
};
|
|
|
|
public static ReachabilityGraphBuilder FromFixture(string variantPath)
|
|
{
|
|
ArgumentException.ThrowIfNullOrWhiteSpace(variantPath);
|
|
var builder = new ReachabilityGraphBuilder();
|
|
|
|
foreach (var fileName in new[] { "callgraph.static.json", "callgraph.framework.json" })
|
|
{
|
|
var path = Path.Combine(variantPath, fileName);
|
|
if (!File.Exists(path))
|
|
{
|
|
continue;
|
|
}
|
|
|
|
using var stream = File.OpenRead(path);
|
|
using var document = JsonDocument.Parse(stream);
|
|
var root = document.RootElement;
|
|
|
|
if (root.TryGetProperty("nodes", out var nodesElement) && nodesElement.ValueKind == JsonValueKind.Array)
|
|
{
|
|
foreach (var node in nodesElement.EnumerateArray())
|
|
{
|
|
var sid = node.TryGetProperty("sid", out var sidElement)
|
|
? sidElement.GetString()
|
|
: node.GetProperty("id").GetString();
|
|
builder.AddNode(sid ?? string.Empty);
|
|
}
|
|
}
|
|
|
|
if (root.TryGetProperty("edges", out var edgesElement) && edgesElement.ValueKind == JsonValueKind.Array)
|
|
{
|
|
foreach (var edge in edgesElement.EnumerateArray())
|
|
{
|
|
var from = edge.TryGetProperty("from", out var fromEl)
|
|
? fromEl.GetString()
|
|
: edge.GetProperty("source").GetString();
|
|
var to = edge.TryGetProperty("to", out var toEl)
|
|
? toEl.GetString()
|
|
: edge.GetProperty("target").GetString();
|
|
var kind = edge.TryGetProperty("kind", out var kindEl)
|
|
? kindEl.GetString()
|
|
: edge.TryGetProperty("type", out var typeEl)
|
|
? typeEl.GetString()
|
|
: "call";
|
|
|
|
builder.AddEdge(from ?? string.Empty, to ?? string.Empty, kind ?? "call");
|
|
}
|
|
}
|
|
}
|
|
|
|
return builder;
|
|
}
|
|
|
|
private sealed record ReachabilityEdge(string From, string To, string Kind);
|
|
|
|
private sealed record ReachabilityNode(string Sid);
|
|
|
|
private sealed record ReachabilityEdgePayload(string From, string To, string Kind);
|
|
|
|
private sealed record ReachabilityGraphPayload
|
|
{
|
|
public string SchemaVersion { get; set; } = GraphSchemaVersion;
|
|
public List<ReachabilityNode> Nodes { get; set; } = new();
|
|
public List<ReachabilityEdgePayload> Edges { get; set; } = new();
|
|
}
|
|
|
|
private sealed record RichNode(
|
|
string SymbolId,
|
|
string Lang,
|
|
string Kind,
|
|
string? Display,
|
|
string? SourceFile,
|
|
int? SourceLine,
|
|
ImmutableSortedDictionary<string, string> Attributes,
|
|
string? Purl = null,
|
|
string? SymbolDigest = null,
|
|
ReachabilitySymbol? Symbol = null,
|
|
string? CodeBlockHash = null);
|
|
|
|
private sealed record RichEdge(
|
|
string From,
|
|
string To,
|
|
string EdgeType,
|
|
EdgeConfidence Confidence,
|
|
string Origin,
|
|
string? Provenance,
|
|
string? Evidence,
|
|
string? Purl = null,
|
|
string? SymbolDigest = null,
|
|
IReadOnlyList<(string Purl, string? SymbolDigest, double? Score)>? Candidates = null);
|
|
}
|
|
|
|
/// <summary>
|
|
/// Confidence levels for reachability edges per the union schema.
|
|
/// </summary>
|
|
public enum EdgeConfidence
|
|
{
|
|
/// <summary>
|
|
/// Edge is certain (direct call, import statement).
|
|
/// </summary>
|
|
Certain,
|
|
|
|
/// <summary>
|
|
/// High confidence (type-constrained virtual call).
|
|
/// </summary>
|
|
High,
|
|
|
|
/// <summary>
|
|
/// Medium confidence (interface dispatch, some dynamic patterns).
|
|
/// </summary>
|
|
Medium,
|
|
|
|
/// <summary>
|
|
/// Low confidence (reflection, string-based loading).
|
|
/// </summary>
|
|
Low
|
|
}
|
|
|
|
/// <summary>
|
|
/// Well-known edge types per the reachability union schema.
|
|
/// </summary>
|
|
public static class EdgeTypes
|
|
{
|
|
public const string Call = "call";
|
|
public const string Import = "import";
|
|
public const string Inherits = "inherits";
|
|
public const string Loads = "loads";
|
|
public const string Dynamic = "dynamic";
|
|
public const string Reflects = "reflects";
|
|
public const string Dlopen = "dlopen";
|
|
public const string Ffi = "ffi";
|
|
public const string Wasm = "wasm";
|
|
public const string Spawn = "spawn";
|
|
}
|
|
|
|
/// <summary>
|
|
/// Well-known provenance hints per the reachability union schema.
|
|
/// </summary>
|
|
public static class Provenance
|
|
{
|
|
public const string JvmBytecode = "jvm-bytecode";
|
|
public const string Il = "il";
|
|
public const string TsAst = "ts-ast";
|
|
public const string Ssa = "ssa";
|
|
public const string Ebpf = "ebpf";
|
|
public const string Etw = "etw";
|
|
public const string Jfr = "jfr";
|
|
public const string Hook = "hook";
|
|
}
|