using System; using System.Collections.Generic; using System.Linq; using System.Security.Cryptography; using System.Text; namespace StellaOps.Scanner.Reachability.Ordering; public sealed class DeterministicGraphOrderer : IGraphOrderer { public IReadOnlyList OrderNodes( RichGraph graph, GraphOrderingStrategy strategy = GraphOrderingStrategy.TopologicalLexicographic) { ArgumentNullException.ThrowIfNull(graph); return strategy switch { GraphOrderingStrategy.TopologicalLexicographic => TopologicalLexicographicOrder(graph), GraphOrderingStrategy.BreadthFirstLexicographic => BreadthFirstLexicographicOrder(graph), GraphOrderingStrategy.DepthFirstLexicographic => DepthFirstLexicographicOrder(graph), GraphOrderingStrategy.Lexicographic => LexicographicOrder(graph), _ => TopologicalLexicographicOrder(graph) }; } public IReadOnlyList OrderEdges( RichGraph graph, IReadOnlyList nodeOrder) { ArgumentNullException.ThrowIfNull(graph); ArgumentNullException.ThrowIfNull(nodeOrder); var index = new Dictionary(StringComparer.Ordinal); for (var i = 0; i < nodeOrder.Count; i++) { index[nodeOrder[i]] = i; } return graph.Edges .Where(e => index.ContainsKey(e.From) && index.ContainsKey(e.To)) .OrderBy(e => index[e.From]) .ThenBy(e => index[e.To]) .ThenBy(e => e.Kind, StringComparer.Ordinal) .ThenBy(e => e.Purl, StringComparer.Ordinal) .ThenBy(e => e.SymbolDigest, StringComparer.Ordinal) .ToList(); } public CanonicalGraph Canonicalize( RichGraph graph, GraphOrderingStrategy strategy = GraphOrderingStrategy.TopologicalLexicographic) { ArgumentNullException.ThrowIfNull(graph); var nodeById = graph.Nodes .Where(n => !string.IsNullOrWhiteSpace(n.Id)) .GroupBy(n => n.Id, StringComparer.Ordinal) .ToDictionary(g => g.Key, g => g.First(), StringComparer.Ordinal); var nodeOrder = OrderNodes(graph, strategy); var edges = OrderEdges(graph, nodeOrder); var index = new Dictionary(StringComparer.Ordinal); for (var i = 0; i < nodeOrder.Count; i++) { index[nodeOrder[i]] = i; } var nodes = nodeOrder .Select(id => { nodeById.TryGetValue(id, out var node); return new CanonicalNode { Index = index[id], Id = id, NodeType = node?.Kind ?? "unknown", Label = node?.Display }; }) .ToList(); var canonicalEdges = edges .Select(e => new CanonicalEdge { SourceIndex = index[e.From], TargetIndex = index[e.To], EdgeType = e.Kind }) .ToList(); var anchors = FindAnchorNodes(graph, nodeOrder); return new CanonicalGraph { Strategy = strategy, Nodes = nodes, Edges = canonicalEdges, ContentHash = ComputeCanonicalHash(nodes, canonicalEdges), AnchorNodes = anchors }; } private static IReadOnlyList TopologicalLexicographicOrder(RichGraph graph) { var nodes = graph.Nodes.Select(n => n.Id).Where(id => !string.IsNullOrWhiteSpace(id)).Distinct(StringComparer.Ordinal).ToList(); nodes.Sort(StringComparer.Ordinal); var adjacency = nodes.ToDictionary(n => n, _ => new List(), StringComparer.Ordinal); var indegree = nodes.ToDictionary(n => n, _ => 0, StringComparer.Ordinal); foreach (var edge in graph.Edges) { if (string.IsNullOrWhiteSpace(edge.From) || string.IsNullOrWhiteSpace(edge.To)) { continue; } if (!adjacency.TryGetValue(edge.From, out var neighbors) || !indegree.ContainsKey(edge.To)) { continue; } neighbors.Add(edge.To); indegree[edge.To] = indegree[edge.To] + 1; } foreach (var list in adjacency.Values) { list.Sort(StringComparer.Ordinal); } var ready = new SortedSet(indegree.Where(kv => kv.Value == 0).Select(kv => kv.Key), StringComparer.Ordinal); var result = new List(nodes.Count); while (ready.Count > 0) { var next = ready.Min!; ready.Remove(next); result.Add(next); foreach (var neighbor in adjacency[next]) { indegree[neighbor] = indegree[neighbor] - 1; if (indegree[neighbor] == 0) { ready.Add(neighbor); } } } if (result.Count == nodes.Count) { return result; } var seen = new HashSet(result, StringComparer.Ordinal); var remainder = nodes .Where(n => !seen.Contains(n)) .OrderBy(n => n, StringComparer.Ordinal) .ToList(); result.AddRange(remainder); return result; } private static IReadOnlyList BreadthFirstLexicographicOrder(RichGraph graph) { var ordered = new List(); var visited = new HashSet(StringComparer.Ordinal); var adjacency = BuildAdjacency(graph); var entries = FindEntryPoints(graph).OrderBy(e => e, StringComparer.Ordinal).ToList(); var queue = new Queue(entries); while (queue.Count > 0) { var current = queue.Dequeue(); if (!visited.Add(current)) { continue; } ordered.Add(current); if (!adjacency.TryGetValue(current, out var neighbors)) { continue; } foreach (var neighbor in neighbors) { if (!visited.Contains(neighbor)) { queue.Enqueue(neighbor); } } } // Append disconnected nodes deterministically foreach (var nodeId in graph.Nodes.Select(n => n.Id).Distinct(StringComparer.Ordinal).OrderBy(n => n, StringComparer.Ordinal)) { if (visited.Add(nodeId)) { ordered.Add(nodeId); } } return ordered; } private static IReadOnlyList DepthFirstLexicographicOrder(RichGraph graph) { var ordered = new List(); var visited = new HashSet(StringComparer.Ordinal); var adjacency = BuildAdjacency(graph); var entries = FindEntryPoints(graph).OrderBy(e => e, StringComparer.Ordinal).ToList(); foreach (var entry in entries) { DfsVisit(entry, adjacency, visited, ordered); } // Append disconnected nodes deterministically foreach (var nodeId in graph.Nodes.Select(n => n.Id).Distinct(StringComparer.Ordinal).OrderBy(n => n, StringComparer.Ordinal)) { DfsVisit(nodeId, adjacency, visited, ordered); } return ordered; } private static void DfsVisit( string node, IReadOnlyDictionary> adjacency, HashSet visited, List result) { if (!visited.Add(node)) { return; } result.Add(node); if (!adjacency.TryGetValue(node, out var neighbors)) { return; } foreach (var neighbor in neighbors) { DfsVisit(neighbor, adjacency, visited, result); } } private static IReadOnlyList LexicographicOrder(RichGraph graph) { return graph.Nodes .Select(n => n.Id) .Where(id => !string.IsNullOrWhiteSpace(id)) .Distinct(StringComparer.Ordinal) .OrderBy(id => id, StringComparer.Ordinal) .ToList(); } private static IReadOnlyDictionary> BuildAdjacency(RichGraph graph) { var adjacency = new Dictionary>(StringComparer.Ordinal); foreach (var edge in graph.Edges) { if (string.IsNullOrWhiteSpace(edge.From) || string.IsNullOrWhiteSpace(edge.To)) { continue; } if (!adjacency.TryGetValue(edge.From, out var list)) { list = new List(); adjacency[edge.From] = list; } list.Add(edge.To); } // Deterministic neighbor traversal foreach (var list in adjacency.Values) { list.Sort(StringComparer.Ordinal); } return adjacency.ToDictionary(kv => kv.Key, kv => (IReadOnlyList)kv.Value, StringComparer.Ordinal); } private static IReadOnlyList FindEntryPoints(RichGraph graph) { var nodeIds = graph.Nodes.Select(n => n.Id).Where(id => !string.IsNullOrWhiteSpace(id)).Distinct(StringComparer.Ordinal).ToList(); var inbound = new HashSet(StringComparer.Ordinal); foreach (var edge in graph.Edges) { if (!string.IsNullOrWhiteSpace(edge.To)) { inbound.Add(edge.To); } } var roots = (graph.Roots ?? Array.Empty()) .Select(r => r.Id) .Where(id => !string.IsNullOrWhiteSpace(id)) .ToList(); var semanticEntrypoints = graph.Nodes .Where(IsEntrypointNode) .Select(n => n.Id) .ToList(); var entryPoints = new HashSet(StringComparer.Ordinal); foreach (var root in roots) { entryPoints.Add(root); } foreach (var entry in semanticEntrypoints) { entryPoints.Add(entry); } foreach (var nodeId in nodeIds) { if (!inbound.Contains(nodeId)) { entryPoints.Add(nodeId); } } return entryPoints.OrderBy(id => id, StringComparer.Ordinal).ToList(); } private static IReadOnlyList? FindAnchorNodes(RichGraph graph, IReadOnlyList nodeOrder) { var anchors = new HashSet(StringComparer.Ordinal); foreach (var root in graph.Roots ?? Array.Empty()) { if (!string.IsNullOrWhiteSpace(root.Id)) { anchors.Add(root.Id); } } foreach (var node in graph.Nodes) { if (IsEntrypointNode(node)) { anchors.Add(node.Id); } } if (anchors.Count == 0) { return null; } return nodeOrder.Where(anchors.Contains).ToList(); } private static string ComputeCanonicalHash( IReadOnlyList nodes, IReadOnlyList edges) { var sb = new StringBuilder(); foreach (var node in nodes) { sb.Append("N:"); sb.Append(node.Index); sb.Append(':'); sb.Append(node.Id); sb.Append(':'); sb.Append(node.NodeType); sb.Append(';'); } foreach (var edge in edges) { sb.Append("E:"); sb.Append(edge.SourceIndex); sb.Append(':'); sb.Append(edge.TargetIndex); sb.Append(':'); sb.Append(edge.EdgeType); sb.Append(';'); } var bytes = Encoding.UTF8.GetBytes(sb.ToString()); var hash = SHA256.HashData(bytes); return $"sha256:{Convert.ToHexString(hash).ToLowerInvariant()}"; } private static bool IsEntrypointNode(RichGraphNode node) { if (node.Attributes?.TryGetValue(RichGraphSemanticAttributes.IsEntrypoint, out var value) != true || string.IsNullOrWhiteSpace(value)) { return false; } return bool.TryParse(value, out var result) && result; } }