// Copyright (c) StellaOps. All rights reserved. // Licensed under AGPL-3.0-or-later. See LICENSE in the project root. using System.Collections.Immutable; using System.Globalization; using System.Security.Cryptography; using System.Text; namespace StellaOps.BinaryIndex.Semantic.Internal; /// /// Weisfeiler-Lehman graph hashing for deterministic semantic fingerprints. /// Uses iterative label refinement to capture graph structure. /// internal sealed class WeisfeilerLehmanHasher { private readonly int _iterations; /// /// Creates a new Weisfeiler-Lehman hasher. /// /// Number of WL iterations (default: 3). public WeisfeilerLehmanHasher(int iterations = 3) { ArgumentOutOfRangeException.ThrowIfLessThan(iterations, 1); _iterations = iterations; } /// /// Compute a deterministic hash of the semantic graph. /// /// The semantic graph to hash. /// SHA-256 hash of the graph. public byte[] ComputeHash(KeySemanticsGraph graph) { ArgumentNullException.ThrowIfNull(graph); if (graph.Nodes.IsEmpty) { return SHA256.HashData(Encoding.UTF8.GetBytes("EMPTY_GRAPH")); } // Build adjacency lists for efficient neighbor lookup var outEdges = BuildAdjacencyList(graph.Edges, e => e.SourceId, e => e.TargetId); var inEdges = BuildAdjacencyList(graph.Edges, e => e.TargetId, e => e.SourceId); // Initialize labels from node properties var labels = InitializeLabels(graph.Nodes); // WL iterations for (var i = 0; i < _iterations; i++) { labels = RefineLabels(graph.Nodes, labels, outEdges, inEdges, graph.Edges); } // Compute final hash from sorted labels return ComputeFinalHash(labels); } /// /// Compute canonical labels for all nodes (useful for graph comparison). /// /// The semantic graph. /// Array of canonical labels indexed by node ID. public ImmutableArray ComputeCanonicalLabels(KeySemanticsGraph graph) { ArgumentNullException.ThrowIfNull(graph); if (graph.Nodes.IsEmpty) { return []; } var outEdges = BuildAdjacencyList(graph.Edges, e => e.SourceId, e => e.TargetId); var inEdges = BuildAdjacencyList(graph.Edges, e => e.TargetId, e => e.SourceId); var labels = InitializeLabels(graph.Nodes); for (var i = 0; i < _iterations; i++) { labels = RefineLabels(graph.Nodes, labels, outEdges, inEdges, graph.Edges); } // Return labels in node ID order var maxId = graph.Nodes.Max(n => n.Id); var result = new string[maxId + 1]; foreach (var node in graph.Nodes) { result[node.Id] = labels.TryGetValue(node.Id, out var label) ? label : string.Empty; } return [.. result]; } private static Dictionary> BuildAdjacencyList( ImmutableArray edges, Func keySelector, Func valueSelector) { var result = new Dictionary>(); foreach (var edge in edges) { var key = keySelector(edge); var value = valueSelector(edge); if (!result.TryGetValue(key, out var list)) { list = []; result[key] = list; } list.Add(value); } return result; } private static Dictionary InitializeLabels(ImmutableArray nodes) { var labels = new Dictionary(nodes.Length); foreach (var node in nodes) { // Create initial label from node type and operation var label = string.Create( CultureInfo.InvariantCulture, $"{(int)node.Type}:{node.Operation}"); labels[node.Id] = label; } return labels; } private static Dictionary RefineLabels( ImmutableArray nodes, Dictionary currentLabels, Dictionary> outEdges, Dictionary> inEdges, ImmutableArray edges) { var newLabels = new Dictionary(nodes.Length); var edgeLookup = BuildEdgeLookup(edges); foreach (var node in nodes) { var sb = new StringBuilder(); sb.Append(currentLabels[node.Id]); sb.Append('|'); // Append sorted outgoing neighbor labels with edge types if (outEdges.TryGetValue(node.Id, out var outNeighbors)) { var neighborLabels = outNeighbors .Select(n => { var edgeType = GetEdgeType(edgeLookup, node.Id, n); return string.Create( CultureInfo.InvariantCulture, $"O{(int)edgeType}:{currentLabels[n]}"); }) .OrderBy(l => l, StringComparer.Ordinal) .ToList(); sb.AppendJoin(',', neighborLabels); } sb.Append('|'); // Append sorted incoming neighbor labels with edge types if (inEdges.TryGetValue(node.Id, out var inNeighbors)) { var neighborLabels = inNeighbors .Select(n => { var edgeType = GetEdgeType(edgeLookup, n, node.Id); return string.Create( CultureInfo.InvariantCulture, $"I{(int)edgeType}:{currentLabels[n]}"); }) .OrderBy(l => l, StringComparer.Ordinal) .ToList(); sb.AppendJoin(',', neighborLabels); } // Hash the combined string to create new label var combined = sb.ToString(); var hash = SHA256.HashData(Encoding.UTF8.GetBytes(combined)); newLabels[node.Id] = Convert.ToHexString(hash)[..16]; // Use first 16 hex chars } return newLabels; } private static Dictionary<(int, int), SemanticEdgeType> BuildEdgeLookup(ImmutableArray edges) { var lookup = new Dictionary<(int, int), SemanticEdgeType>(edges.Length); foreach (var edge in edges) { lookup[(edge.SourceId, edge.TargetId)] = edge.Type; } return lookup; } private static SemanticEdgeType GetEdgeType( Dictionary<(int, int), SemanticEdgeType> lookup, int source, int target) { return lookup.TryGetValue((source, target), out var type) ? type : SemanticEdgeType.Unknown; } private static byte[] ComputeFinalHash(Dictionary labels) { // Sort labels for deterministic output var sortedLabels = labels.Values .OrderBy(l => l, StringComparer.Ordinal) .ToList(); var combined = string.Join("|", sortedLabels); return SHA256.HashData(Encoding.UTF8.GetBytes(combined)); } }