Files
git.stella-ops.org/src/BinaryIndex/__Libraries/StellaOps.BinaryIndex.Semantic/Internal/WeisfeilerLehmanHasher.cs
StellaOps Bot 37e11918e0 save progress
2026-01-06 09:42:20 +02:00

229 lines
7.3 KiB
C#

// Copyright (c) StellaOps. All rights reserved.
// Licensed under AGPL-3.0-or-later. See LICENSE in the project root.
using System.Collections.Immutable;
using System.Globalization;
using System.Security.Cryptography;
using System.Text;
namespace StellaOps.BinaryIndex.Semantic.Internal;
/// <summary>
/// Weisfeiler-Lehman graph hashing for deterministic semantic fingerprints.
/// Uses iterative label refinement to capture graph structure.
/// </summary>
internal sealed class WeisfeilerLehmanHasher
{
private readonly int _iterations;
/// <summary>
/// Creates a new Weisfeiler-Lehman hasher.
/// </summary>
/// <param name="iterations">Number of WL iterations (default: 3).</param>
public WeisfeilerLehmanHasher(int iterations = 3)
{
ArgumentOutOfRangeException.ThrowIfLessThan(iterations, 1);
_iterations = iterations;
}
/// <summary>
/// Compute a deterministic hash of the semantic graph.
/// </summary>
/// <param name="graph">The semantic graph to hash.</param>
/// <returns>SHA-256 hash of the graph.</returns>
public byte[] ComputeHash(KeySemanticsGraph graph)
{
ArgumentNullException.ThrowIfNull(graph);
if (graph.Nodes.IsEmpty)
{
return SHA256.HashData(Encoding.UTF8.GetBytes("EMPTY_GRAPH"));
}
// Build adjacency lists for efficient neighbor lookup
var outEdges = BuildAdjacencyList(graph.Edges, e => e.SourceId, e => e.TargetId);
var inEdges = BuildAdjacencyList(graph.Edges, e => e.TargetId, e => e.SourceId);
// Initialize labels from node properties
var labels = InitializeLabels(graph.Nodes);
// WL iterations
for (var i = 0; i < _iterations; i++)
{
labels = RefineLabels(graph.Nodes, labels, outEdges, inEdges, graph.Edges);
}
// Compute final hash from sorted labels
return ComputeFinalHash(labels);
}
/// <summary>
/// Compute canonical labels for all nodes (useful for graph comparison).
/// </summary>
/// <param name="graph">The semantic graph.</param>
/// <returns>Array of canonical labels indexed by node ID.</returns>
public ImmutableArray<string> ComputeCanonicalLabels(KeySemanticsGraph graph)
{
ArgumentNullException.ThrowIfNull(graph);
if (graph.Nodes.IsEmpty)
{
return [];
}
var outEdges = BuildAdjacencyList(graph.Edges, e => e.SourceId, e => e.TargetId);
var inEdges = BuildAdjacencyList(graph.Edges, e => e.TargetId, e => e.SourceId);
var labels = InitializeLabels(graph.Nodes);
for (var i = 0; i < _iterations; i++)
{
labels = RefineLabels(graph.Nodes, labels, outEdges, inEdges, graph.Edges);
}
// Return labels in node ID order
var maxId = graph.Nodes.Max(n => n.Id);
var result = new string[maxId + 1];
foreach (var node in graph.Nodes)
{
result[node.Id] = labels.TryGetValue(node.Id, out var label) ? label : string.Empty;
}
return [.. result];
}
private static Dictionary<int, List<int>> BuildAdjacencyList(
ImmutableArray<SemanticEdge> edges,
Func<SemanticEdge, int> keySelector,
Func<SemanticEdge, int> valueSelector)
{
var result = new Dictionary<int, List<int>>();
foreach (var edge in edges)
{
var key = keySelector(edge);
var value = valueSelector(edge);
if (!result.TryGetValue(key, out var list))
{
list = [];
result[key] = list;
}
list.Add(value);
}
return result;
}
private static Dictionary<int, string> InitializeLabels(ImmutableArray<SemanticNode> nodes)
{
var labels = new Dictionary<int, string>(nodes.Length);
foreach (var node in nodes)
{
// Create initial label from node type and operation
var label = string.Create(
CultureInfo.InvariantCulture,
$"{(int)node.Type}:{node.Operation}");
labels[node.Id] = label;
}
return labels;
}
private static Dictionary<int, string> RefineLabels(
ImmutableArray<SemanticNode> nodes,
Dictionary<int, string> currentLabels,
Dictionary<int, List<int>> outEdges,
Dictionary<int, List<int>> inEdges,
ImmutableArray<SemanticEdge> edges)
{
var newLabels = new Dictionary<int, string>(nodes.Length);
var edgeLookup = BuildEdgeLookup(edges);
foreach (var node in nodes)
{
var sb = new StringBuilder();
sb.Append(currentLabels[node.Id]);
sb.Append('|');
// Append sorted outgoing neighbor labels with edge types
if (outEdges.TryGetValue(node.Id, out var outNeighbors))
{
var neighborLabels = outNeighbors
.Select(n =>
{
var edgeType = GetEdgeType(edgeLookup, node.Id, n);
return string.Create(
CultureInfo.InvariantCulture,
$"O{(int)edgeType}:{currentLabels[n]}");
})
.OrderBy(l => l, StringComparer.Ordinal)
.ToList();
sb.AppendJoin(',', neighborLabels);
}
sb.Append('|');
// Append sorted incoming neighbor labels with edge types
if (inEdges.TryGetValue(node.Id, out var inNeighbors))
{
var neighborLabels = inNeighbors
.Select(n =>
{
var edgeType = GetEdgeType(edgeLookup, n, node.Id);
return string.Create(
CultureInfo.InvariantCulture,
$"I{(int)edgeType}:{currentLabels[n]}");
})
.OrderBy(l => l, StringComparer.Ordinal)
.ToList();
sb.AppendJoin(',', neighborLabels);
}
// Hash the combined string to create new label
var combined = sb.ToString();
var hash = SHA256.HashData(Encoding.UTF8.GetBytes(combined));
newLabels[node.Id] = Convert.ToHexString(hash)[..16]; // Use first 16 hex chars
}
return newLabels;
}
private static Dictionary<(int, int), SemanticEdgeType> BuildEdgeLookup(ImmutableArray<SemanticEdge> edges)
{
var lookup = new Dictionary<(int, int), SemanticEdgeType>(edges.Length);
foreach (var edge in edges)
{
lookup[(edge.SourceId, edge.TargetId)] = edge.Type;
}
return lookup;
}
private static SemanticEdgeType GetEdgeType(
Dictionary<(int, int), SemanticEdgeType> lookup,
int source,
int target)
{
return lookup.TryGetValue((source, target), out var type) ? type : SemanticEdgeType.Unknown;
}
private static byte[] ComputeFinalHash(Dictionary<int, string> labels)
{
// Sort labels for deterministic output
var sortedLabels = labels.Values
.OrderBy(l => l, StringComparer.Ordinal)
.ToList();
var combined = string.Join("|", sortedLabels);
return SHA256.HashData(Encoding.UTF8.GetBytes(combined));
}
}