229 lines
7.3 KiB
C#
229 lines
7.3 KiB
C#
// Copyright (c) StellaOps. All rights reserved.
|
|
// Licensed under AGPL-3.0-or-later. See LICENSE in the project root.
|
|
|
|
using System.Collections.Immutable;
|
|
using System.Globalization;
|
|
using System.Security.Cryptography;
|
|
using System.Text;
|
|
|
|
namespace StellaOps.BinaryIndex.Semantic.Internal;
|
|
|
|
/// <summary>
|
|
/// Weisfeiler-Lehman graph hashing for deterministic semantic fingerprints.
|
|
/// Uses iterative label refinement to capture graph structure.
|
|
/// </summary>
|
|
internal sealed class WeisfeilerLehmanHasher
|
|
{
|
|
private readonly int _iterations;
|
|
|
|
/// <summary>
|
|
/// Creates a new Weisfeiler-Lehman hasher.
|
|
/// </summary>
|
|
/// <param name="iterations">Number of WL iterations (default: 3).</param>
|
|
public WeisfeilerLehmanHasher(int iterations = 3)
|
|
{
|
|
ArgumentOutOfRangeException.ThrowIfLessThan(iterations, 1);
|
|
_iterations = iterations;
|
|
}
|
|
|
|
/// <summary>
|
|
/// Compute a deterministic hash of the semantic graph.
|
|
/// </summary>
|
|
/// <param name="graph">The semantic graph to hash.</param>
|
|
/// <returns>SHA-256 hash of the graph.</returns>
|
|
public byte[] ComputeHash(KeySemanticsGraph graph)
|
|
{
|
|
ArgumentNullException.ThrowIfNull(graph);
|
|
|
|
if (graph.Nodes.IsEmpty)
|
|
{
|
|
return SHA256.HashData(Encoding.UTF8.GetBytes("EMPTY_GRAPH"));
|
|
}
|
|
|
|
// Build adjacency lists for efficient neighbor lookup
|
|
var outEdges = BuildAdjacencyList(graph.Edges, e => e.SourceId, e => e.TargetId);
|
|
var inEdges = BuildAdjacencyList(graph.Edges, e => e.TargetId, e => e.SourceId);
|
|
|
|
// Initialize labels from node properties
|
|
var labels = InitializeLabels(graph.Nodes);
|
|
|
|
// WL iterations
|
|
for (var i = 0; i < _iterations; i++)
|
|
{
|
|
labels = RefineLabels(graph.Nodes, labels, outEdges, inEdges, graph.Edges);
|
|
}
|
|
|
|
// Compute final hash from sorted labels
|
|
return ComputeFinalHash(labels);
|
|
}
|
|
|
|
/// <summary>
|
|
/// Compute canonical labels for all nodes (useful for graph comparison).
|
|
/// </summary>
|
|
/// <param name="graph">The semantic graph.</param>
|
|
/// <returns>Array of canonical labels indexed by node ID.</returns>
|
|
public ImmutableArray<string> ComputeCanonicalLabels(KeySemanticsGraph graph)
|
|
{
|
|
ArgumentNullException.ThrowIfNull(graph);
|
|
|
|
if (graph.Nodes.IsEmpty)
|
|
{
|
|
return [];
|
|
}
|
|
|
|
var outEdges = BuildAdjacencyList(graph.Edges, e => e.SourceId, e => e.TargetId);
|
|
var inEdges = BuildAdjacencyList(graph.Edges, e => e.TargetId, e => e.SourceId);
|
|
|
|
var labels = InitializeLabels(graph.Nodes);
|
|
|
|
for (var i = 0; i < _iterations; i++)
|
|
{
|
|
labels = RefineLabels(graph.Nodes, labels, outEdges, inEdges, graph.Edges);
|
|
}
|
|
|
|
// Return labels in node ID order
|
|
var maxId = graph.Nodes.Max(n => n.Id);
|
|
var result = new string[maxId + 1];
|
|
|
|
foreach (var node in graph.Nodes)
|
|
{
|
|
result[node.Id] = labels.TryGetValue(node.Id, out var label) ? label : string.Empty;
|
|
}
|
|
|
|
return [.. result];
|
|
}
|
|
|
|
private static Dictionary<int, List<int>> BuildAdjacencyList(
|
|
ImmutableArray<SemanticEdge> edges,
|
|
Func<SemanticEdge, int> keySelector,
|
|
Func<SemanticEdge, int> valueSelector)
|
|
{
|
|
var result = new Dictionary<int, List<int>>();
|
|
|
|
foreach (var edge in edges)
|
|
{
|
|
var key = keySelector(edge);
|
|
var value = valueSelector(edge);
|
|
|
|
if (!result.TryGetValue(key, out var list))
|
|
{
|
|
list = [];
|
|
result[key] = list;
|
|
}
|
|
|
|
list.Add(value);
|
|
}
|
|
|
|
return result;
|
|
}
|
|
|
|
private static Dictionary<int, string> InitializeLabels(ImmutableArray<SemanticNode> nodes)
|
|
{
|
|
var labels = new Dictionary<int, string>(nodes.Length);
|
|
|
|
foreach (var node in nodes)
|
|
{
|
|
// Create initial label from node type and operation
|
|
var label = string.Create(
|
|
CultureInfo.InvariantCulture,
|
|
$"{(int)node.Type}:{node.Operation}");
|
|
|
|
labels[node.Id] = label;
|
|
}
|
|
|
|
return labels;
|
|
}
|
|
|
|
private static Dictionary<int, string> RefineLabels(
|
|
ImmutableArray<SemanticNode> nodes,
|
|
Dictionary<int, string> currentLabels,
|
|
Dictionary<int, List<int>> outEdges,
|
|
Dictionary<int, List<int>> inEdges,
|
|
ImmutableArray<SemanticEdge> edges)
|
|
{
|
|
var newLabels = new Dictionary<int, string>(nodes.Length);
|
|
var edgeLookup = BuildEdgeLookup(edges);
|
|
|
|
foreach (var node in nodes)
|
|
{
|
|
var sb = new StringBuilder();
|
|
sb.Append(currentLabels[node.Id]);
|
|
sb.Append('|');
|
|
|
|
// Append sorted outgoing neighbor labels with edge types
|
|
if (outEdges.TryGetValue(node.Id, out var outNeighbors))
|
|
{
|
|
var neighborLabels = outNeighbors
|
|
.Select(n =>
|
|
{
|
|
var edgeType = GetEdgeType(edgeLookup, node.Id, n);
|
|
return string.Create(
|
|
CultureInfo.InvariantCulture,
|
|
$"O{(int)edgeType}:{currentLabels[n]}");
|
|
})
|
|
.OrderBy(l => l, StringComparer.Ordinal)
|
|
.ToList();
|
|
|
|
sb.AppendJoin(',', neighborLabels);
|
|
}
|
|
|
|
sb.Append('|');
|
|
|
|
// Append sorted incoming neighbor labels with edge types
|
|
if (inEdges.TryGetValue(node.Id, out var inNeighbors))
|
|
{
|
|
var neighborLabels = inNeighbors
|
|
.Select(n =>
|
|
{
|
|
var edgeType = GetEdgeType(edgeLookup, n, node.Id);
|
|
return string.Create(
|
|
CultureInfo.InvariantCulture,
|
|
$"I{(int)edgeType}:{currentLabels[n]}");
|
|
})
|
|
.OrderBy(l => l, StringComparer.Ordinal)
|
|
.ToList();
|
|
|
|
sb.AppendJoin(',', neighborLabels);
|
|
}
|
|
|
|
// Hash the combined string to create new label
|
|
var combined = sb.ToString();
|
|
var hash = SHA256.HashData(Encoding.UTF8.GetBytes(combined));
|
|
newLabels[node.Id] = Convert.ToHexString(hash)[..16]; // Use first 16 hex chars
|
|
}
|
|
|
|
return newLabels;
|
|
}
|
|
|
|
private static Dictionary<(int, int), SemanticEdgeType> BuildEdgeLookup(ImmutableArray<SemanticEdge> edges)
|
|
{
|
|
var lookup = new Dictionary<(int, int), SemanticEdgeType>(edges.Length);
|
|
|
|
foreach (var edge in edges)
|
|
{
|
|
lookup[(edge.SourceId, edge.TargetId)] = edge.Type;
|
|
}
|
|
|
|
return lookup;
|
|
}
|
|
|
|
private static SemanticEdgeType GetEdgeType(
|
|
Dictionary<(int, int), SemanticEdgeType> lookup,
|
|
int source,
|
|
int target)
|
|
{
|
|
return lookup.TryGetValue((source, target), out var type) ? type : SemanticEdgeType.Unknown;
|
|
}
|
|
|
|
private static byte[] ComputeFinalHash(Dictionary<int, string> labels)
|
|
{
|
|
// Sort labels for deterministic output
|
|
var sortedLabels = labels.Values
|
|
.OrderBy(l => l, StringComparer.Ordinal)
|
|
.ToList();
|
|
|
|
var combined = string.Join("|", sortedLabels);
|
|
return SHA256.HashData(Encoding.UTF8.GetBytes(combined));
|
|
}
|
|
}
|