save progress
This commit is contained in:
@@ -0,0 +1,228 @@
|
||||
// Copyright (c) StellaOps. All rights reserved.
|
||||
// Licensed under AGPL-3.0-or-later. See LICENSE in the project root.
|
||||
|
||||
using System.Collections.Immutable;
|
||||
using System.Globalization;
|
||||
using System.Security.Cryptography;
|
||||
using System.Text;
|
||||
|
||||
namespace StellaOps.BinaryIndex.Semantic.Internal;
|
||||
|
||||
/// <summary>
|
||||
/// Weisfeiler-Lehman graph hashing for deterministic semantic fingerprints.
|
||||
/// Uses iterative label refinement to capture graph structure.
|
||||
/// </summary>
|
||||
internal sealed class WeisfeilerLehmanHasher
|
||||
{
|
||||
private readonly int _iterations;
|
||||
|
||||
/// <summary>
|
||||
/// Creates a new Weisfeiler-Lehman hasher.
|
||||
/// </summary>
|
||||
/// <param name="iterations">Number of WL iterations (default: 3).</param>
|
||||
public WeisfeilerLehmanHasher(int iterations = 3)
|
||||
{
|
||||
ArgumentOutOfRangeException.ThrowIfLessThan(iterations, 1);
|
||||
_iterations = iterations;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Compute a deterministic hash of the semantic graph.
|
||||
/// </summary>
|
||||
/// <param name="graph">The semantic graph to hash.</param>
|
||||
/// <returns>SHA-256 hash of the graph.</returns>
|
||||
public byte[] ComputeHash(KeySemanticsGraph graph)
|
||||
{
|
||||
ArgumentNullException.ThrowIfNull(graph);
|
||||
|
||||
if (graph.Nodes.IsEmpty)
|
||||
{
|
||||
return SHA256.HashData(Encoding.UTF8.GetBytes("EMPTY_GRAPH"));
|
||||
}
|
||||
|
||||
// Build adjacency lists for efficient neighbor lookup
|
||||
var outEdges = BuildAdjacencyList(graph.Edges, e => e.SourceId, e => e.TargetId);
|
||||
var inEdges = BuildAdjacencyList(graph.Edges, e => e.TargetId, e => e.SourceId);
|
||||
|
||||
// Initialize labels from node properties
|
||||
var labels = InitializeLabels(graph.Nodes);
|
||||
|
||||
// WL iterations
|
||||
for (var i = 0; i < _iterations; i++)
|
||||
{
|
||||
labels = RefineLabels(graph.Nodes, labels, outEdges, inEdges, graph.Edges);
|
||||
}
|
||||
|
||||
// Compute final hash from sorted labels
|
||||
return ComputeFinalHash(labels);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Compute canonical labels for all nodes (useful for graph comparison).
|
||||
/// </summary>
|
||||
/// <param name="graph">The semantic graph.</param>
|
||||
/// <returns>Array of canonical labels indexed by node ID.</returns>
|
||||
public ImmutableArray<string> ComputeCanonicalLabels(KeySemanticsGraph graph)
|
||||
{
|
||||
ArgumentNullException.ThrowIfNull(graph);
|
||||
|
||||
if (graph.Nodes.IsEmpty)
|
||||
{
|
||||
return [];
|
||||
}
|
||||
|
||||
var outEdges = BuildAdjacencyList(graph.Edges, e => e.SourceId, e => e.TargetId);
|
||||
var inEdges = BuildAdjacencyList(graph.Edges, e => e.TargetId, e => e.SourceId);
|
||||
|
||||
var labels = InitializeLabels(graph.Nodes);
|
||||
|
||||
for (var i = 0; i < _iterations; i++)
|
||||
{
|
||||
labels = RefineLabels(graph.Nodes, labels, outEdges, inEdges, graph.Edges);
|
||||
}
|
||||
|
||||
// Return labels in node ID order
|
||||
var maxId = graph.Nodes.Max(n => n.Id);
|
||||
var result = new string[maxId + 1];
|
||||
|
||||
foreach (var node in graph.Nodes)
|
||||
{
|
||||
result[node.Id] = labels.TryGetValue(node.Id, out var label) ? label : string.Empty;
|
||||
}
|
||||
|
||||
return [.. result];
|
||||
}
|
||||
|
||||
private static Dictionary<int, List<int>> BuildAdjacencyList(
|
||||
ImmutableArray<SemanticEdge> edges,
|
||||
Func<SemanticEdge, int> keySelector,
|
||||
Func<SemanticEdge, int> valueSelector)
|
||||
{
|
||||
var result = new Dictionary<int, List<int>>();
|
||||
|
||||
foreach (var edge in edges)
|
||||
{
|
||||
var key = keySelector(edge);
|
||||
var value = valueSelector(edge);
|
||||
|
||||
if (!result.TryGetValue(key, out var list))
|
||||
{
|
||||
list = [];
|
||||
result[key] = list;
|
||||
}
|
||||
|
||||
list.Add(value);
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
private static Dictionary<int, string> InitializeLabels(ImmutableArray<SemanticNode> nodes)
|
||||
{
|
||||
var labels = new Dictionary<int, string>(nodes.Length);
|
||||
|
||||
foreach (var node in nodes)
|
||||
{
|
||||
// Create initial label from node type and operation
|
||||
var label = string.Create(
|
||||
CultureInfo.InvariantCulture,
|
||||
$"{(int)node.Type}:{node.Operation}");
|
||||
|
||||
labels[node.Id] = label;
|
||||
}
|
||||
|
||||
return labels;
|
||||
}
|
||||
|
||||
private static Dictionary<int, string> RefineLabels(
|
||||
ImmutableArray<SemanticNode> nodes,
|
||||
Dictionary<int, string> currentLabels,
|
||||
Dictionary<int, List<int>> outEdges,
|
||||
Dictionary<int, List<int>> inEdges,
|
||||
ImmutableArray<SemanticEdge> edges)
|
||||
{
|
||||
var newLabels = new Dictionary<int, string>(nodes.Length);
|
||||
var edgeLookup = BuildEdgeLookup(edges);
|
||||
|
||||
foreach (var node in nodes)
|
||||
{
|
||||
var sb = new StringBuilder();
|
||||
sb.Append(currentLabels[node.Id]);
|
||||
sb.Append('|');
|
||||
|
||||
// Append sorted outgoing neighbor labels with edge types
|
||||
if (outEdges.TryGetValue(node.Id, out var outNeighbors))
|
||||
{
|
||||
var neighborLabels = outNeighbors
|
||||
.Select(n =>
|
||||
{
|
||||
var edgeType = GetEdgeType(edgeLookup, node.Id, n);
|
||||
return string.Create(
|
||||
CultureInfo.InvariantCulture,
|
||||
$"O{(int)edgeType}:{currentLabels[n]}");
|
||||
})
|
||||
.OrderBy(l => l, StringComparer.Ordinal)
|
||||
.ToList();
|
||||
|
||||
sb.AppendJoin(',', neighborLabels);
|
||||
}
|
||||
|
||||
sb.Append('|');
|
||||
|
||||
// Append sorted incoming neighbor labels with edge types
|
||||
if (inEdges.TryGetValue(node.Id, out var inNeighbors))
|
||||
{
|
||||
var neighborLabels = inNeighbors
|
||||
.Select(n =>
|
||||
{
|
||||
var edgeType = GetEdgeType(edgeLookup, n, node.Id);
|
||||
return string.Create(
|
||||
CultureInfo.InvariantCulture,
|
||||
$"I{(int)edgeType}:{currentLabels[n]}");
|
||||
})
|
||||
.OrderBy(l => l, StringComparer.Ordinal)
|
||||
.ToList();
|
||||
|
||||
sb.AppendJoin(',', neighborLabels);
|
||||
}
|
||||
|
||||
// Hash the combined string to create new label
|
||||
var combined = sb.ToString();
|
||||
var hash = SHA256.HashData(Encoding.UTF8.GetBytes(combined));
|
||||
newLabels[node.Id] = Convert.ToHexString(hash)[..16]; // Use first 16 hex chars
|
||||
}
|
||||
|
||||
return newLabels;
|
||||
}
|
||||
|
||||
private static Dictionary<(int, int), SemanticEdgeType> BuildEdgeLookup(ImmutableArray<SemanticEdge> edges)
|
||||
{
|
||||
var lookup = new Dictionary<(int, int), SemanticEdgeType>(edges.Length);
|
||||
|
||||
foreach (var edge in edges)
|
||||
{
|
||||
lookup[(edge.SourceId, edge.TargetId)] = edge.Type;
|
||||
}
|
||||
|
||||
return lookup;
|
||||
}
|
||||
|
||||
private static SemanticEdgeType GetEdgeType(
|
||||
Dictionary<(int, int), SemanticEdgeType> lookup,
|
||||
int source,
|
||||
int target)
|
||||
{
|
||||
return lookup.TryGetValue((source, target), out var type) ? type : SemanticEdgeType.Unknown;
|
||||
}
|
||||
|
||||
private static byte[] ComputeFinalHash(Dictionary<int, string> labels)
|
||||
{
|
||||
// Sort labels for deterministic output
|
||||
var sortedLabels = labels.Values
|
||||
.OrderBy(l => l, StringComparer.Ordinal)
|
||||
.ToList();
|
||||
|
||||
var combined = string.Join("|", sortedLabels);
|
||||
return SHA256.HashData(Encoding.UTF8.GetBytes(combined));
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user