up

2025-12-01 21:16:22 +02:00
parent c11d87d252
commit 909d9b6220
208 changed files with 860954 additions and 832 deletions
--- a/src/Bench/StellaOps.Bench/Graph/README.md
+++ b/src/Bench/StellaOps.Bench/Graph/README.md
@@ -0,0 +1,30 @@
+# Graph Bench Harness (BENCH-GRAPH-21-001)
+
+Purpose: measure basic graph load/adjacency build and shallow path exploration over deterministic fixtures.
+
+## Fixtures
+- Use interim synthetic fixtures under `samples/graph/interim/graph-50k` or `graph-100k`.
+- Each fixture includes `nodes.ndjson`, `edges.ndjson`, and `manifest.json` with hashes/counts.
+
+## Usage
+```bash
+python graph_bench.py \
+  --fixture ../../../samples/graph/interim/graph-50k \
+  --output results/graph-50k.json \
+  --samples 100
+```
+
+Outputs a JSON summary with:
+- `nodes`, `edges`
+- `build_ms` — time to build adjacency (ms)
+- `bfs_ms` — total time for 3-depth BFS over sampled nodes
+- `avg_reach_3`, `max_reach_3` — nodes reached within depth 3
+- `manifest` — copied from fixture for traceability
+
+Determinism:
+- Sorted node ids, fixed sample size, stable ordering, no randomness beyond fixture content.
+- No network access; pure local file reads.
+
+Next steps (after overlay schema lands):
+- Extend to load overlay snapshots and measure overlay-join overhead.
+- Add p95/median latency over multiple runs and optional concurrency knobs.
--- a/src/Bench/StellaOps.Bench/Graph/graph_bench.py
+++ b/src/Bench/StellaOps.Bench/Graph/graph_bench.py
@@ -0,0 +1,114 @@
+#!/usr/bin/env python3
+"""
+Graph benchmark harness (BENCH-GRAPH-21-001)
+
+Reads deterministic NDJSON fixtures (nodes/edges) and computes basic metrics plus
+lightweight path queries to exercise adjacency building. Uses only local files,
+no network, and fixed seeds for reproducibility.
+"""
+from __future__ import annotations
+
+import argparse
+import json
+import time
+from pathlib import Path
+from typing import Dict, List, Tuple
+
+
+def load_ndjson(path: Path):
+    with path.open("r", encoding="utf-8") as f:
+        for line in f:
+            if line.strip():
+                yield json.loads(line)
+
+
+def build_graph(nodes_path: Path, edges_path: Path) -> Tuple[Dict[str, List[str]], int]:
+    adjacency: Dict[str, List[str]] = {}
+    node_set = set()
+    for n in load_ndjson(nodes_path):
+        node_set.add(n["id"])
+        adjacency.setdefault(n["id"], [])
+    edge_count = 0
+    for e in load_ndjson(edges_path):
+        source = e["source"]
+        target = e["target"]
+        # Only keep edges where nodes exist
+        if source in adjacency and target in adjacency:
+            adjacency[source].append(target)
+            edge_count += 1
+    # sort neighbors for determinism
+    for v in adjacency.values():
+        v.sort()
+    return adjacency, edge_count
+
+
+def bfs_limited(adjacency: Dict[str, List[str]], start: str, max_depth: int = 3) -> int:
+    visited = {start}
+    frontier = [start]
+    for _ in range(max_depth):
+        next_frontier = []
+        for node in frontier:
+            for nbr in adjacency.get(node, []):
+                if nbr not in visited:
+                    visited.add(nbr)
+                    next_frontier.append(nbr)
+        if not next_frontier:
+            break
+        frontier = next_frontier
+    return len(visited)
+
+
+def run_bench(fixture_dir: Path, sample_size: int = 100) -> dict:
+    nodes_path = fixture_dir / "nodes.ndjson"
+    edges_path = fixture_dir / "edges.ndjson"
+    manifest_path = fixture_dir / "manifest.json"
+
+    manifest = json.loads(manifest_path.read_text()) if manifest_path.exists() else {}
+
+    t0 = time.perf_counter()
+    adjacency, edge_count = build_graph(nodes_path, edges_path)
+    build_ms = (time.perf_counter() - t0) * 1000
+
+    # deterministic sample: first N node ids sorted
+    node_ids = sorted(adjacency.keys())[:sample_size]
+    reach_counts = []
+    t1 = time.perf_counter()
+    for node_id in node_ids:
+        reach_counts.append(bfs_limited(adjacency, node_id, max_depth=3))
+    bfs_ms = (time.perf_counter() - t1) * 1000
+
+    avg_reach = sum(reach_counts) / len(reach_counts) if reach_counts else 0
+    max_reach = max(reach_counts) if reach_counts else 0
+
+    return {
+        "fixture": fixture_dir.name,
+        "nodes": len(adjacency),
+        "edges": edge_count,
+        "build_ms": round(build_ms, 2),
+        "bfs_ms": round(bfs_ms, 2),
+        "bfs_samples": len(node_ids),
+        "avg_reach_3": round(avg_reach, 2),
+        "max_reach_3": max_reach,
+        "manifest": manifest,
+    }
+
+
+def main() -> int:
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--fixture", required=True, help="Path to fixture directory (nodes.ndjson, edges.ndjson)")
+    parser.add_argument("--output", required=True, help="Path to write results JSON")
+    parser.add_argument("--samples", type=int, default=100, help="Number of starting nodes to sample deterministically")
+    args = parser.parse_args()
+
+    fixture_dir = Path(args.fixture).resolve()
+    out_path = Path(args.output).resolve()
+    out_path.parent.mkdir(parents=True, exist_ok=True)
+
+    result = run_bench(fixture_dir, sample_size=args.samples)
+    out_path.write_text(json.dumps(result, indent=2, sort_keys=True))
+    print(f"Wrote results to {out_path}")
+    return 0
+
+
+if __name__ == "__main__":
+    raise SystemExit(main())
--- a/src/Bench/StellaOps.Bench/TASKS.md
+++ b/src/Bench/StellaOps.Bench/TASKS.md
@@ -3,3 +3,4 @@
 | ID | Status | Sprint | Notes | Evidence |
 | --- | --- | --- | --- | --- |
 | BENCH-DETERMINISM-401-057 | DONE (2025-11-26) | SPRINT_0512_0001_0001_bench | Determinism harness and mock scanner added under `src/Bench/StellaOps.Bench/Determinism`; manifests + sample inputs included. | `src/Bench/StellaOps.Bench/Determinism/results` (generated) |
+| BENCH-GRAPH-21-001 | DOING (2025-12-01) | SPRINT_0512_0001_0001_bench | Added interim graph bench harness (`Graph/graph_bench.py`) using synthetic 50k/100k fixtures; measures adjacency build + depth-3 reach; pending overlay schema for final fixture integration. | `src/Bench/StellaOps.Bench/Graph` |