up
This commit is contained in:
30
src/Bench/StellaOps.Bench/Graph/README.md
Normal file
30
src/Bench/StellaOps.Bench/Graph/README.md
Normal file
@@ -0,0 +1,30 @@
|
||||
# Graph Bench Harness (BENCH-GRAPH-21-001)
|
||||
|
||||
Purpose: measure basic graph load/adjacency build and shallow path exploration over deterministic fixtures.
|
||||
|
||||
## Fixtures
|
||||
- Use interim synthetic fixtures under `samples/graph/interim/graph-50k` or `graph-100k`.
|
||||
- Each fixture includes `nodes.ndjson`, `edges.ndjson`, and `manifest.json` with hashes/counts.
|
||||
|
||||
## Usage
|
||||
```bash
|
||||
python graph_bench.py \
|
||||
--fixture ../../../samples/graph/interim/graph-50k \
|
||||
--output results/graph-50k.json \
|
||||
--samples 100
|
||||
```
|
||||
|
||||
Outputs a JSON summary with:
|
||||
- `nodes`, `edges`
|
||||
- `build_ms` — time to build adjacency (ms)
|
||||
- `bfs_ms` — total time for 3-depth BFS over sampled nodes
|
||||
- `avg_reach_3`, `max_reach_3` — nodes reached within depth 3
|
||||
- `manifest` — copied from fixture for traceability
|
||||
|
||||
Determinism:
|
||||
- Sorted node ids, fixed sample size, stable ordering, no randomness beyond fixture content.
|
||||
- No network access; pure local file reads.
|
||||
|
||||
Next steps (after overlay schema lands):
|
||||
- Extend to load overlay snapshots and measure overlay-join overhead.
|
||||
- Add p95/median latency over multiple runs and optional concurrency knobs.
|
||||
114
src/Bench/StellaOps.Bench/Graph/graph_bench.py
Normal file
114
src/Bench/StellaOps.Bench/Graph/graph_bench.py
Normal file
@@ -0,0 +1,114 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Graph benchmark harness (BENCH-GRAPH-21-001)
|
||||
|
||||
Reads deterministic NDJSON fixtures (nodes/edges) and computes basic metrics plus
|
||||
lightweight path queries to exercise adjacency building. Uses only local files,
|
||||
no network, and fixed seeds for reproducibility.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import json
|
||||
import time
|
||||
from pathlib import Path
|
||||
from typing import Dict, List, Tuple
|
||||
|
||||
|
||||
def load_ndjson(path: Path):
|
||||
with path.open("r", encoding="utf-8") as f:
|
||||
for line in f:
|
||||
if line.strip():
|
||||
yield json.loads(line)
|
||||
|
||||
|
||||
def build_graph(nodes_path: Path, edges_path: Path) -> Tuple[Dict[str, List[str]], int]:
|
||||
adjacency: Dict[str, List[str]] = {}
|
||||
node_set = set()
|
||||
for n in load_ndjson(nodes_path):
|
||||
node_set.add(n["id"])
|
||||
adjacency.setdefault(n["id"], [])
|
||||
edge_count = 0
|
||||
for e in load_ndjson(edges_path):
|
||||
source = e["source"]
|
||||
target = e["target"]
|
||||
# Only keep edges where nodes exist
|
||||
if source in adjacency and target in adjacency:
|
||||
adjacency[source].append(target)
|
||||
edge_count += 1
|
||||
# sort neighbors for determinism
|
||||
for v in adjacency.values():
|
||||
v.sort()
|
||||
return adjacency, edge_count
|
||||
|
||||
|
||||
def bfs_limited(adjacency: Dict[str, List[str]], start: str, max_depth: int = 3) -> int:
|
||||
visited = {start}
|
||||
frontier = [start]
|
||||
for _ in range(max_depth):
|
||||
next_frontier = []
|
||||
for node in frontier:
|
||||
for nbr in adjacency.get(node, []):
|
||||
if nbr not in visited:
|
||||
visited.add(nbr)
|
||||
next_frontier.append(nbr)
|
||||
if not next_frontier:
|
||||
break
|
||||
frontier = next_frontier
|
||||
return len(visited)
|
||||
|
||||
|
||||
def run_bench(fixture_dir: Path, sample_size: int = 100) -> dict:
|
||||
nodes_path = fixture_dir / "nodes.ndjson"
|
||||
edges_path = fixture_dir / "edges.ndjson"
|
||||
manifest_path = fixture_dir / "manifest.json"
|
||||
|
||||
manifest = json.loads(manifest_path.read_text()) if manifest_path.exists() else {}
|
||||
|
||||
t0 = time.perf_counter()
|
||||
adjacency, edge_count = build_graph(nodes_path, edges_path)
|
||||
build_ms = (time.perf_counter() - t0) * 1000
|
||||
|
||||
# deterministic sample: first N node ids sorted
|
||||
node_ids = sorted(adjacency.keys())[:sample_size]
|
||||
reach_counts = []
|
||||
t1 = time.perf_counter()
|
||||
for node_id in node_ids:
|
||||
reach_counts.append(bfs_limited(adjacency, node_id, max_depth=3))
|
||||
bfs_ms = (time.perf_counter() - t1) * 1000
|
||||
|
||||
avg_reach = sum(reach_counts) / len(reach_counts) if reach_counts else 0
|
||||
max_reach = max(reach_counts) if reach_counts else 0
|
||||
|
||||
return {
|
||||
"fixture": fixture_dir.name,
|
||||
"nodes": len(adjacency),
|
||||
"edges": edge_count,
|
||||
"build_ms": round(build_ms, 2),
|
||||
"bfs_ms": round(bfs_ms, 2),
|
||||
"bfs_samples": len(node_ids),
|
||||
"avg_reach_3": round(avg_reach, 2),
|
||||
"max_reach_3": max_reach,
|
||||
"manifest": manifest,
|
||||
}
|
||||
|
||||
|
||||
def main() -> int:
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument("--fixture", required=True, help="Path to fixture directory (nodes.ndjson, edges.ndjson)")
|
||||
parser.add_argument("--output", required=True, help="Path to write results JSON")
|
||||
parser.add_argument("--samples", type=int, default=100, help="Number of starting nodes to sample deterministically")
|
||||
args = parser.parse_args()
|
||||
|
||||
fixture_dir = Path(args.fixture).resolve()
|
||||
out_path = Path(args.output).resolve()
|
||||
out_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
result = run_bench(fixture_dir, sample_size=args.samples)
|
||||
out_path.write_text(json.dumps(result, indent=2, sort_keys=True))
|
||||
print(f"Wrote results to {out_path}")
|
||||
return 0
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
raise SystemExit(main())
|
||||
@@ -3,3 +3,4 @@
|
||||
| ID | Status | Sprint | Notes | Evidence |
|
||||
| --- | --- | --- | --- | --- |
|
||||
| BENCH-DETERMINISM-401-057 | DONE (2025-11-26) | SPRINT_0512_0001_0001_bench | Determinism harness and mock scanner added under `src/Bench/StellaOps.Bench/Determinism`; manifests + sample inputs included. | `src/Bench/StellaOps.Bench/Determinism/results` (generated) |
|
||||
| BENCH-GRAPH-21-001 | DOING (2025-12-01) | SPRINT_0512_0001_0001_bench | Added interim graph bench harness (`Graph/graph_bench.py`) using synthetic 50k/100k fixtures; measures adjacency build + depth-3 reach; pending overlay schema for final fixture integration. | `src/Bench/StellaOps.Bench/Graph` |
|
||||
|
||||
Reference in New Issue
Block a user