#!/usr/bin/env python3 """ Deterministic interim graph fixture generator. Produces two fixtures (50k and 100k nodes) with simple package/version nodes and dependency edges. Output shape is NDJSON with stable ordering. """ from __future__ import annotations import hashlib import json import math import random from pathlib import Path from typing import Iterable, List ROOT = Path(__file__).resolve().parent OUT_DIR = ROOT TENANT = "demo-tenant" def chunked(seq: Iterable, size: int): chunk = [] for item in seq: chunk.append(item) if len(chunk) >= size: yield chunk chunk = [] if chunk: yield chunk def make_nodes(count: int) -> List[dict]: nodes: List[dict] = [] for i in range(1, count + 1): nodes.append( { "id": f"pkg-{i:06d}", "kind": "package", "name": f"package-{i:06d}", "version": f"1.{(i % 10)}.{(i % 7)}", "tenant": TENANT, } ) return nodes def make_edges(nodes: List[dict], fanout: int) -> List[dict]: edges: List[dict] = [] rng = random.Random(42) n = len(nodes) for idx, node in enumerate(nodes): # Connect each node to up to `fanout` later nodes to keep sparse DAG targets = set() while len(targets) < fanout: t = rng.randint(idx + 1, n) if t <= n: targets.add(t) if idx + fanout >= n: break for t in sorted(targets): edges.append( { "id": f"edge-{node['id']}-{t:06d}", "kind": "depends_on", "source": node["id"], "target": f"pkg-{t:06d}", "tenant": TENANT, } ) return edges def write_ndjson(path: Path, records: Iterable[dict]): with path.open("w", encoding="utf-8") as f: for rec in records: f.write(json.dumps(rec, separators=(",", ":"), sort_keys=True)) f.write("\n") def sha256_file(path: Path) -> str: h = hashlib.sha256() with path.open("rb") as f: for chunk in iter(lambda: f.read(8192), b""): h.update(chunk) return h.hexdigest() def generate_fixture(name: str, node_count: int): fixture_dir = OUT_DIR / name fixture_dir.mkdir(parents=True, exist_ok=True) print(f"Generating {name} with {node_count} nodes…") nodes = make_nodes(node_count) # keep fanout small to limit edges and file size fanout = max(1, int(math.log10(node_count))) edges = make_edges(nodes, fanout=fanout) nodes_path = fixture_dir / "nodes.ndjson" edges_path = fixture_dir / "edges.ndjson" manifest_path = fixture_dir / "manifest.json" write_ndjson(nodes_path, nodes) write_ndjson(edges_path, edges) manifest = { "version": "1.0.0", "tenant": TENANT, "counts": {"nodes": len(nodes), "edges": len(edges)}, "hashes": { "nodes.ndjson": sha256_file(nodes_path), "edges.ndjson": sha256_file(edges_path), }, } manifest_path.write_text(json.dumps(manifest, indent=2, sort_keys=True)) print(f"Wrote manifest {manifest_path}") def main(): generate_fixture("graph-50k", 50_000) generate_fixture("graph-100k", 100_000) if __name__ == "__main__": main()