126 lines
3.4 KiB
Python
126 lines
3.4 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
Deterministic interim graph fixture generator.
|
|
|
|
Produces two fixtures (50k and 100k nodes) with simple package/version nodes
|
|
and dependency edges. Output shape is NDJSON with stable ordering.
|
|
"""
|
|
from __future__ import annotations
|
|
|
|
import hashlib
|
|
import json
|
|
import math
|
|
import random
|
|
from pathlib import Path
|
|
from typing import Iterable, List
|
|
|
|
ROOT = Path(__file__).resolve().parent
|
|
OUT_DIR = ROOT
|
|
TENANT = "demo-tenant"
|
|
|
|
|
|
def chunked(seq: Iterable, size: int):
|
|
chunk = []
|
|
for item in seq:
|
|
chunk.append(item)
|
|
if len(chunk) >= size:
|
|
yield chunk
|
|
chunk = []
|
|
if chunk:
|
|
yield chunk
|
|
|
|
|
|
def make_nodes(count: int) -> List[dict]:
|
|
nodes: List[dict] = []
|
|
for i in range(1, count + 1):
|
|
nodes.append(
|
|
{
|
|
"id": f"pkg-{i:06d}",
|
|
"kind": "package",
|
|
"name": f"package-{i:06d}",
|
|
"version": f"1.{(i % 10)}.{(i % 7)}",
|
|
"tenant": TENANT,
|
|
}
|
|
)
|
|
return nodes
|
|
|
|
|
|
def make_edges(nodes: List[dict], fanout: int) -> List[dict]:
|
|
edges: List[dict] = []
|
|
rng = random.Random(42)
|
|
n = len(nodes)
|
|
for idx, node in enumerate(nodes):
|
|
# Connect each node to up to `fanout` later nodes to keep sparse DAG
|
|
targets = set()
|
|
while len(targets) < fanout:
|
|
t = rng.randint(idx + 1, n)
|
|
if t <= n:
|
|
targets.add(t)
|
|
if idx + fanout >= n:
|
|
break
|
|
for t in sorted(targets):
|
|
edges.append(
|
|
{
|
|
"id": f"edge-{node['id']}-{t:06d}",
|
|
"kind": "depends_on",
|
|
"source": node["id"],
|
|
"target": f"pkg-{t:06d}",
|
|
"tenant": TENANT,
|
|
}
|
|
)
|
|
return edges
|
|
|
|
|
|
def write_ndjson(path: Path, records: Iterable[dict]):
|
|
with path.open("w", encoding="utf-8") as f:
|
|
for rec in records:
|
|
f.write(json.dumps(rec, separators=(",", ":"), sort_keys=True))
|
|
f.write("\n")
|
|
|
|
|
|
def sha256_file(path: Path) -> str:
|
|
h = hashlib.sha256()
|
|
with path.open("rb") as f:
|
|
for chunk in iter(lambda: f.read(8192), b""):
|
|
h.update(chunk)
|
|
return h.hexdigest()
|
|
|
|
|
|
def generate_fixture(name: str, node_count: int):
|
|
fixture_dir = OUT_DIR / name
|
|
fixture_dir.mkdir(parents=True, exist_ok=True)
|
|
|
|
print(f"Generating {name} with {node_count} nodes…")
|
|
nodes = make_nodes(node_count)
|
|
# keep fanout small to limit edges and file size
|
|
fanout = max(1, int(math.log10(node_count)))
|
|
edges = make_edges(nodes, fanout=fanout)
|
|
|
|
nodes_path = fixture_dir / "nodes.ndjson"
|
|
edges_path = fixture_dir / "edges.ndjson"
|
|
manifest_path = fixture_dir / "manifest.json"
|
|
|
|
write_ndjson(nodes_path, nodes)
|
|
write_ndjson(edges_path, edges)
|
|
|
|
manifest = {
|
|
"version": "1.0.0",
|
|
"tenant": TENANT,
|
|
"counts": {"nodes": len(nodes), "edges": len(edges)},
|
|
"hashes": {
|
|
"nodes.ndjson": sha256_file(nodes_path),
|
|
"edges.ndjson": sha256_file(edges_path),
|
|
},
|
|
}
|
|
manifest_path.write_text(json.dumps(manifest, indent=2, sort_keys=True))
|
|
print(f"Wrote manifest {manifest_path}")
|
|
|
|
|
|
def main():
|
|
generate_fixture("graph-50k", 50_000)
|
|
generate_fixture("graph-100k", 100_000)
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|