up
This commit is contained in:
125
samples/graph/interim/generate.py
Normal file
125
samples/graph/interim/generate.py
Normal file
@@ -0,0 +1,125 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Deterministic interim graph fixture generator.
|
||||
|
||||
Produces two fixtures (50k and 100k nodes) with simple package/version nodes
|
||||
and dependency edges. Output shape is NDJSON with stable ordering.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import hashlib
|
||||
import json
|
||||
import math
|
||||
import random
|
||||
from pathlib import Path
|
||||
from typing import Iterable, List
|
||||
|
||||
ROOT = Path(__file__).resolve().parent
|
||||
OUT_DIR = ROOT
|
||||
TENANT = "demo-tenant"
|
||||
|
||||
|
||||
def chunked(seq: Iterable, size: int):
|
||||
chunk = []
|
||||
for item in seq:
|
||||
chunk.append(item)
|
||||
if len(chunk) >= size:
|
||||
yield chunk
|
||||
chunk = []
|
||||
if chunk:
|
||||
yield chunk
|
||||
|
||||
|
||||
def make_nodes(count: int) -> List[dict]:
|
||||
nodes: List[dict] = []
|
||||
for i in range(1, count + 1):
|
||||
nodes.append(
|
||||
{
|
||||
"id": f"pkg-{i:06d}",
|
||||
"kind": "package",
|
||||
"name": f"package-{i:06d}",
|
||||
"version": f"1.{(i % 10)}.{(i % 7)}",
|
||||
"tenant": TENANT,
|
||||
}
|
||||
)
|
||||
return nodes
|
||||
|
||||
|
||||
def make_edges(nodes: List[dict], fanout: int) -> List[dict]:
|
||||
edges: List[dict] = []
|
||||
rng = random.Random(42)
|
||||
n = len(nodes)
|
||||
for idx, node in enumerate(nodes):
|
||||
# Connect each node to up to `fanout` later nodes to keep sparse DAG
|
||||
targets = set()
|
||||
while len(targets) < fanout:
|
||||
t = rng.randint(idx + 1, n)
|
||||
if t <= n:
|
||||
targets.add(t)
|
||||
if idx + fanout >= n:
|
||||
break
|
||||
for t in sorted(targets):
|
||||
edges.append(
|
||||
{
|
||||
"id": f"edge-{node['id']}-{t:06d}",
|
||||
"kind": "depends_on",
|
||||
"source": node["id"],
|
||||
"target": f"pkg-{t:06d}",
|
||||
"tenant": TENANT,
|
||||
}
|
||||
)
|
||||
return edges
|
||||
|
||||
|
||||
def write_ndjson(path: Path, records: Iterable[dict]):
|
||||
with path.open("w", encoding="utf-8") as f:
|
||||
for rec in records:
|
||||
f.write(json.dumps(rec, separators=(",", ":"), sort_keys=True))
|
||||
f.write("\n")
|
||||
|
||||
|
||||
def sha256_file(path: Path) -> str:
|
||||
h = hashlib.sha256()
|
||||
with path.open("rb") as f:
|
||||
for chunk in iter(lambda: f.read(8192), b""):
|
||||
h.update(chunk)
|
||||
return h.hexdigest()
|
||||
|
||||
|
||||
def generate_fixture(name: str, node_count: int):
|
||||
fixture_dir = OUT_DIR / name
|
||||
fixture_dir.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
print(f"Generating {name} with {node_count} nodes…")
|
||||
nodes = make_nodes(node_count)
|
||||
# keep fanout small to limit edges and file size
|
||||
fanout = max(1, int(math.log10(node_count)))
|
||||
edges = make_edges(nodes, fanout=fanout)
|
||||
|
||||
nodes_path = fixture_dir / "nodes.ndjson"
|
||||
edges_path = fixture_dir / "edges.ndjson"
|
||||
manifest_path = fixture_dir / "manifest.json"
|
||||
|
||||
write_ndjson(nodes_path, nodes)
|
||||
write_ndjson(edges_path, edges)
|
||||
|
||||
manifest = {
|
||||
"version": "1.0.0",
|
||||
"tenant": TENANT,
|
||||
"counts": {"nodes": len(nodes), "edges": len(edges)},
|
||||
"hashes": {
|
||||
"nodes.ndjson": sha256_file(nodes_path),
|
||||
"edges.ndjson": sha256_file(edges_path),
|
||||
},
|
||||
}
|
||||
manifest_path.write_text(json.dumps(manifest, indent=2, sort_keys=True))
|
||||
print(f"Wrote manifest {manifest_path}")
|
||||
|
||||
|
||||
def main():
|
||||
generate_fixture("graph-50k", 50_000)
|
||||
generate_fixture("graph-100k", 100_000)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Reference in New Issue
Block a user