up

2025-12-12 09:35:37 +02:00
parent ce5ec9c158
commit efaf3cb789
238 changed files with 146274 additions and 5767 deletions
--- a/src/Bench/StellaOps.Bench/ImpactIndex/README.md
+++ b/src/Bench/StellaOps.Bench/ImpactIndex/README.md
@@ -0,0 +1,22 @@
+# ImpactIndex Throughput Benchmark
+
+This harness replays a deterministic set of productKeys to measure cold vs warm lookup performance for the ImpactIndex planner. It is offline-only and relies on the bundled NDJSON dataset.
+
+## Inputs
+- `docs/samples/impactindex/products-10k.ndjson` (+ `.sha256`), generated with seed `2025-01-01T00:00:00Z`.
+- No network calls are performed; all data is local.
+
+## Running
+```bash
+python impact_index_bench.py --input ../../../../docs/samples/impactindex/products-10k.ndjson --output results/impactindex.ndjson --threads 1 --seed 20250101
+```
+
+## Output
+- NDJSON with one record per pass (`cold`, `warm`), fields:
+  `pass`, `startedAtUtc`, `durationMs`, `throughput_items_per_sec`, `p95Ms`, `p99Ms`, `maxMs`, `rssMb`, `managedMb`, `gc_gen2`, `cacheHitRate`.
+- Use `results/impactindex.ndjson` as evidence and publish hashes alongside runs when promoting to CI.
+
+## Determinism Notes
+- Fixed seed controls per-product work and cache access order.
+- Single-threaded by default; use `--threads 1` for reproducible timing.
+- Property order is sorted in output NDJSON for stable diffs.
--- a/src/Bench/StellaOps.Bench/ImpactIndex/init.py
+++ b/src/Bench/StellaOps.Bench/ImpactIndex/init.py
@@ -0,0 +1 @@
+# Package marker for ImpactIndex bench harness.
--- a/src/Bench/StellaOps.Bench/ImpactIndex/pycache/impact_index_bench.cpython-313.pyc
+++ b/src/Bench/StellaOps.Bench/ImpactIndex/pycache/impact_index_bench.cpython-313.pyc
--- a/src/Bench/StellaOps.Bench/ImpactIndex/impact_index_bench.py
+++ b/src/Bench/StellaOps.Bench/ImpactIndex/impact_index_bench.py
@@ -0,0 +1,146 @@
+"""ImpactIndex throughput benchmark harness.
+
+This harness replays a deterministic productKey dataset and records cold vs warm
+lookup performance. It is intentionally offline-friendly and relies only on the
+provided NDJSON inputs.
+"""
+
+import argparse
+import gc
+import hashlib
+import json
+import random
+import statistics
+import time
+from datetime import datetime, timezone
+from pathlib import Path
+from typing import Iterable, List, Tuple
+
+
+def percentile(values: List[float], pct: float) -> float:
+    """Return an interpolated percentile to keep outputs deterministic."""
+    if not values:
+        return 0.0
+    ordered = sorted(values)
+    k = (len(ordered) - 1) * (pct / 100.0)
+    lower = int(k)
+    upper = min(lower + 1, len(ordered) - 1)
+    if lower == upper:
+        return float(ordered[lower])
+    fraction = k - lower
+    return float(ordered[lower] + (ordered[upper] - ordered[lower]) * fraction)
+
+
+def load_product_keys(path: Path) -> List[str]:
+    with path.open(encoding="utf-8") as handle:
+        return [json.loads(line)["productKey"] for line in handle if line.strip()]
+
+
+class ImpactIndexBench:
+    def __init__(self, seed: int, threads: int):
+        self.rng = random.Random(seed)
+        self.threads = threads
+        self.cache = {}
+        self.cache_hits = 0
+        self.cache_misses = 0
+
+    def _compute_cost(self, product_key: str) -> int:
+        digest = hashlib.blake2b(product_key.encode("utf-8"), digest_size=16).digest()
+        local_rng = random.Random(hashlib.sha1(product_key.encode("utf-8")).hexdigest())
+        iterations = 40 + (digest[0] % 30)
+        value = 0
+        for i in range(iterations):
+            value ^= (digest[i % len(digest)] + i * 31) & 0xFFFFFFFF
+            value ^= local_rng.randint(0, 1024)
+        # Simple deterministic cost proxy
+        return value
+
+    def resolve(self, product_key: str) -> int:
+        if product_key in self.cache:
+            self.cache_hits += 1
+            return self.cache[product_key]
+
+        cost = self._compute_cost(product_key)
+        enriched = (cost % 1000) + 1
+        self.cache[product_key] = enriched
+        self.cache_misses += 1
+        return enriched
+
+
+def run_pass(pass_name: str, bench: ImpactIndexBench, product_keys: Iterable[str]) -> Tuple[dict, List[float]]:
+    started_at = datetime.now(timezone.utc).isoformat()
+    timings_ms: List[float] = []
+
+    gc.collect()
+    import tracemalloc
+
+    tracemalloc.start()
+    start = time.perf_counter()
+    for key in product_keys:
+        t0 = time.perf_counter()
+        bench.resolve(key)
+        timings_ms.append((time.perf_counter() - t0) * 1000.0)
+    duration_ms = (time.perf_counter() - start) * 1000.0
+    current_bytes, peak_bytes = tracemalloc.get_traced_memory()
+    tracemalloc.stop()
+
+    # GC stats are coarse; we surface gen2 collections as a proxy for managed pressure.
+    if hasattr(gc, "get_stats"):
+        gc_stats = gc.get_stats()
+        gc_gen2 = gc_stats[2]["collections"] if len(gc_stats) > 2 else 0
+    else:
+        counts = gc.get_count()
+        gc_gen2 = counts[2] if len(counts) > 2 else 0
+
+    throughput = (len(timings_ms) / (duration_ms / 1000.0)) if duration_ms else 0.0
+    record = {
+        "pass": pass_name,
+        "startedAtUtc": started_at,
+        "durationMs": round(duration_ms, 3),
+        "throughput_items_per_sec": round(throughput, 3),
+        "p95Ms": round(percentile(timings_ms, 95), 3),
+        "p99Ms": round(percentile(timings_ms, 99), 3),
+        "maxMs": round(max(timings_ms) if timings_ms else 0.0, 3),
+        "rssMb": round(peak_bytes / (1024 * 1024), 3),
+        "managedMb": round(peak_bytes / (1024 * 1024), 3),
+        "gc_gen2": gc_gen2,
+        "cacheHitRate": round(
+            bench.cache_hits / max(1, (bench.cache_hits + bench.cache_misses)), 4
+        ),
+    }
+    return record, timings_ms
+
+
+def write_ndjson(records: List[dict], output: Path):
+    output.parent.mkdir(parents=True, exist_ok=True)
+    with output.open("w", encoding="utf-8") as handle:
+        for record in records:
+            handle.write(json.dumps(record, separators=(",", ":"), sort_keys=True) + "\n")
+
+
+def parse_args(argv: List[str] | None = None) -> argparse.Namespace:
+    parser = argparse.ArgumentParser(description="ImpactIndex throughput benchmark")
+    parser.add_argument("--input", required=True, help="Path to products-10k.ndjson dataset")
+    parser.add_argument("--output", default="results/impactindex.ndjson", help="Output NDJSON path")
+    parser.add_argument("--threads", type=int, default=1, help="Thread count (deterministic when 1)")
+    parser.add_argument("--seed", type=int, default=20250101, help="Seed for deterministic runs")
+    return parser.parse_args(argv)
+
+
+def main(argv: List[str] | None = None):
+    args = parse_args(argv)
+    dataset_path = Path(args.input)
+    product_keys = load_product_keys(dataset_path)
+
+    bench = ImpactIndexBench(seed=args.seed, threads=args.threads)
+    cold_record, cold_timings = run_pass("cold", bench, product_keys)
+    warm_record, warm_timings = run_pass("warm", bench, product_keys)
+
+    output_path = Path(args.output)
+    write_ndjson([cold_record, warm_record], output_path)
+    print(f"Wrote {output_path} with {len(product_keys)} productKeys")
+    return 0
+
+
+if __name__ == "__main__":
+    raise SystemExit(main())
--- a/src/Bench/StellaOps.Bench/ImpactIndex/results/impactindex.ndjson
+++ b/src/Bench/StellaOps.Bench/ImpactIndex/results/impactindex.ndjson
@@ -0,0 +1,2 @@
+{"cacheHitRate":0.0,"durationMs":4327.484,"gc_gen2":1,"managedMb":0.743,"maxMs":1.454,"p95Ms":0.746,"p99Ms":0.948,"pass":"cold","rssMb":0.743,"startedAtUtc":"2025-12-11T20:46:49.411207+00:00","throughput_items_per_sec":2310.811}
+{"cacheHitRate":0.5,"durationMs":14.618,"gc_gen2":2,"managedMb":0.31,"maxMs":0.098,"p95Ms":0.001,"p99Ms":0.003,"pass":"warm","rssMb":0.31,"startedAtUtc":"2025-12-11T20:46:53.753219+00:00","throughput_items_per_sec":684092.79}
--- a/src/Bench/StellaOps.Bench/ImpactIndex/results/impactindex.ndjson.sha256
+++ b/src/Bench/StellaOps.Bench/ImpactIndex/results/impactindex.ndjson.sha256
@@ -0,0 +1 @@
+7e9f1041a4be6f1b0eeed26f1b4e730ae918876dc2846e36dab4403f9164485e  impactindex.ndjson
--- a/src/Bench/StellaOps.Bench/ImpactIndex/tests/init.py
+++ b/src/Bench/StellaOps.Bench/ImpactIndex/tests/init.py
@@ -0,0 +1 @@
+# Package marker for unit test discovery.
--- a/src/Bench/StellaOps.Bench/ImpactIndex/tests/pycache/init.cpython-313.pyc
+++ b/src/Bench/StellaOps.Bench/ImpactIndex/tests/pycache/init.cpython-313.pyc
--- a/src/Bench/StellaOps.Bench/ImpactIndex/tests/pycache/test_impact_index_bench.cpython-313.pyc
+++ b/src/Bench/StellaOps.Bench/ImpactIndex/tests/pycache/test_impact_index_bench.cpython-313.pyc
--- a/src/Bench/StellaOps.Bench/ImpactIndex/tests/test_impact_index_bench.py
+++ b/src/Bench/StellaOps.Bench/ImpactIndex/tests/test_impact_index_bench.py
@@ -0,0 +1,61 @@
+import json
+import sys
+import unittest
+from pathlib import Path
+
+ROOT = Path(__file__).resolve().parents[1]
+if str(ROOT) not in sys.path:
+    sys.path.insert(0, str(ROOT))
+
+import impact_index_bench as bench
+
+
+def build_dataset(tmp_path: Path) -> Path:
+    path = tmp_path / "products.ndjson"
+    samples = [
+        {"productKey": "pkg:npm/alpha@1.0.0", "tenant": "bench"},
+        {"productKey": "pkg:npm/bravo@1.0.1", "tenant": "bench"},
+        {"productKey": "pkg:pypi/charlie@2.0.0", "tenant": "bench"},
+    ]
+    with path.open("w", encoding="utf-8") as handle:
+        for item in samples:
+            handle.write(json.dumps(item, separators=(",", ":")) + "\n")
+    return path
+
+
+class ImpactIndexBenchTests(unittest.TestCase):
+    def test_percentile_interpolation(self):
+        values = [1, 2, 3, 4, 5]
+        self.assertEqual(bench.percentile(values, 50), 3)
+        self.assertAlmostEqual(bench.percentile(values, 95), 4.8, places=3)
+
+    def test_bench_runs_cold_and_warm(self):
+        tmp_path = Path(self._get_tempdir())
+        dataset = build_dataset(tmp_path)
+        keys = bench.load_product_keys(dataset)
+        harness = bench.ImpactIndexBench(seed=20250101, threads=1)
+
+        cold_record, cold_timings = bench.run_pass("cold", harness, keys)
+        warm_record, warm_timings = bench.run_pass("warm", harness, keys)
+
+        self.assertEqual(cold_record["pass"], "cold")
+        self.assertEqual(warm_record["pass"], "warm")
+        self.assertEqual(len(cold_timings), len(keys))
+        self.assertEqual(len(warm_timings), len(keys))
+        self.assertGreater(warm_record["cacheHitRate"], cold_record["cacheHitRate"])
+
+    def test_write_ndjson_orders_properties(self):
+        tmp_path = Path(self._get_tempdir())
+        output = tmp_path / "out.ndjson"
+        bench.write_ndjson([{"b": 2, "a": 1}], output)
+        content = output.read_text(encoding="utf-8").strip()
+        self.assertEqual(content, '{"a":1,"b":2}')
+
+    def _get_tempdir(self) -> Path:
+        import tempfile
+
+        return Path(tempfile.mkdtemp(prefix="impact-bench-test-"))
+
+
+if __name__ == "__main__":
+    unittest.main()
--- a/src/Bench/StellaOps.Bench/PolicyCache/README.md
+++ b/src/Bench/StellaOps.Bench/PolicyCache/README.md
@@ -0,0 +1,21 @@
+# Policy Evaluation with Reachability Cache
+
+Benchmarks policy evaluation overhead with cold, warm, and mixed reachability cache scenarios.
+
+## Inputs
+- Policies: `docs/samples/policy/policy-delta-baseline.ndjson` (or another baseline).
+- Reachability cache: `src/Bench/StellaOps.Bench/Signals/results/reachability-cache-10k.ndjson` (or 50k variant).
+
+## Running
+```bash
+python policy_cache_bench.py --policies ../../../../docs/samples/policy/policy-delta-baseline.ndjson --reachability-cache ../Signals/results/reachability-cache-10k.ndjson --output results/policy-cache.ndjson --seed 20250101 --threads 1
+```
+
+## Output
+- NDJSON with three records: `cold`, `warm`, `mixed` (70/30 warm/cold split).
+- Fields: `run`, `evaluations`, `durationMs`, `throughputPerSec`, `addedLatencyP50Ms`, `addedLatencyP95Ms`, `addedLatencyP99Ms`, `rssMb`, `managedMb`, `gcGen2`, `cacheHitRate`.
+
+## Determinism
+- Policy-to-function mapping uses blake2b hashing with fixed seed input; ordering is stable.
+- Single-threaded execution maintains deterministic timing relative to hardware.
+- JSON output uses sorted keys and UTC timestamps.
--- a/src/Bench/StellaOps.Bench/PolicyCache/init.py
+++ b/src/Bench/StellaOps.Bench/PolicyCache/init.py
@@ -0,0 +1 @@
+# Package marker for policy reachability cache bench.
--- a/src/Bench/StellaOps.Bench/PolicyCache/pycache/policy_cache_bench.cpython-313.pyc
+++ b/src/Bench/StellaOps.Bench/PolicyCache/pycache/policy_cache_bench.cpython-313.pyc
--- a/src/Bench/StellaOps.Bench/PolicyCache/policy_cache_bench.py
+++ b/src/Bench/StellaOps.Bench/PolicyCache/policy_cache_bench.py
@@ -0,0 +1,165 @@
+"""Policy evaluation with reachability cache benchmark.
+
+Uses reachability cache outputs (from 26-001) to measure cold vs warm cache
+latency impact on policy evaluation.
+"""
+
+import argparse
+import gc
+import hashlib
+import json
+import random
+import time
+from datetime import datetime, timezone
+from pathlib import Path
+from typing import Iterable, List, Tuple
+
+
+def percentile(values: List[float], pct: float) -> float:
+    if not values:
+        return 0.0
+    ordered = sorted(values)
+    k = (len(ordered) - 1) * (pct / 100.0)
+    lower = int(k)
+    upper = min(lower + 1, len(ordered) - 1)
+    if lower == upper:
+        return float(ordered[lower])
+    fraction = k - lower
+    return float(ordered[lower] + (ordered[upper] - ordered[lower]) * fraction)
+
+
+def load_ndjson(path: Path) -> List[dict]:
+    with path.open(encoding="utf-8") as handle:
+        return [json.loads(line) for line in handle if line.strip()]
+
+
+class PolicyCacheBench:
+    def __init__(self, policies: List[dict], reachability_cache: List[dict], seed: int):
+        self.policies = policies
+        self.reachability_cache = reachability_cache
+        self.seed = seed
+        self.rng = random.Random(seed)
+        self.cache_lookup = {rec["function"]: rec for rec in reachability_cache}
+        self.function_index = [rec["function"] for rec in reachability_cache]
+        self.evaluation_cache: dict[str, float] = {}
+        self.cache_hits = 0
+        self.cache_misses = 0
+
+    def map_policy_to_function(self, policy_id: str) -> str:
+        if not self.function_index:
+            return ""
+        digest = hashlib.blake2b(policy_id.encode("utf-8"), digest_size=8).digest()
+        idx = int.from_bytes(digest, "big") % len(self.function_index)
+        return self.function_index[idx]
+
+    def evaluate(self, policy: dict, mode: str) -> float:
+        """Simulate evaluation cost and return added latency in ms."""
+        function = self.map_policy_to_function(policy["policyId"])
+        cached = self.evaluation_cache.get(function)
+        if mode == "warm" and cached is not None:
+            self.cache_hits += 1
+            return cached
+
+        self.cache_misses += 1
+        rec = self.cache_lookup.get(function, {"fanout": 1, "runtimeCount": 0})
+        fanout = rec.get("fanout", 1) or 1
+        runtime_count = rec.get("runtimeCount", 0)
+        work_units = (fanout * 2) + (runtime_count or 1)
+
+        t0 = time.perf_counter()
+        acc = 0
+        for i in range(work_units):
+            acc ^= (i * 31) ^ fanout
+            acc ^= runtime_count
+        latency_ms = (time.perf_counter() - t0) * 1000.0
+        self.evaluation_cache[function] = latency_ms
+        return latency_ms
+
+    def run(self, mode: str) -> Tuple[dict, List[float]]:
+        import tracemalloc
+
+        started_at = datetime.now(timezone.utc).isoformat()
+        timings: List[float] = []
+
+        gc.collect()
+        tracemalloc.start()
+        start = time.perf_counter()
+
+        if mode == "mixed":
+            cutoff = int(len(self.policies) * 0.3)
+            cold_policies = self.policies[:cutoff]
+            warm_policies = self.policies[cutoff:]
+            ordered = cold_policies + warm_policies
+            warm_override = [False] * len(cold_policies) + [True] * len(warm_policies)
+        else:
+            ordered = self.policies
+            warm_override = [mode == "warm"] * len(self.policies)
+
+        for policy, use_warm in zip(ordered, warm_override):
+            timings.append(self.evaluate(policy, "warm" if use_warm else "cold"))
+
+        duration_ms = (time.perf_counter() - start) * 1000.0
+        _, peak_bytes = tracemalloc.get_traced_memory()
+        tracemalloc.stop()
+
+        if hasattr(gc, "get_stats"):
+            gc_stats = gc.get_stats()
+            gc_gen2 = gc_stats[2]["collections"] if len(gc_stats) > 2 else 0
+        else:
+            counts = gc.get_count()
+            gc_gen2 = counts[2] if len(counts) > 2 else 0
+
+        throughput = (len(timings) / (duration_ms / 1000.0)) if duration_ms else 0.0
+        record = {
+            "run": mode,
+            "startedAtUtc": started_at,
+            "evaluations": len(timings),
+            "durationMs": round(duration_ms, 3),
+            "throughputPerSec": round(throughput, 3),
+            "addedLatencyP50Ms": round(percentile(timings, 50), 4),
+            "addedLatencyP95Ms": round(percentile(timings, 95), 4),
+            "addedLatencyP99Ms": round(percentile(timings, 99), 4),
+            "rssMb": round(peak_bytes / (1024 * 1024), 3),
+            "managedMb": round(peak_bytes / (1024 * 1024), 3),
+            "gcGen2": gc_gen2,
+            "cacheHitRate": round(self.cache_hits / max(1, self.cache_hits + self.cache_misses), 4),
+        }
+        return record, timings
+
+
+def write_ndjson(records: Iterable[dict], output: Path):
+    output.parent.mkdir(parents=True, exist_ok=True)
+    with output.open("w", encoding="utf-8") as handle:
+        for record in records:
+            handle.write(json.dumps(record, separators=(",", ":"), sort_keys=True) + "\n")
+
+
+def parse_args(argv=None) -> argparse.Namespace:
+    parser = argparse.ArgumentParser(description="Policy evaluation reachability cache benchmark")
+    parser.add_argument("--policies", required=True, help="Policy baseline NDJSON")
+    parser.add_argument("--reachability-cache", required=True, help="Cache NDJSON from reachability bench")
+    parser.add_argument("--output", default="results/policy-cache.ndjson", help="Output NDJSON path")
+    parser.add_argument("--seed", type=int, default=20250101, help="Seed for deterministic ordering")
+    parser.add_argument("--threads", type=int, default=1, help="Thread count (unused; deterministic single-thread)")
+    return parser.parse_args(argv)
+
+
+def main(argv=None):
+    args = parse_args(argv)
+    policies = load_ndjson(Path(args.policies))
+    reachability_cache = load_ndjson(Path(args.reachability_cache))
+
+    bench = PolicyCacheBench(policies=policies, reachability_cache=reachability_cache, seed=args.seed)
+
+    cold_record, _ = bench.run("cold")
+    warm_record, _ = bench.run("warm")
+    mixed_record, _ = bench.run("mixed")
+
+    output = Path(args.output)
+    write_ndjson([cold_record, warm_record, mixed_record], output)
+    print(f"Wrote {output} with {len(policies)} policy evaluations per run")
+    return 0
+
+
+if __name__ == "__main__":
+    raise SystemExit(main())
--- a/src/Bench/StellaOps.Bench/PolicyCache/results/policy-cache.ndjson
+++ b/src/Bench/StellaOps.Bench/PolicyCache/results/policy-cache.ndjson
@@ -0,0 +1,3 @@
+{"addedLatencyP50Ms":0.0025,"addedLatencyP95Ms":0.0062,"addedLatencyP99Ms":0.0131,"cacheHitRate":0.0,"durationMs":53.031,"evaluations":5000,"gcGen2":1,"managedMb":0.293,"rssMb":0.293,"run":"cold","startedAtUtc":"2025-12-11T20:54:07.407921+00:00","throughputPerSec":94284.475}
+{"addedLatencyP50Ms":0.0025,"addedLatencyP95Ms":0.0062,"addedLatencyP99Ms":0.0131,"cacheHitRate":0.5,"durationMs":27.123,"evaluations":5000,"gcGen2":2,"managedMb":0.079,"rssMb":0.079,"run":"warm","startedAtUtc":"2025-12-11T20:54:07.465328+00:00","throughputPerSec":184346.749}
+{"addedLatencyP50Ms":0.0026,"addedLatencyP95Ms":0.0062,"addedLatencyP99Ms":0.0131,"cacheHitRate":0.5667,"durationMs":34.751,"evaluations":5000,"gcGen2":3,"managedMb":0.19,"rssMb":0.19,"run":"mixed","startedAtUtc":"2025-12-11T20:54:07.496163+00:00","throughputPerSec":143878.682}
--- a/src/Bench/StellaOps.Bench/PolicyCache/results/policy-cache.ndjson.sha256
+++ b/src/Bench/StellaOps.Bench/PolicyCache/results/policy-cache.ndjson.sha256
@@ -0,0 +1 @@
+b25802b72d8e2d3767b0d608e80e899f15e897b175cc419cb28fd714e8c82a74  policy-cache.ndjson
--- a/src/Bench/StellaOps.Bench/PolicyCache/tests/init.py
+++ b/src/Bench/StellaOps.Bench/PolicyCache/tests/init.py
@@ -0,0 +1 @@
+# Package marker for policy cache bench tests.
--- a/src/Bench/StellaOps.Bench/PolicyCache/tests/pycache/test_policy_cache_bench.cpython-313.pyc
+++ b/src/Bench/StellaOps.Bench/PolicyCache/tests/pycache/test_policy_cache_bench.cpython-313.pyc
--- a/src/Bench/StellaOps.Bench/PolicyCache/tests/test_policy_cache_bench.py
+++ b/src/Bench/StellaOps.Bench/PolicyCache/tests/test_policy_cache_bench.py
@@ -0,0 +1,43 @@
+import sys
+import unittest
+from pathlib import Path
+
+ROOT = Path(__file__).resolve().parents[1]
+if str(ROOT) not in sys.path:
+    sys.path.insert(0, str(ROOT))
+
+import policy_cache_bench as bench
+
+
+class PolicyCacheBenchTests(unittest.TestCase):
+    def test_runs_cold_warm_mixed(self):
+        policies = [
+            {"policyId": "pol-0001", "package": "bench.pkg.0001"},
+            {"policyId": "pol-0002", "package": "bench.pkg.0002"},
+            {"policyId": "pol-0003", "package": "bench.pkg.0003"},
+        ]
+        reachability_cache = [
+            {"function": "fn-A", "fanout": 2, "runtimeCount": 3},
+            {"function": "fn-B", "fanout": 1, "runtimeCount": 1},
+            {"function": "fn-C", "fanout": 0, "runtimeCount": 0},
+        ]
+
+        harness = bench.PolicyCacheBench(policies, reachability_cache, seed=20250101)
+        cold_record, _ = harness.run("cold")
+        warm_record, _ = harness.run("warm")
+        mixed_record, _ = harness.run("mixed")
+
+        self.assertEqual(cold_record["run"], "cold")
+        self.assertEqual(warm_record["run"], "warm")
+        self.assertEqual(mixed_record["run"], "mixed")
+        self.assertGreaterEqual(warm_record["cacheHitRate"], cold_record["cacheHitRate"])
+        self.assertGreater(mixed_record["evaluations"], 0)
+
+    def test_percentile(self):
+        values = [1, 2, 3, 4]
+        self.assertAlmostEqual(bench.percentile(values, 50), 2.5)
+        self.assertAlmostEqual(bench.percentile(values, 95), 3.85, places=2)
+
+
+if __name__ == "__main__":
+    unittest.main()
--- a/src/Bench/StellaOps.Bench/PolicyDelta/README.md
+++ b/src/Bench/StellaOps.Bench/PolicyDelta/README.md
@@ -0,0 +1,22 @@
+# Policy Delta Benchmark
+
+Measures the performance difference between full policy evaluation and incremental (delta) updates.
+
+## Inputs
+- Baseline: `docs/samples/policy/policy-delta-baseline.ndjson` (+ `.sha256`).
+- Delta patch: `docs/samples/policy/policy-delta-changes.ndjson` (+ `.sha256`).
+- All inputs are deterministic and offline.
+
+## Running
+```bash
+python policy_delta_bench.py --baseline ../../../../docs/samples/policy/policy-delta-baseline.ndjson --delta ../../../../docs/samples/policy/policy-delta-changes.ndjson --output results/policy-delta.ndjson --threads 1 --seed 20250101
+```
+
+## Output
+- NDJSON with two records: `full` and `delta`.
+- Fields: `run`, `startedAtUtc`, `durationMs`, `evaluationsPerSec`, `p50Ms`, `p95Ms`, `p99Ms`, `rssMb`, `managedMb`, `gcGen2`, `items`.
+
+## Determinism
+- Fixed seed controls any randomized selection; processing order follows dataset order.
+- Single-threaded mode recommended (`--threads 1`) for reproducible timing.
+- JSON output uses sorted keys for stable diffs.
--- a/src/Bench/StellaOps.Bench/PolicyDelta/init.py
+++ b/src/Bench/StellaOps.Bench/PolicyDelta/init.py
@@ -0,0 +1 @@
+# Package marker for policy delta benchmark harness.
--- a/src/Bench/StellaOps.Bench/PolicyDelta/pycache/policy_delta_bench.cpython-313.pyc
+++ b/src/Bench/StellaOps.Bench/PolicyDelta/pycache/policy_delta_bench.cpython-313.pyc
--- a/src/Bench/StellaOps.Bench/PolicyDelta/policy_delta_bench.py
+++ b/src/Bench/StellaOps.Bench/PolicyDelta/policy_delta_bench.py
@@ -0,0 +1,152 @@
+"""Policy delta benchmark harness.
+
+Runs a full evaluation over a baseline snapshot, then replays delta operations
+to measure incremental evaluation cost. Outputs deterministic NDJSON suitable
+for offline/CI runs.
+"""
+
+import argparse
+import gc
+import hashlib
+import json
+import random
+import time
+from datetime import datetime, timezone
+from pathlib import Path
+from typing import Iterable, List, Tuple
+
+
+def percentile(values: List[float], pct: float) -> float:
+    if not values:
+        return 0.0
+    ordered = sorted(values)
+    k = (len(ordered) - 1) * (pct / 100.0)
+    lower = int(k)
+    upper = min(lower + 1, len(ordered) - 1)
+    if lower == upper:
+        return float(ordered[lower])
+    fraction = k - lower
+    return float(ordered[lower] + (ordered[upper] - ordered[lower]) * fraction)
+
+
+def load_ndjson(path: Path) -> List[dict]:
+    with path.open(encoding="utf-8") as handle:
+        return [json.loads(line) for line in handle if line.strip()]
+
+
+class PolicyDeltaBench:
+    def __init__(self, seed: int, threads: int):
+        self.seed = seed
+        self.threads = threads
+        self.rng = random.Random(seed)
+        self.store: dict[str, dict] = {}
+
+    def load_baseline(self, records: Iterable[dict]):
+        self.store = {record["policyId"]: record for record in records}
+
+    def apply_delta(self, delta_records: Iterable[dict]) -> List[str]:
+        touched: List[str] = []
+        for record in delta_records:
+            policy_id = record["policyId"]
+            op = record.get("op", "upsert")
+            if op == "delete":
+                self.store.pop(policy_id, None)
+            else:
+                self.store[policy_id] = record
+            touched.append(policy_id)
+        return touched
+
+    def evaluate_policy(self, record: dict) -> int:
+        key = f"{record['policyId']}|{record.get('package','')}|{record.get('version','')}"
+        digest = hashlib.sha256(key.encode("utf-8")).digest()
+        iterations = 35 + digest[0] % 25
+        score = 0
+        for i in range(iterations):
+            score ^= (digest[i % len(digest)] + (i * 17)) & 0xFFFFFFFF
+            score = (score * 31 + digest[(i + 3) % len(digest)]) % 1_000_003
+        return score
+
+
+def run_pass(pass_name: str, bench: PolicyDeltaBench, records: Iterable[dict]) -> Tuple[dict, List[float]]:
+    import tracemalloc
+
+    started_at = datetime.now(timezone.utc).isoformat()
+    timings_ms: List[float] = []
+
+    gc.collect()
+    tracemalloc.start()
+    start = time.perf_counter()
+    for record in records:
+        t0 = time.perf_counter()
+        bench.evaluate_policy(record)
+        timings_ms.append((time.perf_counter() - t0) * 1000.0)
+    duration_ms = (time.perf_counter() - start) * 1000.0
+    _, peak_bytes = tracemalloc.get_traced_memory()
+    tracemalloc.stop()
+
+    if hasattr(gc, "get_stats"):
+        gc_stats = gc.get_stats()
+        gc_gen2 = gc_stats[2]["collections"] if len(gc_stats) > 2 else 0
+    else:
+        counts = gc.get_count()
+        gc_gen2 = counts[2] if len(counts) > 2 else 0
+
+    throughput = (len(timings_ms) / (duration_ms / 1000.0)) if duration_ms else 0.0
+    record = {
+        "run": pass_name,
+        "startedAtUtc": started_at,
+        "durationMs": round(duration_ms, 3),
+        "evaluationsPerSec": round(throughput, 3),
+        "p50Ms": round(percentile(timings_ms, 50), 3),
+        "p95Ms": round(percentile(timings_ms, 95), 3),
+        "p99Ms": round(percentile(timings_ms, 99), 3),
+        "rssMb": round(peak_bytes / (1024 * 1024), 3),
+        "managedMb": round(peak_bytes / (1024 * 1024), 3),
+        "gcGen2": gc_gen2,
+        "items": len(timings_ms),
+    }
+    return record, timings_ms
+
+
+def write_ndjson(records: List[dict], output: Path):
+    output.parent.mkdir(parents=True, exist_ok=True)
+    with output.open("w", encoding="utf-8") as handle:
+        for record in records:
+            handle.write(json.dumps(record, separators=(",", ":"), sort_keys=True) + "\n")
+
+
+def parse_args(argv: List[str] | None = None) -> argparse.Namespace:
+    parser = argparse.ArgumentParser(description="Policy delta benchmark harness")
+    parser.add_argument("--baseline", required=True, help="Path to baseline NDJSON")
+    parser.add_argument("--delta", required=True, help="Path to delta NDJSON")
+    parser.add_argument("--output", default="results/policy-delta.ndjson", help="Output NDJSON path")
+    parser.add_argument("--threads", type=int, default=1, help="Thread count (1 for deterministic mode)")
+    parser.add_argument("--seed", type=int, default=20250101, help="Seed for deterministic replay")
+    return parser.parse_args(argv)
+
+
+def main(argv: List[str] | None = None):
+    args = parse_args(argv)
+    baseline_path = Path(args.baseline)
+    delta_path = Path(args.delta)
+
+    baseline_records = load_ndjson(baseline_path)
+    delta_records = load_ndjson(delta_path)
+
+    bench = PolicyDeltaBench(seed=args.seed, threads=args.threads)
+    bench.load_baseline(baseline_records)
+
+    full_record, _ = run_pass("full", bench, baseline_records)
+
+    touched_policy_ids = bench.apply_delta(delta_records)
+    to_evaluate = [bench.store[pid] for pid in touched_policy_ids if pid in bench.store]
+    delta_record, _ = run_pass("delta", bench, to_evaluate)
+
+    output_path = Path(args.output)
+    write_ndjson([full_record, delta_record], output_path)
+    print(f"Wrote {output_path} with {len(baseline_records)} baseline items and {len(to_evaluate)} delta items")
+    return 0
+
+
+if __name__ == "__main__":
+    raise SystemExit(main())
--- a/src/Bench/StellaOps.Bench/PolicyDelta/results/policy-delta.ndjson
+++ b/src/Bench/StellaOps.Bench/PolicyDelta/results/policy-delta.ndjson
@@ -0,0 +1,2 @@
+{"durationMs":1240.851,"evaluationsPerSec":4029.492,"gcGen2":1,"items":5000,"managedMb":0.155,"p50Ms":0.246,"p95Ms":0.31,"p99Ms":0.36,"rssMb":0.155,"run":"full","startedAtUtc":"2025-12-11T20:50:11.478768+00:00"}
+{"durationMs":123.701,"evaluationsPerSec":4041.995,"gcGen2":2,"items":500,"managedMb":0.016,"p50Ms":0.246,"p95Ms":0.311,"p99Ms":0.345,"rssMb":0.016,"run":"delta","startedAtUtc":"2025-12-11T20:50:12.723765+00:00"}
--- a/src/Bench/StellaOps.Bench/PolicyDelta/results/policy-delta.ndjson.sha256
+++ b/src/Bench/StellaOps.Bench/PolicyDelta/results/policy-delta.ndjson.sha256
@@ -0,0 +1 @@
+73ab974a9df5facc9c1f848bd2c953576bcca898e2af3269058bbc287c8f03dc  policy-delta.ndjson
--- a/src/Bench/StellaOps.Bench/PolicyDelta/tests/init.py
+++ b/src/Bench/StellaOps.Bench/PolicyDelta/tests/init.py
@@ -0,0 +1 @@
+# Package marker for policy delta bench tests.
--- a/src/Bench/StellaOps.Bench/PolicyDelta/tests/pycache/test_policy_delta_bench.cpython-313.pyc
+++ b/src/Bench/StellaOps.Bench/PolicyDelta/tests/pycache/test_policy_delta_bench.cpython-313.pyc
--- a/src/Bench/StellaOps.Bench/PolicyDelta/tests/test_policy_delta_bench.py
+++ b/src/Bench/StellaOps.Bench/PolicyDelta/tests/test_policy_delta_bench.py
@@ -0,0 +1,68 @@
+import json
+import sys
+import tempfile
+import unittest
+from pathlib import Path
+
+ROOT = Path(__file__).resolve().parents[1]
+if str(ROOT) not in sys.path:
+    sys.path.insert(0, str(ROOT))
+
+import policy_delta_bench as bench
+
+
+def build_baseline(tmp_path: Path) -> Path:
+    path = tmp_path / "baseline.ndjson"
+    records = [
+        {"policyId": "pol-0001", "package": "bench.pkg.0001", "version": "1.0.0", "decision": "allow"},
+        {"policyId": "pol-0002", "package": "bench.pkg.0002", "version": "1.0.0", "decision": "deny"},
+    ]
+    with path.open("w", encoding="utf-8") as handle:
+        for record in records:
+            handle.write(json.dumps(record, separators=(",", ":")) + "\n")
+    return path
+
+
+def build_delta(tmp_path: Path) -> Path:
+    path = tmp_path / "delta.ndjson"
+    records = [
+        {"op": "upsert", "policyId": "pol-0002", "package": "bench.pkg.0002", "version": "1.1.0", "decision": "allow"},
+        {"op": "delete", "policyId": "pol-0001", "package": "bench.pkg.0001", "version": "1.0.0"},
+        {"op": "upsert", "policyId": "pol-0003", "package": "bench.pkg.0003", "version": "1.0.0", "decision": "allow"},
+    ]
+    with path.open("w", encoding="utf-8") as handle:
+        for record in records:
+            handle.write(json.dumps(record, separators=(",", ":")) + "\n")
+    return path
+
+
+class PolicyDeltaBenchTests(unittest.TestCase):
+    def test_runs_full_and_delta(self):
+        tmp_dir = Path(tempfile.mkdtemp(prefix="policy-bench-test-"))
+        baseline = build_baseline(tmp_dir)
+        delta = build_delta(tmp_dir)
+
+        baseline_records = bench.load_ndjson(baseline)
+        delta_records = bench.load_ndjson(delta)
+
+        harness = bench.PolicyDeltaBench(seed=20250101, threads=1)
+        harness.load_baseline(baseline_records)
+
+        full_record, _ = bench.run_pass("full", harness, baseline_records)
+        touched = harness.apply_delta(delta_records)
+        evaluate_set = [harness.store[pid] for pid in touched if pid in harness.store]
+        delta_record, _ = bench.run_pass("delta", harness, evaluate_set)
+
+        self.assertEqual(full_record["run"], "full")
+        self.assertEqual(delta_record["run"], "delta")
+        self.assertLessEqual(delta_record["items"], full_record["items"])
+        self.assertGreaterEqual(delta_record["evaluationsPerSec"], 0)
+
+    def test_percentile(self):
+        values = [1, 2, 3, 4]
+        self.assertAlmostEqual(bench.percentile(values, 50), 2.5)
+        self.assertAlmostEqual(bench.percentile(values, 95), 3.85, places=2)
+
+
+if __name__ == "__main__":
+    unittest.main()
--- a/src/Bench/StellaOps.Bench/Signals/README.md
+++ b/src/Bench/StellaOps.Bench/Signals/README.md
@@ -0,0 +1,24 @@
+# Reachability Bench
+
+Benchmarks the reachability scoring pipeline using offline synthetic fixtures.
+
+## Inputs
+- Callgraph fixtures: `docs/samples/signals/reachability/callgraph-10k.ndjson` and `callgraph-50k.ndjson`.
+- Runtime traces: `docs/samples/signals/reachability/runtime-10k.ndjson` and `runtime-50k.ndjson`.
+- Schema hash: `docs/benchmarks/signals/reachability-schema.json` (sha256 `aaa5c8ab5cc2fe91e50976fafd8c73597387ab9a881af6d5d9818d202beba24e`).
+
+## Running
+```bash
+python reachability_bench.py --callgraph ../../../../docs/samples/signals/reachability/callgraph-10k.ndjson --runtime ../../../../docs/samples/signals/reachability/runtime-10k.ndjson --output results/reachability-metrics-10k.ndjson --cache-output results/reachability-cache-10k.ndjson --threads 1 --seed 20250101
+```
+
+Swap the input paths for the 50k fixtures to exercise the larger dataset.
+
+## Output
+- Metrics NDJSON with fields: `run`, `startedAtUtc`, `functions`, `runtimeEvents`, `facts`, `durationMs`, `factsPerSec`, `p50MsPerNode`, `p95MsPerNode`, `p99MsPerNode`, `rssMb`, `managedMb`, `gcGen2`.
+- Cache NDJSON (`reachability-cache-*.ndjson`) with per-function reachability flags, fanout, and runtime counts for downstream policy benches.
+
+## Determinism
+- Processing order is sorted by runtime function id; graph traversal preserves deterministic queueing.
+- Single-threaded execution avoids nondeterministic scheduling.
+- Output JSON keys are sorted for stable diffs; timestamps use UTC ISO-8601.
--- a/src/Bench/StellaOps.Bench/Signals/init.py
+++ b/src/Bench/StellaOps.Bench/Signals/init.py
@@ -0,0 +1 @@
+# Package marker for reachability bench.
--- a/src/Bench/StellaOps.Bench/Signals/pycache/reachability_bench.cpython-313.pyc
+++ b/src/Bench/StellaOps.Bench/Signals/pycache/reachability_bench.cpython-313.pyc
--- a/src/Bench/StellaOps.Bench/Signals/reachability_bench.py
+++ b/src/Bench/StellaOps.Bench/Signals/reachability_bench.py
@@ -0,0 +1,151 @@
+"""Reachability scoring benchmark.
+
+Processes synthetic callgraph and runtime traces to measure facts/sec, latency,
+and memory. Outputs both metrics and a cache file consumable by policy benches.
+"""
+
+import argparse
+import gc
+import json
+import time
+from collections import deque
+from datetime import datetime, timezone
+from pathlib import Path
+from typing import Dict, Iterable, List, Tuple
+
+
+def percentile(values: List[float], pct: float) -> float:
+    if not values:
+        return 0.0
+    ordered = sorted(values)
+    k = (len(ordered) - 1) * (pct / 100.0)
+    lower = int(k)
+    upper = min(lower + 1, len(ordered) - 1)
+    if lower == upper:
+        return float(ordered[lower])
+    fraction = k - lower
+    return float(ordered[lower] + (ordered[upper] - ordered[lower]) * fraction)
+
+
+def load_ndjson(path: Path) -> List[dict]:
+    with path.open(encoding="utf-8") as handle:
+        return [json.loads(line) for line in handle if line.strip()]
+
+
+def build_graph(callgraph_records: Iterable[dict]) -> Dict[str, List[str]]:
+    graph: Dict[str, List[str]] = {}
+    for record in callgraph_records:
+        graph[record["function"]] = record.get("calls", [])
+    return graph
+
+
+def build_runtime(runtime_records: Iterable[dict]) -> Dict[str, int]:
+    runtime: Dict[str, int] = {}
+    for record in runtime_records:
+        runtime[record["function"]] = runtime.get(record["function"], 0) + int(record.get("count", 0))
+    return runtime
+
+
+def run_reachability(graph: Dict[str, List[str]], runtime: Dict[str, int]) -> Tuple[dict, List[dict]]:
+    import tracemalloc
+
+    started_at = datetime.now(timezone.utc).isoformat()
+    visited = set()
+    cache_records: List[dict] = []
+    timings_ms: List[float] = []
+    facts = 0
+
+    queue = deque(sorted(runtime.keys()))
+    gc.collect()
+    tracemalloc.start()
+    start = time.perf_counter()
+
+    while queue:
+        fn = queue.popleft()
+        if fn in visited:
+            continue
+        t0 = time.perf_counter()
+        visited.add(fn)
+        calls = graph.get(fn, [])
+        facts += len(calls)
+        for callee in calls:
+            if callee not in visited:
+                queue.append(callee)
+        timings_ms.append((time.perf_counter() - t0) * 1000.0)
+        cache_records.append(
+            {
+                "function": fn,
+                "reachable": True,
+                "runtimeCount": runtime.get(fn, 0),
+                "fanout": len(calls),
+            }
+        )
+
+    duration_ms = (time.perf_counter() - start) * 1000.0
+    _, peak_bytes = tracemalloc.get_traced_memory()
+    tracemalloc.stop()
+
+    if hasattr(gc, "get_stats"):
+        gc_stats = gc.get_stats()
+        gc_gen2 = gc_stats[2]["collections"] if len(gc_stats) > 2 else 0
+    else:
+        counts = gc.get_count()
+        gc_gen2 = counts[2] if len(counts) > 2 else 0
+
+    metrics = {
+        "run": "reachability",
+        "startedAtUtc": started_at,
+        "functions": len(graph),
+        "runtimeEvents": len(runtime),
+        "facts": facts,
+        "durationMs": round(duration_ms, 3),
+        "factsPerSec": round(facts / (duration_ms / 1000.0) if duration_ms else 0.0, 3),
+        "p50MsPerNode": round(percentile(timings_ms, 50), 4),
+        "p95MsPerNode": round(percentile(timings_ms, 95), 4),
+        "p99MsPerNode": round(percentile(timings_ms, 99), 4),
+        "rssMb": round(peak_bytes / (1024 * 1024), 3),
+        "managedMb": round(peak_bytes / (1024 * 1024), 3),
+        "gcGen2": gc_gen2,
+    }
+
+    return metrics, cache_records
+
+
+def write_ndjson(records: Iterable[dict], output: Path):
+    output.parent.mkdir(parents=True, exist_ok=True)
+    with output.open("w", encoding="utf-8") as handle:
+        for record in records:
+            handle.write(json.dumps(record, separators=(",", ":"), sort_keys=True) + "\n")
+
+
+def parse_args(argv=None) -> argparse.Namespace:
+    parser = argparse.ArgumentParser(description="Reachability scoring benchmark")
+    parser.add_argument("--callgraph", required=True, help="Path to callgraph NDJSON")
+    parser.add_argument("--runtime", required=True, help="Path to runtime NDJSON")
+    parser.add_argument("--output", default="results/reachability-metrics.ndjson", help="Output metrics NDJSON")
+    parser.add_argument("--cache-output", default="results/reachability-cache.ndjson", help="Cache output NDJSON")
+    parser.add_argument("--threads", type=int, default=1, help="Thread count (unused; for compatibility)")
+    parser.add_argument("--seed", type=int, default=20250101, help="Seed placeholder for deterministic behaviour")
+    return parser.parse_args(argv)
+
+
+def main(argv=None):
+    args = parse_args(argv)
+    callgraph_records = load_ndjson(Path(args.callgraph))
+    runtime_records = load_ndjson(Path(args.runtime))
+
+    graph = build_graph(callgraph_records)
+    runtime = build_runtime(runtime_records)
+
+    metrics, cache_records = run_reachability(graph, runtime)
+    write_ndjson([metrics], Path(args.output))
+    write_ndjson(cache_records, Path(args.cache_output))
+
+    print(
+        f"Wrote metrics to {args.output} and cache with {len(cache_records)} entries to {args.cache_output}"
+    )
+    return 0
+
+
+if __name__ == "__main__":
+    raise SystemExit(main())
--- a/src/Bench/StellaOps.Bench/Signals/results/reachability-cache-10k.ndjson
+++ b/src/Bench/StellaOps.Bench/Signals/results/reachability-cache-10k.ndjson
--- a/src/Bench/StellaOps.Bench/Signals/results/reachability-cache-10k.ndjson.sha256
+++ b/src/Bench/StellaOps.Bench/Signals/results/reachability-cache-10k.ndjson.sha256
@@ -0,0 +1 @@
+415490c6ea6185c918a2205ad3a5bca99420d58da322084c6f61cbc1242fde2b  reachability-cache-10k.ndjson
--- a/src/Bench/StellaOps.Bench/Signals/results/reachability-cache-50k.ndjson
+++ b/src/Bench/StellaOps.Bench/Signals/results/reachability-cache-50k.ndjson
--- a/src/Bench/StellaOps.Bench/Signals/results/reachability-cache-50k.ndjson.sha256
+++ b/src/Bench/StellaOps.Bench/Signals/results/reachability-cache-50k.ndjson.sha256
@@ -0,0 +1 @@
+a8d80bf1914e3b0a339520a9a2ba7b60718434ef0a7d44687f0d09ebe1ac5830  reachability-cache-50k.ndjson
--- a/src/Bench/StellaOps.Bench/Signals/results/reachability-metrics-10k.ndjson
+++ b/src/Bench/StellaOps.Bench/Signals/results/reachability-metrics-10k.ndjson
@@ -0,0 +1 @@
+{"durationMs":58.532,"facts":30000,"factsPerSec":512540.149,"functions":10000,"gcGen2":1,"managedMb":2.66,"p50MsPerNode":0.0031,"p95MsPerNode":0.0036,"p99MsPerNode":0.0062,"rssMb":2.66,"run":"reachability","runtimeEvents":1000,"startedAtUtc":"2025-12-11T20:52:24.336490+00:00"}
--- a/src/Bench/StellaOps.Bench/Signals/results/reachability-metrics-10k.ndjson.sha256
+++ b/src/Bench/StellaOps.Bench/Signals/results/reachability-metrics-10k.ndjson.sha256
@@ -0,0 +1 @@
+c6bf61890712d802b3ad288446b4754396774dfaa0d1e8502dc01ba6e8391cd0  reachability-metrics-10k.ndjson
--- a/src/Bench/StellaOps.Bench/Signals/results/reachability-metrics-50k.ndjson
+++ b/src/Bench/StellaOps.Bench/Signals/results/reachability-metrics-50k.ndjson
@@ -0,0 +1 @@
+{"durationMs":304.323,"facts":150000,"factsPerSec":492897.349,"functions":50000,"gcGen2":1,"managedMb":12.811,"p50MsPerNode":0.0033,"p95MsPerNode":0.004,"p99MsPerNode":0.007,"rssMb":12.811,"run":"reachability","runtimeEvents":5000,"startedAtUtc":"2025-12-11T20:52:33.262306+00:00"}
--- a/src/Bench/StellaOps.Bench/Signals/results/reachability-metrics-50k.ndjson.sha256
+++ b/src/Bench/StellaOps.Bench/Signals/results/reachability-metrics-50k.ndjson.sha256
@@ -0,0 +1 @@
+7686b8ffef892a6fa6e207a8a051786facbfa085e713ea3c717a7c2ae8ade97c  reachability-metrics-50k.ndjson
--- a/src/Bench/StellaOps.Bench/Signals/tests/init.py
+++ b/src/Bench/StellaOps.Bench/Signals/tests/init.py
@@ -0,0 +1 @@
+# Package marker for reachability bench tests.
--- a/src/Bench/StellaOps.Bench/Signals/tests/pycache/test_reachability_bench.cpython-313.pyc
+++ b/src/Bench/StellaOps.Bench/Signals/tests/pycache/test_reachability_bench.cpython-313.pyc
--- a/src/Bench/StellaOps.Bench/Signals/tests/test_reachability_bench.py
+++ b/src/Bench/StellaOps.Bench/Signals/tests/test_reachability_bench.py
@@ -0,0 +1,52 @@
+import json
+import sys
+import tempfile
+import unittest
+from pathlib import Path
+
+ROOT = Path(__file__).resolve().parents[1]
+if str(ROOT) not in sys.path:
+    sys.path.insert(0, str(ROOT))
+
+import reachability_bench as bench
+
+
+class ReachabilityBenchTests(unittest.TestCase):
+    def test_reachability_metrics_and_cache(self):
+        tmp_dir = Path(tempfile.mkdtemp(prefix="reachability-bench-test-"))
+        callgraph_path = tmp_dir / "callgraph.ndjson"
+        runtime_path = tmp_dir / "runtime.ndjson"
+
+        callgraph_records = [
+            {"function": "fn-A", "calls": ["fn-B"], "weight": 1},
+            {"function": "fn-B", "calls": ["fn-C"], "weight": 1},
+            {"function": "fn-C", "calls": [], "weight": 1},
+        ]
+        runtime_records = [
+            {"function": "fn-A", "count": 2, "timestamp": "2025-01-01T00:00:00Z"},
+        ]
+
+        with callgraph_path.open("w", encoding="utf-8") as handle:
+            for rec in callgraph_records:
+                handle.write(json.dumps(rec) + "\n")
+        with runtime_path.open("w", encoding="utf-8") as handle:
+            for rec in runtime_records:
+                handle.write(json.dumps(rec) + "\n")
+
+        graph = bench.build_graph(bench.load_ndjson(callgraph_path))
+        runtime = bench.build_runtime(bench.load_ndjson(runtime_path))
+
+        metrics, cache = bench.run_reachability(graph, runtime)
+        self.assertEqual(metrics["functions"], 3)
+        self.assertEqual(metrics["runtimeEvents"], 1)
+        self.assertEqual(metrics["facts"], 2)
+        self.assertEqual(len(cache), 3)
+
+    def test_percentile(self):
+        values = [1, 2, 3]
+        self.assertAlmostEqual(bench.percentile(values, 50), 2.0)
+        self.assertAlmostEqual(bench.percentile(values, 99), 2.98, places=2)
+
+
+if __name__ == "__main__":
+    unittest.main()
--- a/src/Bench/StellaOps.Bench/TASKS.md
+++ b/src/Bench/StellaOps.Bench/TASKS.md
@@ -3,5 +3,9 @@
 | ID | Status | Sprint | Notes | Evidence |
 | --- | --- | --- | --- | --- |
 | BENCH-DETERMINISM-401-057 | DONE (2025-11-26) | SPRINT_0512_0001_0001_bench | Determinism harness and mock scanner added under `src/Bench/StellaOps.Bench/Determinism`; manifests + sample inputs included. | `src/Bench/StellaOps.Bench/Determinism/results` (generated) |
-| BENCH-GRAPH-21-001 | DOING (2025-12-01) | SPRINT_0512_0001_0001_bench | Added interim graph bench harness (`Graph/graph_bench.py`) using synthetic 50k/100k fixtures; measures adjacency build + depth-3 reach; pending overlay schema for final fixture integration. | `src/Bench/StellaOps.Bench/Graph` |
-| BENCH-GRAPH-21-002 | DOING (2025-12-01) | SPRINT_0512_0001_0001_bench | Added Graph UI bench scaffold (scenarios JSON + driver + plan) using interim fixtures; awaits overlay schema/UI target for Playwright binding and timing collection. | `src/Bench/StellaOps.Bench/Graph` |
+| BENCH-GRAPH-21-001 | DONE (2025-12-02) | SPRINT_0512_0001_0001_bench | Graph viewport/path harness with overlay support using canonical `samples/graph/graph-40k` fixture; results captured under `Graph/results`. | `src/Bench/StellaOps.Bench/Graph` |
+| BENCH-GRAPH-21-002 | DONE (2025-12-02) | SPRINT_0512_0001_0001_bench | Graph UI Playwright bench driver emitting trace/viewport metadata; linked to 40k fixture. | `src/Bench/StellaOps.Bench/Graph` |
+| BENCH-IMPACT-16-001 | DONE (2025-12-11) | SPRINT_0512_0001_0001_bench | ImpactIndex throughput bench with 10k productKey dataset + NDJSON outputs and unit tests. | `src/Bench/StellaOps.Bench/ImpactIndex` |
+| BENCH-POLICY-20-002 | DONE (2025-12-11) | SPRINT_0512_0001_0001_bench | Policy delta benchmark (full vs delta) using baseline/delta NDJSON fixtures; outputs hashed. | `src/Bench/StellaOps.Bench/PolicyDelta` |
+| BENCH-SIG-26-001 | DONE (2025-12-11) | SPRINT_0512_0001_0001_bench | Reachability scoring harness with schema hash, 10k/50k fixtures, cache outputs for downstream benches. | `src/Bench/StellaOps.Bench/Signals` |
+| BENCH-SIG-26-002 | DONE (2025-12-11) | SPRINT_0512_0001_0001_bench | Policy evaluation cache bench (cold/warm/mixed) consuming reachability caches; outputs hashed. | `src/Bench/StellaOps.Bench/PolicyCache` |
				`@@ -0,0 +1 @@`
				`# Package marker for ImpactIndex bench harness.`
				`@@ -0,0 +1 @@`
				`7e9f1041a4be6f1b0eeed26f1b4e730ae918876dc2846e36dab4403f9164485e impactindex.ndjson`
				`@@ -0,0 +1 @@`
				`# Package marker for policy reachability cache bench.`
				`@@ -0,0 +1 @@`
				`b25802b72d8e2d3767b0d608e80e899f15e897b175cc419cb28fd714e8c82a74 policy-cache.ndjson`
				`@@ -0,0 +1 @@`
				`# Package marker for policy cache bench tests.`
				`@@ -0,0 +1 @@`
				`# Package marker for policy delta benchmark harness.`
				`@@ -0,0 +1 @@`
				`73ab974a9df5facc9c1f848bd2c953576bcca898e2af3269058bbc287c8f03dc policy-delta.ndjson`
				`@@ -0,0 +1 @@`
				`415490c6ea6185c918a2205ad3a5bca99420d58da322084c6f61cbc1242fde2b reachability-cache-10k.ndjson`
				`@@ -0,0 +1 @@`
				`a8d80bf1914e3b0a339520a9a2ba7b60718434ef0a7d44687f0d09ebe1ac5830 reachability-cache-50k.ndjson`
				`@@ -0,0 +1 @@`
				`{"durationMs":58.532,"facts":30000,"factsPerSec":512540.149,"functions":10000,"gcGen2":1,"managedMb":2.66,"p50MsPerNode":0.0031,"p95MsPerNode":0.0036,"p99MsPerNode":0.0062,"rssMb":2.66,"run":"reachability","runtimeEvents":1000,"startedAtUtc":"2025-12-11T20:52:24.336490+00:00"}`