Files
git.stella-ops.org/tests/supply-chain/02-schema-fuzz/run_mutations.py

209 lines
6.9 KiB
Python

#!/usr/bin/env python3
"""Schema-aware deterministic mutation lane for supply-chain fixtures."""
from __future__ import annotations
import argparse
import json
import pathlib
import random
import sys
import time
import unicodedata
from typing import Callable
TOOLS_DIR = pathlib.Path(__file__).resolve().parents[1] / "tools"
sys.path.insert(0, str(TOOLS_DIR))
from canonicalize_json import DuplicateKeyError, canonicalize_text, parse_json_strict # noqa: E402
from emit_artifacts import TestCaseResult, record_failure, write_junit # noqa: E402
MutationFn = Callable[[str], str]
def _truncate_payload(text: str) -> str:
return text[:-1] if text else text
def _append_garbage(text: str) -> str:
return text + " !!!"
def _inject_duplicate_key(text: str) -> str:
if text.lstrip().startswith("{"):
return text.replace("{", '{"bomFormat":"CycloneDX","bomFormat":"CycloneDX",', 1)
return text
def _unicode_normalization_toggle(text: str) -> str:
return unicodedata.normalize("NFD", text)
def _reorder_known_keys(text: str) -> str:
parsed = parse_json_strict(text)
if isinstance(parsed, dict):
reordered = {k: parsed[k] for k in sorted(parsed.keys(), reverse=True)}
return json.dumps(reordered, ensure_ascii=False, indent=2)
return text
MUTATORS: list[tuple[str, MutationFn]] = [
("truncate", _truncate_payload),
("append_garbage", _append_garbage),
("duplicate_key", _inject_duplicate_key),
("unicode_nfd", _unicode_normalization_toggle),
("reorder_keys", _reorder_known_keys),
]
def _load_inputs() -> list[tuple[str, str]]:
root = pathlib.Path(__file__).resolve().parents[1] / "05-corpus" / "fixtures"
files = sorted(path for path in root.rglob("*.json") if "malformed" not in path.parts)
return [(path.name, path.read_text(encoding="utf-8")) for path in files]
def main() -> int:
parser = argparse.ArgumentParser(description="Run deterministic schema mutation lane.")
parser.add_argument("--seed", type=int, default=20260226)
parser.add_argument("--limit", type=int, default=1000)
parser.add_argument("--time-seconds", type=int, default=60)
parser.add_argument(
"--output",
type=pathlib.Path,
default=pathlib.Path("out/supply-chain/02-schema-fuzz"),
)
args = parser.parse_args()
rng = random.Random(args.seed)
output = args.output.resolve()
output.mkdir(parents=True, exist_ok=True)
mutated_dir = output / "corpus" / "mutated"
mutated_dir.mkdir(parents=True, exist_ok=True)
fixture_inputs = _load_inputs()
if not fixture_inputs:
raise SystemExit("No mutation inputs found")
start = time.perf_counter()
counts = {
"accepted": 0,
"rejected_invalid_json": 0,
"rejected_duplicate_keys": 0,
"crash": 0,
}
lane_case_results: list[TestCaseResult] = []
mutation_records: list[dict[str, str | int]] = []
executed = 0
while executed < args.limit and (time.perf_counter() - start) < args.time_seconds:
fixture_name, payload = fixture_inputs[executed % len(fixture_inputs)]
mutator_name, mutator = MUTATORS[rng.randrange(0, len(MUTATORS))]
case_id = f"{executed:05d}-{fixture_name}-{mutator_name}"
case_start = time.perf_counter()
try:
mutated = mutator(payload)
canonicalize_text(mutated)
counts["accepted"] += 1
lane_case_results.append(
TestCaseResult(
suite="02-schema-fuzz",
name=case_id,
passed=True,
duration_seconds=time.perf_counter() - case_start,
)
)
except DuplicateKeyError as exc:
mutated = mutator(payload)
counts["rejected_duplicate_keys"] += 1
lane_case_results.append(
TestCaseResult(
suite="02-schema-fuzz",
name=case_id,
passed=True,
duration_seconds=time.perf_counter() - case_start,
)
)
mutation_records.append(
{"caseId": case_id, "result": "rejected_duplicate_keys", "reason": str(exc)}
)
except json.JSONDecodeError as exc:
mutated = mutator(payload)
counts["rejected_invalid_json"] += 1
lane_case_results.append(
TestCaseResult(
suite="02-schema-fuzz",
name=case_id,
passed=True,
duration_seconds=time.perf_counter() - case_start,
)
)
mutation_records.append(
{"caseId": case_id, "result": "rejected_invalid_json", "reason": str(exc)}
)
except Exception as exc: # noqa: BLE001
mutated = mutator(payload)
counts["crash"] += 1
record_failure(
lane_output_dir=output,
case_id=case_id,
seed=args.seed,
payload_text=mutated,
error_class="mutation_lane_crash",
message=str(exc),
details={
"fixture": fixture_name,
"mutator": mutator_name,
},
canonical_diff_patch=None,
)
lane_case_results.append(
TestCaseResult(
suite="02-schema-fuzz",
name=case_id,
passed=False,
duration_seconds=time.perf_counter() - case_start,
failure_message=str(exc),
)
)
if executed < 50:
mutated_path = mutated_dir / f"{executed:05d}-{mutator_name}.json"
mutated_path.write_text(mutated, encoding="utf-8", newline="\n")
executed += 1
report = {
"seed": args.seed,
"executed": executed,
"limit": args.limit,
"timeBudgetSeconds": args.time_seconds,
"durationSeconds": round(time.perf_counter() - start, 4),
"counts": counts,
"machineReadableErrorClasses": sorted(
{
"invalid_json",
"duplicate_key",
"mutation_lane_crash",
}
),
"records": mutation_records,
}
(output / "report.json").write_text(json.dumps(report, sort_keys=True, indent=2) + "\n", encoding="utf-8")
write_junit(output / "junit.xml", lane_case_results)
repro = (
"# Repro Playbook\n\n"
f"- Seed: `{args.seed}`\n"
f"- Executed mutations: `{executed}`\n"
"- Replay command:\n"
f" - `python tests/supply-chain/02-schema-fuzz/run_mutations.py --seed {args.seed} --limit {executed} --time-seconds {args.time_seconds}`\n"
)
(output / "repro_playbook.md").write_text(repro, encoding="utf-8", newline="\n")
return 0 if counts["crash"] == 0 else 1
if __name__ == "__main__":
raise SystemExit(main())