#!/usr/bin/env python3 """Schema-aware deterministic mutation lane for supply-chain fixtures.""" from __future__ import annotations import argparse import json import pathlib import random import sys import time import unicodedata from typing import Callable TOOLS_DIR = pathlib.Path(__file__).resolve().parents[1] / "tools" sys.path.insert(0, str(TOOLS_DIR)) from canonicalize_json import DuplicateKeyError, canonicalize_text, parse_json_strict # noqa: E402 from emit_artifacts import TestCaseResult, record_failure, write_junit # noqa: E402 MutationFn = Callable[[str], str] def _truncate_payload(text: str) -> str: return text[:-1] if text else text def _append_garbage(text: str) -> str: return text + " !!!" def _inject_duplicate_key(text: str) -> str: if text.lstrip().startswith("{"): return text.replace("{", '{"bomFormat":"CycloneDX","bomFormat":"CycloneDX",', 1) return text def _unicode_normalization_toggle(text: str) -> str: return unicodedata.normalize("NFD", text) def _reorder_known_keys(text: str) -> str: parsed = parse_json_strict(text) if isinstance(parsed, dict): reordered = {k: parsed[k] for k in sorted(parsed.keys(), reverse=True)} return json.dumps(reordered, ensure_ascii=False, indent=2) return text MUTATORS: list[tuple[str, MutationFn]] = [ ("truncate", _truncate_payload), ("append_garbage", _append_garbage), ("duplicate_key", _inject_duplicate_key), ("unicode_nfd", _unicode_normalization_toggle), ("reorder_keys", _reorder_known_keys), ] def _load_inputs() -> list[tuple[str, str]]: root = pathlib.Path(__file__).resolve().parents[1] / "05-corpus" / "fixtures" files = sorted(path for path in root.rglob("*.json") if "malformed" not in path.parts) return [(path.name, path.read_text(encoding="utf-8")) for path in files] def main() -> int: parser = argparse.ArgumentParser(description="Run deterministic schema mutation lane.") parser.add_argument("--seed", type=int, default=20260226) parser.add_argument("--limit", type=int, default=1000) parser.add_argument("--time-seconds", type=int, default=60) parser.add_argument( "--output", type=pathlib.Path, default=pathlib.Path("out/supply-chain/02-schema-fuzz"), ) args = parser.parse_args() rng = random.Random(args.seed) output = args.output.resolve() output.mkdir(parents=True, exist_ok=True) mutated_dir = output / "corpus" / "mutated" mutated_dir.mkdir(parents=True, exist_ok=True) fixture_inputs = _load_inputs() if not fixture_inputs: raise SystemExit("No mutation inputs found") start = time.perf_counter() counts = { "accepted": 0, "rejected_invalid_json": 0, "rejected_duplicate_keys": 0, "crash": 0, } lane_case_results: list[TestCaseResult] = [] mutation_records: list[dict[str, str | int]] = [] executed = 0 while executed < args.limit and (time.perf_counter() - start) < args.time_seconds: fixture_name, payload = fixture_inputs[executed % len(fixture_inputs)] mutator_name, mutator = MUTATORS[rng.randrange(0, len(MUTATORS))] case_id = f"{executed:05d}-{fixture_name}-{mutator_name}" case_start = time.perf_counter() try: mutated = mutator(payload) canonicalize_text(mutated) counts["accepted"] += 1 lane_case_results.append( TestCaseResult( suite="02-schema-fuzz", name=case_id, passed=True, duration_seconds=time.perf_counter() - case_start, ) ) except DuplicateKeyError as exc: mutated = mutator(payload) counts["rejected_duplicate_keys"] += 1 lane_case_results.append( TestCaseResult( suite="02-schema-fuzz", name=case_id, passed=True, duration_seconds=time.perf_counter() - case_start, ) ) mutation_records.append( {"caseId": case_id, "result": "rejected_duplicate_keys", "reason": str(exc)} ) except json.JSONDecodeError as exc: mutated = mutator(payload) counts["rejected_invalid_json"] += 1 lane_case_results.append( TestCaseResult( suite="02-schema-fuzz", name=case_id, passed=True, duration_seconds=time.perf_counter() - case_start, ) ) mutation_records.append( {"caseId": case_id, "result": "rejected_invalid_json", "reason": str(exc)} ) except Exception as exc: # noqa: BLE001 mutated = mutator(payload) counts["crash"] += 1 record_failure( lane_output_dir=output, case_id=case_id, seed=args.seed, payload_text=mutated, error_class="mutation_lane_crash", message=str(exc), details={ "fixture": fixture_name, "mutator": mutator_name, }, canonical_diff_patch=None, ) lane_case_results.append( TestCaseResult( suite="02-schema-fuzz", name=case_id, passed=False, duration_seconds=time.perf_counter() - case_start, failure_message=str(exc), ) ) if executed < 50: mutated_path = mutated_dir / f"{executed:05d}-{mutator_name}.json" mutated_path.write_text(mutated, encoding="utf-8", newline="\n") executed += 1 report = { "seed": args.seed, "executed": executed, "limit": args.limit, "timeBudgetSeconds": args.time_seconds, "durationSeconds": round(time.perf_counter() - start, 4), "counts": counts, "machineReadableErrorClasses": sorted( { "invalid_json", "duplicate_key", "mutation_lane_crash", } ), "records": mutation_records, } (output / "report.json").write_text(json.dumps(report, sort_keys=True, indent=2) + "\n", encoding="utf-8") write_junit(output / "junit.xml", lane_case_results) repro = ( "# Repro Playbook\n\n" f"- Seed: `{args.seed}`\n" f"- Executed mutations: `{executed}`\n" "- Replay command:\n" f" - `python tests/supply-chain/02-schema-fuzz/run_mutations.py --seed {args.seed} --limit {executed} --time-seconds {args.time_seconds}`\n" ) (output / "repro_playbook.md").write_text(repro, encoding="utf-8", newline="\n") return 0 if counts["crash"] == 0 else 1 if __name__ == "__main__": raise SystemExit(main())