Refactor code structure for improved readability and maintainability; optimize performance in key functions.
This commit is contained in:
47
scripts/corpus/generate-manifest.py
Normal file
47
scripts/corpus/generate-manifest.py
Normal file
@@ -0,0 +1,47 @@
|
||||
#!/usr/bin/env python3
|
||||
"""Generate corpus-manifest.json from case directories."""
|
||||
from __future__ import annotations
|
||||
|
||||
import hashlib
|
||||
import json
|
||||
from datetime import datetime, timezone
|
||||
from pathlib import Path
|
||||
|
||||
ROOT = Path(__file__).resolve().parents[2]
|
||||
CORPUS = ROOT / "bench" / "golden-corpus" / "categories"
|
||||
OUTPUT = ROOT / "bench" / "golden-corpus" / "corpus-manifest.json"
|
||||
|
||||
|
||||
def sha256(path: Path) -> str:
|
||||
h = hashlib.sha256()
|
||||
with path.open("rb") as fh:
|
||||
while True:
|
||||
chunk = fh.read(8192)
|
||||
if not chunk:
|
||||
break
|
||||
h.update(chunk)
|
||||
return h.hexdigest()
|
||||
|
||||
|
||||
def main() -> int:
|
||||
cases = []
|
||||
for case_dir in sorted([p for p in CORPUS.rglob("*") if p.is_dir() and (p / "case-manifest.json").exists()]):
|
||||
manifest_path = case_dir / "case-manifest.json"
|
||||
cases.append({
|
||||
"id": case_dir.name,
|
||||
"path": str(case_dir.relative_to(ROOT)).replace("\\", "/"),
|
||||
"manifestDigest": f"sha256:{sha256(manifest_path)}",
|
||||
})
|
||||
|
||||
payload = {
|
||||
"generatedAt": datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ"),
|
||||
"caseCount": len(cases),
|
||||
"cases": cases,
|
||||
}
|
||||
|
||||
OUTPUT.write_text(json.dumps(payload, indent=2) + "\n", encoding="utf-8")
|
||||
return 0
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
raise SystemExit(main())
|
||||
Reference in New Issue
Block a user