#!/usr/bin/env bash set -euo pipefail # Validates the store-aoc-19-005 dataset tarball. # Usage: ./scripts/concelier/test-store-aoc-19-005-dataset.sh [tarball] command -v tar >/dev/null || { echo "tar is required" >&2; exit 1; } command -v sha256sum >/dev/null || { echo "sha256sum is required" >&2; exit 1; } command -v python >/dev/null || { echo "python is required" >&2; exit 1; } DATASET="${1:-out/linksets/linksets-stage-backfill.tar.zst}" if [[ ! -f "${DATASET}" ]]; then echo "Dataset not found: ${DATASET}" >&2 exit 1 fi WORKDIR="$(mktemp -d)" cleanup() { rm -rf "${WORKDIR}"; } trap cleanup EXIT tar -xf "${DATASET}" -C "${WORKDIR}" for required in linksets.ndjson advisory_chunks.ndjson manifest.json; do if [[ ! -f "${WORKDIR}/${required}" ]]; then echo "Missing ${required} in dataset" >&2 exit 1 fi done manifest="${WORKDIR}/manifest.json" expected_linksets=$(python - <<'PY' "${manifest}" import json, sys with open(sys.argv[1], "r", encoding="utf-8") as f: data = json.load(f) print(data["records"]["linksets"]) PY ) expected_chunks=$(python - <<'PY' "${manifest}" import json, sys with open(sys.argv[1], "r", encoding="utf-8") as f: data = json.load(f) print(data["records"]["advisory_chunks"]) PY ) expected_linksets_sha=$(python - <<'PY' "${manifest}" import json, sys with open(sys.argv[1], "r", encoding="utf-8") as f: data = json.load(f) print(data["sha256"]["linksets.ndjson"]) PY ) expected_chunks_sha=$(python - <<'PY' "${manifest}" import json, sys with open(sys.argv[1], "r", encoding="utf-8") as f: data = json.load(f) print(data["sha256"]["advisory_chunks.ndjson"]) PY ) actual_linksets=$(wc -l < "${WORKDIR}/linksets.ndjson" | tr -d '[:space:]') actual_chunks=$(wc -l < "${WORKDIR}/advisory_chunks.ndjson" | tr -d '[:space:]') actual_linksets_sha=$(sha256sum "${WORKDIR}/linksets.ndjson" | awk '{print $1}') actual_chunks_sha=$(sha256sum "${WORKDIR}/advisory_chunks.ndjson" | awk '{print $1}') if [[ "${expected_linksets}" != "${actual_linksets}" ]]; then echo "linksets count mismatch: expected ${expected_linksets}, got ${actual_linksets}" >&2 exit 1 fi if [[ "${expected_chunks}" != "${actual_chunks}" ]]; then echo "advisory_chunks count mismatch: expected ${expected_chunks}, got ${actual_chunks}" >&2 exit 1 fi if [[ "${expected_linksets_sha}" != "${actual_linksets_sha}" ]]; then echo "linksets sha mismatch: expected ${expected_linksets_sha}, got ${actual_linksets_sha}" >&2 exit 1 fi if [[ "${expected_chunks_sha}" != "${actual_chunks_sha}" ]]; then echo "advisory_chunks sha mismatch: expected ${expected_chunks_sha}, got ${actual_chunks_sha}" >&2 exit 1 fi echo "Dataset validation succeeded:" echo " linksets: ${actual_linksets}" echo " advisory_chunks: ${actual_chunks}" echo " linksets.sha256=${actual_linksets_sha}" echo " advisory_chunks.sha256=${actual_chunks_sha}"