Files
git.stella-ops.org/devops/tools/concelier/test-store-aoc-19-005-dataset.sh
2025-12-26 18:11:06 +02:00

91 lines
2.8 KiB
Bash

#!/usr/bin/env bash
set -euo pipefail
# Validates the store-aoc-19-005 dataset tarball.
# Usage: ./scripts/concelier/test-store-aoc-19-005-dataset.sh [tarball]
command -v tar >/dev/null || { echo "tar is required" >&2; exit 1; }
command -v sha256sum >/dev/null || { echo "sha256sum is required" >&2; exit 1; }
command -v python >/dev/null || { echo "python is required" >&2; exit 1; }
DATASET="${1:-out/linksets/linksets-stage-backfill.tar.zst}"
if [[ ! -f "${DATASET}" ]]; then
echo "Dataset not found: ${DATASET}" >&2
exit 1
fi
WORKDIR="$(mktemp -d)"
cleanup() { rm -rf "${WORKDIR}"; }
trap cleanup EXIT
tar -xf "${DATASET}" -C "${WORKDIR}"
for required in linksets.ndjson advisory_chunks.ndjson manifest.json; do
if [[ ! -f "${WORKDIR}/${required}" ]]; then
echo "Missing ${required} in dataset" >&2
exit 1
fi
done
manifest="${WORKDIR}/manifest.json"
expected_linksets=$(python - <<'PY' "${manifest}"
import json, sys
with open(sys.argv[1], "r", encoding="utf-8") as f:
data = json.load(f)
print(data["records"]["linksets"])
PY
)
expected_chunks=$(python - <<'PY' "${manifest}"
import json, sys
with open(sys.argv[1], "r", encoding="utf-8") as f:
data = json.load(f)
print(data["records"]["advisory_chunks"])
PY
)
expected_linksets_sha=$(python - <<'PY' "${manifest}"
import json, sys
with open(sys.argv[1], "r", encoding="utf-8") as f:
data = json.load(f)
print(data["sha256"]["linksets.ndjson"])
PY
)
expected_chunks_sha=$(python - <<'PY' "${manifest}"
import json, sys
with open(sys.argv[1], "r", encoding="utf-8") as f:
data = json.load(f)
print(data["sha256"]["advisory_chunks.ndjson"])
PY
)
actual_linksets=$(wc -l < "${WORKDIR}/linksets.ndjson" | tr -d '[:space:]')
actual_chunks=$(wc -l < "${WORKDIR}/advisory_chunks.ndjson" | tr -d '[:space:]')
actual_linksets_sha=$(sha256sum "${WORKDIR}/linksets.ndjson" | awk '{print $1}')
actual_chunks_sha=$(sha256sum "${WORKDIR}/advisory_chunks.ndjson" | awk '{print $1}')
if [[ "${expected_linksets}" != "${actual_linksets}" ]]; then
echo "linksets count mismatch: expected ${expected_linksets}, got ${actual_linksets}" >&2
exit 1
fi
if [[ "${expected_chunks}" != "${actual_chunks}" ]]; then
echo "advisory_chunks count mismatch: expected ${expected_chunks}, got ${actual_chunks}" >&2
exit 1
fi
if [[ "${expected_linksets_sha}" != "${actual_linksets_sha}" ]]; then
echo "linksets sha mismatch: expected ${expected_linksets_sha}, got ${actual_linksets_sha}" >&2
exit 1
fi
if [[ "${expected_chunks_sha}" != "${actual_chunks_sha}" ]]; then
echo "advisory_chunks sha mismatch: expected ${expected_chunks_sha}, got ${actual_chunks_sha}" >&2
exit 1
fi
echo "Dataset validation succeeded:"
echo " linksets: ${actual_linksets}"
echo " advisory_chunks: ${actual_chunks}"
echo " linksets.sha256=${actual_linksets_sha}"
echo " advisory_chunks.sha256=${actual_chunks_sha}"