91 lines
2.8 KiB
Bash
91 lines
2.8 KiB
Bash
#!/usr/bin/env bash
|
|
set -euo pipefail
|
|
|
|
# Validates the store-aoc-19-005 dataset tarball.
|
|
# Usage: ./scripts/concelier/test-store-aoc-19-005-dataset.sh [tarball]
|
|
|
|
command -v tar >/dev/null || { echo "tar is required" >&2; exit 1; }
|
|
command -v sha256sum >/dev/null || { echo "sha256sum is required" >&2; exit 1; }
|
|
command -v python >/dev/null || { echo "python is required" >&2; exit 1; }
|
|
|
|
DATASET="${1:-out/linksets/linksets-stage-backfill.tar.zst}"
|
|
|
|
if [[ ! -f "${DATASET}" ]]; then
|
|
echo "Dataset not found: ${DATASET}" >&2
|
|
exit 1
|
|
fi
|
|
|
|
WORKDIR="$(mktemp -d)"
|
|
cleanup() { rm -rf "${WORKDIR}"; }
|
|
trap cleanup EXIT
|
|
|
|
tar -xf "${DATASET}" -C "${WORKDIR}"
|
|
|
|
for required in linksets.ndjson advisory_chunks.ndjson manifest.json; do
|
|
if [[ ! -f "${WORKDIR}/${required}" ]]; then
|
|
echo "Missing ${required} in dataset" >&2
|
|
exit 1
|
|
fi
|
|
done
|
|
|
|
manifest="${WORKDIR}/manifest.json"
|
|
expected_linksets=$(python - <<'PY' "${manifest}"
|
|
import json, sys
|
|
with open(sys.argv[1], "r", encoding="utf-8") as f:
|
|
data = json.load(f)
|
|
print(data["records"]["linksets"])
|
|
PY
|
|
)
|
|
expected_chunks=$(python - <<'PY' "${manifest}"
|
|
import json, sys
|
|
with open(sys.argv[1], "r", encoding="utf-8") as f:
|
|
data = json.load(f)
|
|
print(data["records"]["advisory_chunks"])
|
|
PY
|
|
)
|
|
expected_linksets_sha=$(python - <<'PY' "${manifest}"
|
|
import json, sys
|
|
with open(sys.argv[1], "r", encoding="utf-8") as f:
|
|
data = json.load(f)
|
|
print(data["sha256"]["linksets.ndjson"])
|
|
PY
|
|
)
|
|
expected_chunks_sha=$(python - <<'PY' "${manifest}"
|
|
import json, sys
|
|
with open(sys.argv[1], "r", encoding="utf-8") as f:
|
|
data = json.load(f)
|
|
print(data["sha256"]["advisory_chunks.ndjson"])
|
|
PY
|
|
)
|
|
|
|
actual_linksets=$(wc -l < "${WORKDIR}/linksets.ndjson" | tr -d '[:space:]')
|
|
actual_chunks=$(wc -l < "${WORKDIR}/advisory_chunks.ndjson" | tr -d '[:space:]')
|
|
actual_linksets_sha=$(sha256sum "${WORKDIR}/linksets.ndjson" | awk '{print $1}')
|
|
actual_chunks_sha=$(sha256sum "${WORKDIR}/advisory_chunks.ndjson" | awk '{print $1}')
|
|
|
|
if [[ "${expected_linksets}" != "${actual_linksets}" ]]; then
|
|
echo "linksets count mismatch: expected ${expected_linksets}, got ${actual_linksets}" >&2
|
|
exit 1
|
|
fi
|
|
|
|
if [[ "${expected_chunks}" != "${actual_chunks}" ]]; then
|
|
echo "advisory_chunks count mismatch: expected ${expected_chunks}, got ${actual_chunks}" >&2
|
|
exit 1
|
|
fi
|
|
|
|
if [[ "${expected_linksets_sha}" != "${actual_linksets_sha}" ]]; then
|
|
echo "linksets sha mismatch: expected ${expected_linksets_sha}, got ${actual_linksets_sha}" >&2
|
|
exit 1
|
|
fi
|
|
|
|
if [[ "${expected_chunks_sha}" != "${actual_chunks_sha}" ]]; then
|
|
echo "advisory_chunks sha mismatch: expected ${expected_chunks_sha}, got ${actual_chunks_sha}" >&2
|
|
exit 1
|
|
fi
|
|
|
|
echo "Dataset validation succeeded:"
|
|
echo " linksets: ${actual_linksets}"
|
|
echo " advisory_chunks: ${actual_chunks}"
|
|
echo " linksets.sha256=${actual_linksets_sha}"
|
|
echo " advisory_chunks.sha256=${actual_chunks_sha}"
|