#!/usr/bin/env bash set -euo pipefail # Deterministic dataset builder for STORE-AOC-19-005-DEV. # Generates linksets-stage-backfill.tar.zst from repo seed data. # Usage: # ./scripts/concelier/build-store-aoc-19-005-dataset.sh [output_tarball] # Default output: out/linksets/linksets-stage-backfill.tar.zst command -v tar >/dev/null || { echo "tar is required" >&2; exit 1; } command -v sha256sum >/dev/null || { echo "sha256sum is required" >&2; exit 1; } TAR_COMPRESS=() if command -v zstd >/dev/null 2>&1; then TAR_COMPRESS=(--zstd) else echo "zstd not found; building uncompressed tarball (extension kept for compatibility)" >&2 fi ROOT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/../.." && pwd)" SEED_DIR="${ROOT_DIR}/seed-data/concelier/store-aoc-19-005" OUT_DIR="${ROOT_DIR}/out/linksets" OUT_PATH="${1:-${OUT_DIR}/linksets-stage-backfill.tar.zst}" GEN_TIME="2025-12-07T00:00:00Z" for seed in linksets.ndjson advisory_chunks.ndjson; do if [[ ! -f "${SEED_DIR}/${seed}" ]]; then echo "Missing seed file: ${SEED_DIR}/${seed}" >&2 exit 1 fi done WORKDIR="$(mktemp -d)" cleanup() { rm -rf "${WORKDIR}"; } trap cleanup EXIT cp "${SEED_DIR}/linksets.ndjson" "${WORKDIR}/linksets.ndjson" cp "${SEED_DIR}/advisory_chunks.ndjson" "${WORKDIR}/advisory_chunks.ndjson" linksets_sha=$(sha256sum "${WORKDIR}/linksets.ndjson" | awk '{print $1}') advisory_sha=$(sha256sum "${WORKDIR}/advisory_chunks.ndjson" | awk '{print $1}') linksets_count=$(wc -l < "${WORKDIR}/linksets.ndjson" | tr -d '[:space:]') advisory_count=$(wc -l < "${WORKDIR}/advisory_chunks.ndjson" | tr -d '[:space:]') cat >"${WORKDIR}/manifest.json" < "${OUT_PATH}.sha256" echo "Wrote ${OUT_PATH}" cat "${OUT_PATH}.sha256"