#!/usr/bin/env bash set -euo pipefail # Postgres backfill runner for STORE-AOC-19-005-DEV (Link-Not-Merge raw linksets/chunks) # Usage: # PGURI=postgres://.../concelier ./scripts/concelier/backfill-store-aoc-19-005.sh /path/to/linksets-stage-backfill.tar.zst # Optional: # PGSCHEMA=lnm_raw (default), DRY_RUN=1 to stop after extraction # # Assumptions: # - Dataset contains ndjson files: linksets.ndjson, advisory_chunks.ndjson, manifest.json # - Target staging tables are created by this script if absent: # .linksets_raw(id text primary key, raw jsonb) # .advisory_chunks_raw(id text primary key, raw jsonb) DATASET_PATH="${1:-}" if [[ -z "${DATASET_PATH}" || ! -f "${DATASET_PATH}" ]]; then echo "Dataset tarball not found. Provide path to linksets-stage-backfill.tar.zst" >&2 exit 1 fi PGURI="${PGURI:-${CONCELIER_PG_URI:-}}" PGSCHEMA="${PGSCHEMA:-lnm_raw}" DRY_RUN="${DRY_RUN:-0}" if [[ -z "${PGURI}" ]]; then echo "PGURI (or CONCELIER_PG_URI) must be set" >&2 exit 1 fi WORKDIR="$(mktemp -d)" cleanup() { rm -rf "${WORKDIR}"; } trap cleanup EXIT echo "==> Dataset: ${DATASET_PATH}" sha256sum "${DATASET_PATH}" echo "==> Extracting to ${WORKDIR}" tar -xf "${DATASET_PATH}" -C "${WORKDIR}" for required in linksets.ndjson advisory_chunks.ndjson manifest.json; do if [[ ! -f "${WORKDIR}/${required}" ]]; then echo "Missing required file in dataset: ${required}" >&2 exit 1 fi done echo "==> Ensuring staging schema/tables exist in Postgres" psql "${PGURI}" < Importing linksets into ${PGSCHEMA}.linksets_raw" cat >"${WORKDIR}/linksets.tsv" <(jq -rc '[._id, .] | @tsv' "${WORKDIR}/linksets.ndjson") psql "${PGURI}" < Importing advisory_chunks into ${PGSCHEMA}.advisory_chunks_raw" cat >"${WORKDIR}/advisory_chunks.tsv" <(jq -rc '[._id, .] | @tsv' "${WORKDIR}/advisory_chunks.ndjson") psql "${PGURI}" < Post-import counts" psql -tA "${PGURI}" -c "select 'linksets_raw='||count(*) from ${PGSCHEMA}.linksets_raw;" psql -tA "${PGURI}" -c "select 'advisory_chunks_raw='||count(*) from ${PGSCHEMA}.advisory_chunks_raw;" echo "==> Manifest summary" cat "${WORKDIR}/manifest.json" echo "Backfill complete."