Files
git.stella-ops.org/scripts/concelier/backfill-store-aoc-19-005.sh
StellaOps Bot 11597679ed
Some checks failed
AOC Guard CI / aoc-guard (push) Has been cancelled
AOC Guard CI / aoc-verify (push) Has been cancelled
Concelier Attestation Tests / attestation-tests (push) Has been cancelled
Docs CI / lint-and-preview (push) Has been cancelled
Policy Lint & Smoke / policy-lint (push) Has been cancelled
Scanner Analyzers / Discover Analyzers (push) Has been cancelled
Scanner Analyzers / Build Analyzers (push) Has been cancelled
Scanner Analyzers / Test Language Analyzers (push) Has been cancelled
Scanner Analyzers / Validate Test Fixtures (push) Has been cancelled
Scanner Analyzers / Verify Deterministic Output (push) Has been cancelled
console-runner-image / build-runner-image (push) Has been cancelled
wine-csp-build / Build Wine CSP Image (push) Has been cancelled
wine-csp-build / Integration Tests (push) Has been cancelled
wine-csp-build / Security Scan (push) Has been cancelled
wine-csp-build / Generate SBOM (push) Has been cancelled
wine-csp-build / Publish Image (push) Has been cancelled
wine-csp-build / Air-Gap Bundle (push) Has been cancelled
wine-csp-build / Test Summary (push) Has been cancelled
feat: Implement BerkeleyDB reader for RPM databases
- Added BerkeleyDbReader class to read and extract RPM header blobs from BerkeleyDB hash databases.
- Implemented methods to detect BerkeleyDB format and extract values, including handling of page sizes and magic numbers.
- Added tests for BerkeleyDbReader to ensure correct functionality and header extraction.

feat: Add Yarn PnP data tests

- Created YarnPnpDataTests to validate package resolution and data loading from Yarn PnP cache.
- Implemented tests for resolved keys, package presence, and loading from cache structure.

test: Add egg-info package fixtures for Python tests

- Created egg-info package fixtures for testing Python analyzers.
- Included PKG-INFO, entry_points.txt, and installed-files.txt for comprehensive coverage.

test: Enhance RPM database reader tests

- Added tests for RpmDatabaseReader to validate fallback to legacy packages when SQLite is missing.
- Implemented helper methods to create legacy package files and RPM headers for testing.

test: Implement dual signing tests

- Added DualSignTests to validate secondary signature addition when configured.
- Created stub implementations for crypto providers and key resolvers to facilitate testing.

chore: Update CI script for Playwright Chromium installation

- Modified ci-console-exports.sh to ensure deterministic Chromium binary installation for console exports tests.
- Added checks for Windows compatibility and environment variable setups for Playwright browsers.
2025-12-07 16:24:45 +02:00

88 lines
2.9 KiB
Bash

#!/usr/bin/env bash
set -euo pipefail
# Postgres backfill runner for STORE-AOC-19-005-DEV (Link-Not-Merge raw linksets/chunks)
# Usage:
# PGURI=postgres://.../concelier ./scripts/concelier/backfill-store-aoc-19-005.sh /path/to/linksets-stage-backfill.tar.zst
# Optional:
# PGSCHEMA=lnm_raw (default), DRY_RUN=1 to stop after extraction
#
# Assumptions:
# - Dataset contains ndjson files: linksets.ndjson, advisory_chunks.ndjson, manifest.json
# - Target staging tables are created by this script if absent:
# <schema>.linksets_raw(id text primary key, raw jsonb)
# <schema>.advisory_chunks_raw(id text primary key, raw jsonb)
DATASET_PATH="${1:-}"
if [[ -z "${DATASET_PATH}" || ! -f "${DATASET_PATH}" ]]; then
echo "Dataset tarball not found. Provide path to linksets-stage-backfill.tar.zst" >&2
exit 1
fi
PGURI="${PGURI:-${CONCELIER_PG_URI:-}}"
PGSCHEMA="${PGSCHEMA:-lnm_raw}"
DRY_RUN="${DRY_RUN:-0}"
if [[ -z "${PGURI}" ]]; then
echo "PGURI (or CONCELIER_PG_URI) must be set" >&2
exit 1
fi
WORKDIR="$(mktemp -d)"
cleanup() { rm -rf "${WORKDIR}"; }
trap cleanup EXIT
echo "==> Dataset: ${DATASET_PATH}"
sha256sum "${DATASET_PATH}"
echo "==> Extracting to ${WORKDIR}"
tar -xf "${DATASET_PATH}" -C "${WORKDIR}"
for required in linksets.ndjson advisory_chunks.ndjson manifest.json; do
if [[ ! -f "${WORKDIR}/${required}" ]]; then
echo "Missing required file in dataset: ${required}" >&2
exit 1
fi
done
echo "==> Ensuring staging schema/tables exist in Postgres"
psql "${PGURI}" <<SQL
create schema if not exists ${PGSCHEMA};
create table if not exists ${PGSCHEMA}.linksets_raw (
id text primary key,
raw jsonb not null
);
create table if not exists ${PGSCHEMA}.advisory_chunks_raw (
id text primary key,
raw jsonb not null
);
SQL
if [[ "${DRY_RUN}" != "0" ]]; then
echo "DRY_RUN=1 set; extraction and schema verification completed, skipping import."
exit 0
fi
echo "==> Importing linksets into ${PGSCHEMA}.linksets_raw"
cat >"${WORKDIR}/linksets.tsv" <(jq -rc '[._id, .] | @tsv' "${WORKDIR}/linksets.ndjson")
psql "${PGURI}" <<SQL
TRUNCATE TABLE ${PGSCHEMA}.linksets_raw;
\copy ${PGSCHEMA}.linksets_raw (id, raw) FROM '${WORKDIR}/linksets.tsv' WITH (FORMAT csv, DELIMITER E'\t', QUOTE '"', ESCAPE '"');
SQL
echo "==> Importing advisory_chunks into ${PGSCHEMA}.advisory_chunks_raw"
cat >"${WORKDIR}/advisory_chunks.tsv" <(jq -rc '[._id, .] | @tsv' "${WORKDIR}/advisory_chunks.ndjson")
psql "${PGURI}" <<SQL
TRUNCATE TABLE ${PGSCHEMA}.advisory_chunks_raw;
\copy ${PGSCHEMA}.advisory_chunks_raw (id, raw) FROM '${WORKDIR}/advisory_chunks.tsv' WITH (FORMAT csv, DELIMITER E'\t', QUOTE '"', ESCAPE '"');
SQL
echo "==> Post-import counts"
psql -tA "${PGURI}" -c "select 'linksets_raw='||count(*) from ${PGSCHEMA}.linksets_raw;"
psql -tA "${PGURI}" -c "select 'advisory_chunks_raw='||count(*) from ${PGSCHEMA}.advisory_chunks_raw;"
echo "==> Manifest summary"
cat "${WORKDIR}/manifest.json"
echo "Backfill complete."