feat: Implement BerkeleyDB reader for RPM databases
Some checks failed
AOC Guard CI / aoc-guard (push) Has been cancelled
AOC Guard CI / aoc-verify (push) Has been cancelled
Concelier Attestation Tests / attestation-tests (push) Has been cancelled
Docs CI / lint-and-preview (push) Has been cancelled
Policy Lint & Smoke / policy-lint (push) Has been cancelled
Scanner Analyzers / Discover Analyzers (push) Has been cancelled
Scanner Analyzers / Build Analyzers (push) Has been cancelled
Scanner Analyzers / Test Language Analyzers (push) Has been cancelled
Scanner Analyzers / Validate Test Fixtures (push) Has been cancelled
Scanner Analyzers / Verify Deterministic Output (push) Has been cancelled
console-runner-image / build-runner-image (push) Has been cancelled
wine-csp-build / Build Wine CSP Image (push) Has been cancelled
wine-csp-build / Integration Tests (push) Has been cancelled
wine-csp-build / Security Scan (push) Has been cancelled
wine-csp-build / Generate SBOM (push) Has been cancelled
wine-csp-build / Publish Image (push) Has been cancelled
wine-csp-build / Air-Gap Bundle (push) Has been cancelled
wine-csp-build / Test Summary (push) Has been cancelled

- Added BerkeleyDbReader class to read and extract RPM header blobs from BerkeleyDB hash databases.
- Implemented methods to detect BerkeleyDB format and extract values, including handling of page sizes and magic numbers.
- Added tests for BerkeleyDbReader to ensure correct functionality and header extraction.

feat: Add Yarn PnP data tests

- Created YarnPnpDataTests to validate package resolution and data loading from Yarn PnP cache.
- Implemented tests for resolved keys, package presence, and loading from cache structure.

test: Add egg-info package fixtures for Python tests

- Created egg-info package fixtures for testing Python analyzers.
- Included PKG-INFO, entry_points.txt, and installed-files.txt for comprehensive coverage.

test: Enhance RPM database reader tests

- Added tests for RpmDatabaseReader to validate fallback to legacy packages when SQLite is missing.
- Implemented helper methods to create legacy package files and RPM headers for testing.

test: Implement dual signing tests

- Added DualSignTests to validate secondary signature addition when configured.
- Created stub implementations for crypto providers and key resolvers to facilitate testing.

chore: Update CI script for Playwright Chromium installation

- Modified ci-console-exports.sh to ensure deterministic Chromium binary installation for console exports tests.
- Added checks for Windows compatibility and environment variable setups for Playwright browsers.
This commit is contained in:
StellaOps Bot
2025-12-07 16:24:45 +02:00
parent e3f28a21ab
commit 11597679ed
199 changed files with 9809 additions and 4404 deletions

View File

@@ -0,0 +1,87 @@
#!/usr/bin/env bash
set -euo pipefail
# Postgres backfill runner for STORE-AOC-19-005-DEV (Link-Not-Merge raw linksets/chunks)
# Usage:
# PGURI=postgres://.../concelier ./scripts/concelier/backfill-store-aoc-19-005.sh /path/to/linksets-stage-backfill.tar.zst
# Optional:
# PGSCHEMA=lnm_raw (default), DRY_RUN=1 to stop after extraction
#
# Assumptions:
# - Dataset contains ndjson files: linksets.ndjson, advisory_chunks.ndjson, manifest.json
# - Target staging tables are created by this script if absent:
# <schema>.linksets_raw(id text primary key, raw jsonb)
# <schema>.advisory_chunks_raw(id text primary key, raw jsonb)
DATASET_PATH="${1:-}"
if [[ -z "${DATASET_PATH}" || ! -f "${DATASET_PATH}" ]]; then
echo "Dataset tarball not found. Provide path to linksets-stage-backfill.tar.zst" >&2
exit 1
fi
PGURI="${PGURI:-${CONCELIER_PG_URI:-}}"
PGSCHEMA="${PGSCHEMA:-lnm_raw}"
DRY_RUN="${DRY_RUN:-0}"
if [[ -z "${PGURI}" ]]; then
echo "PGURI (or CONCELIER_PG_URI) must be set" >&2
exit 1
fi
WORKDIR="$(mktemp -d)"
cleanup() { rm -rf "${WORKDIR}"; }
trap cleanup EXIT
echo "==> Dataset: ${DATASET_PATH}"
sha256sum "${DATASET_PATH}"
echo "==> Extracting to ${WORKDIR}"
tar -xf "${DATASET_PATH}" -C "${WORKDIR}"
for required in linksets.ndjson advisory_chunks.ndjson manifest.json; do
if [[ ! -f "${WORKDIR}/${required}" ]]; then
echo "Missing required file in dataset: ${required}" >&2
exit 1
fi
done
echo "==> Ensuring staging schema/tables exist in Postgres"
psql "${PGURI}" <<SQL
create schema if not exists ${PGSCHEMA};
create table if not exists ${PGSCHEMA}.linksets_raw (
id text primary key,
raw jsonb not null
);
create table if not exists ${PGSCHEMA}.advisory_chunks_raw (
id text primary key,
raw jsonb not null
);
SQL
if [[ "${DRY_RUN}" != "0" ]]; then
echo "DRY_RUN=1 set; extraction and schema verification completed, skipping import."
exit 0
fi
echo "==> Importing linksets into ${PGSCHEMA}.linksets_raw"
cat >"${WORKDIR}/linksets.tsv" <(jq -rc '[._id, .] | @tsv' "${WORKDIR}/linksets.ndjson")
psql "${PGURI}" <<SQL
TRUNCATE TABLE ${PGSCHEMA}.linksets_raw;
\copy ${PGSCHEMA}.linksets_raw (id, raw) FROM '${WORKDIR}/linksets.tsv' WITH (FORMAT csv, DELIMITER E'\t', QUOTE '"', ESCAPE '"');
SQL
echo "==> Importing advisory_chunks into ${PGSCHEMA}.advisory_chunks_raw"
cat >"${WORKDIR}/advisory_chunks.tsv" <(jq -rc '[._id, .] | @tsv' "${WORKDIR}/advisory_chunks.ndjson")
psql "${PGURI}" <<SQL
TRUNCATE TABLE ${PGSCHEMA}.advisory_chunks_raw;
\copy ${PGSCHEMA}.advisory_chunks_raw (id, raw) FROM '${WORKDIR}/advisory_chunks.tsv' WITH (FORMAT csv, DELIMITER E'\t', QUOTE '"', ESCAPE '"');
SQL
echo "==> Post-import counts"
psql -tA "${PGURI}" -c "select 'linksets_raw='||count(*) from ${PGSCHEMA}.linksets_raw;"
psql -tA "${PGURI}" -c "select 'advisory_chunks_raw='||count(*) from ${PGSCHEMA}.advisory_chunks_raw;"
echo "==> Manifest summary"
cat "${WORKDIR}/manifest.json"
echo "Backfill complete."

View File

@@ -0,0 +1,74 @@
#!/usr/bin/env bash
set -euo pipefail
# Deterministic dataset builder for STORE-AOC-19-005-DEV.
# Generates linksets-stage-backfill.tar.zst from repo seed data.
# Usage:
# ./scripts/concelier/build-store-aoc-19-005-dataset.sh [output_tarball]
# Default output: out/linksets/linksets-stage-backfill.tar.zst
command -v tar >/dev/null || { echo "tar is required" >&2; exit 1; }
command -v sha256sum >/dev/null || { echo "sha256sum is required" >&2; exit 1; }
TAR_COMPRESS=()
if command -v zstd >/dev/null 2>&1; then
TAR_COMPRESS=(--zstd)
else
echo "zstd not found; building uncompressed tarball (extension kept for compatibility)" >&2
fi
ROOT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/../.." && pwd)"
SEED_DIR="${ROOT_DIR}/seed-data/concelier/store-aoc-19-005"
OUT_DIR="${ROOT_DIR}/out/linksets"
OUT_PATH="${1:-${OUT_DIR}/linksets-stage-backfill.tar.zst}"
GEN_TIME="2025-12-07T00:00:00Z"
for seed in linksets.ndjson advisory_chunks.ndjson; do
if [[ ! -f "${SEED_DIR}/${seed}" ]]; then
echo "Missing seed file: ${SEED_DIR}/${seed}" >&2
exit 1
fi
done
WORKDIR="$(mktemp -d)"
cleanup() { rm -rf "${WORKDIR}"; }
trap cleanup EXIT
cp "${SEED_DIR}/linksets.ndjson" "${WORKDIR}/linksets.ndjson"
cp "${SEED_DIR}/advisory_chunks.ndjson" "${WORKDIR}/advisory_chunks.ndjson"
linksets_sha=$(sha256sum "${WORKDIR}/linksets.ndjson" | awk '{print $1}')
advisory_sha=$(sha256sum "${WORKDIR}/advisory_chunks.ndjson" | awk '{print $1}')
linksets_count=$(wc -l < "${WORKDIR}/linksets.ndjson" | tr -d '[:space:]')
advisory_count=$(wc -l < "${WORKDIR}/advisory_chunks.ndjson" | tr -d '[:space:]')
cat >"${WORKDIR}/manifest.json" <<EOF
{
"datasetId": "store-aoc-19-005-dev",
"generatedAt": "${GEN_TIME}",
"source": "seed-data/concelier/store-aoc-19-005",
"records": {
"linksets": ${linksets_count},
"advisory_chunks": ${advisory_count}
},
"sha256": {
"linksets.ndjson": "${linksets_sha}",
"advisory_chunks.ndjson": "${advisory_sha}"
}
}
EOF
mkdir -p "${OUT_DIR}"
tar "${TAR_COMPRESS[@]}" \
--format=ustar \
--mtime='1970-01-01 00:00:00Z' \
--owner=0 --group=0 --numeric-owner \
-cf "${OUT_PATH}" \
-C "${WORKDIR}" \
linksets.ndjson advisory_chunks.ndjson manifest.json
sha256sum "${OUT_PATH}" > "${OUT_PATH}.sha256"
echo "Wrote ${OUT_PATH}"
cat "${OUT_PATH}.sha256"

View File

@@ -0,0 +1,55 @@
#!/usr/bin/env bash
set -euo pipefail
# Export Concelier linksets/advisory_chunks from Postgres to a tar.zst bundle.
# Usage:
# PGURI=postgres://user:pass@host:5432/db \
# ./scripts/concelier/export-linksets-tarball.sh out/linksets/linksets-stage-backfill.tar.zst
#
# Optional env:
# PGSCHEMA=public # schema that owns linksets/advisory_chunks
# LINKSETS_TABLE=linksets # table name for linksets
# CHUNKS_TABLE=advisory_chunks # table name for advisory chunks
# TMPDIR=/tmp/export-linksets # working directory (defaults to mktemp)
TARGET="${1:-}"
if [[ -z "${TARGET}" ]]; then
echo "Usage: PGURI=... $0 out/linksets/linksets-stage-backfill.tar.zst" >&2
exit 1
fi
if [[ -z "${PGURI:-}" ]]; then
echo "PGURI environment variable is required (postgres://...)" >&2
exit 1
fi
PGSCHEMA="${PGSCHEMA:-public}"
LINKSETS_TABLE="${LINKSETS_TABLE:-linksets}"
CHUNKS_TABLE="${CHUNKS_TABLE:-advisory_chunks}"
WORKDIR="${TMPDIR:-$(mktemp -d)}"
mkdir -p "${WORKDIR}"
OUTDIR="$(dirname "${TARGET}")"
mkdir -p "${OUTDIR}"
echo "==> Exporting linksets from ${PGSCHEMA}.${LINKSETS_TABLE}"
psql "${PGURI}" -c "\copy (select row_to_json(t) from ${PGSCHEMA}.${LINKSETS_TABLE} t) to '${WORKDIR}/linksets.ndjson'"
echo "==> Exporting advisory_chunks from ${PGSCHEMA}.${CHUNKS_TABLE}"
psql "${PGURI}" -c "\copy (select row_to_json(t) from ${PGSCHEMA}.${CHUNKS_TABLE} t) to '${WORKDIR}/advisory_chunks.ndjson'"
LINKSETS_COUNT="$(wc -l < "${WORKDIR}/linksets.ndjson")"
CHUNKS_COUNT="$(wc -l < "${WORKDIR}/advisory_chunks.ndjson")"
echo "==> Writing manifest.json"
jq -n --argjson linksets "${LINKSETS_COUNT}" --argjson advisory_chunks "${CHUNKS_COUNT}" \
'{linksets: $linksets, advisory_chunks: $advisory_chunks}' \
> "${WORKDIR}/manifest.json"
echo "==> Building tarball ${TARGET}"
tar -I "zstd -19" -cf "${TARGET}" -C "${WORKDIR}" linksets.ndjson advisory_chunks.ndjson manifest.json
echo "==> SHA-256"
sha256sum "${TARGET}"
echo "Done. Workdir: ${WORKDIR}"

View File

@@ -0,0 +1,90 @@
#!/usr/bin/env bash
set -euo pipefail
# Validates the store-aoc-19-005 dataset tarball.
# Usage: ./scripts/concelier/test-store-aoc-19-005-dataset.sh [tarball]
command -v tar >/dev/null || { echo "tar is required" >&2; exit 1; }
command -v sha256sum >/dev/null || { echo "sha256sum is required" >&2; exit 1; }
command -v python >/dev/null || { echo "python is required" >&2; exit 1; }
DATASET="${1:-out/linksets/linksets-stage-backfill.tar.zst}"
if [[ ! -f "${DATASET}" ]]; then
echo "Dataset not found: ${DATASET}" >&2
exit 1
fi
WORKDIR="$(mktemp -d)"
cleanup() { rm -rf "${WORKDIR}"; }
trap cleanup EXIT
tar -xf "${DATASET}" -C "${WORKDIR}"
for required in linksets.ndjson advisory_chunks.ndjson manifest.json; do
if [[ ! -f "${WORKDIR}/${required}" ]]; then
echo "Missing ${required} in dataset" >&2
exit 1
fi
done
manifest="${WORKDIR}/manifest.json"
expected_linksets=$(python - <<'PY' "${manifest}"
import json, sys
with open(sys.argv[1], "r", encoding="utf-8") as f:
data = json.load(f)
print(data["records"]["linksets"])
PY
)
expected_chunks=$(python - <<'PY' "${manifest}"
import json, sys
with open(sys.argv[1], "r", encoding="utf-8") as f:
data = json.load(f)
print(data["records"]["advisory_chunks"])
PY
)
expected_linksets_sha=$(python - <<'PY' "${manifest}"
import json, sys
with open(sys.argv[1], "r", encoding="utf-8") as f:
data = json.load(f)
print(data["sha256"]["linksets.ndjson"])
PY
)
expected_chunks_sha=$(python - <<'PY' "${manifest}"
import json, sys
with open(sys.argv[1], "r", encoding="utf-8") as f:
data = json.load(f)
print(data["sha256"]["advisory_chunks.ndjson"])
PY
)
actual_linksets=$(wc -l < "${WORKDIR}/linksets.ndjson" | tr -d '[:space:]')
actual_chunks=$(wc -l < "${WORKDIR}/advisory_chunks.ndjson" | tr -d '[:space:]')
actual_linksets_sha=$(sha256sum "${WORKDIR}/linksets.ndjson" | awk '{print $1}')
actual_chunks_sha=$(sha256sum "${WORKDIR}/advisory_chunks.ndjson" | awk '{print $1}')
if [[ "${expected_linksets}" != "${actual_linksets}" ]]; then
echo "linksets count mismatch: expected ${expected_linksets}, got ${actual_linksets}" >&2
exit 1
fi
if [[ "${expected_chunks}" != "${actual_chunks}" ]]; then
echo "advisory_chunks count mismatch: expected ${expected_chunks}, got ${actual_chunks}" >&2
exit 1
fi
if [[ "${expected_linksets_sha}" != "${actual_linksets_sha}" ]]; then
echo "linksets sha mismatch: expected ${expected_linksets_sha}, got ${actual_linksets_sha}" >&2
exit 1
fi
if [[ "${expected_chunks_sha}" != "${actual_chunks_sha}" ]]; then
echo "advisory_chunks sha mismatch: expected ${expected_chunks_sha}, got ${actual_chunks_sha}" >&2
exit 1
fi
echo "Dataset validation succeeded:"
echo " linksets: ${actual_linksets}"
echo " advisory_chunks: ${actual_chunks}"
echo " linksets.sha256=${actual_linksets_sha}"
echo " advisory_chunks.sha256=${actual_chunks_sha}"