Files
git.stella-ops.org/devops/docker/repro-builders/rhel/scripts/extract-functions.sh
StellaOps Bot e6c47c8f50 save progress
2025-12-28 23:49:56 +02:00

74 lines
2.5 KiB
Bash

#!/bin/bash
# RHEL Function Extraction Script
# Sprint: SPRINT_1227_0002_0001 (Reproducible Builders)
#
# Extracts function-level fingerprints from RPM packages
set -euo pipefail
RPM_PATH="${1:-}"
OUTPUT_DIR="${2:-/build/fingerprints}"
[[ -z "${RPM_PATH}" ]] && { echo "Usage: $0 <rpm_path> [output_dir]"; exit 1; }
[[ ! -f "${RPM_PATH}" ]] && { echo "RPM not found: ${RPM_PATH}"; exit 1; }
mkdir -p "${OUTPUT_DIR}"
RPM_NAME=$(rpm -qp --qf '%{NAME}' "${RPM_PATH}" 2>/dev/null)
RPM_VERSION=$(rpm -qp --qf '%{VERSION}-%{RELEASE}' "${RPM_PATH}" 2>/dev/null)
WORK_DIR=$(mktemp -d)
trap "rm -rf ${WORK_DIR}" EXIT
cd "${WORK_DIR}"
# Extract RPM contents
rpm2cpio "${RPM_PATH}" | cpio -idmv 2>/dev/null
# Find ELF binaries
find . -type f -exec file {} \; | grep -E 'ELF.*(executable|shared object)' | cut -d: -f1 | while read -r binary; do
BINARY_NAME=$(basename "${binary}")
BINARY_PATH="${binary#./}"
# Get build-id if present
BUILD_ID=$(readelf -n "${binary}" 2>/dev/null | grep 'Build ID:' | awk '{print $3}' || echo "")
# Extract function symbols
OUTPUT_FILE="${OUTPUT_DIR}/${RPM_NAME}_${BINARY_NAME}.json"
{
echo "{"
echo " \"package\": \"${RPM_NAME}\","
echo " \"version\": \"${RPM_VERSION}\","
echo " \"binary\": \"${BINARY_PATH}\","
echo " \"build_id\": \"${BUILD_ID}\","
echo " \"extracted_at\": \"$(date -u '+%Y-%m-%dT%H:%M:%SZ')\","
echo " \"functions\": ["
# Extract function addresses and sizes using nm and objdump
FIRST=true
nm -S --defined-only "${binary}" 2>/dev/null | grep -E '^[0-9a-f]+ [0-9a-f]+ [Tt]' | while read -r addr size type name; do
if [[ "${FIRST}" == "true" ]]; then
FIRST=false
else
echo ","
fi
# Calculate function hash from disassembly
FUNC_HASH=$(objdump -d --start-address=0x${addr} --stop-address=$((0x${addr} + 0x${size})) "${binary}" 2>/dev/null | \
grep -E '^\s+[0-9a-f]+:' | awk '{$1=""; print}' | sha256sum | cut -d' ' -f1)
printf ' {"name": "%s", "address": "0x%s", "size": %d, "hash": "%s"}' \
"${name}" "${addr}" "$((0x${size}))" "${FUNC_HASH}"
done || true
echo ""
echo " ]"
echo "}"
} > "${OUTPUT_FILE}"
echo "Extracted: ${OUTPUT_FILE}"
done
echo "Function extraction complete for: ${RPM_NAME}"