#!/bin/bash # RHEL Function Extraction Script # Sprint: SPRINT_1227_0002_0001 (Reproducible Builders) # # Extracts function-level fingerprints from RPM packages set -euo pipefail RPM_PATH="${1:-}" OUTPUT_DIR="${2:-/build/fingerprints}" [[ -z "${RPM_PATH}" ]] && { echo "Usage: $0 [output_dir]"; exit 1; } [[ ! -f "${RPM_PATH}" ]] && { echo "RPM not found: ${RPM_PATH}"; exit 1; } mkdir -p "${OUTPUT_DIR}" RPM_NAME=$(rpm -qp --qf '%{NAME}' "${RPM_PATH}" 2>/dev/null) RPM_VERSION=$(rpm -qp --qf '%{VERSION}-%{RELEASE}' "${RPM_PATH}" 2>/dev/null) WORK_DIR=$(mktemp -d) trap "rm -rf ${WORK_DIR}" EXIT cd "${WORK_DIR}" # Extract RPM contents rpm2cpio "${RPM_PATH}" | cpio -idmv 2>/dev/null # Find ELF binaries find . -type f -exec file {} \; | grep -E 'ELF.*(executable|shared object)' | cut -d: -f1 | while read -r binary; do BINARY_NAME=$(basename "${binary}") BINARY_PATH="${binary#./}" # Get build-id if present BUILD_ID=$(readelf -n "${binary}" 2>/dev/null | grep 'Build ID:' | awk '{print $3}' || echo "") # Extract function symbols OUTPUT_FILE="${OUTPUT_DIR}/${RPM_NAME}_${BINARY_NAME}.json" { echo "{" echo " \"package\": \"${RPM_NAME}\"," echo " \"version\": \"${RPM_VERSION}\"," echo " \"binary\": \"${BINARY_PATH}\"," echo " \"build_id\": \"${BUILD_ID}\"," echo " \"extracted_at\": \"$(date -u '+%Y-%m-%dT%H:%M:%SZ')\"," echo " \"functions\": [" # Extract function addresses and sizes using nm and objdump FIRST=true nm -S --defined-only "${binary}" 2>/dev/null | grep -E '^[0-9a-f]+ [0-9a-f]+ [Tt]' | while read -r addr size type name; do if [[ "${FIRST}" == "true" ]]; then FIRST=false else echo "," fi # Calculate function hash from disassembly FUNC_HASH=$(objdump -d --start-address=0x${addr} --stop-address=$((0x${addr} + 0x${size})) "${binary}" 2>/dev/null | \ grep -E '^\s+[0-9a-f]+:' | awk '{$1=""; print}' | sha256sum | cut -d' ' -f1) printf ' {"name": "%s", "address": "0x%s", "size": %d, "hash": "%s"}' \ "${name}" "${addr}" "$((0x${size}))" "${FUNC_HASH}" done || true echo "" echo " ]" echo "}" } > "${OUTPUT_FILE}" echo "Extracted: ${OUTPUT_FILE}" done echo "Function extraction complete for: ${RPM_NAME}"