74 lines
2.5 KiB
Bash
74 lines
2.5 KiB
Bash
#!/bin/bash
|
|
# RHEL Function Extraction Script
|
|
# Sprint: SPRINT_1227_0002_0001 (Reproducible Builders)
|
|
#
|
|
# Extracts function-level fingerprints from RPM packages
|
|
|
|
set -euo pipefail
|
|
|
|
RPM_PATH="${1:-}"
|
|
OUTPUT_DIR="${2:-/build/fingerprints}"
|
|
|
|
[[ -z "${RPM_PATH}" ]] && { echo "Usage: $0 <rpm_path> [output_dir]"; exit 1; }
|
|
[[ ! -f "${RPM_PATH}" ]] && { echo "RPM not found: ${RPM_PATH}"; exit 1; }
|
|
|
|
mkdir -p "${OUTPUT_DIR}"
|
|
|
|
RPM_NAME=$(rpm -qp --qf '%{NAME}' "${RPM_PATH}" 2>/dev/null)
|
|
RPM_VERSION=$(rpm -qp --qf '%{VERSION}-%{RELEASE}' "${RPM_PATH}" 2>/dev/null)
|
|
|
|
WORK_DIR=$(mktemp -d)
|
|
trap "rm -rf ${WORK_DIR}" EXIT
|
|
|
|
cd "${WORK_DIR}"
|
|
|
|
# Extract RPM contents
|
|
rpm2cpio "${RPM_PATH}" | cpio -idmv 2>/dev/null
|
|
|
|
# Find ELF binaries
|
|
find . -type f -exec file {} \; | grep -E 'ELF.*(executable|shared object)' | cut -d: -f1 | while read -r binary; do
|
|
BINARY_NAME=$(basename "${binary}")
|
|
BINARY_PATH="${binary#./}"
|
|
|
|
# Get build-id if present
|
|
BUILD_ID=$(readelf -n "${binary}" 2>/dev/null | grep 'Build ID:' | awk '{print $3}' || echo "")
|
|
|
|
# Extract function symbols
|
|
OUTPUT_FILE="${OUTPUT_DIR}/${RPM_NAME}_${BINARY_NAME}.json"
|
|
|
|
{
|
|
echo "{"
|
|
echo " \"package\": \"${RPM_NAME}\","
|
|
echo " \"version\": \"${RPM_VERSION}\","
|
|
echo " \"binary\": \"${BINARY_PATH}\","
|
|
echo " \"build_id\": \"${BUILD_ID}\","
|
|
echo " \"extracted_at\": \"$(date -u '+%Y-%m-%dT%H:%M:%SZ')\","
|
|
echo " \"functions\": ["
|
|
|
|
# Extract function addresses and sizes using nm and objdump
|
|
FIRST=true
|
|
nm -S --defined-only "${binary}" 2>/dev/null | grep -E '^[0-9a-f]+ [0-9a-f]+ [Tt]' | while read -r addr size type name; do
|
|
if [[ "${FIRST}" == "true" ]]; then
|
|
FIRST=false
|
|
else
|
|
echo ","
|
|
fi
|
|
|
|
# Calculate function hash from disassembly
|
|
FUNC_HASH=$(objdump -d --start-address=0x${addr} --stop-address=$((0x${addr} + 0x${size})) "${binary}" 2>/dev/null | \
|
|
grep -E '^\s+[0-9a-f]+:' | awk '{$1=""; print}' | sha256sum | cut -d' ' -f1)
|
|
|
|
printf ' {"name": "%s", "address": "0x%s", "size": %d, "hash": "%s"}' \
|
|
"${name}" "${addr}" "$((0x${size}))" "${FUNC_HASH}"
|
|
done || true
|
|
|
|
echo ""
|
|
echo " ]"
|
|
echo "}"
|
|
} > "${OUTPUT_FILE}"
|
|
|
|
echo "Extracted: ${OUTPUT_FILE}"
|
|
done
|
|
|
|
echo "Function extraction complete for: ${RPM_NAME}"
|