Files
git.stella-ops.org/devops/docker/repro-builders/alpine/scripts/extract-functions.sh
StellaOps Bot cec4265a40 save progress
2025-12-28 01:40:52 +02:00

72 lines
2.2 KiB
Bash

#!/bin/sh
# Extract function fingerprints from ELF binaries
# Outputs JSON array with function name, offset, size, and hashes
#
# Usage: extract-functions.sh <directory>
#
# Dependencies: objdump, readelf, sha256sum, jq
set -eu
DIR="${1:-.}"
extract_functions_from_binary() {
local binary="$1"
# Skip non-ELF files
file "$binary" | grep -q "ELF" || return 0
# Get function symbols
objdump -t "$binary" 2>/dev/null | \
awk '/\.text.*[0-9a-f]+.*F/ {
# Fields: addr flags section size name
gsub(/\*.*\*/, "", $1) # Clean address
if ($5 != "" && $4 != "00000000" && $4 != "0000000000000000") {
printf "%s %s %s\n", $1, $4, $NF
}
}' | while read -r offset size name; do
# Skip compiler-generated symbols
case "$name" in
__*|_GLOBAL_*|.plt*|.text*|frame_dummy|register_tm_clones|deregister_tm_clones)
continue
;;
esac
# Convert hex size to decimal
dec_size=$((16#$size))
# Skip tiny functions (likely padding)
[ "$dec_size" -lt 16 ] && continue
# Extract function bytes and compute hash
# Using objdump to get disassembly and hash the opcodes
local hash=$(objdump -d --start-address="0x$offset" --stop-address="0x$((16#$offset + dec_size))" "$binary" 2>/dev/null | \
grep "^[[:space:]]*[0-9a-f]*:" | \
awk '{for(i=2;i<=NF;i++){if($i~/^[0-9a-f]{2}$/){printf "%s", $i}}}' | \
sha256sum | cut -d' ' -f1)
# Output JSON object
printf '{"name":"%s","offset":"0x%s","size":%d,"hash":"%s"}\n' \
"$name" "$offset" "$dec_size" "${hash:-unknown}"
done
}
# Find all ELF binaries in directory
echo "["
first=true
find "$DIR" -type f -executable 2>/dev/null | while read -r binary; do
# Check if ELF
file "$binary" 2>/dev/null | grep -q "ELF" || continue
extract_functions_from_binary "$binary" | while read -r json; do
[ -z "$json" ] && continue
if [ "$first" = "true" ]; then
first=false
else
echo ","
fi
echo "$json"
done
done
echo "]"