#!/bin/sh # Extract function fingerprints from ELF binaries # Outputs JSON array with function name, offset, size, and hashes # # Usage: extract-functions.sh # # Dependencies: objdump, readelf, sha256sum, jq set -eu DIR="${1:-.}" extract_functions_from_binary() { local binary="$1" # Skip non-ELF files file "$binary" | grep -q "ELF" || return 0 # Get function symbols objdump -t "$binary" 2>/dev/null | \ awk '/\.text.*[0-9a-f]+.*F/ { # Fields: addr flags section size name gsub(/\*.*\*/, "", $1) # Clean address if ($5 != "" && $4 != "00000000" && $4 != "0000000000000000") { printf "%s %s %s\n", $1, $4, $NF } }' | while read -r offset size name; do # Skip compiler-generated symbols case "$name" in __*|_GLOBAL_*|.plt*|.text*|frame_dummy|register_tm_clones|deregister_tm_clones) continue ;; esac # Convert hex size to decimal dec_size=$((16#$size)) # Skip tiny functions (likely padding) [ "$dec_size" -lt 16 ] && continue # Extract function bytes and compute hash # Using objdump to get disassembly and hash the opcodes local hash=$(objdump -d --start-address="0x$offset" --stop-address="0x$((16#$offset + dec_size))" "$binary" 2>/dev/null | \ grep "^[[:space:]]*[0-9a-f]*:" | \ awk '{for(i=2;i<=NF;i++){if($i~/^[0-9a-f]{2}$/){printf "%s", $i}}}' | \ sha256sum | cut -d' ' -f1) # Output JSON object printf '{"name":"%s","offset":"0x%s","size":%d,"hash":"%s"}\n' \ "$name" "$offset" "$dec_size" "${hash:-unknown}" done } # Find all ELF binaries in directory echo "[" first=true find "$DIR" -type f -executable 2>/dev/null | while read -r binary; do # Check if ELF file "$binary" 2>/dev/null | grep -q "ELF" || continue extract_functions_from_binary "$binary" | while read -r json; do [ -z "$json" ] && continue if [ "$first" = "true" ]; then first=false else echo "," fi echo "$json" done done echo "]"