save progress

This commit is contained in:
StellaOps Bot
2025-12-28 01:40:35 +02:00
parent 3bfbbae115
commit cec4265a40
694 changed files with 88052 additions and 24718 deletions

View File

@@ -0,0 +1,62 @@
# Alpine Reproducible Builder
# Creates deterministic builds of Alpine packages for fingerprint diffing
#
# Usage:
# docker build -t repro-builder-alpine:3.20 --build-arg RELEASE=3.20 .
# docker run -v ./output:/output repro-builder-alpine:3.20 build openssl 3.0.7-r0
ARG RELEASE=3.20
FROM alpine:${RELEASE}
ARG RELEASE
ENV ALPINE_RELEASE=${RELEASE}
# Install build tools and dependencies
RUN apk add --no-cache \
alpine-sdk \
abuild \
sudo \
git \
curl \
binutils \
elfutils \
coreutils \
tar \
gzip \
xz \
patch \
diffutils \
file \
&& rm -rf /var/cache/apk/*
# Create build user (abuild requires non-root)
RUN adduser -D -G abuild builder \
&& echo "builder ALL=(ALL) NOPASSWD: ALL" >> /etc/sudoers \
&& mkdir -p /var/cache/distfiles \
&& chown -R builder:abuild /var/cache/distfiles
# Setup abuild
USER builder
WORKDIR /home/builder
# Generate abuild keys
RUN abuild-keygen -a -i -n
# Copy normalization and build scripts
COPY --chown=builder:abuild scripts/normalize.sh /usr/local/bin/normalize.sh
COPY --chown=builder:abuild scripts/build.sh /usr/local/bin/build.sh
COPY --chown=builder:abuild scripts/extract-functions.sh /usr/local/bin/extract-functions.sh
RUN chmod +x /usr/local/bin/*.sh
# Environment for reproducibility
ENV TZ=UTC
ENV LC_ALL=C.UTF-8
ENV LANG=C.UTF-8
# Build output directory
VOLUME /output
WORKDIR /build
ENTRYPOINT ["/usr/local/bin/build.sh"]
CMD ["--help"]

View File

@@ -0,0 +1,226 @@
#!/bin/sh
# Alpine Reproducible Build Script
# Builds packages with deterministic settings for fingerprint generation
#
# Usage: build.sh [build|diff] <package> <version> [patch_url...]
#
# Examples:
# build.sh build openssl 3.0.7-r0
# build.sh diff openssl 3.0.7-r0 3.0.8-r0
# build.sh build openssl 3.0.7-r0 https://patch.url/CVE-2023-1234.patch
set -eu
COMMAND="${1:-help}"
PACKAGE="${2:-}"
VERSION="${3:-}"
OUTPUT_DIR="${OUTPUT_DIR:-/output}"
log() {
echo "[$(date -u +%Y-%m-%dT%H:%M:%SZ)] $*" >&2
}
show_help() {
cat <<EOF
Alpine Reproducible Builder
Usage:
build.sh build <package> <version> [patch_urls...]
Build a package with reproducible settings
build.sh diff <package> <vuln_version> <patched_version>
Build two versions and compute fingerprint diff
build.sh --help
Show this help message
Environment:
SOURCE_DATE_EPOCH Override timestamp (extracted from APKBUILD if not set)
OUTPUT_DIR Output directory (default: /output)
CFLAGS Additional compiler flags
LDFLAGS Additional linker flags
Examples:
build.sh build openssl 3.0.7-r0
build.sh build curl 8.1.0-r0 https://patch/CVE-2023-1234.patch
build.sh diff openssl 3.0.7-r0 3.0.8-r0
EOF
}
setup_reproducible_env() {
local pkg="$1"
local ver="$2"
# Extract SOURCE_DATE_EPOCH from APKBUILD if not set
if [ -z "${SOURCE_DATE_EPOCH:-}" ]; then
if [ -f "aports/main/$pkg/APKBUILD" ]; then
# Use pkgrel date or fallback to current
SOURCE_DATE_EPOCH=$(stat -c %Y "aports/main/$pkg/APKBUILD" 2>/dev/null || date +%s)
else
SOURCE_DATE_EPOCH=$(date +%s)
fi
export SOURCE_DATE_EPOCH
fi
log "SOURCE_DATE_EPOCH=$SOURCE_DATE_EPOCH"
# Reproducible compiler flags
export CFLAGS="${CFLAGS:-} -fno-record-gcc-switches -fdebug-prefix-map=$(pwd)=/build"
export CXXFLAGS="${CXXFLAGS:-} ${CFLAGS}"
export LDFLAGS="${LDFLAGS:-}"
# Locale for deterministic sorting
export LC_ALL=C.UTF-8
export TZ=UTC
}
fetch_source() {
local pkg="$1"
local ver="$2"
log "Fetching source for $pkg-$ver"
# Clone aports if needed
if [ ! -d "aports" ]; then
git clone --depth 1 https://gitlab.alpinelinux.org/alpine/aports.git
fi
# Find package
local pkg_dir=""
for repo in main community testing; do
if [ -d "aports/$repo/$pkg" ]; then
pkg_dir="aports/$repo/$pkg"
break
fi
done
if [ -z "$pkg_dir" ]; then
log "ERROR: Package $pkg not found in aports"
return 1
fi
# Checkout specific version if needed
cd "$pkg_dir"
abuild fetch
abuild unpack
}
apply_patches() {
local src_dir="$1"
shift
for patch_url in "$@"; do
log "Applying patch: $patch_url"
curl -sSL "$patch_url" | patch -d "$src_dir" -p1
done
}
build_package() {
local pkg="$1"
local ver="$2"
shift 2
local patches="$@"
log "Building $pkg-$ver"
setup_reproducible_env "$pkg" "$ver"
cd /build
fetch_source "$pkg" "$ver"
if [ -n "$patches" ]; then
apply_patches "src/$pkg-*" $patches
fi
# Build with reproducible settings
abuild -r
# Copy output
local out_dir="$OUTPUT_DIR/$pkg-$ver"
mkdir -p "$out_dir"
cp -r ~/packages/*/*.apk "$out_dir/" 2>/dev/null || true
# Extract binaries and fingerprints
for apk in "$out_dir"/*.apk; do
[ -f "$apk" ] || continue
local apk_name=$(basename "$apk" .apk)
mkdir -p "$out_dir/extracted/$apk_name"
tar -xzf "$apk" -C "$out_dir/extracted/$apk_name"
# Extract function fingerprints
/usr/local/bin/extract-functions.sh "$out_dir/extracted/$apk_name" > "$out_dir/$apk_name.functions.json"
done
log "Build complete: $out_dir"
}
diff_versions() {
local pkg="$1"
local vuln_ver="$2"
local patched_ver="$3"
log "Building and diffing $pkg: $vuln_ver vs $patched_ver"
# Build vulnerable version
build_package "$pkg" "$vuln_ver"
# Build patched version
build_package "$pkg" "$patched_ver"
# Compute diff
local diff_out="$OUTPUT_DIR/$pkg-diff-$vuln_ver-vs-$patched_ver.json"
# Simple diff of function fingerprints
jq -s '
.[0] as $vuln |
.[1] as $patched |
{
package: "'"$pkg"'",
vulnerable_version: "'"$vuln_ver"'",
patched_version: "'"$patched_ver"'",
vulnerable_functions: ($vuln | length),
patched_functions: ($patched | length),
added: [($patched[] | select(.name as $n | ($vuln | map(.name) | index($n)) == null))],
removed: [($vuln[] | select(.name as $n | ($patched | map(.name) | index($n)) == null))],
modified: [
$vuln[] | .name as $n | .hash as $h |
($patched[] | select(.name == $n and .hash != $h)) |
{name: $n, vuln_hash: $h, patched_hash: .hash}
]
}
' \
"$OUTPUT_DIR/$pkg-$vuln_ver"/*.functions.json \
"$OUTPUT_DIR/$pkg-$patched_ver"/*.functions.json \
> "$diff_out"
log "Diff complete: $diff_out"
}
case "$COMMAND" in
build)
if [ -z "$PACKAGE" ] || [ -z "$VERSION" ]; then
log "ERROR: Package and version required"
show_help
exit 1
fi
shift 2 # Remove command, package, version
build_package "$PACKAGE" "$VERSION" "$@"
;;
diff)
PATCHED_VERSION="${4:-}"
if [ -z "$PACKAGE" ] || [ -z "$VERSION" ] || [ -z "$PATCHED_VERSION" ]; then
log "ERROR: Package, vulnerable version, and patched version required"
show_help
exit 1
fi
diff_versions "$PACKAGE" "$VERSION" "$PATCHED_VERSION"
;;
--help|help)
show_help
;;
*)
log "ERROR: Unknown command: $COMMAND"
show_help
exit 1
;;
esac

View File

@@ -0,0 +1,71 @@
#!/bin/sh
# Extract function fingerprints from ELF binaries
# Outputs JSON array with function name, offset, size, and hashes
#
# Usage: extract-functions.sh <directory>
#
# Dependencies: objdump, readelf, sha256sum, jq
set -eu
DIR="${1:-.}"
extract_functions_from_binary() {
local binary="$1"
# Skip non-ELF files
file "$binary" | grep -q "ELF" || return 0
# Get function symbols
objdump -t "$binary" 2>/dev/null | \
awk '/\.text.*[0-9a-f]+.*F/ {
# Fields: addr flags section size name
gsub(/\*.*\*/, "", $1) # Clean address
if ($5 != "" && $4 != "00000000" && $4 != "0000000000000000") {
printf "%s %s %s\n", $1, $4, $NF
}
}' | while read -r offset size name; do
# Skip compiler-generated symbols
case "$name" in
__*|_GLOBAL_*|.plt*|.text*|frame_dummy|register_tm_clones|deregister_tm_clones)
continue
;;
esac
# Convert hex size to decimal
dec_size=$((16#$size))
# Skip tiny functions (likely padding)
[ "$dec_size" -lt 16 ] && continue
# Extract function bytes and compute hash
# Using objdump to get disassembly and hash the opcodes
local hash=$(objdump -d --start-address="0x$offset" --stop-address="0x$((16#$offset + dec_size))" "$binary" 2>/dev/null | \
grep "^[[:space:]]*[0-9a-f]*:" | \
awk '{for(i=2;i<=NF;i++){if($i~/^[0-9a-f]{2}$/){printf "%s", $i}}}' | \
sha256sum | cut -d' ' -f1)
# Output JSON object
printf '{"name":"%s","offset":"0x%s","size":%d,"hash":"%s"}\n' \
"$name" "$offset" "$dec_size" "${hash:-unknown}"
done
}
# Find all ELF binaries in directory
echo "["
first=true
find "$DIR" -type f -executable 2>/dev/null | while read -r binary; do
# Check if ELF
file "$binary" 2>/dev/null | grep -q "ELF" || continue
extract_functions_from_binary "$binary" | while read -r json; do
[ -z "$json" ] && continue
if [ "$first" = "true" ]; then
first=false
else
echo ","
fi
echo "$json"
done
done
echo "]"

View File

@@ -0,0 +1,65 @@
#!/bin/sh
# Normalization scripts for reproducible builds
# Strips non-deterministic content from build artifacts
#
# Usage: normalize.sh <directory>
set -eu
DIR="${1:-.}"
log() {
echo "[normalize] $*" >&2
}
# Strip timestamps from __DATE__ and __TIME__ macros
strip_date_time() {
log "Stripping date/time macros..."
# Already handled by SOURCE_DATE_EPOCH in modern GCC
}
# Normalize build paths
normalize_paths() {
log "Normalizing build paths..."
# Handled by -fdebug-prefix-map
}
# Normalize ar archives for deterministic ordering
normalize_archives() {
log "Normalizing ar archives..."
find "$DIR" -name "*.a" -type f | while read -r archive; do
if ar --version 2>&1 | grep -q "GNU ar"; then
# GNU ar with deterministic mode
ar -rcsD "$archive.tmp" "$archive" && mv "$archive.tmp" "$archive" 2>/dev/null || true
fi
done
}
# Strip debug sections that contain non-deterministic info
strip_debug_timestamps() {
log "Stripping debug timestamps..."
find "$DIR" -type f \( -name "*.o" -o -name "*.so" -o -name "*.so.*" -o -executable \) | while read -r obj; do
# Check if ELF
file "$obj" 2>/dev/null | grep -q "ELF" || continue
# Strip build-id if not needed (we regenerate it)
# objcopy --remove-section=.note.gnu.build-id "$obj" 2>/dev/null || true
# Remove timestamps from DWARF debug info
# This is typically handled by SOURCE_DATE_EPOCH
done
}
# Normalize tar archives
normalize_tars() {
log "Normalizing tar archives..."
# When creating tars, use:
# tar --sort=name --mtime="@${SOURCE_DATE_EPOCH}" --owner=0 --group=0 --numeric-owner
}
# Run all normalizations
normalize_paths
normalize_archives
strip_debug_timestamps
log "Normalization complete"