Files
git.stella-ops.org/datasets/reachability/obfuscation-test-set.yaml
2026-01-20 00:45:38 +02:00

148 lines
3.8 KiB
YAML

# Obfuscation Test Set (MLEM-008)
# Ground-truth pairs for obfuscation resilience testing
test_cases:
- id: gt-0018
name: "Control Flow Flattening - OpenSSL"
description: "OpenSSL function with control flow flattening obfuscation"
original:
library: openssl
version: "3.0.12"
function: SSL_read
binary: libssl.so.3
obfuscated:
technique: control_flow_flattening
tool: ollvm
binary: libssl_obf.so.3
expected_match: true
difficulty: medium
- id: gt-0019
name: "Instruction Substitution - zlib"
description: "zlib function with instruction substitution"
original:
library: zlib
version: "1.3"
function: inflate
binary: libz.so.1.3
obfuscated:
technique: instruction_substitution
tool: ollvm
binary: libz_obf.so.1.3
expected_match: true
difficulty: easy
- id: gt-0020
name: "Bogus Control Flow - libcrypto"
description: "libcrypto function with bogus control flow insertion"
original:
library: openssl
version: "3.0.12"
function: EVP_DigestFinal_ex
binary: libcrypto.so.3
obfuscated:
technique: bogus_control_flow
tool: ollvm
binary: libcrypto_obf.so.3
expected_match: true
difficulty: medium
- id: gt-0021
name: "Dead Code Insertion - libxml2"
description: "libxml2 parser with dead code insertion"
original:
library: libxml2
version: "2.12.4"
function: xmlParseDocument
binary: libxml2.so.2
obfuscated:
technique: dead_code_insertion
tool: custom
binary: libxml2_obf.so.2
expected_match: true
difficulty: easy
- id: gt-0022
name: "Register Reassignment - OpenSSL"
description: "OpenSSL function with register reassignment"
original:
library: openssl
version: "3.0.12"
function: SSL_connect
binary: libssl.so.3
obfuscated:
technique: register_reassignment
tool: custom
binary: libssl_regobf.so.3
expected_match: true
difficulty: easy
- id: gt-0023
name: "Combined Obfuscation - Heavy"
description: "Heavily obfuscated function with multiple techniques"
original:
library: openssl
version: "3.0.12"
function: SSL_write
binary: libssl.so.3
obfuscated:
technique: combined
techniques_applied:
- control_flow_flattening
- instruction_substitution
- bogus_control_flow
- string_encryption
tool: tigress
binary: libssl_heavy.so.3
expected_match: true
difficulty: hard
- id: gt-0024
name: "Virtualization Obfuscation"
description: "Function protected with VM-based virtualization"
original:
library: openssl
version: "3.0.12"
function: AES_encrypt
binary: libcrypto.so.3
obfuscated:
technique: virtualization
tool: vmprotect
binary: libcrypto_vm.so.3
expected_match: false # Known limitation - VM obfuscation is hard
difficulty: extreme
- id: gt-0025
name: "Anti-Decompilation"
description: "Function with anti-decompilation tricks"
original:
library: zlib
version: "1.3"
function: compress
binary: libz.so.1.3
obfuscated:
technique: anti_decompile
tricks:
- overlapping_instructions
- stack_pointer_abuse
- indirect_jumps
tool: custom
binary: libz_antidec.so.1.3
expected_match: true
difficulty: hard
metadata:
version: "1.0"
created: "2026-01-19"
description: "Obfuscation test set for ML embedding validation (MLEM-008)"
total_cases: 8
difficulty_distribution:
easy: 3
medium: 2
hard: 2
extreme: 1
validation_targets:
accuracy_improvement: "+10% on obfuscated vs baseline"
false_positive_rate: "< 2%"
latency_impact: "< 50ms per function"