save progress
This commit is contained in:
220
devops/docker/corpus/scripts/init-test-data.sql
Normal file
220
devops/docker/corpus/scripts/init-test-data.sql
Normal file
@@ -0,0 +1,220 @@
|
||||
-- =============================================================================
|
||||
-- CORPUS TEST DATA - Minimal corpus for integration testing
|
||||
-- Copyright (c) StellaOps. All rights reserved.
|
||||
-- Licensed under AGPL-3.0-or-later.
|
||||
-- =============================================================================
|
||||
|
||||
-- Set tenant for test data
|
||||
SET app.tenant_id = 'test-tenant';
|
||||
|
||||
-- =============================================================================
|
||||
-- LIBRARIES
|
||||
-- =============================================================================
|
||||
|
||||
INSERT INTO corpus.libraries (id, name, description, homepage_url, source_repo)
|
||||
VALUES
|
||||
('a0000001-0000-0000-0000-000000000001', 'glibc', 'GNU C Library', 'https://www.gnu.org/software/libc/', 'https://sourceware.org/git/glibc.git'),
|
||||
('a0000001-0000-0000-0000-000000000002', 'openssl', 'OpenSSL cryptographic library', 'https://www.openssl.org/', 'https://github.com/openssl/openssl.git'),
|
||||
('a0000001-0000-0000-0000-000000000003', 'zlib', 'zlib compression library', 'https://zlib.net/', 'https://github.com/madler/zlib.git'),
|
||||
('a0000001-0000-0000-0000-000000000004', 'curl', 'libcurl transfer library', 'https://curl.se/', 'https://github.com/curl/curl.git'),
|
||||
('a0000001-0000-0000-0000-000000000005', 'sqlite', 'SQLite database engine', 'https://sqlite.org/', 'https://sqlite.org/src')
|
||||
ON CONFLICT (tenant_id, name) DO NOTHING;
|
||||
|
||||
-- =============================================================================
|
||||
-- LIBRARY VERSIONS (glibc)
|
||||
-- =============================================================================
|
||||
|
||||
INSERT INTO corpus.library_versions (id, library_id, version, release_date, is_security_release)
|
||||
VALUES
|
||||
-- glibc versions
|
||||
('b0000001-0000-0000-0000-000000000001', 'a0000001-0000-0000-0000-000000000001', '2.17', '2012-12-25', false),
|
||||
('b0000001-0000-0000-0000-000000000002', 'a0000001-0000-0000-0000-000000000001', '2.28', '2018-08-01', false),
|
||||
('b0000001-0000-0000-0000-000000000003', 'a0000001-0000-0000-0000-000000000001', '2.31', '2020-02-01', false),
|
||||
('b0000001-0000-0000-0000-000000000004', 'a0000001-0000-0000-0000-000000000001', '2.35', '2022-02-03', false),
|
||||
('b0000001-0000-0000-0000-000000000005', 'a0000001-0000-0000-0000-000000000001', '2.38', '2023-07-31', false),
|
||||
-- OpenSSL versions
|
||||
('b0000002-0000-0000-0000-000000000001', 'a0000001-0000-0000-0000-000000000002', '1.0.2u', '2019-12-20', true),
|
||||
('b0000002-0000-0000-0000-000000000002', 'a0000001-0000-0000-0000-000000000002', '1.1.1w', '2023-09-11', true),
|
||||
('b0000002-0000-0000-0000-000000000003', 'a0000001-0000-0000-0000-000000000002', '3.0.12', '2023-10-24', true),
|
||||
('b0000002-0000-0000-0000-000000000004', 'a0000001-0000-0000-0000-000000000002', '3.1.4', '2023-10-24', true),
|
||||
-- zlib versions
|
||||
('b0000003-0000-0000-0000-000000000001', 'a0000001-0000-0000-0000-000000000003', '1.2.11', '2017-01-15', false),
|
||||
('b0000003-0000-0000-0000-000000000002', 'a0000001-0000-0000-0000-000000000003', '1.2.13', '2022-10-13', true),
|
||||
('b0000003-0000-0000-0000-000000000003', 'a0000001-0000-0000-0000-000000000003', '1.3.1', '2024-01-22', false)
|
||||
ON CONFLICT (tenant_id, library_id, version) DO NOTHING;
|
||||
|
||||
-- =============================================================================
|
||||
-- BUILD VARIANTS
|
||||
-- =============================================================================
|
||||
|
||||
INSERT INTO corpus.build_variants (id, library_version_id, architecture, abi, compiler, compiler_version, optimization_level, binary_sha256)
|
||||
VALUES
|
||||
-- glibc 2.31 variants
|
||||
('c0000001-0000-0000-0000-000000000001', 'b0000001-0000-0000-0000-000000000003', 'x86_64', 'gnu', 'gcc', '9.3.0', 'O2', 'a1b2c3d4e5f6a1b2c3d4e5f6a1b2c3d4e5f6a1b2c3d4e5f6a1b2c3d4e5f6a1b2'),
|
||||
('c0000001-0000-0000-0000-000000000002', 'b0000001-0000-0000-0000-000000000003', 'aarch64', 'gnu', 'gcc', '9.3.0', 'O2', 'b2c3d4e5f6a1b2c3d4e5f6a1b2c3d4e5f6a1b2c3d4e5f6a1b2c3d4e5f6a1b2c3'),
|
||||
('c0000001-0000-0000-0000-000000000003', 'b0000001-0000-0000-0000-000000000003', 'armhf', 'gnu', 'gcc', '9.3.0', 'O2', 'c3d4e5f6a1b2c3d4e5f6a1b2c3d4e5f6a1b2c3d4e5f6a1b2c3d4e5f6a1b2c3d4'),
|
||||
-- glibc 2.35 variants
|
||||
('c0000002-0000-0000-0000-000000000001', 'b0000001-0000-0000-0000-000000000004', 'x86_64', 'gnu', 'gcc', '11.2.0', 'O2', 'd4e5f6a1b2c3d4e5f6a1b2c3d4e5f6a1b2c3d4e5f6a1b2c3d4e5f6a1b2c3d4e5'),
|
||||
('c0000002-0000-0000-0000-000000000002', 'b0000001-0000-0000-0000-000000000004', 'aarch64', 'gnu', 'gcc', '11.2.0', 'O2', 'e5f6a1b2c3d4e5f6a1b2c3d4e5f6a1b2c3d4e5f6a1b2c3d4e5f6a1b2c3d4e5f6'),
|
||||
-- OpenSSL 3.0.12 variants
|
||||
('c0000003-0000-0000-0000-000000000001', 'b0000002-0000-0000-0000-000000000003', 'x86_64', 'gnu', 'gcc', '11.2.0', 'O2', 'f6a1b2c3d4e5f6a1b2c3d4e5f6a1b2c3d4e5f6a1b2c3d4e5f6a1b2c3d4e5f6a1'),
|
||||
('c0000003-0000-0000-0000-000000000002', 'b0000002-0000-0000-0000-000000000003', 'aarch64', 'gnu', 'gcc', '11.2.0', 'O2', 'a1b2c3d4e5f6a1b2c3d4e5f6a1b2c3d4e5f6a1b2c3d4e5f6a1b2c3d4e5f6a1b3')
|
||||
ON CONFLICT (tenant_id, library_version_id, architecture, abi, compiler, optimization_level) DO NOTHING;
|
||||
|
||||
-- =============================================================================
|
||||
-- FUNCTIONS (Sample functions from glibc)
|
||||
-- =============================================================================
|
||||
|
||||
INSERT INTO corpus.functions (id, build_variant_id, name, demangled_name, address, size_bytes, is_exported)
|
||||
VALUES
|
||||
-- glibc 2.31 x86_64 functions
|
||||
('d0000001-0000-0000-0000-000000000001', 'c0000001-0000-0000-0000-000000000001', 'memcpy', 'memcpy', 140000, 256, true),
|
||||
('d0000001-0000-0000-0000-000000000002', 'c0000001-0000-0000-0000-000000000001', 'memset', 'memset', 140256, 192, true),
|
||||
('d0000001-0000-0000-0000-000000000003', 'c0000001-0000-0000-0000-000000000001', 'strlen', 'strlen', 140448, 128, true),
|
||||
('d0000001-0000-0000-0000-000000000004', 'c0000001-0000-0000-0000-000000000001', 'strcmp', 'strcmp', 140576, 160, true),
|
||||
('d0000001-0000-0000-0000-000000000005', 'c0000001-0000-0000-0000-000000000001', 'strcpy', 'strcpy', 140736, 144, true),
|
||||
('d0000001-0000-0000-0000-000000000006', 'c0000001-0000-0000-0000-000000000001', 'malloc', 'malloc', 150000, 512, true),
|
||||
('d0000001-0000-0000-0000-000000000007', 'c0000001-0000-0000-0000-000000000001', 'free', 'free', 150512, 384, true),
|
||||
('d0000001-0000-0000-0000-000000000008', 'c0000001-0000-0000-0000-000000000001', 'realloc', 'realloc', 150896, 448, true),
|
||||
('d0000001-0000-0000-0000-000000000009', 'c0000001-0000-0000-0000-000000000001', 'printf', 'printf', 160000, 1024, true),
|
||||
('d0000001-0000-0000-0000-000000000010', 'c0000001-0000-0000-0000-000000000001', 'sprintf', 'sprintf', 161024, 896, true),
|
||||
-- glibc 2.35 x86_64 functions (same functions, different addresses/sizes due to optimization)
|
||||
('d0000002-0000-0000-0000-000000000001', 'c0000002-0000-0000-0000-000000000001', 'memcpy', 'memcpy', 145000, 280, true),
|
||||
('d0000002-0000-0000-0000-000000000002', 'c0000002-0000-0000-0000-000000000001', 'memset', 'memset', 145280, 208, true),
|
||||
('d0000002-0000-0000-0000-000000000003', 'c0000002-0000-0000-0000-000000000001', 'strlen', 'strlen', 145488, 144, true),
|
||||
('d0000002-0000-0000-0000-000000000004', 'c0000002-0000-0000-0000-000000000001', 'strcmp', 'strcmp', 145632, 176, true),
|
||||
('d0000002-0000-0000-0000-000000000005', 'c0000002-0000-0000-0000-000000000001', 'strcpy', 'strcpy', 145808, 160, true),
|
||||
('d0000002-0000-0000-0000-000000000006', 'c0000002-0000-0000-0000-000000000001', 'malloc', 'malloc', 155000, 544, true),
|
||||
('d0000002-0000-0000-0000-000000000007', 'c0000002-0000-0000-0000-000000000001', 'free', 'free', 155544, 400, true),
|
||||
-- OpenSSL 3.0.12 functions
|
||||
('d0000003-0000-0000-0000-000000000001', 'c0000003-0000-0000-0000-000000000001', 'EVP_DigestInit_ex', 'EVP_DigestInit_ex', 200000, 320, true),
|
||||
('d0000003-0000-0000-0000-000000000002', 'c0000003-0000-0000-0000-000000000001', 'EVP_DigestUpdate', 'EVP_DigestUpdate', 200320, 256, true),
|
||||
('d0000003-0000-0000-0000-000000000003', 'c0000003-0000-0000-0000-000000000001', 'EVP_DigestFinal_ex', 'EVP_DigestFinal_ex', 200576, 288, true),
|
||||
('d0000003-0000-0000-0000-000000000004', 'c0000003-0000-0000-0000-000000000001', 'EVP_EncryptInit_ex', 'EVP_EncryptInit_ex', 201000, 384, true),
|
||||
('d0000003-0000-0000-0000-000000000005', 'c0000003-0000-0000-0000-000000000001', 'EVP_DecryptInit_ex', 'EVP_DecryptInit_ex', 201384, 384, true),
|
||||
('d0000003-0000-0000-0000-000000000006', 'c0000003-0000-0000-0000-000000000001', 'SSL_CTX_new', 'SSL_CTX_new', 300000, 512, true),
|
||||
('d0000003-0000-0000-0000-000000000007', 'c0000003-0000-0000-0000-000000000001', 'SSL_new', 'SSL_new', 300512, 384, true),
|
||||
('d0000003-0000-0000-0000-000000000008', 'c0000003-0000-0000-0000-000000000001', 'SSL_connect', 'SSL_connect', 300896, 1024, true)
|
||||
ON CONFLICT (tenant_id, build_variant_id, name, address) DO NOTHING;
|
||||
|
||||
-- =============================================================================
|
||||
-- FINGERPRINTS (Simulated semantic fingerprints)
|
||||
-- =============================================================================
|
||||
|
||||
INSERT INTO corpus.fingerprints (id, function_id, algorithm, fingerprint, metadata)
|
||||
VALUES
|
||||
-- memcpy fingerprints (semantic_ksg algorithm)
|
||||
('e0000001-0000-0000-0000-000000000001', 'd0000001-0000-0000-0000-000000000001', 'semantic_ksg',
|
||||
decode('a1b2c3d4e5f6a1b2c3d4e5f6a1b2c3d4e5f6a1b2c3d4e5f6a1b2c3d4e5f60001', 'hex'),
|
||||
'{"node_count": 45, "edge_count": 72, "api_calls": ["memcpy_internal"], "complexity": 8}'::jsonb),
|
||||
('e0000001-0000-0000-0000-000000000002', 'd0000001-0000-0000-0000-000000000001', 'instruction_bb',
|
||||
decode('b2c3d4e5f6a1b2c3d4e5f6a1b2c3d4e5f6a1b2c3d4e5f6a1b2c3d4e5f6a10001', 'hex'),
|
||||
'{"bb_count": 8, "instruction_count": 64}'::jsonb),
|
||||
-- memcpy 2.35 (similar fingerprint, different version)
|
||||
('e0000002-0000-0000-0000-000000000001', 'd0000002-0000-0000-0000-000000000001', 'semantic_ksg',
|
||||
decode('a1b2c3d4e5f6a1b2c3d4e5f6a1b2c3d4e5f6a1b2c3d4e5f6a1b2c3d4e5f60002', 'hex'),
|
||||
'{"node_count": 48, "edge_count": 76, "api_calls": ["memcpy_internal"], "complexity": 9}'::jsonb),
|
||||
-- memset fingerprints
|
||||
('e0000003-0000-0000-0000-000000000001', 'd0000001-0000-0000-0000-000000000002', 'semantic_ksg',
|
||||
decode('c3d4e5f6a1b2c3d4e5f6a1b2c3d4e5f6a1b2c3d4e5f6a1b2c3d4e5f6a1b20001', 'hex'),
|
||||
'{"node_count": 32, "edge_count": 48, "api_calls": [], "complexity": 5}'::jsonb),
|
||||
-- strlen fingerprints
|
||||
('e0000004-0000-0000-0000-000000000001', 'd0000001-0000-0000-0000-000000000003', 'semantic_ksg',
|
||||
decode('d4e5f6a1b2c3d4e5f6a1b2c3d4e5f6a1b2c3d4e5f6a1b2c3d4e5f6a1b2c30001', 'hex'),
|
||||
'{"node_count": 24, "edge_count": 32, "api_calls": [], "complexity": 4}'::jsonb),
|
||||
-- malloc fingerprints
|
||||
('e0000005-0000-0000-0000-000000000001', 'd0000001-0000-0000-0000-000000000006', 'semantic_ksg',
|
||||
decode('e5f6a1b2c3d4e5f6a1b2c3d4e5f6a1b2c3d4e5f6a1b2c3d4e5f6a1b2c3d40001', 'hex'),
|
||||
'{"node_count": 128, "edge_count": 256, "api_calls": ["sbrk", "mmap"], "complexity": 24}'::jsonb),
|
||||
-- OpenSSL EVP_DigestInit_ex
|
||||
('e0000006-0000-0000-0000-000000000001', 'd0000003-0000-0000-0000-000000000001', 'semantic_ksg',
|
||||
decode('f6a1b2c3d4e5f6a1b2c3d4e5f6a1b2c3d4e5f6a1b2c3d4e5f6a1b2c3d4e50001', 'hex'),
|
||||
'{"node_count": 56, "edge_count": 84, "api_calls": ["OPENSSL_init_crypto"], "complexity": 12}'::jsonb),
|
||||
-- SSL_CTX_new
|
||||
('e0000007-0000-0000-0000-000000000001', 'd0000003-0000-0000-0000-000000000006', 'semantic_ksg',
|
||||
decode('a1b2c3d4e5f6a1b2c3d4e5f6a1b2c3d4e5f6a1b2c3d4e5f6a1b2c3d4e5f60003', 'hex'),
|
||||
'{"node_count": 96, "edge_count": 144, "api_calls": ["CRYPTO_malloc", "SSL_CTX_set_options"], "complexity": 18}'::jsonb)
|
||||
ON CONFLICT (tenant_id, function_id, algorithm) DO NOTHING;
|
||||
|
||||
-- =============================================================================
|
||||
-- FUNCTION CLUSTERS
|
||||
-- =============================================================================
|
||||
|
||||
INSERT INTO corpus.function_clusters (id, library_id, canonical_name, description)
|
||||
VALUES
|
||||
('f0000001-0000-0000-0000-000000000001', 'a0000001-0000-0000-0000-000000000001', 'memcpy', 'Memory copy function across glibc versions'),
|
||||
('f0000001-0000-0000-0000-000000000002', 'a0000001-0000-0000-0000-000000000001', 'memset', 'Memory set function across glibc versions'),
|
||||
('f0000001-0000-0000-0000-000000000003', 'a0000001-0000-0000-0000-000000000001', 'strlen', 'String length function across glibc versions'),
|
||||
('f0000001-0000-0000-0000-000000000004', 'a0000001-0000-0000-0000-000000000001', 'malloc', 'Memory allocation function across glibc versions'),
|
||||
('f0000002-0000-0000-0000-000000000001', 'a0000001-0000-0000-0000-000000000002', 'EVP_DigestInit_ex', 'EVP digest initialization across OpenSSL versions'),
|
||||
('f0000002-0000-0000-0000-000000000002', 'a0000001-0000-0000-0000-000000000002', 'SSL_CTX_new', 'SSL context creation across OpenSSL versions')
|
||||
ON CONFLICT (tenant_id, library_id, canonical_name) DO NOTHING;
|
||||
|
||||
-- =============================================================================
|
||||
-- CLUSTER MEMBERS
|
||||
-- =============================================================================
|
||||
|
||||
INSERT INTO corpus.cluster_members (cluster_id, function_id, similarity_to_centroid)
|
||||
VALUES
|
||||
-- memcpy cluster
|
||||
('f0000001-0000-0000-0000-000000000001', 'd0000001-0000-0000-0000-000000000001', 1.0),
|
||||
('f0000001-0000-0000-0000-000000000001', 'd0000002-0000-0000-0000-000000000001', 0.95),
|
||||
-- memset cluster
|
||||
('f0000001-0000-0000-0000-000000000002', 'd0000001-0000-0000-0000-000000000002', 1.0),
|
||||
('f0000001-0000-0000-0000-000000000002', 'd0000002-0000-0000-0000-000000000002', 0.92),
|
||||
-- strlen cluster
|
||||
('f0000001-0000-0000-0000-000000000003', 'd0000001-0000-0000-0000-000000000003', 1.0),
|
||||
('f0000001-0000-0000-0000-000000000003', 'd0000002-0000-0000-0000-000000000003', 0.94),
|
||||
-- malloc cluster
|
||||
('f0000001-0000-0000-0000-000000000004', 'd0000001-0000-0000-0000-000000000006', 1.0),
|
||||
('f0000001-0000-0000-0000-000000000004', 'd0000002-0000-0000-0000-000000000006', 0.88)
|
||||
ON CONFLICT DO NOTHING;
|
||||
|
||||
-- =============================================================================
|
||||
-- CVE ASSOCIATIONS
|
||||
-- =============================================================================
|
||||
|
||||
INSERT INTO corpus.function_cves (function_id, cve_id, affected_state, confidence, evidence_type)
|
||||
VALUES
|
||||
-- CVE-2021-3999 affects glibc getcwd
|
||||
-- Note: We don't have getcwd in our test data, but this shows the structure
|
||||
-- CVE-2022-0778 affects OpenSSL BN_mod_sqrt (infinite loop)
|
||||
('d0000003-0000-0000-0000-000000000001', 'CVE-2022-0778', 'fixed', 0.95, 'advisory'),
|
||||
('d0000003-0000-0000-0000-000000000002', 'CVE-2022-0778', 'fixed', 0.95, 'advisory'),
|
||||
-- CVE-2023-0286 affects OpenSSL X509 certificate handling
|
||||
('d0000003-0000-0000-0000-000000000006', 'CVE-2023-0286', 'fixed', 0.90, 'commit'),
|
||||
('d0000003-0000-0000-0000-000000000007', 'CVE-2023-0286', 'fixed', 0.90, 'commit')
|
||||
ON CONFLICT (tenant_id, function_id, cve_id) DO NOTHING;
|
||||
|
||||
-- =============================================================================
|
||||
-- INGESTION LOG
|
||||
-- =============================================================================
|
||||
|
||||
INSERT INTO corpus.ingestion_jobs (id, library_id, job_type, status, functions_indexed, started_at, completed_at)
|
||||
VALUES
|
||||
('99000001-0000-0000-0000-000000000001', 'a0000001-0000-0000-0000-000000000001', 'full_ingest', 'completed', 10, now() - interval '1 day', now() - interval '1 day' + interval '5 minutes'),
|
||||
('99000001-0000-0000-0000-000000000002', 'a0000001-0000-0000-0000-000000000002', 'full_ingest', 'completed', 8, now() - interval '12 hours', now() - interval '12 hours' + interval '3 minutes')
|
||||
ON CONFLICT DO NOTHING;
|
||||
|
||||
-- =============================================================================
|
||||
-- SUMMARY
|
||||
-- =============================================================================
|
||||
|
||||
DO $$
|
||||
DECLARE
|
||||
lib_count INT;
|
||||
ver_count INT;
|
||||
func_count INT;
|
||||
fp_count INT;
|
||||
BEGIN
|
||||
SELECT COUNT(*) INTO lib_count FROM corpus.libraries;
|
||||
SELECT COUNT(*) INTO ver_count FROM corpus.library_versions;
|
||||
SELECT COUNT(*) INTO func_count FROM corpus.functions;
|
||||
SELECT COUNT(*) INTO fp_count FROM corpus.fingerprints;
|
||||
|
||||
RAISE NOTICE 'Corpus test data initialized:';
|
||||
RAISE NOTICE ' Libraries: %', lib_count;
|
||||
RAISE NOTICE ' Versions: %', ver_count;
|
||||
RAISE NOTICE ' Functions: %', func_count;
|
||||
RAISE NOTICE ' Fingerprints: %', fp_count;
|
||||
END $$;
|
||||
Reference in New Issue
Block a user