-- Stella Ops Analytics Schema (PostgreSQL) -- System of record: PostgreSQL -- Purpose: Star-schema analytics layer for SBOM and attestation data -- Sprint: 20260120_030 -- Version: 1.0.0 BEGIN; -- ============================================================================= -- EXTENSIONS -- ============================================================================= CREATE EXTENSION IF NOT EXISTS pgcrypto; -- ============================================================================= -- SCHEMA -- ============================================================================= CREATE SCHEMA IF NOT EXISTS analytics; COMMENT ON SCHEMA analytics IS 'Analytics star-schema for SBOM, attestation, and vulnerability data'; -- ============================================================================= -- VERSION TRACKING -- ============================================================================= CREATE TABLE IF NOT EXISTS analytics.schema_version ( version TEXT PRIMARY KEY, applied_at TIMESTAMPTZ NOT NULL DEFAULT now(), description TEXT ); INSERT INTO analytics.schema_version (version, description) VALUES ('1.0.0', 'Initial analytics schema - SBOM analytics lake') ON CONFLICT DO NOTHING; -- ============================================================================= -- ENUMS -- ============================================================================= DO $$ BEGIN IF NOT EXISTS (SELECT 1 FROM pg_type WHERE typname = 'analytics_component_type') THEN CREATE TYPE analytics_component_type AS ENUM ( 'library', 'application', 'container', 'framework', 'operating-system', 'device', 'firmware', 'file' ); END IF; IF NOT EXISTS (SELECT 1 FROM pg_type WHERE typname = 'analytics_license_category') THEN CREATE TYPE analytics_license_category AS ENUM ( 'permissive', 'copyleft-weak', 'copyleft-strong', 'proprietary', 'unknown' ); END IF; IF NOT EXISTS (SELECT 1 FROM pg_type WHERE typname = 'analytics_severity') THEN CREATE TYPE analytics_severity AS ENUM ( 'critical', 'high', 'medium', 'low', 'none', 'unknown' ); END IF; IF NOT EXISTS (SELECT 1 FROM pg_type WHERE typname = 'analytics_attestation_type') THEN CREATE TYPE analytics_attestation_type AS ENUM ( 'provenance', 'sbom', 'vex', 'build', 'scan', 'policy' ); END IF; END $$; -- ============================================================================= -- NORMALIZATION FUNCTIONS -- ============================================================================= -- Normalize supplier names for consistent grouping CREATE OR REPLACE FUNCTION analytics.normalize_supplier(raw_supplier TEXT) RETURNS TEXT AS $$ BEGIN IF raw_supplier IS NULL OR raw_supplier = '' THEN RETURN NULL; END IF; -- Lowercase, trim, remove common suffixes, normalize whitespace RETURN LOWER(TRIM( REGEXP_REPLACE( REGEXP_REPLACE(raw_supplier, '\s+(Inc\.?|LLC|Ltd\.?|Corp\.?|GmbH|B\.V\.|S\.A\.|PLC|Co\.)$', '', 'i'), '\s+', ' ', 'g' ) )); END; $$ LANGUAGE plpgsql IMMUTABLE PARALLEL SAFE; COMMENT ON FUNCTION analytics.normalize_supplier IS 'Normalize supplier names by removing legal suffixes and standardizing case/whitespace'; -- Categorize SPDX license expressions CREATE OR REPLACE FUNCTION analytics.categorize_license(license_expr TEXT) RETURNS analytics_license_category AS $$ BEGIN IF license_expr IS NULL OR license_expr = '' THEN RETURN 'unknown'; END IF; -- Strong copyleft (without exceptions) IF license_expr ~* '(^GPL-[23]|AGPL|OSL|SSPL|EUPL|RPL|QPL|Sleepycat)' AND license_expr !~* 'WITH.*exception|WITH.*linking.*exception|WITH.*classpath.*exception' THEN RETURN 'copyleft-strong'; END IF; -- Weak copyleft IF license_expr ~* '(LGPL|MPL|EPL|CPL|CDDL|Artistic|MS-RL|APSL|IPL|SPL)' THEN RETURN 'copyleft-weak'; END IF; -- Permissive licenses IF license_expr ~* '(MIT|Apache|BSD|ISC|Zlib|Unlicense|CC0|WTFPL|0BSD|PostgreSQL|X11|Beerware|FTL|HPND|NTP|UPL)' THEN RETURN 'permissive'; END IF; -- Proprietary indicators IF license_expr ~* '(proprietary|commercial|all.rights.reserved|see.license|custom|confidential)' THEN RETURN 'proprietary'; END IF; -- Check for GPL with exceptions (treat as weak copyleft) IF license_expr ~* 'GPL.*WITH.*exception' THEN RETURN 'copyleft-weak'; END IF; RETURN 'unknown'; END; $$ LANGUAGE plpgsql IMMUTABLE PARALLEL SAFE; COMMENT ON FUNCTION analytics.categorize_license IS 'Categorize SPDX license expressions into risk categories'; -- Extract PURL components CREATE OR REPLACE FUNCTION analytics.parse_purl(purl TEXT) RETURNS TABLE (purl_type TEXT, purl_namespace TEXT, purl_name TEXT, purl_version TEXT) AS $$ DECLARE matches TEXT[]; BEGIN -- Pattern: pkg:type/namespace/name@version or pkg:type/name@version IF purl IS NULL OR purl = '' THEN RETURN QUERY SELECT NULL::TEXT, NULL::TEXT, NULL::TEXT, NULL::TEXT; RETURN; END IF; -- Extract type purl_type := SUBSTRING(purl FROM 'pkg:([^/]+)/'); -- Extract version if present purl_version := SUBSTRING(purl FROM '@([^?#]+)'); -- Remove version and qualifiers for name extraction DECLARE name_part TEXT := REGEXP_REPLACE(purl, '@[^?#]+', ''); BEGIN name_part := REGEXP_REPLACE(name_part, '\?.*$', ''); name_part := REGEXP_REPLACE(name_part, '#.*$', ''); name_part := REGEXP_REPLACE(name_part, '^pkg:[^/]+/', ''); -- Check if there's a namespace IF name_part ~ '/' THEN purl_namespace := SUBSTRING(name_part FROM '^([^/]+)/'); purl_name := SUBSTRING(name_part FROM '/([^/]+)$'); ELSE purl_namespace := NULL; purl_name := name_part; END IF; END; RETURN QUERY SELECT purl_type, purl_namespace, purl_name, purl_version; END; $$ LANGUAGE plpgsql IMMUTABLE PARALLEL SAFE; COMMENT ON FUNCTION analytics.parse_purl IS 'Parse Package URL into components (type, namespace, name, version)'; -- ============================================================================= -- CORE TABLES -- ============================================================================= -- Unified component registry (dimension table) CREATE TABLE IF NOT EXISTS analytics.components ( component_id UUID PRIMARY KEY DEFAULT gen_random_uuid(), -- Canonical identifiers purl TEXT NOT NULL, -- Package URL (canonical identifier) purl_type TEXT NOT NULL, -- Extracted: maven, npm, pypi, golang, etc. purl_namespace TEXT, -- Extracted: group/org/scope purl_name TEXT NOT NULL, -- Extracted: package name purl_version TEXT, -- Extracted: version hash_sha256 TEXT, -- Content hash for deduplication -- Display fields name TEXT NOT NULL, -- Display name version TEXT, -- Display version description TEXT, -- Classification component_type analytics_component_type NOT NULL DEFAULT 'library', -- Supplier/maintainer supplier TEXT, -- Raw supplier name supplier_normalized TEXT, -- Normalized for grouping -- Licensing license_declared TEXT, -- Raw license string from SBOM license_concluded TEXT, -- Concluded SPDX expression license_category analytics_license_category DEFAULT 'unknown', -- Additional identifiers cpe TEXT, -- CPE identifier if available -- Usage metrics first_seen_at TIMESTAMPTZ NOT NULL DEFAULT now(), last_seen_at TIMESTAMPTZ NOT NULL DEFAULT now(), sbom_count INT NOT NULL DEFAULT 1, -- Number of SBOMs containing this artifact_count INT NOT NULL DEFAULT 1, -- Number of artifacts containing this -- Audit created_at TIMESTAMPTZ NOT NULL DEFAULT now(), updated_at TIMESTAMPTZ NOT NULL DEFAULT now(), UNIQUE (purl, hash_sha256) ); CREATE INDEX IF NOT EXISTS ix_components_purl ON analytics.components (purl); CREATE INDEX IF NOT EXISTS ix_components_purl_type ON analytics.components (purl_type); CREATE INDEX IF NOT EXISTS ix_components_supplier ON analytics.components (supplier_normalized); CREATE INDEX IF NOT EXISTS ix_components_license ON analytics.components (license_category, license_concluded); CREATE INDEX IF NOT EXISTS ix_components_type ON analytics.components (component_type); CREATE INDEX IF NOT EXISTS ix_components_hash ON analytics.components (hash_sha256) WHERE hash_sha256 IS NOT NULL; CREATE INDEX IF NOT EXISTS ix_components_last_seen ON analytics.components (last_seen_at DESC); COMMENT ON TABLE analytics.components IS 'Unified component registry - canonical source for all SBOM components'; -- Artifacts (container images, applications) - dimension table CREATE TABLE IF NOT EXISTS analytics.artifacts ( artifact_id UUID PRIMARY KEY DEFAULT gen_random_uuid(), -- Identity artifact_type TEXT NOT NULL, -- container, application, library, firmware name TEXT NOT NULL, -- Image/app name version TEXT, -- Tag/version digest TEXT, -- SHA256 digest purl TEXT, -- Package URL if applicable -- Source source_repo TEXT, -- Git repo URL source_ref TEXT, -- Git ref (branch/tag/commit) registry TEXT, -- Container registry -- Organization environment TEXT, -- dev, stage, prod team TEXT, -- Owning team service TEXT, -- Service name deployed_at TIMESTAMPTZ, -- Last deployment timestamp -- SBOM metadata sbom_digest TEXT, -- SHA256 of associated SBOM sbom_format TEXT, -- cyclonedx, spdx sbom_spec_version TEXT, -- 1.7, 3.0, etc. -- Pre-computed counts component_count INT DEFAULT 0, -- Number of components in SBOM vulnerability_count INT DEFAULT 0, -- Total vulns (pre-VEX) critical_count INT DEFAULT 0, -- Critical severity vulns high_count INT DEFAULT 0, -- High severity vulns medium_count INT DEFAULT 0, -- Medium severity vulns low_count INT DEFAULT 0, -- Low severity vulns -- Attestation status provenance_attested BOOLEAN DEFAULT FALSE, slsa_level INT, -- 0-4 -- Audit created_at TIMESTAMPTZ NOT NULL DEFAULT now(), updated_at TIMESTAMPTZ NOT NULL DEFAULT now(), UNIQUE (digest) ); CREATE INDEX IF NOT EXISTS ix_artifacts_name_version ON analytics.artifacts (name, version); CREATE INDEX IF NOT EXISTS ix_artifacts_environment ON analytics.artifacts (environment); CREATE INDEX IF NOT EXISTS ix_artifacts_team ON analytics.artifacts (team); CREATE INDEX IF NOT EXISTS ix_artifacts_deployed ON analytics.artifacts (deployed_at DESC); CREATE INDEX IF NOT EXISTS ix_artifacts_digest ON analytics.artifacts (digest); CREATE INDEX IF NOT EXISTS ix_artifacts_service ON analytics.artifacts (service); COMMENT ON TABLE analytics.artifacts IS 'Container images and applications with SBOM and attestation metadata'; -- Artifact-component bridge (fact table) CREATE TABLE IF NOT EXISTS analytics.artifact_components ( artifact_id UUID NOT NULL REFERENCES analytics.artifacts(artifact_id) ON DELETE CASCADE, component_id UUID NOT NULL REFERENCES analytics.components(component_id) ON DELETE CASCADE, -- SBOM reference bom_ref TEXT, -- Original bom-ref for round-trips -- Dependency metadata scope TEXT, -- required, optional, excluded dependency_path TEXT[], -- Path from root (for transitive deps) depth INT DEFAULT 0, -- Dependency depth (0=direct) introduced_via TEXT, -- Direct dependency that introduced this -- Audit created_at TIMESTAMPTZ NOT NULL DEFAULT now(), PRIMARY KEY (artifact_id, component_id) ); CREATE INDEX IF NOT EXISTS ix_artifact_components_component ON analytics.artifact_components (component_id); CREATE INDEX IF NOT EXISTS ix_artifact_components_depth ON analytics.artifact_components (depth); COMMENT ON TABLE analytics.artifact_components IS 'Bridge table linking artifacts to their SBOM components'; -- Component-vulnerability bridge (fact table) CREATE TABLE IF NOT EXISTS analytics.component_vulns ( component_id UUID NOT NULL REFERENCES analytics.components(component_id) ON DELETE CASCADE, vuln_id TEXT NOT NULL, -- CVE-YYYY-NNNNN or GHSA-xxxx-xxxx-xxxx -- Source source TEXT NOT NULL, -- nvd, ghsa, osv, vendor -- Severity severity analytics_severity NOT NULL, cvss_score NUMERIC(3,1), -- 0.0-10.0 cvss_vector TEXT, -- CVSS vector string -- Exploitability epss_score NUMERIC(5,4), -- 0.0000-1.0000 kev_listed BOOLEAN DEFAULT FALSE, -- CISA KEV -- Applicability affects BOOLEAN NOT NULL DEFAULT TRUE, -- Does this vuln affect this component? affected_versions TEXT, -- Version range expression -- Remediation fixed_version TEXT, -- First fixed version fix_available BOOLEAN DEFAULT FALSE, -- Provenance introduced_via TEXT, -- How vulnerability was introduced published_at TIMESTAMPTZ, -- Audit created_at TIMESTAMPTZ NOT NULL DEFAULT now(), updated_at TIMESTAMPTZ NOT NULL DEFAULT now(), PRIMARY KEY (component_id, vuln_id) ); CREATE INDEX IF NOT EXISTS ix_component_vulns_vuln ON analytics.component_vulns (vuln_id); CREATE INDEX IF NOT EXISTS ix_component_vulns_severity ON analytics.component_vulns (severity, cvss_score DESC); CREATE INDEX IF NOT EXISTS ix_component_vulns_fixable ON analytics.component_vulns (fix_available) WHERE fix_available = TRUE; CREATE INDEX IF NOT EXISTS ix_component_vulns_kev ON analytics.component_vulns (kev_listed) WHERE kev_listed = TRUE; CREATE INDEX IF NOT EXISTS ix_component_vulns_epss ON analytics.component_vulns (epss_score DESC) WHERE epss_score IS NOT NULL; COMMENT ON TABLE analytics.component_vulns IS 'Component-to-vulnerability mapping with severity and remediation data'; -- Attestations analytics table CREATE TABLE IF NOT EXISTS analytics.attestations ( attestation_id UUID PRIMARY KEY DEFAULT gen_random_uuid(), artifact_id UUID REFERENCES analytics.artifacts(artifact_id) ON DELETE SET NULL, -- Predicate predicate_type analytics_attestation_type NOT NULL, predicate_uri TEXT NOT NULL, -- Full predicate type URI -- Issuer issuer TEXT, -- Who signed issuer_normalized TEXT, -- Normalized issuer -- Build metadata builder_id TEXT, -- Build system identifier slsa_level INT, -- SLSA conformance level (0-4) -- DSSE envelope dsse_payload_hash TEXT NOT NULL, -- SHA256 of payload dsse_sig_algorithm TEXT, -- Signature algorithm -- Transparency log rekor_log_id TEXT, -- Transparency log ID rekor_log_index BIGINT, -- Log index -- Timestamps statement_time TIMESTAMPTZ, -- When statement was made -- Verification verified BOOLEAN DEFAULT FALSE, -- Signature verified verification_time TIMESTAMPTZ, -- Build provenance fields materials_hash TEXT, -- Hash of build materials source_uri TEXT, -- Source code URI workflow_ref TEXT, -- CI workflow reference -- Audit created_at TIMESTAMPTZ NOT NULL DEFAULT now(), UNIQUE (dsse_payload_hash) ); CREATE INDEX IF NOT EXISTS ix_attestations_artifact ON analytics.attestations (artifact_id); CREATE INDEX IF NOT EXISTS ix_attestations_type ON analytics.attestations (predicate_type); CREATE INDEX IF NOT EXISTS ix_attestations_issuer ON analytics.attestations (issuer_normalized); CREATE INDEX IF NOT EXISTS ix_attestations_rekor ON analytics.attestations (rekor_log_id) WHERE rekor_log_id IS NOT NULL; CREATE INDEX IF NOT EXISTS ix_attestations_slsa ON analytics.attestations (slsa_level) WHERE slsa_level IS NOT NULL; COMMENT ON TABLE analytics.attestations IS 'Attestation metadata for analytics (provenance, SBOM, VEX attestations)'; -- VEX overrides (fact table) CREATE TABLE IF NOT EXISTS analytics.vex_overrides ( override_id UUID PRIMARY KEY DEFAULT gen_random_uuid(), attestation_id UUID REFERENCES analytics.attestations(attestation_id) ON DELETE SET NULL, artifact_id UUID REFERENCES analytics.artifacts(artifact_id) ON DELETE CASCADE, -- Vulnerability vuln_id TEXT NOT NULL, component_purl TEXT, -- Optional: specific component -- Status status TEXT NOT NULL, -- not_affected, affected, fixed, under_investigation -- Justification justification TEXT, -- Justification category (CycloneDX/OpenVEX) justification_detail TEXT, -- Human-readable detail impact TEXT, -- Impact statement action_statement TEXT, -- Recommended action -- Decision metadata operator_id TEXT, -- Who made the decision confidence NUMERIC(3,2), -- 0.00-1.00 -- Validity valid_from TIMESTAMPTZ NOT NULL DEFAULT now(), valid_until TIMESTAMPTZ, -- Expiration -- Review tracking last_reviewed TIMESTAMPTZ, review_count INT DEFAULT 1, -- Audit created_at TIMESTAMPTZ NOT NULL DEFAULT now(), updated_at TIMESTAMPTZ NOT NULL DEFAULT now() ); CREATE INDEX IF NOT EXISTS ix_vex_overrides_artifact_vuln ON analytics.vex_overrides (artifact_id, vuln_id); CREATE INDEX IF NOT EXISTS ix_vex_overrides_vuln ON analytics.vex_overrides (vuln_id); CREATE INDEX IF NOT EXISTS ix_vex_overrides_status ON analytics.vex_overrides (status); CREATE INDEX IF NOT EXISTS ix_vex_overrides_active ON analytics.vex_overrides (artifact_id, vuln_id) WHERE valid_until IS NULL OR valid_until > now(); COMMENT ON TABLE analytics.vex_overrides IS 'VEX status overrides with justifications and validity periods'; -- ============================================================================= -- RAW PAYLOAD AUDIT TABLES -- ============================================================================= -- Raw SBOM storage for audit trail CREATE TABLE IF NOT EXISTS analytics.raw_sboms ( sbom_id UUID PRIMARY KEY DEFAULT gen_random_uuid(), artifact_id UUID REFERENCES analytics.artifacts(artifact_id) ON DELETE SET NULL, -- Format format TEXT NOT NULL, -- cyclonedx, spdx spec_version TEXT NOT NULL, -- 1.7, 3.0.1, etc. -- Content content_hash TEXT NOT NULL UNIQUE, -- SHA256 of raw content content_size BIGINT NOT NULL, storage_uri TEXT NOT NULL, -- Object storage path -- Pipeline metadata ingest_version TEXT NOT NULL, -- Pipeline version schema_version TEXT NOT NULL, -- Schema version at ingest -- Audit ingested_at TIMESTAMPTZ NOT NULL DEFAULT now() ); CREATE INDEX IF NOT EXISTS ix_raw_sboms_artifact ON analytics.raw_sboms (artifact_id); CREATE INDEX IF NOT EXISTS ix_raw_sboms_hash ON analytics.raw_sboms (content_hash); COMMENT ON TABLE analytics.raw_sboms IS 'Raw SBOM payloads for audit trail and reprocessing'; -- Raw attestation storage CREATE TABLE IF NOT EXISTS analytics.raw_attestations ( raw_id UUID PRIMARY KEY DEFAULT gen_random_uuid(), attestation_id UUID REFERENCES analytics.attestations(attestation_id) ON DELETE SET NULL, -- Content content_hash TEXT NOT NULL UNIQUE, content_size BIGINT NOT NULL, storage_uri TEXT NOT NULL, -- Pipeline metadata ingest_version TEXT NOT NULL, schema_version TEXT NOT NULL, -- Audit ingested_at TIMESTAMPTZ NOT NULL DEFAULT now() ); CREATE INDEX IF NOT EXISTS ix_raw_attestations_attestation ON analytics.raw_attestations (attestation_id); CREATE INDEX IF NOT EXISTS ix_raw_attestations_hash ON analytics.raw_attestations (content_hash); COMMENT ON TABLE analytics.raw_attestations IS 'Raw attestation payloads (DSSE envelopes) for audit trail'; -- ============================================================================= -- TIME-SERIES ROLLUP TABLES -- ============================================================================= -- Daily vulnerability counts CREATE TABLE IF NOT EXISTS analytics.daily_vulnerability_counts ( snapshot_date DATE NOT NULL, environment TEXT NOT NULL, team TEXT, severity analytics_severity NOT NULL, -- Counts total_vulns INT NOT NULL, fixable_vulns INT NOT NULL, vex_mitigated INT NOT NULL, kev_vulns INT NOT NULL, unique_cves INT NOT NULL, affected_artifacts INT NOT NULL, affected_components INT NOT NULL, -- Audit created_at TIMESTAMPTZ NOT NULL DEFAULT now(), PRIMARY KEY (snapshot_date, environment, COALESCE(team, ''), severity) ); CREATE INDEX IF NOT EXISTS ix_daily_vuln_counts_date ON analytics.daily_vulnerability_counts (snapshot_date DESC); CREATE INDEX IF NOT EXISTS ix_daily_vuln_counts_env ON analytics.daily_vulnerability_counts (environment, snapshot_date DESC); COMMENT ON TABLE analytics.daily_vulnerability_counts IS 'Daily vulnerability count rollups for trend analysis'; -- Daily component counts CREATE TABLE IF NOT EXISTS analytics.daily_component_counts ( snapshot_date DATE NOT NULL, environment TEXT NOT NULL, team TEXT, license_category analytics_license_category NOT NULL, component_type analytics_component_type NOT NULL, -- Counts total_components INT NOT NULL, unique_suppliers INT NOT NULL, -- Audit created_at TIMESTAMPTZ NOT NULL DEFAULT now(), PRIMARY KEY (snapshot_date, environment, COALESCE(team, ''), license_category, component_type) ); CREATE INDEX IF NOT EXISTS ix_daily_comp_counts_date ON analytics.daily_component_counts (snapshot_date DESC); COMMENT ON TABLE analytics.daily_component_counts IS 'Daily component count rollups by license and type'; -- ============================================================================= -- MATERIALIZED VIEWS -- ============================================================================= -- Supplier concentration CREATE MATERIALIZED VIEW IF NOT EXISTS analytics.mv_supplier_concentration AS SELECT c.supplier_normalized AS supplier, COUNT(DISTINCT c.component_id) AS component_count, COUNT(DISTINCT ac.artifact_id) AS artifact_count, COUNT(DISTINCT a.team) AS team_count, ARRAY_AGG(DISTINCT a.environment) FILTER (WHERE a.environment IS NOT NULL) AS environments, SUM(CASE WHEN cv.severity = 'critical' THEN 1 ELSE 0 END) AS critical_vuln_count, SUM(CASE WHEN cv.severity = 'high' THEN 1 ELSE 0 END) AS high_vuln_count, MAX(c.last_seen_at) AS last_seen_at FROM analytics.components c LEFT JOIN analytics.artifact_components ac ON ac.component_id = c.component_id LEFT JOIN analytics.artifacts a ON a.artifact_id = ac.artifact_id LEFT JOIN analytics.component_vulns cv ON cv.component_id = c.component_id AND cv.affects = TRUE WHERE c.supplier_normalized IS NOT NULL GROUP BY c.supplier_normalized WITH DATA; CREATE UNIQUE INDEX IF NOT EXISTS ix_mv_supplier_concentration_supplier ON analytics.mv_supplier_concentration (supplier); COMMENT ON MATERIALIZED VIEW analytics.mv_supplier_concentration IS 'Pre-computed supplier concentration metrics'; -- License distribution CREATE MATERIALIZED VIEW IF NOT EXISTS analytics.mv_license_distribution AS SELECT c.license_concluded, c.license_category, COUNT(*) AS component_count, COUNT(DISTINCT ac.artifact_id) AS artifact_count, ARRAY_AGG(DISTINCT c.purl_type) FILTER (WHERE c.purl_type IS NOT NULL) AS ecosystems FROM analytics.components c LEFT JOIN analytics.artifact_components ac ON ac.component_id = c.component_id GROUP BY c.license_concluded, c.license_category WITH DATA; CREATE UNIQUE INDEX IF NOT EXISTS ix_mv_license_distribution_license ON analytics.mv_license_distribution (COALESCE(license_concluded, ''), license_category); COMMENT ON MATERIALIZED VIEW analytics.mv_license_distribution IS 'Pre-computed license distribution metrics'; -- Vulnerability exposure adjusted by VEX CREATE MATERIALIZED VIEW IF NOT EXISTS analytics.mv_vuln_exposure AS SELECT cv.vuln_id, cv.severity, cv.cvss_score, cv.epss_score, cv.kev_listed, cv.fix_available, COUNT(DISTINCT cv.component_id) AS raw_component_count, COUNT(DISTINCT ac.artifact_id) AS raw_artifact_count, COUNT(DISTINCT cv.component_id) FILTER ( WHERE NOT EXISTS ( SELECT 1 FROM analytics.vex_overrides vo WHERE vo.artifact_id = ac.artifact_id AND vo.vuln_id = cv.vuln_id AND vo.status = 'not_affected' AND (vo.valid_until IS NULL OR vo.valid_until > now()) ) ) AS effective_component_count, COUNT(DISTINCT ac.artifact_id) FILTER ( WHERE NOT EXISTS ( SELECT 1 FROM analytics.vex_overrides vo WHERE vo.artifact_id = ac.artifact_id AND vo.vuln_id = cv.vuln_id AND vo.status = 'not_affected' AND (vo.valid_until IS NULL OR vo.valid_until > now()) ) ) AS effective_artifact_count FROM analytics.component_vulns cv JOIN analytics.artifact_components ac ON ac.component_id = cv.component_id WHERE cv.affects = TRUE GROUP BY cv.vuln_id, cv.severity, cv.cvss_score, cv.epss_score, cv.kev_listed, cv.fix_available WITH DATA; CREATE UNIQUE INDEX IF NOT EXISTS ix_mv_vuln_exposure_vuln ON analytics.mv_vuln_exposure (vuln_id); COMMENT ON MATERIALIZED VIEW analytics.mv_vuln_exposure IS 'CVE exposure with VEX-adjusted impact counts'; -- Attestation coverage by environment/team CREATE MATERIALIZED VIEW IF NOT EXISTS analytics.mv_attestation_coverage AS SELECT a.environment, a.team, COUNT(*) AS total_artifacts, COUNT(*) FILTER (WHERE a.provenance_attested = TRUE) AS with_provenance, COUNT(*) FILTER (WHERE EXISTS ( SELECT 1 FROM analytics.attestations att WHERE att.artifact_id = a.artifact_id AND att.predicate_type = 'sbom' )) AS with_sbom_attestation, COUNT(*) FILTER (WHERE EXISTS ( SELECT 1 FROM analytics.attestations att WHERE att.artifact_id = a.artifact_id AND att.predicate_type = 'vex' )) AS with_vex_attestation, COUNT(*) FILTER (WHERE a.slsa_level >= 2) AS slsa_level_2_plus, COUNT(*) FILTER (WHERE a.slsa_level >= 3) AS slsa_level_3_plus, ROUND(100.0 * COUNT(*) FILTER (WHERE a.provenance_attested = TRUE) / NULLIF(COUNT(*), 0), 1) AS provenance_pct, ROUND(100.0 * COUNT(*) FILTER (WHERE a.slsa_level >= 2) / NULLIF(COUNT(*), 0), 1) AS slsa2_pct FROM analytics.artifacts a GROUP BY a.environment, a.team WITH DATA; CREATE UNIQUE INDEX IF NOT EXISTS ix_mv_attestation_coverage_env_team ON analytics.mv_attestation_coverage (COALESCE(environment, ''), COALESCE(team, '')); COMMENT ON MATERIALIZED VIEW analytics.mv_attestation_coverage IS 'Attestation coverage percentages by environment and team'; -- ============================================================================= -- STORED PROCEDURES FOR DAY-1 QUERIES -- ============================================================================= -- Top suppliers by component count CREATE OR REPLACE FUNCTION analytics.sp_top_suppliers(p_limit INT DEFAULT 20) RETURNS JSON AS $$ BEGIN RETURN ( SELECT json_agg(row_to_json(t)) FROM ( SELECT supplier, component_count, artifact_count, team_count, critical_vuln_count, high_vuln_count, environments FROM analytics.mv_supplier_concentration ORDER BY component_count DESC LIMIT p_limit ) t ); END; $$ LANGUAGE plpgsql STABLE; COMMENT ON FUNCTION analytics.sp_top_suppliers IS 'Get top suppliers by component count for supply chain risk analysis'; -- License distribution heatmap CREATE OR REPLACE FUNCTION analytics.sp_license_heatmap() RETURNS JSON AS $$ BEGIN RETURN ( SELECT json_agg(row_to_json(t)) FROM ( SELECT license_category, license_concluded, component_count, artifact_count, ecosystems FROM analytics.mv_license_distribution ORDER BY component_count DESC ) t ); END; $$ LANGUAGE plpgsql STABLE; COMMENT ON FUNCTION analytics.sp_license_heatmap IS 'Get license distribution for compliance heatmap'; -- CVE exposure adjusted by VEX CREATE OR REPLACE FUNCTION analytics.sp_vuln_exposure( p_environment TEXT DEFAULT NULL, p_min_severity TEXT DEFAULT 'low' ) RETURNS JSON AS $$ BEGIN RETURN ( SELECT json_agg(row_to_json(t)) FROM ( SELECT vuln_id, severity::TEXT, cvss_score, epss_score, kev_listed, fix_available, raw_component_count, raw_artifact_count, effective_component_count, effective_artifact_count, raw_artifact_count - effective_artifact_count AS vex_mitigated FROM analytics.mv_vuln_exposure WHERE effective_artifact_count > 0 AND severity::TEXT >= p_min_severity ORDER BY CASE severity WHEN 'critical' THEN 1 WHEN 'high' THEN 2 WHEN 'medium' THEN 3 WHEN 'low' THEN 4 ELSE 5 END, effective_artifact_count DESC LIMIT 50 ) t ); END; $$ LANGUAGE plpgsql STABLE; COMMENT ON FUNCTION analytics.sp_vuln_exposure IS 'Get CVE exposure with VEX-adjusted counts'; -- Fixable backlog CREATE OR REPLACE FUNCTION analytics.sp_fixable_backlog(p_environment TEXT DEFAULT NULL) RETURNS JSON AS $$ BEGIN RETURN ( SELECT json_agg(row_to_json(t)) FROM ( SELECT a.name AS service, a.environment, c.name AS component, c.version, cv.vuln_id, cv.severity::TEXT, cv.fixed_version FROM analytics.component_vulns cv JOIN analytics.components c ON c.component_id = cv.component_id JOIN analytics.artifact_components ac ON ac.component_id = c.component_id JOIN analytics.artifacts a ON a.artifact_id = ac.artifact_id LEFT JOIN analytics.vex_overrides vo ON vo.artifact_id = a.artifact_id AND vo.vuln_id = cv.vuln_id AND vo.status = 'not_affected' AND (vo.valid_until IS NULL OR vo.valid_until > now()) WHERE cv.affects = TRUE AND cv.fix_available = TRUE AND vo.override_id IS NULL AND (p_environment IS NULL OR a.environment = p_environment) ORDER BY CASE cv.severity WHEN 'critical' THEN 1 WHEN 'high' THEN 2 ELSE 3 END, a.name LIMIT 100 ) t ); END; $$ LANGUAGE plpgsql STABLE; COMMENT ON FUNCTION analytics.sp_fixable_backlog IS 'Get vulnerabilities with available fixes that are not VEX-mitigated'; -- Attestation coverage gaps CREATE OR REPLACE FUNCTION analytics.sp_attestation_gaps(p_environment TEXT DEFAULT NULL) RETURNS JSON AS $$ BEGIN RETURN ( SELECT json_agg(row_to_json(t)) FROM ( SELECT environment, team, total_artifacts, with_provenance, provenance_pct, slsa_level_2_plus, slsa2_pct, total_artifacts - with_provenance AS missing_provenance FROM analytics.mv_attestation_coverage WHERE (p_environment IS NULL OR environment = p_environment) ORDER BY provenance_pct ASC ) t ); END; $$ LANGUAGE plpgsql STABLE; COMMENT ON FUNCTION analytics.sp_attestation_gaps IS 'Get attestation coverage gaps by environment/team'; -- MTTR by severity (simplified - requires proper remediation tracking) CREATE OR REPLACE FUNCTION analytics.sp_mttr_by_severity(p_days INT DEFAULT 90) RETURNS JSON AS $$ BEGIN RETURN ( SELECT json_agg(row_to_json(t)) FROM ( SELECT severity::TEXT, COUNT(*) AS total_vulns, AVG(EXTRACT(EPOCH FROM (vo.valid_from - cv.published_at)) / 86400)::NUMERIC(10,2) AS avg_days_to_mitigate FROM analytics.component_vulns cv JOIN analytics.vex_overrides vo ON vo.vuln_id = cv.vuln_id AND vo.status = 'not_affected' WHERE cv.published_at >= now() - (p_days || ' days')::INTERVAL AND cv.published_at IS NOT NULL GROUP BY severity ORDER BY CASE severity WHEN 'critical' THEN 1 WHEN 'high' THEN 2 WHEN 'medium' THEN 3 ELSE 4 END ) t ); END; $$ LANGUAGE plpgsql STABLE; COMMENT ON FUNCTION analytics.sp_mttr_by_severity IS 'Get mean time to remediate by severity (last N days)'; -- ============================================================================= -- REFRESH PROCEDURES -- ============================================================================= -- Refresh all materialized views CREATE OR REPLACE FUNCTION analytics.refresh_all_views() RETURNS VOID AS $$ BEGIN REFRESH MATERIALIZED VIEW CONCURRENTLY analytics.mv_supplier_concentration; REFRESH MATERIALIZED VIEW CONCURRENTLY analytics.mv_license_distribution; REFRESH MATERIALIZED VIEW CONCURRENTLY analytics.mv_vuln_exposure; REFRESH MATERIALIZED VIEW CONCURRENTLY analytics.mv_attestation_coverage; END; $$ LANGUAGE plpgsql; COMMENT ON FUNCTION analytics.refresh_all_views IS 'Refresh all analytics materialized views (run daily)'; -- Daily rollup procedure CREATE OR REPLACE FUNCTION analytics.compute_daily_rollups(p_date DATE DEFAULT CURRENT_DATE) RETURNS VOID AS $$ BEGIN -- Vulnerability counts INSERT INTO analytics.daily_vulnerability_counts ( snapshot_date, environment, team, severity, total_vulns, fixable_vulns, vex_mitigated, kev_vulns, unique_cves, affected_artifacts, affected_components ) SELECT p_date, a.environment, a.team, cv.severity, COUNT(*) AS total_vulns, COUNT(*) FILTER (WHERE cv.fix_available = TRUE) AS fixable_vulns, COUNT(*) FILTER (WHERE EXISTS ( SELECT 1 FROM analytics.vex_overrides vo WHERE vo.artifact_id = a.artifact_id AND vo.vuln_id = cv.vuln_id AND vo.status = 'not_affected' )) AS vex_mitigated, COUNT(*) FILTER (WHERE cv.kev_listed = TRUE) AS kev_vulns, COUNT(DISTINCT cv.vuln_id) AS unique_cves, COUNT(DISTINCT a.artifact_id) AS affected_artifacts, COUNT(DISTINCT cv.component_id) AS affected_components FROM analytics.artifacts a JOIN analytics.artifact_components ac ON ac.artifact_id = a.artifact_id JOIN analytics.component_vulns cv ON cv.component_id = ac.component_id AND cv.affects = TRUE GROUP BY a.environment, a.team, cv.severity ON CONFLICT (snapshot_date, environment, COALESCE(team, ''), severity) DO UPDATE SET total_vulns = EXCLUDED.total_vulns, fixable_vulns = EXCLUDED.fixable_vulns, vex_mitigated = EXCLUDED.vex_mitigated, kev_vulns = EXCLUDED.kev_vulns, unique_cves = EXCLUDED.unique_cves, affected_artifacts = EXCLUDED.affected_artifacts, affected_components = EXCLUDED.affected_components, created_at = now(); -- Component counts INSERT INTO analytics.daily_component_counts ( snapshot_date, environment, team, license_category, component_type, total_components, unique_suppliers ) SELECT p_date, a.environment, a.team, c.license_category, c.component_type, COUNT(DISTINCT c.component_id) AS total_components, COUNT(DISTINCT c.supplier_normalized) AS unique_suppliers FROM analytics.artifacts a JOIN analytics.artifact_components ac ON ac.artifact_id = a.artifact_id JOIN analytics.components c ON c.component_id = ac.component_id GROUP BY a.environment, a.team, c.license_category, c.component_type ON CONFLICT (snapshot_date, environment, COALESCE(team, ''), license_category, component_type) DO UPDATE SET total_components = EXCLUDED.total_components, unique_suppliers = EXCLUDED.unique_suppliers, created_at = now(); END; $$ LANGUAGE plpgsql; COMMENT ON FUNCTION analytics.compute_daily_rollups IS 'Compute daily vulnerability and component rollups for trend analysis'; COMMIT;