Add unit tests for SBOM ingestion and transformation

- Implement `SbomIngestServiceCollectionExtensionsTests` to verify the SBOM ingestion pipeline exports snapshots correctly. - Create `SbomIngestTransformerTests` to ensure the transformation produces expected nodes and edges, including deduplication of license nodes and normalization of timestamps. - Add `SbomSnapshotExporterTests` to test the export functionality for manifest, adjacency, nodes, and edges. - Introduce `VexOverlayTransformerTests` to validate the transformation of VEX nodes and edges. - Set up project file for the test project with necessary dependencies and configurations. - Include JSON fixture files for testing purposes.
2025-11-04 07:49:39 +02:00
parent f72c5c513a
commit 2eb6852d34
491 changed files with 39445 additions and 3917 deletions
--- a/scripts/kisa_capture_html.py
+++ b/scripts/kisa_capture_html.py
@@ -0,0 +1,75 @@
+#!/usr/bin/env python3
+"""Download KISA/KNVD advisory HTML pages for offline analysis."""
+
+from __future__ import annotations
+
+import argparse
+import datetime as dt
+import sys
+import xml.etree.ElementTree as ET
+from pathlib import Path
+from urllib.error import HTTPError, URLError
+from urllib.parse import parse_qs, urlsplit
+from urllib.request import Request, urlopen
+
+FEED_URL = "https://knvd.krcert.or.kr/rss/securityInfo.do"
+USER_AGENT = "Mozilla/5.0 (compatible; StellaOpsOffline/1.0)"
+
+
+def fetch(url: str) -> bytes:
+    req = Request(url, headers={"User-Agent": USER_AGENT})
+    with urlopen(req, timeout=15) as resp:
+        return resp.read()
+
+
+def iter_idxs(feed_xml: bytes) -> list[tuple[str, str]]:
+    root = ET.fromstring(feed_xml)
+    items = []
+    for item in root.findall(".//item"):
+        title = (item.findtext("title") or "").strip()
+        link = item.findtext("link") or ""
+        idx = parse_qs(urlsplit(link).query).get("IDX", [None])[0]
+        if idx:
+            items.append((idx, title))
+    return items
+
+
+def capture(idx: str, title: str, out_dir: Path) -> Path:
+    url = f"https://knvd.krcert.or.kr/detailDos.do?IDX={idx}"
+    html = fetch(url)
+    target = out_dir / f"{idx}.html"
+    target.write_bytes(html)
+    print(f"saved {target} ({title})")
+    return target
+
+
+def main() -> int:
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--out", type=Path, default=Path("seed-data/kisa/html"))
+    parser.add_argument("--limit", type=int, default=10, help="Maximum advisories to download")
+    args = parser.parse_args()
+
+    args.out.mkdir(parents=True, exist_ok=True)
+
+    print(f"[{dt.datetime.utcnow():%Y-%m-%d %H:%M:%S}Z] fetching RSS feed…")
+    try:
+        feed = fetch(FEED_URL)
+    except (URLError, HTTPError) as exc:
+        print("RSS fetch failed:", exc, file=sys.stderr)
+        return 1
+
+    items = iter_idxs(feed)[: args.limit]
+    if not items:
+        print("No advisories found in feed", file=sys.stderr)
+        return 1
+
+    for idx, title in items:
+        try:
+            capture(idx, title, args.out)
+        except (URLError, HTTPError) as exc:
+            print(f"failed {idx}: {exc}", file=sys.stderr)
+    return 0
+
+
+if __name__ == "__main__":
+    raise SystemExit(main())