Add unit tests for SBOM ingestion and transformation
Some checks failed
Docs CI / lint-and-preview (push) Has been cancelled
Some checks failed
Docs CI / lint-and-preview (push) Has been cancelled
- Implement `SbomIngestServiceCollectionExtensionsTests` to verify the SBOM ingestion pipeline exports snapshots correctly. - Create `SbomIngestTransformerTests` to ensure the transformation produces expected nodes and edges, including deduplication of license nodes and normalization of timestamps. - Add `SbomSnapshotExporterTests` to test the export functionality for manifest, adjacency, nodes, and edges. - Introduce `VexOverlayTransformerTests` to validate the transformation of VEX nodes and edges. - Set up project file for the test project with necessary dependencies and configurations. - Include JSON fixture files for testing purposes.
This commit is contained in:
75
scripts/kisa_capture_html.py
Normal file
75
scripts/kisa_capture_html.py
Normal file
@@ -0,0 +1,75 @@
|
||||
#!/usr/bin/env python3
|
||||
"""Download KISA/KNVD advisory HTML pages for offline analysis."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import datetime as dt
|
||||
import sys
|
||||
import xml.etree.ElementTree as ET
|
||||
from pathlib import Path
|
||||
from urllib.error import HTTPError, URLError
|
||||
from urllib.parse import parse_qs, urlsplit
|
||||
from urllib.request import Request, urlopen
|
||||
|
||||
FEED_URL = "https://knvd.krcert.or.kr/rss/securityInfo.do"
|
||||
USER_AGENT = "Mozilla/5.0 (compatible; StellaOpsOffline/1.0)"
|
||||
|
||||
|
||||
def fetch(url: str) -> bytes:
|
||||
req = Request(url, headers={"User-Agent": USER_AGENT})
|
||||
with urlopen(req, timeout=15) as resp:
|
||||
return resp.read()
|
||||
|
||||
|
||||
def iter_idxs(feed_xml: bytes) -> list[tuple[str, str]]:
|
||||
root = ET.fromstring(feed_xml)
|
||||
items = []
|
||||
for item in root.findall(".//item"):
|
||||
title = (item.findtext("title") or "").strip()
|
||||
link = item.findtext("link") or ""
|
||||
idx = parse_qs(urlsplit(link).query).get("IDX", [None])[0]
|
||||
if idx:
|
||||
items.append((idx, title))
|
||||
return items
|
||||
|
||||
|
||||
def capture(idx: str, title: str, out_dir: Path) -> Path:
|
||||
url = f"https://knvd.krcert.or.kr/detailDos.do?IDX={idx}"
|
||||
html = fetch(url)
|
||||
target = out_dir / f"{idx}.html"
|
||||
target.write_bytes(html)
|
||||
print(f"saved {target} ({title})")
|
||||
return target
|
||||
|
||||
|
||||
def main() -> int:
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument("--out", type=Path, default=Path("seed-data/kisa/html"))
|
||||
parser.add_argument("--limit", type=int, default=10, help="Maximum advisories to download")
|
||||
args = parser.parse_args()
|
||||
|
||||
args.out.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
print(f"[{dt.datetime.utcnow():%Y-%m-%d %H:%M:%S}Z] fetching RSS feed…")
|
||||
try:
|
||||
feed = fetch(FEED_URL)
|
||||
except (URLError, HTTPError) as exc:
|
||||
print("RSS fetch failed:", exc, file=sys.stderr)
|
||||
return 1
|
||||
|
||||
items = iter_idxs(feed)[: args.limit]
|
||||
if not items:
|
||||
print("No advisories found in feed", file=sys.stderr)
|
||||
return 1
|
||||
|
||||
for idx, title in items:
|
||||
try:
|
||||
capture(idx, title, args.out)
|
||||
except (URLError, HTTPError) as exc:
|
||||
print(f"failed {idx}: {exc}", file=sys.stderr)
|
||||
return 0
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
raise SystemExit(main())
|
||||
Reference in New Issue
Block a user