#!/usr/bin/env python3 """Download KISA/KNVD advisory HTML pages for offline analysis.""" from __future__ import annotations import argparse import datetime as dt import sys import xml.etree.ElementTree as ET from pathlib import Path from urllib.error import HTTPError, URLError from urllib.parse import parse_qs, urlsplit from urllib.request import Request, urlopen FEED_URL = "https://knvd.krcert.or.kr/rss/securityInfo.do" USER_AGENT = "Mozilla/5.0 (compatible; StellaOpsOffline/1.0)" def fetch(url: str) -> bytes: req = Request(url, headers={"User-Agent": USER_AGENT}) with urlopen(req, timeout=15) as resp: return resp.read() def iter_idxs(feed_xml: bytes) -> list[tuple[str, str]]: root = ET.fromstring(feed_xml) items = [] for item in root.findall(".//item"): title = (item.findtext("title") or "").strip() link = item.findtext("link") or "" idx = parse_qs(urlsplit(link).query).get("IDX", [None])[0] if idx: items.append((idx, title)) return items def capture(idx: str, title: str, out_dir: Path) -> Path: url = f"https://knvd.krcert.or.kr/detailDos.do?IDX={idx}" html = fetch(url) target = out_dir / f"{idx}.html" target.write_bytes(html) print(f"saved {target} ({title})") return target def main() -> int: parser = argparse.ArgumentParser() parser.add_argument("--out", type=Path, default=Path("seed-data/kisa/html")) parser.add_argument("--limit", type=int, default=10, help="Maximum advisories to download") args = parser.parse_args() args.out.mkdir(parents=True, exist_ok=True) print(f"[{dt.datetime.utcnow():%Y-%m-%d %H:%M:%S}Z] fetching RSS feed…") try: feed = fetch(FEED_URL) except (URLError, HTTPError) as exc: print("RSS fetch failed:", exc, file=sys.stderr) return 1 items = iter_idxs(feed)[: args.limit] if not items: print("No advisories found in feed", file=sys.stderr) return 1 for idx, title in items: try: capture(idx, title, args.out) except (URLError, HTTPError) as exc: print(f"failed {idx}: {exc}", file=sys.stderr) return 0 if __name__ == "__main__": raise SystemExit(main())