up
This commit is contained in:
164
ops/wine-csp/fetch-cryptopro.py
Normal file
164
ops/wine-csp/fetch-cryptopro.py
Normal file
@@ -0,0 +1,164 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
CryptoPro crawler (metadata only by default).
|
||||
Fetches https://cryptopro.ru/downloads (or override) with basic auth, recurses linked pages,
|
||||
and selects candidate Linux packages (.deb/.rpm/.tar.gz/.tgz/.run) or MSI as fallback.
|
||||
|
||||
Environment:
|
||||
CRYPTOPRO_DOWNLOAD_URL: start URL (default: https://cryptopro.ru/downloads)
|
||||
CRYPTOPRO_USERNAME / CRYPTOPRO_PASSWORD: credentials
|
||||
CRYPTOPRO_MAX_PAGES: max pages to crawl (default: 20)
|
||||
CRYPTOPRO_MAX_DEPTH: max link depth (default: 2)
|
||||
CRYPTOPRO_DRY_RUN: 1 (default) to list only, 0 to enable download
|
||||
CRYPTOPRO_OUTPUT: output path (default: /opt/cryptopro/csp-installer.bin)
|
||||
"""
|
||||
|
||||
import os
|
||||
import sys
|
||||
import re
|
||||
import html.parser
|
||||
import urllib.parse
|
||||
import urllib.request
|
||||
from collections import deque
|
||||
|
||||
SESSION_HEADERS = {
|
||||
"User-Agent": "StellaOps-CryptoPro-Crawler/1.0 (+https://stella-ops.org)",
|
||||
}
|
||||
|
||||
LINUX_PATTERNS = re.compile(r"\.(deb|rpm|tar\.gz|tgz|run)(?:$|\?)", re.IGNORECASE)
|
||||
MSI_PATTERN = re.compile(r"\.msi(?:$|\?)", re.IGNORECASE)
|
||||
|
||||
|
||||
def log(msg: str) -> None:
|
||||
sys.stdout.write(msg + "\n")
|
||||
sys.stdout.flush()
|
||||
|
||||
|
||||
def warn(msg: str) -> None:
|
||||
sys.stderr.write("[WARN] " + msg + "\n")
|
||||
sys.stderr.flush()
|
||||
|
||||
|
||||
class LinkParser(html.parser.HTMLParser):
|
||||
def __init__(self):
|
||||
super().__init__()
|
||||
self.links = []
|
||||
|
||||
def handle_starttag(self, tag, attrs):
|
||||
if tag != "a":
|
||||
return
|
||||
href = dict(attrs).get("href")
|
||||
if href:
|
||||
self.links.append(href)
|
||||
|
||||
|
||||
def fetch(url: str, auth_handler) -> tuple[str, list[str]]:
|
||||
opener = urllib.request.build_opener(auth_handler)
|
||||
req = urllib.request.Request(url, headers=SESSION_HEADERS)
|
||||
with opener.open(req, timeout=30) as resp:
|
||||
data = resp.read()
|
||||
parser = LinkParser()
|
||||
parser.feed(data.decode("utf-8", errors="ignore"))
|
||||
return data, parser.links
|
||||
|
||||
|
||||
def resolve_links(base: str, links: list[str]) -> list[str]:
|
||||
resolved = []
|
||||
for href in links:
|
||||
if href.startswith("#") or href.startswith("mailto:"):
|
||||
continue
|
||||
resolved.append(urllib.parse.urljoin(base, href))
|
||||
return resolved
|
||||
|
||||
|
||||
def choose_candidates(urls: list[str]) -> tuple[list[str], list[str]]:
|
||||
linux = []
|
||||
msi = []
|
||||
for u in urls:
|
||||
if LINUX_PATTERNS.search(u):
|
||||
linux.append(u)
|
||||
elif MSI_PATTERN.search(u):
|
||||
msi.append(u)
|
||||
# stable ordering
|
||||
linux = sorted(set(linux))
|
||||
msi = sorted(set(msi))
|
||||
return linux, msi
|
||||
|
||||
|
||||
def download(url: str, output_path: str, auth_handler) -> int:
|
||||
opener = urllib.request.build_opener(auth_handler)
|
||||
req = urllib.request.Request(url, headers=SESSION_HEADERS)
|
||||
with opener.open(req, timeout=60) as resp:
|
||||
with open(output_path, "wb") as f:
|
||||
f.write(resp.read())
|
||||
return os.path.getsize(output_path)
|
||||
|
||||
|
||||
def main() -> int:
|
||||
start_url = os.environ.get("CRYPTOPRO_DOWNLOAD_URL", "https://cryptopro.ru/downloads")
|
||||
username = os.environ.get("CRYPTOPRO_USERNAME", "contact@stella-ops.org")
|
||||
password = os.environ.get("CRYPTOPRO_PASSWORD", "Hoko33JD3nj3aJD.")
|
||||
max_pages = int(os.environ.get("CRYPTOPRO_MAX_PAGES", "20"))
|
||||
max_depth = int(os.environ.get("CRYPTOPRO_MAX_DEPTH", "2"))
|
||||
dry_run = os.environ.get("CRYPTOPRO_DRY_RUN", "1") != "0"
|
||||
output_path = os.environ.get("CRYPTOPRO_OUTPUT", "/opt/cryptopro/csp-installer.bin")
|
||||
|
||||
if username == "contact@stella-ops.org" and password == "Hoko33JD3nj3aJD.":
|
||||
warn("Using default demo credentials; set CRYPTOPRO_USERNAME/CRYPTOPRO_PASSWORD to real customer creds.")
|
||||
|
||||
passman = urllib.request.HTTPPasswordMgrWithDefaultRealm()
|
||||
passman.add_password(None, start_url, username, password)
|
||||
auth_handler = urllib.request.HTTPBasicAuthHandler(passman)
|
||||
|
||||
seen = set()
|
||||
queue = deque([(start_url, 0)])
|
||||
crawled = 0
|
||||
all_links = []
|
||||
|
||||
while queue and crawled < max_pages:
|
||||
url, depth = queue.popleft()
|
||||
if url in seen or depth > max_depth:
|
||||
continue
|
||||
seen.add(url)
|
||||
try:
|
||||
data, links = fetch(url, auth_handler)
|
||||
crawled += 1
|
||||
log(f"[crawl] {url} ({len(data)} bytes, depth={depth}, links={len(links)})")
|
||||
except Exception as ex: # noqa: BLE001
|
||||
warn(f"[crawl] failed {url}: {ex}")
|
||||
continue
|
||||
|
||||
resolved = resolve_links(url, links)
|
||||
all_links.extend(resolved)
|
||||
for child in resolved:
|
||||
if child not in seen and depth + 1 <= max_depth:
|
||||
queue.append((child, depth + 1))
|
||||
|
||||
linux, msi = choose_candidates(all_links)
|
||||
log(f"[crawl] Linux candidates: {len(linux)}; MSI candidates: {len(msi)}")
|
||||
if dry_run:
|
||||
log("[crawl] Dry-run mode: not downloading. Set CRYPTOPRO_DRY_RUN=0 and CRYPTOPRO_OUTPUT to enable download.")
|
||||
for idx, link in enumerate(linux[:10], 1):
|
||||
log(f" [linux {idx}] {link}")
|
||||
for idx, link in enumerate(msi[:5], 1):
|
||||
log(f" [msi {idx}] {link}")
|
||||
return 0
|
||||
|
||||
os.makedirs(os.path.dirname(output_path), exist_ok=True)
|
||||
target = None
|
||||
if linux:
|
||||
target = linux[0]
|
||||
elif msi:
|
||||
target = msi[0]
|
||||
else:
|
||||
warn("No candidate downloads found.")
|
||||
return 1
|
||||
|
||||
log(f"[download] Fetching {target} -> {output_path}")
|
||||
size = download(target, output_path, auth_handler)
|
||||
log(f"[download] Complete, size={size} bytes")
|
||||
return 0
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
sys.exit(main())
|
||||
Reference in New Issue
Block a user