feat(docs): Add comprehensive documentation for Vexer, Vulnerability Explorer, and Zastava modules
- Introduced AGENTS.md, README.md, TASKS.md, and implementation_plan.md for Vexer, detailing mission, responsibilities, key components, and operational notes. - Established similar documentation structure for Vulnerability Explorer and Zastava modules, including their respective workflows, integrations, and observability notes. - Created risk scoring profiles documentation outlining the core workflow, factor model, governance, and deliverables. - Ensured all modules adhere to the Aggregation-Only Contract and maintain determinism and provenance in outputs.
This commit is contained in:
		
							
								
								
									
										155
									
								
								docs/modules/scanner/operations/analyzers-grafana-dashboard.json
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										155
									
								
								docs/modules/scanner/operations/analyzers-grafana-dashboard.json
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,155 @@ | ||||
| { | ||||
|   "title": "StellaOps Scanner Analyzer Benchmarks", | ||||
|   "uid": "scanner-analyzer-bench", | ||||
|   "schemaVersion": 38, | ||||
|   "version": 1, | ||||
|   "editable": true, | ||||
|   "timezone": "", | ||||
|   "graphTooltip": 0, | ||||
|   "time": { | ||||
|     "from": "now-24h", | ||||
|     "to": "now" | ||||
|   }, | ||||
|   "templating": { | ||||
|     "list": [ | ||||
|       { | ||||
|         "name": "datasource", | ||||
|         "type": "datasource", | ||||
|         "query": "prometheus", | ||||
|         "refresh": 1, | ||||
|         "hide": 0, | ||||
|         "current": {} | ||||
|       } | ||||
|     ] | ||||
|   }, | ||||
|   "annotations": { | ||||
|     "list": [] | ||||
|   }, | ||||
|   "panels": [ | ||||
|     { | ||||
|       "id": 1, | ||||
|       "title": "Max Duration (ms)", | ||||
|       "type": "timeseries", | ||||
|       "datasource": { | ||||
|         "type": "prometheus", | ||||
|         "uid": "${datasource}" | ||||
|       }, | ||||
|       "fieldConfig": { | ||||
|         "defaults": { | ||||
|           "unit": "ms", | ||||
|           "displayName": "{{scenario}}" | ||||
|         }, | ||||
|         "overrides": [] | ||||
|       }, | ||||
|       "options": { | ||||
|         "legend": { | ||||
|           "displayMode": "table", | ||||
|           "placement": "bottom" | ||||
|         }, | ||||
|         "tooltip": { | ||||
|           "mode": "single", | ||||
|           "sort": "none" | ||||
|         } | ||||
|       }, | ||||
|       "targets": [ | ||||
|         { | ||||
|           "expr": "scanner_analyzer_bench_max_ms", | ||||
|           "legendFormat": "{{scenario}}", | ||||
|           "refId": "A" | ||||
|         }, | ||||
|         { | ||||
|           "expr": "scanner_analyzer_bench_baseline_max_ms", | ||||
|           "legendFormat": "{{scenario}} baseline", | ||||
|           "refId": "B" | ||||
|         } | ||||
|       ] | ||||
|     }, | ||||
|     { | ||||
|       "id": 2, | ||||
|       "title": "Regression Ratio vs Limit", | ||||
|       "type": "timeseries", | ||||
|       "datasource": { | ||||
|         "type": "prometheus", | ||||
|         "uid": "${datasource}" | ||||
|       }, | ||||
|       "fieldConfig": { | ||||
|         "defaults": { | ||||
|           "unit": "percentunit", | ||||
|           "displayName": "{{scenario}}", | ||||
|           "min": 0, | ||||
|           "thresholds": { | ||||
|             "mode": "absolute", | ||||
|             "steps": [ | ||||
|               { | ||||
|                 "color": "green", | ||||
|                 "value": null | ||||
|               }, | ||||
|               { | ||||
|                 "color": "red", | ||||
|                 "value": 20 | ||||
|               } | ||||
|             ] | ||||
|           } | ||||
|         }, | ||||
|         "overrides": [] | ||||
|       }, | ||||
|       "options": { | ||||
|         "legend": { | ||||
|           "displayMode": "table", | ||||
|           "placement": "bottom" | ||||
|         }, | ||||
|         "tooltip": { | ||||
|           "mode": "multi", | ||||
|           "sort": "none" | ||||
|         } | ||||
|       }, | ||||
|       "targets": [ | ||||
|         { | ||||
|           "expr": "(scanner_analyzer_bench_regression_ratio - 1) * 100", | ||||
|           "legendFormat": "{{scenario}} regression %", | ||||
|           "refId": "A" | ||||
|         }, | ||||
|         { | ||||
|           "expr": "(scanner_analyzer_bench_regression_limit - 1) * 100", | ||||
|           "legendFormat": "{{scenario}} limit %", | ||||
|           "refId": "B" | ||||
|         } | ||||
|       ] | ||||
|     }, | ||||
|     { | ||||
|       "id": 3, | ||||
|       "title": "Breached Scenarios", | ||||
|       "type": "stat", | ||||
|       "datasource": { | ||||
|         "type": "prometheus", | ||||
|         "uid": "${datasource}" | ||||
|       }, | ||||
|       "fieldConfig": { | ||||
|         "defaults": { | ||||
|           "displayName": "{{scenario}}", | ||||
|           "unit": "short" | ||||
|         }, | ||||
|         "overrides": [] | ||||
|       }, | ||||
|       "options": { | ||||
|         "colorMode": "value", | ||||
|         "graphMode": "area", | ||||
|         "justifyMode": "center", | ||||
|         "reduceOptions": { | ||||
|           "calcs": [ | ||||
|             "last" | ||||
|           ], | ||||
|           "fields": "", | ||||
|           "values": false | ||||
|         } | ||||
|       }, | ||||
|       "targets": [ | ||||
|         { | ||||
|           "expr": "scanner_analyzer_bench_regression_breached", | ||||
|           "legendFormat": "{{scenario}}", | ||||
|           "refId": "A" | ||||
|         } | ||||
|       ] | ||||
|     } | ||||
|   ] | ||||
| } | ||||
							
								
								
									
										48
									
								
								docs/modules/scanner/operations/analyzers.md
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										48
									
								
								docs/modules/scanner/operations/analyzers.md
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,48 @@ | ||||
| # Scanner Analyzer Benchmarks – Operations Guide | ||||
|  | ||||
| ## Purpose | ||||
| Keep the language analyzer microbench under the < 5 s SBOM pledge. CI emits Prometheus metrics and JSON fixtures so trend dashboards and alerts stay in lockstep with the repository baseline. | ||||
|  | ||||
| > **Grafana note:** Import `docs/modules/scanner/operations/analyzers-grafana-dashboard.json` into your Prometheus-backed Grafana stack to monitor `scanner_analyzer_bench_*` metrics and alert on regressions. | ||||
|  | ||||
| ## Publishing workflow | ||||
| 1. CI (or engineers running locally) execute: | ||||
|    ```bash | ||||
|    dotnet run \ | ||||
|      --project src/Bench/StellaOps.Bench/Scanner.Analyzers/StellaOps.Bench.ScannerAnalyzers/StellaOps.Bench.ScannerAnalyzers.csproj \ | ||||
|      -- \ | ||||
|      --repo-root . \ | ||||
|      --out src/Bench/StellaOps.Bench/Scanner.Analyzers/baseline.csv \ | ||||
|      --json out/bench/scanner-analyzers/latest.json \ | ||||
|      --prom out/bench/scanner-analyzers/latest.prom \ | ||||
|      --commit "$(git rev-parse HEAD)" \ | ||||
|      --environment "${CI_ENVIRONMENT_NAME:-local}" | ||||
|    ``` | ||||
| 2. Publish the artefacts (`baseline.csv`, `latest.json`, `latest.prom`) to `bench-artifacts/<date>/`. | ||||
| 3. Promtail (or the CI job) pushes `latest.prom` into Prometheus; JSON lands in long-term storage for workbook snapshots. | ||||
| 4. The harness exits non-zero if: | ||||
|    - `max_ms` for any scenario breaches its configured threshold; or | ||||
|    - `max_ms` regresses ≥ 20 % versus `baseline.csv`. | ||||
|  | ||||
| ## Grafana dashboard | ||||
| - Import `docs/modules/scanner/operations/analyzers-grafana-dashboard.json`. | ||||
| - Point the template variable `datasource` to the Prometheus instance ingesting `scanner_analyzer_bench_*` metrics. | ||||
| - Panels: | ||||
|   - **Max Duration (ms)** – compares live runs vs baseline. | ||||
|   - **Regression Ratio vs Limit** – plots `(max / baseline_max - 1) * 100`. | ||||
|   - **Breached Scenarios** – stat panel sourced from `scanner_analyzer_bench_regression_breached`. | ||||
|  | ||||
| ## Alerting & on-call response | ||||
| - **Primary alert**: fire when `scanner_analyzer_bench_regression_ratio{scenario=~".+"} >= 1.20` for 2 consecutive samples (10 min default). Suggested PromQL: | ||||
|   ``` | ||||
|   max_over_time(scanner_analyzer_bench_regression_ratio[10m]) >= 1.20 | ||||
|   ``` | ||||
| - Suppress duplicates using the `scenario` label. | ||||
| - Pager payload should include `scenario`, `max_ms`, `baseline_max_ms`, and `commit`. | ||||
| - Immediate triage steps: | ||||
|   1. Check `latest.json` artefact for the failing scenario – confirm commit and environment. | ||||
|   2. Re-run the harness with `--captured-at` and `--baseline` pointing at the last known good CSV to verify determinism. | ||||
|   3. If regression persists, open an incident ticket tagged `scanner-analyzer-perf` and page the owning language guild. | ||||
|   4. Roll back the offending change or update the baseline after sign-off from the guild lead and Perf captain. | ||||
|  | ||||
| Document the outcome in `docs/12_PERFORMANCE_WORKBOOK.md` (section 8) so trendlines reflect any accepted regressions. | ||||
| @@ -0,0 +1,72 @@ | ||||
| # Entry-Point Dynamic Analysis | ||||
|  | ||||
| When we have access to a running container (e.g., during runtime posture checks), Stella Ops augments the static inference with live signals. This document describes the Observational Exec Graph (OEG) that powers the dynamic mode. | ||||
|  | ||||
| ## 1) Goals | ||||
|  | ||||
| - Capture the *actual* process tree and exec lineage after the container starts. | ||||
| - Identify steady-state processes (long-lived, listening, non-wrapper) even when supervision stacks are present. | ||||
| - Feed the same reduction and runtime-classification pipeline as the static analyser. | ||||
|  | ||||
| ## 2) Observational Exec Graph (OEG) | ||||
|  | ||||
| ### 2.1 Data sources | ||||
| - **Tracepoints / eBPF**: `sched_process_exec`, `sched_process_fork/clone`, and corresponding exit events give us pid, ppid, namespace, binary path, and argv snapshots with minimal overhead. | ||||
| - **/proc sampling**: for each tracked PID, capture `/proc/<pid>/{exe,cmdline,cwd}` and file descriptors (especially listening sockets). | ||||
| - **Namespace mapping**: normalise host PIDs to container PIDs (`NStgid`) so the graph is stable across runtimes. | ||||
|  | ||||
| ### 2.2 Graph model | ||||
|  | ||||
| ```csharp | ||||
| public sealed record ExecNode(int HostPid, int NsPid, int Ppid, string Exe, string[] Argv, long StartTicks); | ||||
| public sealed record ExecEdge(int ParentHostPid, int ChildHostPid, string Kind); // "clone" | "exec" | ||||
| ``` | ||||
|  | ||||
| - Nodes represent `exec()` events (post-exec image) and contain the final argv. | ||||
| - Edges labelled `clone` capture forks; `exec` edges show program replacements. | ||||
|  | ||||
| ### 2.3 Steady-state candidate selection | ||||
|  | ||||
| For each node compute features: | ||||
|  | ||||
| | Feature | Rationale | | ||||
| | --- | --- | | ||||
| | Lifetime (until sampling end) | Long-lived processes are more likely to be the real workload. | | ||||
| | Additional execs downstream | Zero execs after start implies terminal. | | ||||
| | Listening sockets | Owning `LISTEN` sockets strongly suggests a server. | | ||||
| | Wrapper catalogue hit | Mark nodes that match known shims (`tini`, `gosu`, `supervisord`, etc.). | | ||||
| | Children fan-out | Supervisors spawn multiple children and remain parents. | | ||||
|  | ||||
| Feed these into a scoring function; retain Top‑K candidates (usually 1–3) along with evidence. | ||||
|  | ||||
| ## 3) Integration with static pipeline | ||||
|  | ||||
| 1. For each steady-state candidate, snapshot the command/argv and normalise via `ResolvedCommand` (as in static mode). | ||||
| 2. Run wrapper reduction and ShellFlow analysis if the candidate is a script. | ||||
| 3. Invoke runtime detectors to classify the binary. | ||||
| 4. Merge dynamic evidence with static evidence. Conflicts drop confidence or trigger the “supervisor” classification. | ||||
|  | ||||
| ## 4) Supervisors & multi-service containers | ||||
|  | ||||
| Some images (e.g., `supervisord`, `s6`, `runit`) intentionally start multiple long-lived processes. Handle them as follows: | ||||
|  | ||||
| - Detect supervisor binaries from the wrapper catalogue. | ||||
| - Analyse their configuration (`/etc/supervisord.conf`, `/etc/services.d/*`, etc.) to enumerate child services statically. | ||||
| - Emit multiple `TerminalProcess` entries with individual confidence scores but mark the parent as `type = supervisor`. | ||||
|  | ||||
| ## 5) Operational hints | ||||
|  | ||||
| - Sampling window: 1–3 seconds after start is usually sufficient; extend in debug mode. | ||||
| - Overhead: prefer eBPF/tracepoints; fall back to periodic `/proc` walks when instrumentation isn’t available. | ||||
| - Security: honour namespace boundaries; never inspect processes outside the target container’s cgroup/namespace. | ||||
| - Failure mode: if dynamic capture fails, fall back to static mode and flag evidence accordingly (`"Dynamic capture unavailable"`). | ||||
|  | ||||
| ## 6) Deliverables | ||||
|  | ||||
| The dynamic reducer returns an `EntryTraceResult` populated with: | ||||
|  | ||||
| - `ExecGraph` containing nodes and edges for audit/debug. | ||||
| - `Terminals` listing steady-state processes (possibly multiple). | ||||
| - `Evidence` strings referencing dynamic signals (`"pid 47 listening on 0.0.0.0:8080"`, `"wrapper tini collapsed into /usr/local/bin/python"`). | ||||
|  | ||||
| Downstream modules (Policy, Vuln Explorer, Export Center) treat the result identically to static scans, enabling easy comparison between build-time and runtime observations. | ||||
							
								
								
									
										24
									
								
								docs/modules/scanner/operations/entrypoint-lang-ccpp.md
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										24
									
								
								docs/modules/scanner/operations/entrypoint-lang-ccpp.md
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,24 @@ | ||||
| # Entry-Point Runtime — C / C++ | ||||
|  | ||||
| ## Signals to gather | ||||
| - Dynamically linked ELF (`.dynamic`) with GLIBC references (`GLIBC`, `GLIBCXX`, `libstdc++`). | ||||
| - Presence of `/lib64/ld-linux-*.so.*` loaders. | ||||
| - Absence of Go/Rust-specific markers. | ||||
| - Native supervisor binaries (`nginx`, `envoy`, custom C services). | ||||
| - Config files adjacent to the binary (`/etc/app.conf`, YAML/INI). | ||||
|  | ||||
| ## Implementation notes | ||||
| - Treat this detector as the "native fallback": confirm no higher-priority language matched. | ||||
| - Collect shared library list to attach as evidence; highlight unusual dependencies. | ||||
| - Inspect `EXPOSE` ports and config directories to aid classification. | ||||
| - Normalise busybox-style symlinks (actual binary often `/bin/busybox` with applet name). | ||||
|  | ||||
| ## Evidence & scoring | ||||
| - Boost for ELF dynamic dependencies and loader presence. | ||||
| - Add evidence for config files, service managers, or env variables. | ||||
| - Penalise extremely small binaries without metadata (may be wrappers). | ||||
|  | ||||
| ## Edge cases | ||||
| - Static C binaries may look like Go; rely on build ID absence and library fingerprints. | ||||
| - When binary is part of a supervisor stack (e.g., `s6-svscan`), delegate classification to `Supervisor`. | ||||
| - Windows native services should be handled by PE analysis (`entrypoint-runtime-overview.md`). | ||||
							
								
								
									
										22
									
								
								docs/modules/scanner/operations/entrypoint-lang-deno.md
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										22
									
								
								docs/modules/scanner/operations/entrypoint-lang-deno.md
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,22 @@ | ||||
| # Entry-Point Runtime — Deno | ||||
|  | ||||
| ## Signals to gather | ||||
| - `argv0` equals `deno` or path ends with `/bin/deno`. | ||||
| - Arguments include `run`, `task`, `serve`, or `compile` outputs. | ||||
| - Presence of `deno.json` / `deno.jsonc`, `import_map.json`, or cached modules (`/deno-dir`). | ||||
| - Environment (`DENO_DIR`, `DENO_AUTH_TOKENS`). | ||||
|  | ||||
| ## Implementation notes | ||||
| - Resolve script URLs or local files; for remote sources record the URL as evidence. | ||||
| - Distinguish between `deno compile` executables and the Deno runtime invoking a script. | ||||
| - Recognise `deno task <name>` by reading tasks from `deno.json`. | ||||
| - ShellFlow should already collapse Docker official entrypoint (`/usr/bin/env deno task start`). | ||||
|  | ||||
| ## Evidence & scoring | ||||
| - Boost for confirmed script/URL and config file presence. | ||||
| - Add evidence for permissions flags (`--allow-net`, `--allow-env`) to aid policy decisions. | ||||
| - Penalise when only the binary is present without scripts. | ||||
|  | ||||
| ## Edge cases | ||||
| - Deno deploy shims or adapters may further wrap the runtime; rely on wrapper catalogue. | ||||
| - When `deno compile` emits a standalone binary, treat it as C/C++ unless metadata persists. | ||||
							
								
								
									
										25
									
								
								docs/modules/scanner/operations/entrypoint-lang-dotnet.md
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										25
									
								
								docs/modules/scanner/operations/entrypoint-lang-dotnet.md
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,25 @@ | ||||
| # Entry-Point Runtime — .NET / C# | ||||
|  | ||||
| ## Signals to gather | ||||
| - Framework-dependent: `dotnet <app.dll>` invocation. | ||||
| - Adjacent `*.runtimeconfig.json` (parse `tfm`, framework references, roll-forward). | ||||
| - Self-contained or single-file apps: ELF/PE with `DOTNET_BUNDLE`, `System.Private.CoreLib`, or `coreclr` markers. | ||||
| - ASP.NET hints: `ASPNETCORE_URLS`, `appsettings.json`, presence of `wwwroot`. | ||||
| - Windows builds: PE with CLI header (managed assembly) or native host embedding a bundle. | ||||
|  | ||||
| ## Implementation notes | ||||
| - Resolve DLL paths relative to the working directory after env expansion. | ||||
| - When `dotnet` is invoked without a DLL, treat as low-confidence and record evidence. | ||||
| - For single-file executables, read the first few MB looking for bundle markers rather than full PE/ELF parsing. | ||||
| - Capture runtimeconfig metadata when available; store TFM in `LanguageHit.MainModule`. | ||||
| - Treat `dotnet exec` wrappers the same as `dotnet <dll>`. | ||||
|  | ||||
| ## Evidence & scoring | ||||
| - Large confidence boost when both host (`dotnet`) and DLL artefact are present. | ||||
| - Add evidence for runtimeconfig parsing (`"runtimeconfig TFM=net8.0"`), bundle markers, or ASP.NET env vars. | ||||
| - Penalise detections lacking artefact confirmation. | ||||
|  | ||||
| ## Edge cases | ||||
| - Native AOT (`dotnet publish -p:PublishAot=true`) emits native binaries without managed markers—should fall back to C/C++ detector. | ||||
| - PowerShell-launched apps: ShellFlow should rewrite before the detector runs. | ||||
| - Side-by-side deployment where multiple DLLs exist—prefer the one passed to `dotnet` or specified via `DOTNET_STARTUP_HOOKS`. | ||||
							
								
								
									
										22
									
								
								docs/modules/scanner/operations/entrypoint-lang-elixir.md
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										22
									
								
								docs/modules/scanner/operations/entrypoint-lang-elixir.md
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,22 @@ | ||||
| # Entry-Point Runtime — Elixir / Erlang (BEAM) | ||||
|  | ||||
| ## Signals to gather | ||||
| - `argv0` equals `elixir`, `iex`, `mix`, `erl`, `beam.smp`, or release scripts (`bin/app start`). | ||||
| - Release layouts: `_build/prod/rel/<app>/bin/<app>`, `releases/<version>/vm.args`, `sys.config`. | ||||
| - Environment variables (`MIX_ENV`, `RELEASE_COOKIE`, `RELEASE_NODE`). | ||||
| - Config files (`config/config.exs`, `config/prod.exs`). | ||||
|  | ||||
| ## Implementation notes | ||||
| - Recognise Distillery / mix release scripts that `exec` the real BEAM VM. | ||||
| - When release script is invoked with `eval`, treat the wrapper as part of the chain but classify runtime as `Elixir`. | ||||
| - Inspect `vm.args` for node name, cookie, and distributed settings. | ||||
| - For pure Erlang services (no Elixir), the same detector should fire using `erl` hints. | ||||
|  | ||||
| ## Evidence & scoring | ||||
| - Boost for release directories and BEAM VM binaries (`beam.smp`). | ||||
| - Add evidence for config files and env vars. | ||||
| - Penalise minimal images lacking release artefacts (could be generic shell wrappers). | ||||
|  | ||||
| ## Edge cases | ||||
| - Phoenix apps often rely on `bin/server` wrapper—ShellFlow must collapse to release script. | ||||
| - Multi-node clusters may start multiple BEAM instances; treat as `Supervisor` if several nodes stay active. | ||||
							
								
								
									
										24
									
								
								docs/modules/scanner/operations/entrypoint-lang-go.md
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										24
									
								
								docs/modules/scanner/operations/entrypoint-lang-go.md
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,24 @@ | ||||
| # Entry-Point Runtime — Go | ||||
|  | ||||
| ## Signals to gather | ||||
| - Statically linked ELF with `.note.go.buildid`. | ||||
| - `.gopclntab` section (function name table) or `Go build ID` strings. | ||||
| - Minimal dynamic dependencies (often none) and musl/glibc loader differences. | ||||
| - `GODEBUG`, `GOMAXPROCS`, `GOENV` environment variables. | ||||
| - Go module artefacts: `go.mod`, `go.sum`. | ||||
|  | ||||
| ## Implementation notes | ||||
| - Use ELF parsing to locate `.note.go.buildid`; fallback to scanning the first few MB for `Go build ID`. | ||||
| - Distinguish from Rust/C by checking `.dynsym` count, presence of Go-specific section names, and the absence of `GLIBCXX`. | ||||
| - For distroless images, rely solely on ELF traits since no package metadata is present. | ||||
| - Record binary path and module files as evidence. | ||||
|  | ||||
| ## Evidence & scoring | ||||
| - Strong boost for `.note.go.buildid` or `.gopclntab`. | ||||
| - Add evidence for module files or env variables. | ||||
| - Penalise binaries with high numbers of shared libraries (likely C/C++). | ||||
|  | ||||
| ## Edge cases | ||||
| - TinyGo or stripped binaries may lack build IDs—fall back to heuristics (symbol patterns, text section). | ||||
| - CGO-enabled binaries include glibc dependencies; still treat as Go but mention CGO in evidence if detected. | ||||
| - Supervisors wrapping Go services (e.g., `envoy`) should be handled upstream by wrapper detection. | ||||
							
								
								
									
										29
									
								
								docs/modules/scanner/operations/entrypoint-lang-java.md
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										29
									
								
								docs/modules/scanner/operations/entrypoint-lang-java.md
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,29 @@ | ||||
| # Entry-Point Runtime — Java | ||||
|  | ||||
| ## Signals to gather | ||||
| - `argv0` equals `java` / `javaw` or resides under `*/bin/java`. | ||||
| - `-jar <app.jar>` argument with the jar present in the VFS. | ||||
| - Manifest metadata (`META-INF/MANIFEST.MF`) containing `Main-Class` or `Start-Class`. | ||||
| - Spring Boot layout (`BOOT-INF/**`). | ||||
| - Classpath form (`-cp/-classpath`) followed by a main class token. | ||||
| - Presence of an embedded JRE (`lib/modules`, `jre/bin/java`). | ||||
| - `JAVA_OPTS`, `JAVA_TOOL_OPTIONS`, or `JAVA_HOME` environment hints. | ||||
| - `EXPOSE` ports often associated with Java servers (`8080`, `8443`). | ||||
|  | ||||
| ## Implementation notes | ||||
| - Expand env variables before resolving jar/class paths (supports `${VAR}`, `${VAR:-default}`). | ||||
| - For classpath mode, open a subset of jars to corroborate `Main-Class`. | ||||
| - Track when the app is started through shell wrappers (`exec java -jar "$APP_JAR"`); ShellFlow should already collapse these. | ||||
| - Distinguish between installers (e.g., `java -version`) and actual app launches by checking for jar/class arguments. | ||||
| - When multiple jars/classes are possible, prefer manifest-backed artefacts but record alternates in evidence. | ||||
|  | ||||
| ## Evidence & scoring | ||||
| - Reward concrete artefacts (jar exists, manifest resolved). | ||||
| - Add evidence entries for each heuristic (`"MANIFEST Main-Class=com.example.Main"`, `"Spring Boot BOOT-INF detected"`). | ||||
| - Penalise missing artefacts or ambiguous classpaths. | ||||
| - Surface runtime-specific env/ports as supplementary clues, but keep their weight low to avoid false positives. | ||||
|  | ||||
| ## Edge cases | ||||
| - Launcher scripts that eventually run `java` — ensure ShellFlow surfaces the final command. | ||||
| - Multi-module fat jars: only expose the main entry jar in evidence; keep supporting jars as context. | ||||
| - Native image (`native-image` / GraalVM) should fall through to Go/Rust/C++ detectors when `java` binary is absent. | ||||
							
								
								
									
										24
									
								
								docs/modules/scanner/operations/entrypoint-lang-nginx.md
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										24
									
								
								docs/modules/scanner/operations/entrypoint-lang-nginx.md
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,24 @@ | ||||
| # Entry-Point Runtime — Nginx | ||||
|  | ||||
| ## Signals to gather | ||||
| - `argv0` equals `nginx`. | ||||
| - Config files: `/etc/nginx/nginx.conf`, `conf.d/*.conf`, `/usr/share/nginx/html`. | ||||
| - Environment (`NGINX_ENTRYPOINT_QUIET_LOGS`, `NGINX_PORT`, `NGINX_ENVSUBST_TEMPLATE`). | ||||
| - Listening sockets on 80/443 (dynamic mode) or `EXPOSE 80` (static). | ||||
| - Modules or scripts shipped with the official Docker entrypoint (`docker-entrypoint.sh` collapsing to `nginx -g "daemon off;"`). | ||||
|  | ||||
| ## Implementation notes | ||||
| - Parse `nginx.conf` (basic directive traversal) to extract worker processes, include chains, upstream definitions. | ||||
| - Handle official entrypoint idioms (`envsubst` templating) via ShellFlow. | ||||
| - Distinguish pure reverse proxies from PHP-FPM combos; when both `nginx` and `php-fpm` run, classify container as `Supervisor`. | ||||
| - Record static web content presence (`/usr/share/nginx/html/index.html`). | ||||
|  | ||||
| ## Evidence & scoring | ||||
| - Boost for confirmed config and workers. | ||||
| - Add evidence for templating features, env substitution, or modules. | ||||
| - Penalise if binary exists without config (likely not the entry point). | ||||
|  | ||||
| ## Edge cases | ||||
| - Alpine images may place configs under `/etc/nginx/conf.d`; include both. | ||||
| - Custom builds might rename binary (`openresty`, `tengine`); consider aliases if common. | ||||
| - Windows Nginx not supported; fall back to `Other`. | ||||
							
								
								
									
										24
									
								
								docs/modules/scanner/operations/entrypoint-lang-node.md
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										24
									
								
								docs/modules/scanner/operations/entrypoint-lang-node.md
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,24 @@ | ||||
| # Entry-Point Runtime — Node.js | ||||
|  | ||||
| ## Signals to gather | ||||
| - `argv0` equals `node`, `nodejs`, or path ends with `/bin/node`. | ||||
| - Scripts launched via package runners (`npm`, `yarn`, `pnpm node …`, `npx`). | ||||
| - Presence of `package.json` with `"main"` or `"scripts":{"start":…}` entries. | ||||
| - `NODE_ENV`, `NODE_OPTIONS`, or `NPM_PACKAGE_NAME` environment hints. | ||||
| - Bundler/PM2 scenarios: `pm2-runtime`, `pm2-docker`, `forever`, `nodemon`. | ||||
|  | ||||
| ## Implementation notes | ||||
| - Resolve script arguments (e.g., `node server.js`) relative to the working dir. | ||||
| - If invoked through `npm start`/`yarn run`, parse `package.json` to expand the actual script. | ||||
| - Support TypeScript loaders (`ts-node`, `node --loader`, `.mjs`) by inspecting extensions and flags. | ||||
| - Normalise shebang-based Node scripts (ShellFlow ensures `#!/usr/bin/env node` collapses to Node). | ||||
|  | ||||
| ## Evidence & scoring | ||||
| - Boost confidence when a concrete JS/TS entry file exists. | ||||
| - Add evidence for `package.json` metadata, PM2 ecosystem files, or `NODE_ENV` values. | ||||
| - Penalise when the entry file is missing or only package runners are present without scripts. | ||||
|  | ||||
| ## Edge cases | ||||
| - Multi-service supervisors (e.g., `pm2` managing multiple apps): treat as `Supervisor` and list programmes as children. | ||||
| - Serverless shims (e.g., Google Functions) wrap Node; prefer the user-provided handler script if detectable. | ||||
| - Distroless snapshots may omit package managers; rely on Node binary + script presence. | ||||
							
								
								
									
										24
									
								
								docs/modules/scanner/operations/entrypoint-lang-phpfpm.md
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										24
									
								
								docs/modules/scanner/operations/entrypoint-lang-phpfpm.md
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,24 @@ | ||||
| # Entry-Point Runtime — PHP-FPM | ||||
|  | ||||
| ## Signals to gather | ||||
| - `argv0` equals `php-fpm` or `php-fpm8*` variants; master process often invoked with `-F` or `--nodaemonize`. | ||||
| - Configuration files: `/usr/local/etc/php-fpm.conf`, `www.conf`, pool definitions under `php-fpm.d`. | ||||
| - PHP runtime artefacts: `composer.json`, `public/index.php`, `artisan`, `wp-config.php`. | ||||
| - Environment variables such as `PHP_FPM_CONFIG`, `PHP_INI_DIR`, `APP_ENV`. | ||||
| - Socket or port exposure (`listen = 9000`, `/run/php-fpm.sock`). | ||||
|  | ||||
| ## Implementation notes | ||||
| - Verify master process vs worker processes (master stays PID 1, workers forked). | ||||
| - Inspect pool configuration to extract listening endpoint and process manager mode. | ||||
| - If `docker-php-entrypoint` is involved, ShellFlow must expand to `php-fpm`. | ||||
| - Distinguish FPM from CLI invocations (`php script.php`) to avoid misclassification. | ||||
|  | ||||
| ## Evidence & scoring | ||||
| - Reward confirmed config files and listening sockets. | ||||
| - Add evidence for application artefacts (Composer lockfile, framework directories). | ||||
| - Penalise when only the binary is present without config (could be CLI usage). | ||||
|  | ||||
| ## Edge cases | ||||
| - Images bundling Apache/Nginx front-ends should end up as `Supervisor` with PHP-FPM as a child service. | ||||
| - Some Alpine packages install `php-fpm7` naming—include aliases in detector. | ||||
| - When `php-fpm` is launched via `s6` or supervisor, rely on child detection to avoid double counting. | ||||
							
								
								
									
										25
									
								
								docs/modules/scanner/operations/entrypoint-lang-python.md
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										25
									
								
								docs/modules/scanner/operations/entrypoint-lang-python.md
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,25 @@ | ||||
| # Entry-Point Runtime — Python | ||||
|  | ||||
| ## Signals to gather | ||||
| - `argv0` equals `python`, `python3`, `pypy`, or an interpreter symlink. | ||||
| - WSGI/ASGI servers: `gunicorn`, `uvicorn`, `hypercorn`, `daphne`. | ||||
| - Task runners: `celery -A app worker`, `rq worker`, `pytest`. | ||||
| - Presence of `requirements.txt`, `pyproject.toml`, `setup.cfg`, or `Pipfile`. | ||||
| - `PYTHONPATH`, `PYTHONUNBUFFERED`, `DJANGO_SETTINGS_MODULE`, `FLASK_APP`, or application-specific env vars. | ||||
| - Virtualenv detection (`/venv/bin/python`, `pyvenv.cfg`). | ||||
|  | ||||
| ## Implementation notes | ||||
| - When invoked as `python -m module`, resolve the module to a path if possible. | ||||
| - For WSGI/ASGI servers, inspect command arguments (`app:app`, `module:create_app`) and config files. | ||||
| - Recognise wrapper scripts such as `docker-entrypoint.py` that eventually `exec "$@"`. | ||||
| - Support zipped apps or single-file bundles by checking `zipapp` signatures. | ||||
|  | ||||
| ## Evidence & scoring | ||||
| - Increase confidence when module or script exists and dependencies are present. | ||||
| - Capture evidence for env variables, config files, or known server arguments. | ||||
| - Penalise ambiguous invocations (e.g., `python -c "..."` without persistent service). | ||||
|  | ||||
| ## Edge cases | ||||
| - Supervisors launching multiple Python workers fall back to `Supervisor` classification with Python listed as child. | ||||
| - Conda environments use different directory structures; look for `conda-meta` directories. | ||||
| - Alpine distroless images may ship `python` symlinks without standard libs—ensure script presence before final classification. | ||||
							
								
								
									
										24
									
								
								docs/modules/scanner/operations/entrypoint-lang-ruby.md
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										24
									
								
								docs/modules/scanner/operations/entrypoint-lang-ruby.md
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,24 @@ | ||||
| # Entry-Point Runtime — Ruby | ||||
|  | ||||
| ## Signals to gather | ||||
| - `argv0` equals `ruby`, `bundle`, `bundler`, `rackup`, `puma`, `unicorn`, `sidekiq`, or `resque`. | ||||
| - Bundler scripts: `bundle exec <cmd>`; Gemfile and `Gemfile.lock`. | ||||
| - Rails and Rack hints: `config.ru`, `bin/rails`, `bin/rake`. | ||||
| - Background jobs: `sidekiq`, `delayed_job`, `resque`. | ||||
| - Environment variables (`RAILS_ENV`, `RACK_ENV`, `BUNDLE_GEMFILE`). | ||||
|  | ||||
| ## Implementation notes | ||||
| - Normalise `bundle exec` by skipping the bundler wrapper and targeting the actual command. | ||||
| - Resolve script paths relative to the working directory. | ||||
| - For `puma`/`unicorn`, parse config files (`config/puma.rb`, `config/unicorn.rb`) to gather ports/workers. | ||||
| - Recognise `foreman start` or `overmind` launching Procfile processes—may devolve to `Supervisor` classification. | ||||
|  | ||||
| ## Evidence & scoring | ||||
| - Boost confidence when `Gemfile.lock` exists and the requested server script is found. | ||||
| - Add evidence for env variables and config files. | ||||
| - Penalise ambiguous CLI invocations or missing artefacts. | ||||
|  | ||||
| ## Edge cases | ||||
| - Alpine distroless images may rely on `ruby` symlinks; confirm binary presence. | ||||
| - JRuby (running on Java) may trigger both Ruby and Java signals—prefer Ruby if `ruby`/`jruby` interpreter is explicit. | ||||
| - Supervisors launching multiple Ruby workers should produce a single `Supervisor` entry with Ruby children. | ||||
							
								
								
									
										24
									
								
								docs/modules/scanner/operations/entrypoint-lang-rust.md
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										24
									
								
								docs/modules/scanner/operations/entrypoint-lang-rust.md
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,24 @@ | ||||
| # Entry-Point Runtime — Rust | ||||
|  | ||||
| ## Signals to gather | ||||
| - ELF binaries with DWARF producer strings containing `rustc`. | ||||
| - Symbols prefixed with `_ZN` (mangled Rust) or section `.rustc`. | ||||
| - Presence of `panic=abort` strings, `Rust` metadata, or Cargo artefacts (`Cargo.toml`, `Cargo.lock`). | ||||
| - Statically linked (no `.dynamic` entries) in many cases, or musl loader (`/lib/ld-musl-x86_64.so.1`). | ||||
| - Environment such as `RUST_LOG`, `RUST_BACKTRACE`. | ||||
|  | ||||
| ## Implementation notes | ||||
| - Parse DWARF `.debug_info` when available; short-circuit by scanning `.comment` sections for `rustc`. | ||||
| - Distinguish from Go by the absence of `.note.go.buildid`. | ||||
| - When Cargo artefacts exist, include target name and profile in evidence. | ||||
| - For binaries built with `--target x86_64-pc-windows-gnu`, treat them under the same detector (PE + Rust markers). | ||||
|  | ||||
| ## Evidence & scoring | ||||
| - Reward DWARF producer strings, Cargo files, and Rust-specific env vars. | ||||
| - Penalise when only generic static binary traits are present (may defer to C/C++). | ||||
| - Mention musl vs glibc loader differences for observability. | ||||
|  | ||||
| ## Edge cases | ||||
| - Rust compiled to WebAssembly or run inside Wasmtime falls outside this detector; leave as `Other`. | ||||
| - Stripped binaries without DWARF or comments may be indistinguishable from C—fall back to C/C++ and add note. | ||||
| - Supervisors launching multiple Rust binaries handled upstream. | ||||
| @@ -0,0 +1,25 @@ | ||||
| # Entry-Point Runtime — Supervisors | ||||
|  | ||||
| Some containers intentionally launch multiple long-lived services (sidecars, appliance images, `supervisord`, `s6`, `runit`, `pm2`). Instead of forcing a single runtime classification, the detector can emit a `Supervisor` entry with child services enumerated separately. | ||||
|  | ||||
| ## Signals to gather | ||||
| - Known supervisor binaries: `supervisord`, `s6-svscan`, `s6-supervise`, `runsvdir`, `pm2-runtime`, `forego`, `foreman`, `overmind`. | ||||
| - Configuration files: `/etc/supervisord.conf`, `/etc/s6/*.conf`, `Procfile`, `ecosystem.config.js`. | ||||
| - Multiple child processes that remain active after startup. | ||||
| - Environment variables controlling supervisor behaviour (`SUPERVISOR_*`, `PM2_HOME`, `S6_CMD_WAIT_FOR_SERVICES`). | ||||
|  | ||||
| ## Implementation notes | ||||
| - Keep the supervisor as the primary terminal but query configuration to list child commands. | ||||
| - For each child, run the usual reduction + runtime detection and attach results as derived evidence. | ||||
| - When configuration is templated (e.g., `envsubst`), evaluate ShellFlow output to resolve final commands. | ||||
| - Record scheduling details (autorestart, process limits) relevant for incident response. | ||||
|  | ||||
| ## Evidence & scoring | ||||
| - Supervisor detection flips `LanguageType.Supervisor` with mid-level confidence (0.6–0.7). | ||||
| - Confidence increases when configuration explicitly lists services and child processes are observed (dynamic mode). | ||||
| - Provide evidence for each child service (`"manages: php-fpm on /run/php-fpm.sock"`, `"manages: nginx listening on 0.0.0.0:80"`). | ||||
|  | ||||
| ## Edge cases | ||||
| - Docker Compose-style images using `bash` to run multiple services should also map here if ShellFlow detects multiple `&` background jobs. | ||||
| - Ensure we do not classify minimal init shims (`tini`, `dumb-init`) as supervisors—they should be collapsed. | ||||
| - When supervisor manages only one child, collapse to the child runtime and drop the supervisor evidence to avoid noise. | ||||
							
								
								
									
										94
									
								
								docs/modules/scanner/operations/entrypoint-problem.md
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										94
									
								
								docs/modules/scanner/operations/entrypoint-problem.md
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,94 @@ | ||||
| # Entry-Point Detection — Problem & Architecture | ||||
|  | ||||
| ## 1) Why this exists | ||||
|  | ||||
| Container images rarely expose their *real* workload directly. Shell wrappers, init shims, supervisors, or language launchers often sit between the Dockerfile `ENTRYPOINT`/`CMD` values and the program you actually care about. Stella Ops needs a deterministic, explainable way to map any container image (or running container) to a single logical entry point that downstream systems can reason about. | ||||
|  | ||||
| We define the target artefact as the tuple below: | ||||
|  | ||||
| ```jsonc | ||||
| { | ||||
|   "type": "java|dotnet|go|python|node|ruby|php-fpm|c/c++|rust|nginx|supervisor|other", | ||||
|   "resolvedBinary": "/app/app.jar | /app/app.dll | /app/server | /usr/local/bin/node", | ||||
|   "args": ["..."], | ||||
|   "confidence": 0.00..1.00, | ||||
|   "evidence": [ | ||||
|     "why we believe this" | ||||
|   ], | ||||
|   "chain": [ | ||||
|     {"from": "/bin/sh -c", "to": "/entrypoint.sh", "why": "ENTRYPOINT shell-form"}, | ||||
|     {"from": "/entrypoint.sh", "to": "java -jar orders.jar", "why": "exec \"$@\" with java default"} | ||||
|   ] | ||||
| } | ||||
| ``` | ||||
|  | ||||
| Constraints: | ||||
|  | ||||
| - Static first: no `/proc`, no `ptrace`, no customer code execution when scanning images. | ||||
| - Honour Docker/OCI precedence (`ENTRYPOINT` vs `CMD`, shell- vs exec-form, Windows `Shell` overrides). | ||||
| - Work on distroless and multi-arch images as well as traditional distro bases. | ||||
| - Emit auditable evidence and reduction chains so policy decisions are explainable. | ||||
|  | ||||
| ## 2) Dual-mode architecture | ||||
|  | ||||
| The scanner exposes a single façade but routes to two reducers: | ||||
|  | ||||
| ``` | ||||
| Scanner.EntryTrace/ | ||||
|   Common/ | ||||
|     OciImageReader.cs | ||||
|     OverlayVfs.cs | ||||
|     Heuristics/ | ||||
|     Models/ | ||||
|   Dynamic/ProcReducer.cs   // running container | ||||
|   Static/ImageReducer.cs   // static image inference | ||||
| ``` | ||||
|  | ||||
| Selection logic: | ||||
|  | ||||
| ```csharp | ||||
| IEntryReducer reducer = container.IsRunning | ||||
|   ? new ProcReducer() | ||||
|   : new ImageReducer(); | ||||
| var result = reducer.TraceAndReduce(ct); | ||||
| ``` | ||||
|  | ||||
| Both reducers publish a harmonised `EntryTraceResult`, allowing downstream modules (Policy Engine, Vuln Explorer, Export Center) to consume the same shape regardless of data source. | ||||
|  | ||||
| ## 3) Pipeline overview | ||||
|  | ||||
| ### 3.1 Static images | ||||
|  | ||||
| 1. Pull or load OCI image. | ||||
| 2. Compose final argv (`ENTRYPOINT ++ CMD`), respecting shell overrides. | ||||
| 3. Overlay layers with whiteout support via a lazy virtual filesystem. | ||||
| 4. Resolve paths, shebangs, wrappers, and scripts until a terminal candidate emerges. | ||||
| 5. Classify runtime family, identify application artefact, score confidence, and emit evidence. | ||||
|  | ||||
| ### 3.2 Running containers | ||||
|  | ||||
| 1. Capture real exec / fork events and build an exec graph. | ||||
| 2. Locate steady-state processes (long-lived, owns listeners, not a shim). | ||||
| 3. Collapse wrappers using the same catalogue as static mode. | ||||
| 4. Cross-check with static heuristics to tighten confidence. | ||||
|  | ||||
| ### 3.3 Shared components | ||||
|  | ||||
| - **ShellFlow static analyser** handles script idioms (`set --`, `exec "$@"`, branch rewrites). | ||||
| - **Wrapper catalogue** recognises shells, init shims, supervisors, and package runners. | ||||
| - **Runtime detectors** plug in per language/framework (Java, .NET, Node, Python, PHP-FPM, Ruby, Go, Rust, Nginx, C/C++). | ||||
| - **Score calibrator** turns detector raw scores into a unified 0..1 confidence. | ||||
|  | ||||
| ## 4) Document map | ||||
|  | ||||
| The entry-point playbook is now split into focused guides: | ||||
|  | ||||
| | Document | Purpose | | ||||
| | --- | --- | | ||||
| | `entrypoint-static-analysis.md` | Overlay VFS, argv composition, wrapper reduction, scoring. | | ||||
| | `entrypoint-dynamic-analysis.md` | Observational Exec Graph for running containers. | | ||||
| | `entrypoint-shell-analysis.md` | ShellFlow static analyser and script idioms. | | ||||
| | `entrypoint-runtime-overview.md` | Detector contracts, helper utilities, calibration, integrations. | | ||||
| | `entrypoint-lang-*.md` | Runtime-specific heuristics (Java, .NET, Node, Python, PHP-FPM, Ruby, Go, Rust, C/C++, Nginx, Deno, Elixir/BEAM, Supervisor). | | ||||
|  | ||||
| Use this file as the landing page; each guide can be read independently when implementing or updating a specific component. | ||||
							
								
								
									
										152
									
								
								docs/modules/scanner/operations/entrypoint-runtime-overview.md
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										152
									
								
								docs/modules/scanner/operations/entrypoint-runtime-overview.md
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,152 @@ | ||||
| # Runtime Detector Overview | ||||
|  | ||||
| Runtime classification converts a reduced command into a concrete language or framework identity with supporting evidence. This document describes the shared contracts, helper utilities, calibration strategy, and integration points; language-specific heuristics live in the `entrypoint-lang-*.md` files. | ||||
|  | ||||
| ## 1) Contracts | ||||
|  | ||||
| ```csharp | ||||
| public enum LanguageType { | ||||
|   Java, DotNet, Node, Python, PhpFpm, Ruby, Go, Rust, CCpp, | ||||
|   Nginx, Deno, Elixir, Supervisor, Other | ||||
| } | ||||
|  | ||||
| public sealed record ResolvedCommand( | ||||
|   string[] Argv, | ||||
|   string   Argv0, | ||||
|   string?  AbsolutePath, | ||||
|   bool     IsElf, | ||||
|   bool     IsPe, | ||||
|   bool     IsScript, | ||||
|   string?  Shebang, | ||||
|   string   WorkingDir | ||||
| ); | ||||
|  | ||||
| public sealed record LanguageHit( | ||||
|   LanguageType Type, | ||||
|   double       RawScore, | ||||
|   string       ResolvedBinary, | ||||
|   string[]     Args, | ||||
|   List<string> Evidence, | ||||
|   string?      AppArtifactPath = null, | ||||
|   string?      MainModule = null, | ||||
|   Dictionary<string,string>? Extra = null | ||||
| ); | ||||
| ``` | ||||
|  | ||||
| ### Interface | ||||
|  | ||||
| ```csharp | ||||
| public interface ILanguageSubDetector { | ||||
|   LanguageHit? TryDetect( | ||||
|     ResolvedCommand cmd, OverlayVfs vfs, EnvBag env, ImageContext img, CancellationToken ct = default); | ||||
| } | ||||
|  | ||||
| public sealed class LanguageDetector { | ||||
|   private readonly ILanguageSubDetector[] _detectors = { | ||||
|     new JavaDetector(), | ||||
|     new DotNetDetector(), | ||||
|     new NodeDetector(), | ||||
|     new PythonDetector(), | ||||
|     new PhpFpmDetector(), | ||||
|     new RubyDetector(), | ||||
|     new NginxDetector(), | ||||
|     new GoDetector(), | ||||
|     new RustDetector(), | ||||
|     new DenoDetector(), | ||||
|     new ElixirDetector(), | ||||
|     new CCppDetector(), | ||||
|     new SupervisorDetector() | ||||
|   }; | ||||
|   private readonly ScoreCalibrator _cal = ScoreCalibrator.Default; | ||||
|  | ||||
|   public LanguageHit Detect(ResolvedCommand cmd, OverlayVfs vfs, EnvBag env, ImageContext img, out double confidence) { | ||||
|     var hits = _detectors.Select(d => d.TryDetect(cmd, vfs, env, img)).Where(h => h is not null).ToList()!; | ||||
|     LanguageHit best = hits.Count == 0 | ||||
|       ? new LanguageHit(LanguageType.Other, 0.10, cmd.AbsolutePath ?? cmd.Argv0, cmd.Argv.Skip(1).ToArray(), | ||||
|                         new() { "No strong runtime family signals detected." }) | ||||
|       : hits.OrderByDescending(_cal.Calibrate).First(); | ||||
|  | ||||
|     confidence = _cal.Calibrate(best); | ||||
|     foreach (var alt in hits.Where(h => h != best).OrderByDescending(_cal.Calibrate)) | ||||
|       best.Evidence.Add($"Alternative: {alt!.Type} (score={_cal.Calibrate(alt):0.00}) — {string.Join("; ", alt.Evidence.Take(2))}…"); | ||||
|     return best; | ||||
|   } | ||||
| } | ||||
| ``` | ||||
|  | ||||
| ## 2) Helpers | ||||
|  | ||||
| ```csharp | ||||
| static class VfsHelpers { | ||||
|   public static bool FileExists(OverlayVfs vfs, string path) => vfs.Exists(path); | ||||
|   public static bool TryOpen(OverlayVfs vfs, string path, out Stream? stream) { | ||||
|     if (!vfs.Exists(path)) { stream = null; return false; } | ||||
|     stream = vfs.OpenRead(path); | ||||
|     return true; | ||||
|   } | ||||
|   public static string Join(string cwd, string maybeRel) => | ||||
|     Path.IsPathRooted(maybeRel) ? maybeRel : Path.GetFullPath(Path.Combine(cwd, maybeRel)); | ||||
| } | ||||
|  | ||||
| static class ArgvHelpers { | ||||
|   public static int IndexOf(this string[] argv, string flag) => | ||||
|     Array.FindIndex(argv, a => a == flag); | ||||
|   public static string? Next(this string[] argv, int idx) => | ||||
|     (idx >= 0 && idx + 1 < argv.Length) ? argv[idx + 1] : null; | ||||
|   public static bool AnyEndsWith(this IEnumerable<string> args, string suffix, bool ignoreCase = true) => | ||||
|     args.Any(a => a.EndsWith(suffix, ignoreCase ? StringComparison.OrdinalIgnoreCase : StringComparison.Ordinal)); | ||||
|   public static bool Is(this string? candidate, params string[] names) => | ||||
|     candidate is not null && names.Any(n => string.Equals(Path.GetFileName(candidate), n, StringComparison.OrdinalIgnoreCase)); | ||||
| } | ||||
| ``` | ||||
|  | ||||
| ## 3) Scoring & calibration | ||||
|  | ||||
| - Each sub-detector returns a `RawScore` (0..1) based on family-specific heuristics. | ||||
| - Feed raw scores into a calibrator (Platt scaling or isotonic regression) trained on labelled corpora to get calibrated probabilities. | ||||
| - Persist calibration metadata per detector to avoid drift. | ||||
| - When no detector fires, return `LanguageType.Other` with low confidence and an evidence note. | ||||
|  | ||||
| ## 4) Cross-checks | ||||
|  | ||||
| Enhance precision by combining detector results with filesystem and configuration signals: | ||||
|  | ||||
| - Compare declared `EXPOSE` ports with runtime defaults (e.g., `80/443` for Nginx, `8080` for Java app servers). | ||||
| - Inspect service-specific configuration (`nginx.conf`, `php-fpm.conf`, `web.config`, `Gemfile`, `package.json`, `pyproject.toml`). | ||||
| - For Java and .NET, verify artefact presence and manifest metadata; for Go/Rust check static binary traits. | ||||
| - Re-run detectors after ShellFlow rewrites to ensure post-`exec` commands are analysed. | ||||
|  | ||||
| ## 5) Windows nuances | ||||
|  | ||||
| - Use `config.Shell` to detect PowerShell vs CMD; adjust interpreter lookup accordingly. | ||||
| - PE probing is mandatory—PowerShell scripts often front .NET or native binaries. | ||||
| - Consider case-insensitive paths and `\` separators. | ||||
|  | ||||
| ## 6) Integration points | ||||
|  | ||||
| - Static reducer passes `ResolvedCommand` → runtime detector. | ||||
| - Dynamic reducer pipes steady-state commands through the same interface. | ||||
| - Output `LanguageHit` populates the `TerminalProcess` along with `confidence`. | ||||
| - Downstream consumers (Policy Engine, Vuln Explorer) merge runtime type into their evidence trail. | ||||
|  | ||||
| ## 7) Next steps | ||||
|  | ||||
| Language-specific heuristics live in: | ||||
|  | ||||
| | Runtime | Document | | ||||
| | --- | --- | | ||||
| | Java | `entrypoint-lang-java.md` | | ||||
| | .NET / C# | `entrypoint-lang-dotnet.md` | | ||||
| | Node.js | `entrypoint-lang-node.md` | | ||||
| | Python | `entrypoint-lang-python.md` | | ||||
| | PHP-FPM | `entrypoint-lang-phpfpm.md` | | ||||
| | Ruby | `entrypoint-lang-ruby.md` | | ||||
| | Go | `entrypoint-lang-go.md` | | ||||
| | Rust | `entrypoint-lang-rust.md` | | ||||
| | C/C++ | `entrypoint-lang-ccpp.md` | | ||||
| | Nginx | `entrypoint-lang-nginx.md` | | ||||
| | Deno | `entrypoint-lang-deno.md` | | ||||
| | Elixir/Erlang (BEAM) | `entrypoint-lang-elixir.md` | | ||||
| | Supervisors | `entrypoint-lang-supervisor.md` | | ||||
|  | ||||
| Each runtime file documents the heuristics, artefacts, and edge cases specific to that family. | ||||
							
								
								
									
										83
									
								
								docs/modules/scanner/operations/entrypoint-shell-analysis.md
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										83
									
								
								docs/modules/scanner/operations/entrypoint-shell-analysis.md
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,83 @@ | ||||
| # ShellFlow — Script Reduction Playbook | ||||
|  | ||||
| Most container entry points eventually execute a shell script. The ShellFlow analyser resolves these scripts without executing user code, providing deterministic, explainable reductions. | ||||
|  | ||||
| ## 1) Scope | ||||
|  | ||||
| - POSIX `sh` subset with common Bash extensions (control flow, functions, parameter expansion). | ||||
| - Handles idioms from official Docker images (`if [ "$1" = "server" ]; then …`, `exec gosu "$USER" "$@"`, `set -- java -jar …`). | ||||
| - Tracks positional parameters (`$@`, `$1..$9`), environment variables, and `set --` mutations. | ||||
| - Produces one or more candidate commands with supporting evidence. | ||||
|  | ||||
| ## 2) Architecture | ||||
|  | ||||
| ``` | ||||
| ShellFlow/ | ||||
|   Parser/           // POSIX sh lexer + recursive descent parser | ||||
|   Ast/              // nodes for lists, pipelines, conditionals, functions | ||||
|   Evaluator/        // partial evaluation & taint tracking | ||||
|   Idioms/           // pattern library for common Docker entrypoints | ||||
|   Planner/          // emits CommandPlan[] | ||||
| ``` | ||||
|  | ||||
| ### 2.1 CommandPlan | ||||
|  | ||||
| ```csharp | ||||
| public sealed record CommandPlan( | ||||
|   string[] Argv, | ||||
|   double   HeuristicScore, | ||||
|   IReadOnlyList<string> Evidence, | ||||
|   IReadOnlyList<ReductionEdge> Chain, | ||||
|   bool     IsFallback = false | ||||
| ); | ||||
| ``` | ||||
|  | ||||
| Plans feed directly into the static reducer, which selects the highest-confidence plan but keeps alternates as evidence. | ||||
|  | ||||
| ## 3) Parsing & AST | ||||
|  | ||||
| - Tokenise words, assignments, pipelines (`|`), lists (`;`, `&&`, `||`), conditionals (`if`, `case`), loops (`for`, `while`, `until`), functions, and redirections. | ||||
| - Preserve heredocs and subshells as opaque nodes (evaluated conservatively). | ||||
| - Record source spans to surface meaningful evidence (`"line 12: exec java -jar $APP_JAR"`). | ||||
|  | ||||
| ## 4) Partial evaluation | ||||
|  | ||||
| - Initialise symbol table from image environment plus caller-supplied args. | ||||
| - Treat `$@`, `$*`, `$1..$9` as tainted; propagate taint through assignments. | ||||
| - Resolve `${VAR:-default}` and `${VAR:+alt}` when `VAR` known; otherwise branch. | ||||
| - Support `set -- …` (resets positional parameters) and `shift`. | ||||
| - `source`/`.` commands are parsed recursively when files are available; else fallback to low-confidence branch. | ||||
|  | ||||
| ## 5) Exec sink detection | ||||
|  | ||||
| - `exec <cmd>` dominates the remainder of the script. | ||||
| - Chains such as `exec gosu "$USER" "$@"` feed into wrapper collapse. | ||||
| - When no `exec` is present, pick the last reachable simple command in the main path. | ||||
| - Multi-branch scripts yield multiple plans with adjusted scores; unresolved branches are marked `IsFallback`. | ||||
|  | ||||
| ## 6) Idiom library | ||||
|  | ||||
| | Pattern | Action | | ||||
| | --- | --- | | ||||
| | `if [ "${1:0:1}" = '-' ]; then set -- server "$@"; fi` | Rewrite argv to prepend default command. | | ||||
| | `if [ "$1" = "bash" ]; then exec "$@"; fi` | Pass-through for manual shells. | | ||||
| | `exec "$@"` + non-empty CMD | Substitute CMD vector into plan. | | ||||
| | `exec java -jar "$APP_JAR" "$@"` | Resolve JAR via env or filesystem. | | ||||
| | `set -- gosu "$APP_USER" "$@"` | Collapse into wrapper plan. | | ||||
|  | ||||
| Idioms are implemented as AST visitors; each adds evidence strings when triggered. | ||||
|  | ||||
| ## 7) Confidence scoring | ||||
|  | ||||
| - Base score from plan heuristics (`HeuristicScore`). | ||||
| - Penalties for unresolved taint (`$@` unknown), missing files, nested subshells, or fallbacks. | ||||
| - Bonus when idioms match, artefacts exist, or env values resolve cleanly. | ||||
| - Final confidence is combined with the outer static scoring model. | ||||
|  | ||||
| ## 8) Failure modes | ||||
|  | ||||
| - Missing script (`ENTRYPOINT` points to deleted file): emit fallback plan with low confidence. | ||||
| - Self-modifying scripts or heavy dynamic features (`eval`, backticks): mark plan as low-confidence and surface warning evidence. | ||||
| - Commands that spawn supervisors without `exec`: return both the supervisor and inferred children (if configuration files are present). | ||||
|  | ||||
| ShellFlow keeps the static reducer explainable: every inferred command is accompanied by the script span and reasoning used to reach it. | ||||
							
								
								
									
										122
									
								
								docs/modules/scanner/operations/entrypoint-static-analysis.md
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										122
									
								
								docs/modules/scanner/operations/entrypoint-static-analysis.md
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,122 @@ | ||||
| # Entry-Point Static Analysis | ||||
|  | ||||
| This guide captures the static half of Stella Ops’ entry-point detection pipeline: how we turn image metadata and filesystem contents into a resolved binary, an execution chain, and a confidence score. | ||||
|  | ||||
| ## 1) Loading OCI images | ||||
|  | ||||
| ### 1.1 Supported inputs | ||||
| - Registry references (`repo:tag@sha256:digest`) using the existing content store. | ||||
| - Local OCI/Docker v2 archives (`docker save` tarball, OCI layout directory with `index.json` + `blobs/sha256/*`). | ||||
|  | ||||
| ### 1.2 Normalised model | ||||
|  | ||||
| ```csharp | ||||
| sealed class OciImage { | ||||
|   public required string Os; | ||||
|   public required string Arch; | ||||
|   public required string[] Entrypoint; | ||||
|   public required string[] Cmd; | ||||
|   public required string[] Shell;      // Windows / powershell overrides | ||||
|   public required string WorkingDir; | ||||
|   public required string[] Env; | ||||
|   public required string[] ExposedPorts; | ||||
|   public required LabelMap Labels; | ||||
|   public required LayerRef[] Layers;   // ordered, compressed blobs | ||||
| } | ||||
| ``` | ||||
|  | ||||
| Compose the runtime argv as `Entrypoint ++ Cmd`, honouring shell-form vs exec-form (see §2.3). | ||||
|  | ||||
| ## 2) Overlay virtual filesystem | ||||
|  | ||||
| ### 2.1 Whiteouts | ||||
| - Regular whiteout: `path/.wh.<name>` removes `<name>` from lower layers. | ||||
| - Opaque directory: `path/.wh..wh..opq` hides the directory entirely. | ||||
|  | ||||
| ### 2.2 Lazy extraction | ||||
| - First pass: build a tar index `(path → layer, offset, size, mode, isWhiteout, isDir)`. | ||||
| - Decompress only when reading a file; optionally support eStargz TOC to accelerate random access. | ||||
|  | ||||
| ### 2.3 Shell-form composition | ||||
| - Dockerfile shell form is serialised as `["/bin/sh","-c","…"]` (or `Shell[]` override on Windows). | ||||
| - Always trust `config.json`; no need to inspect the Dockerfile. | ||||
| - Working directory defaults to `/` if unspecified. | ||||
|  | ||||
| ## 3) Low-level primitives | ||||
|  | ||||
| ### 3.1 PATH resolution | ||||
| - Extract `PATH` from environment (fallback `/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin`). | ||||
| - If `argv[0]` is relative or lacks `/`, walk the PATH to resolve an absolute file. | ||||
| - Verify execute bit (or Windows ACL) before accepting. | ||||
|  | ||||
| ### 3.2 Shebang handling | ||||
| - For non-ELF/PE files: read first line; interpret `#!interpreter args`. | ||||
| - Replace `argv[0]` with the interpreter, prepend shebang args, append script path per kernel semantics. | ||||
|  | ||||
| ### 3.3 Binary probes | ||||
| - Identify ELF via magic `\x7FELF`, parse `.interp`, `.dynamic`, linked libs, `.note.go.buildid`, DWARF producer. | ||||
| - Identify PE (Windows) and detect .NET single-file bundles via CLI header. | ||||
| - Record features for runtime scoring (Go vs Rust vs glibc vs musl). | ||||
|  | ||||
| ## 4) Wrapper catalogue | ||||
|  | ||||
| Collapse known wrappers before analysing the target command: | ||||
|  | ||||
| - Init shims: `tini`, `dumb-init`, `s6-svscan`, `runit`, `supervisord`. | ||||
| - Privilege droppers: `gosu`, `su-exec`, `chpst`. | ||||
| - Shells: `sh`, `bash`, `dash`, BusyBox variants. | ||||
| - Package runners: `npm`, `yarn`, `pnpm`, `pip`, `pipenv`, `poetry`, `bundle`, `rake`. | ||||
|  | ||||
| Rules: | ||||
| - If wrapper contains a `--` sentinel (`tini -- app …`) drop the wrapper and record a reduction edge. | ||||
| - `gosu user cmd …` → collapse to `cmd …`. | ||||
| - For shell wrappers, delegate to the ShellFlow analyser (see separate guide). | ||||
|  | ||||
| ## 5) ShellFlow integration | ||||
|  | ||||
| When the resolved command is a shell script, invoke the ShellFlow analyser to locate the eventual `exec` target. Key capabilities: | ||||
|  | ||||
| - Parses POSIX sh (and common Bash extensions). | ||||
| - Tracks environment mutations (`set`, `export`, `set --`). | ||||
| - Resolves `$@`, `$1..9`, `${VAR:-default}`. | ||||
| - Recognises idioms from official Docker images (`if [ "$1" = "server" ]; then …`). | ||||
| - Emits multiple branches when predicates depend on unknown data, but tags them with lower confidence. | ||||
|  | ||||
| The analyser returns one or more candidate commands along with reasons, which feed into the reduction engine. | ||||
|  | ||||
| ## 6) Reduction algorithm | ||||
|  | ||||
| 1. Compose argv `ENTRYPOINT ++ CMD`. | ||||
| 2. Collapse wrappers; append `ReductionEdge` entries documenting each step. | ||||
| 3. Resolve argv0 to an absolute file and classify (ELF/PE/script). | ||||
| 4. If script → run ShellFlow; replace current command with highest-confidence `exec` target while preserving alternates as evidence. | ||||
| 5. Attempt to resolve application artefacts for VM hosts (JARs, DLLs, JS entry, Python module, etc.). | ||||
| 6. Emit `EntryTraceResult` with candidate terminals ranked by confidence. | ||||
|  | ||||
| ## 7) Confidence scoring | ||||
|  | ||||
| Use a simple logistic model with feature contributions captured for the evidence trail. Example features: | ||||
|  | ||||
| | Id | Signal | Weight | | ||||
| | --- | --- | --- | | ||||
| | `f1` | Entrypoint already an executable (ELF/PE) | +0.18 | | ||||
| | `f2` | Observed chain ends in non-wrapper binary | +0.22 | | ||||
| | `f3` | VM host + resolvable artefact | +0.20 | | ||||
| | `f4` | Exposed ports align with runtime | +0.06 | | ||||
| | `f5` | Shebang interpreter matches runtime family | +0.05 | | ||||
| | `f6` | Language artefact validation succeeded | +0.15 | | ||||
| | `f8` | Multi-branch script unresolved (`$@` taint) | −0.20 | | ||||
| | `f9` | Target missing execute bit | −0.25 | | ||||
| | `f10` | Shell with no `exec` | −0.18 | | ||||
|  | ||||
| Persist per-feature evidence strings so UI/CLI users can see **why** the scanner picked a given entry point. | ||||
|  | ||||
| ## 8) Outputs | ||||
|  | ||||
| Return a populated `EntryTraceResult`: | ||||
|  | ||||
| - `Terminals` contains the best candidate(s) and their runtime classification. | ||||
| - `Evidence` aggregates feature messages, ShellFlow reasoning, wrapper reductions, and runtime detector hints. | ||||
| - `Chain` shows the explainable path from initial Docker argv to the final binary. | ||||
|  | ||||
| Static and dynamic reducers share this shape, enabling downstream modules to remain agnostic of the detection mode. | ||||
							
								
								
									
										26
									
								
								docs/modules/scanner/operations/entrypoint.md
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										26
									
								
								docs/modules/scanner/operations/entrypoint.md
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,26 @@ | ||||
| # Entry-Point Documentation Index | ||||
|  | ||||
| The entry-point detection system is now split into focused guides. Use this index to navigate the individual topics. | ||||
|  | ||||
| | Topic | Document | | ||||
| | --- | --- | | ||||
| | Problem statement & architecture overview | `entrypoint-problem.md` | | ||||
| | Static resolver (OCI layers, wrappers, scoring) | `entrypoint-static-analysis.md` | | ||||
| | Dynamic resolver / Observational Exec Graph | `entrypoint-dynamic-analysis.md` | | ||||
| | ShellFlow script analysis | `entrypoint-shell-analysis.md` | | ||||
| | Runtime detector contracts & calibration | `entrypoint-runtime-overview.md` | | ||||
| | Java heuristics | `entrypoint-lang-java.md` | | ||||
| | .NET heuristics | `entrypoint-lang-dotnet.md` | | ||||
| | Node.js heuristics | `entrypoint-lang-node.md` | | ||||
| | Python heuristics | `entrypoint-lang-python.md` | | ||||
| | PHP-FPM heuristics | `entrypoint-lang-phpfpm.md` | | ||||
| | Ruby heuristics | `entrypoint-lang-ruby.md` | | ||||
| | Go heuristics | `entrypoint-lang-go.md` | | ||||
| | Rust heuristics | `entrypoint-lang-rust.md` | | ||||
| | C/C++ heuristics | `entrypoint-lang-ccpp.md` | | ||||
| | Nginx heuristics | `entrypoint-lang-nginx.md` | | ||||
| | Deno heuristics | `entrypoint-lang-deno.md` | | ||||
| | Elixir / Erlang (BEAM) heuristics | `entrypoint-lang-elixir.md` | | ||||
| | Supervisor classification | `entrypoint-lang-supervisor.md` | | ||||
|  | ||||
| > Looking for historical context? The unified write-up previously in `entrypoint2.md` and `entrypoint-lang-detection.md` has been decomposed into the files above for easier maintenance. | ||||
							
								
								
									
										88
									
								
								docs/modules/scanner/operations/rustfs-migration.md
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										88
									
								
								docs/modules/scanner/operations/rustfs-migration.md
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,88 @@ | ||||
| # Scanner Artifact Store Migration (MinIO → RustFS) | ||||
|  | ||||
| ## Overview | ||||
|  | ||||
| Sprint 11 introduces **RustFS** as the default artifact store for the Scanner plane. Existing | ||||
| deployments running MinIO (or any S3-compatible backend) must migrate stored SBOM artefacts to RustFS | ||||
| before switching the Scanner hosts to `scanner.artifactStore.driver = "rustfs"`. | ||||
|  | ||||
| This runbook covers the recommended migration workflow and validation steps. | ||||
|  | ||||
| ## Prerequisites | ||||
|  | ||||
| - RustFS service deployed and reachable from the Scanner control plane (`http(s)://rustfs:8080`). | ||||
| - Existing MinIO/S3 credentials with read access to the current bucket. | ||||
| - CLI environment with the StellaOps source tree (for the migration tool) and `dotnet 10` SDK. | ||||
| - Maintenance window sized to copy all artefacts (migration is read-only on the source bucket). | ||||
|  | ||||
| ## 1. Snapshot source bucket (optional but recommended) | ||||
|  | ||||
| If the MinIO deployment offers versioning or snapshots, take one before migrating. For non-versioned | ||||
| deployments, capture an external backup (e.g., `mc mirror` to offline storage). | ||||
|  | ||||
| ## 2. Dry-run the migrator | ||||
|  | ||||
| ``` | ||||
| dotnet run --project src/Tools/RustFsMigrator -- \ | ||||
|   --s3-bucket scanner-artifacts \ | ||||
|   --s3-endpoint http://stellaops-minio:9000 \ | ||||
|   --s3-access-key stellaops \ | ||||
|   --s3-secret-key dev-minio-secret \ | ||||
|   --rustfs-endpoint http://stellaops-rustfs:8080 \ | ||||
|   --rustfs-bucket scanner-artifacts \ | ||||
|   --prefix scanner/ \ | ||||
|   --dry-run | ||||
| ``` | ||||
|  | ||||
| The dry-run enumerates keys and reports the object count without writing to RustFS. Use this to | ||||
| estimate migration time. | ||||
|  | ||||
| ## 3. Execute migration | ||||
|  | ||||
| Remove the `--dry-run` flag to copy data. Optional flags: | ||||
|  | ||||
| - `--immutable` – mark all migrated objects as immutable (`X-RustFS-Immutable`). | ||||
| - `--retain-days 365` – request retention (in days) via `X-RustFS-Retain-Seconds`. | ||||
| - `--rustfs-api-key-header` / `--rustfs-api-key` – provide auth headers when RustFS is protected. | ||||
|  | ||||
| The tool streams each object from S3 and performs an idempotent `PUT` to RustFS preserving the key | ||||
| structure (e.g., `scanner/layers/<sha256>/sbom.cdx.json.zst`). | ||||
|  | ||||
| ## 4. Verify sample objects | ||||
|  | ||||
| Pick a handful of SBOM digests and confirm: | ||||
|  | ||||
| 1. `GET /api/v1/buckets/<bucket>/objects/<key>` returns the expected payload (size + SHA-256). | ||||
| 2. Scanner WebService configured with `scanner.artifactStore.driver = "rustfs"` can fetch the same | ||||
|    artefacts (Smoke test: `GET /api/v1/scanner/sboms/<digest>?format=cdx-json`). | ||||
|  | ||||
| ## 5. Switch Scanner hosts | ||||
|  | ||||
| Update configuration (Helm/Compose/environment) to set: | ||||
|  | ||||
| ``` | ||||
| scanner: | ||||
|   artifactStore: | ||||
|     driver: rustfs | ||||
|     endpoint: http://stellaops-rustfs:8080 | ||||
|     bucket: scanner-artifacts | ||||
|     timeoutSeconds: 30 | ||||
| ``` | ||||
|  | ||||
| Redeploy Scanner WebService and Worker. Monitor logs for `RustFS` upload/download messages and | ||||
| Prometheus scrape (`rustfs_requests_total`). | ||||
|  | ||||
| ## 6. Cleanup legacy MinIO (optional) | ||||
|  | ||||
| After a complete migration and validation period, decommission the MinIO bucket or repurpose it for | ||||
| other components (Concelier still supports S3). Ensure backups reference RustFS snapshots going | ||||
| forward. | ||||
|  | ||||
| ## Troubleshooting | ||||
|  | ||||
| - **Uploads fail (HTTP 4xx/5xx):** Check RustFS logs and confirm API key headers. Re-run the migrator | ||||
|   for the affected keys. | ||||
| - **Missing objects post-cutover:** Re-run the migrator with the specific `--prefix`. The tool is | ||||
|   idempotent and safely overwrites existing objects. | ||||
| - **Performance tuning:** Run multiple instances of the migrator with disjoint prefixes if needed; the | ||||
|   RustFS API is stateless and supports parallel PUTs. | ||||
		Reference in New Issue
	
	Block a user