feat: Add RustFS artifact object store and migration tool
- Implemented RustFsArtifactObjectStore for managing artifacts in RustFS. - Added unit tests for RustFsArtifactObjectStore functionality. - Created a RustFS migrator tool to transfer objects from S3 to RustFS. - Introduced policy preview and report models for API integration. - Added fixtures and tests for policy preview and report functionality. - Included necessary metadata and scripts for cache_pkg package.
This commit is contained in:
		
							
								
								
									
										155
									
								
								docs/ops/scanner-analyzers-grafana-dashboard.json
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										155
									
								
								docs/ops/scanner-analyzers-grafana-dashboard.json
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,155 @@
 | 
			
		||||
{
 | 
			
		||||
  "title": "StellaOps Scanner Analyzer Benchmarks",
 | 
			
		||||
  "uid": "scanner-analyzer-bench",
 | 
			
		||||
  "schemaVersion": 38,
 | 
			
		||||
  "version": 1,
 | 
			
		||||
  "editable": true,
 | 
			
		||||
  "timezone": "",
 | 
			
		||||
  "graphTooltip": 0,
 | 
			
		||||
  "time": {
 | 
			
		||||
    "from": "now-24h",
 | 
			
		||||
    "to": "now"
 | 
			
		||||
  },
 | 
			
		||||
  "templating": {
 | 
			
		||||
    "list": [
 | 
			
		||||
      {
 | 
			
		||||
        "name": "datasource",
 | 
			
		||||
        "type": "datasource",
 | 
			
		||||
        "query": "prometheus",
 | 
			
		||||
        "refresh": 1,
 | 
			
		||||
        "hide": 0,
 | 
			
		||||
        "current": {}
 | 
			
		||||
      }
 | 
			
		||||
    ]
 | 
			
		||||
  },
 | 
			
		||||
  "annotations": {
 | 
			
		||||
    "list": []
 | 
			
		||||
  },
 | 
			
		||||
  "panels": [
 | 
			
		||||
    {
 | 
			
		||||
      "id": 1,
 | 
			
		||||
      "title": "Max Duration (ms)",
 | 
			
		||||
      "type": "timeseries",
 | 
			
		||||
      "datasource": {
 | 
			
		||||
        "type": "prometheus",
 | 
			
		||||
        "uid": "${datasource}"
 | 
			
		||||
      },
 | 
			
		||||
      "fieldConfig": {
 | 
			
		||||
        "defaults": {
 | 
			
		||||
          "unit": "ms",
 | 
			
		||||
          "displayName": "{{scenario}}"
 | 
			
		||||
        },
 | 
			
		||||
        "overrides": []
 | 
			
		||||
      },
 | 
			
		||||
      "options": {
 | 
			
		||||
        "legend": {
 | 
			
		||||
          "displayMode": "table",
 | 
			
		||||
          "placement": "bottom"
 | 
			
		||||
        },
 | 
			
		||||
        "tooltip": {
 | 
			
		||||
          "mode": "single",
 | 
			
		||||
          "sort": "none"
 | 
			
		||||
        }
 | 
			
		||||
      },
 | 
			
		||||
      "targets": [
 | 
			
		||||
        {
 | 
			
		||||
          "expr": "scanner_analyzer_bench_max_ms",
 | 
			
		||||
          "legendFormat": "{{scenario}}",
 | 
			
		||||
          "refId": "A"
 | 
			
		||||
        },
 | 
			
		||||
        {
 | 
			
		||||
          "expr": "scanner_analyzer_bench_baseline_max_ms",
 | 
			
		||||
          "legendFormat": "{{scenario}} baseline",
 | 
			
		||||
          "refId": "B"
 | 
			
		||||
        }
 | 
			
		||||
      ]
 | 
			
		||||
    },
 | 
			
		||||
    {
 | 
			
		||||
      "id": 2,
 | 
			
		||||
      "title": "Regression Ratio vs Limit",
 | 
			
		||||
      "type": "timeseries",
 | 
			
		||||
      "datasource": {
 | 
			
		||||
        "type": "prometheus",
 | 
			
		||||
        "uid": "${datasource}"
 | 
			
		||||
      },
 | 
			
		||||
      "fieldConfig": {
 | 
			
		||||
        "defaults": {
 | 
			
		||||
          "unit": "percentunit",
 | 
			
		||||
          "displayName": "{{scenario}}",
 | 
			
		||||
          "min": 0,
 | 
			
		||||
          "thresholds": {
 | 
			
		||||
            "mode": "absolute",
 | 
			
		||||
            "steps": [
 | 
			
		||||
              {
 | 
			
		||||
                "color": "green",
 | 
			
		||||
                "value": null
 | 
			
		||||
              },
 | 
			
		||||
              {
 | 
			
		||||
                "color": "red",
 | 
			
		||||
                "value": 20
 | 
			
		||||
              }
 | 
			
		||||
            ]
 | 
			
		||||
          }
 | 
			
		||||
        },
 | 
			
		||||
        "overrides": []
 | 
			
		||||
      },
 | 
			
		||||
      "options": {
 | 
			
		||||
        "legend": {
 | 
			
		||||
          "displayMode": "table",
 | 
			
		||||
          "placement": "bottom"
 | 
			
		||||
        },
 | 
			
		||||
        "tooltip": {
 | 
			
		||||
          "mode": "multi",
 | 
			
		||||
          "sort": "none"
 | 
			
		||||
        }
 | 
			
		||||
      },
 | 
			
		||||
      "targets": [
 | 
			
		||||
        {
 | 
			
		||||
          "expr": "(scanner_analyzer_bench_regression_ratio - 1) * 100",
 | 
			
		||||
          "legendFormat": "{{scenario}} regression %",
 | 
			
		||||
          "refId": "A"
 | 
			
		||||
        },
 | 
			
		||||
        {
 | 
			
		||||
          "expr": "(scanner_analyzer_bench_regression_limit - 1) * 100",
 | 
			
		||||
          "legendFormat": "{{scenario}} limit %",
 | 
			
		||||
          "refId": "B"
 | 
			
		||||
        }
 | 
			
		||||
      ]
 | 
			
		||||
    },
 | 
			
		||||
    {
 | 
			
		||||
      "id": 3,
 | 
			
		||||
      "title": "Breached Scenarios",
 | 
			
		||||
      "type": "stat",
 | 
			
		||||
      "datasource": {
 | 
			
		||||
        "type": "prometheus",
 | 
			
		||||
        "uid": "${datasource}"
 | 
			
		||||
      },
 | 
			
		||||
      "fieldConfig": {
 | 
			
		||||
        "defaults": {
 | 
			
		||||
          "displayName": "{{scenario}}",
 | 
			
		||||
          "unit": "short"
 | 
			
		||||
        },
 | 
			
		||||
        "overrides": []
 | 
			
		||||
      },
 | 
			
		||||
      "options": {
 | 
			
		||||
        "colorMode": "value",
 | 
			
		||||
        "graphMode": "area",
 | 
			
		||||
        "justifyMode": "center",
 | 
			
		||||
        "reduceOptions": {
 | 
			
		||||
          "calcs": [
 | 
			
		||||
            "last"
 | 
			
		||||
          ],
 | 
			
		||||
          "fields": "",
 | 
			
		||||
          "values": false
 | 
			
		||||
        }
 | 
			
		||||
      },
 | 
			
		||||
      "targets": [
 | 
			
		||||
        {
 | 
			
		||||
          "expr": "scanner_analyzer_bench_regression_breached",
 | 
			
		||||
          "legendFormat": "{{scenario}}",
 | 
			
		||||
          "refId": "A"
 | 
			
		||||
        }
 | 
			
		||||
      ]
 | 
			
		||||
    }
 | 
			
		||||
  ]
 | 
			
		||||
}
 | 
			
		||||
							
								
								
									
										48
									
								
								docs/ops/scanner-analyzers-operations.md
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										48
									
								
								docs/ops/scanner-analyzers-operations.md
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,48 @@
 | 
			
		||||
# Scanner Analyzer Benchmarks – Operations Guide
 | 
			
		||||
 | 
			
		||||
## Purpose
 | 
			
		||||
Keep the language analyzer microbench under the < 5 s SBOM pledge. CI emits Prometheus metrics and JSON fixtures so trend dashboards and alerts stay in lockstep with the repository baseline.
 | 
			
		||||
 | 
			
		||||
> **Grafana note:** Import `docs/ops/scanner-analyzers-grafana-dashboard.json` into your Prometheus-backed Grafana stack to monitor `scanner_analyzer_bench_*` metrics and alert on regressions.
 | 
			
		||||
 | 
			
		||||
## Publishing workflow
 | 
			
		||||
1. CI (or engineers running locally) execute:
 | 
			
		||||
   ```bash
 | 
			
		||||
   dotnet run \
 | 
			
		||||
     --project bench/Scanner.Analyzers/StellaOps.Bench.ScannerAnalyzers/StellaOps.Bench.ScannerAnalyzers.csproj \
 | 
			
		||||
     -- \
 | 
			
		||||
     --repo-root . \
 | 
			
		||||
     --out bench/Scanner.Analyzers/baseline.csv \
 | 
			
		||||
     --json out/bench/scanner-analyzers/latest.json \
 | 
			
		||||
     --prom out/bench/scanner-analyzers/latest.prom \
 | 
			
		||||
     --commit "$(git rev-parse HEAD)" \
 | 
			
		||||
     --environment "${CI_ENVIRONMENT_NAME:-local}"
 | 
			
		||||
   ```
 | 
			
		||||
2. Publish the artefacts (`baseline.csv`, `latest.json`, `latest.prom`) to `bench-artifacts/<date>/`.
 | 
			
		||||
3. Promtail (or the CI job) pushes `latest.prom` into Prometheus; JSON lands in long-term storage for workbook snapshots.
 | 
			
		||||
4. The harness exits non-zero if:
 | 
			
		||||
   - `max_ms` for any scenario breaches its configured threshold; or
 | 
			
		||||
   - `max_ms` regresses ≥ 20 % versus `baseline.csv`.
 | 
			
		||||
 | 
			
		||||
## Grafana dashboard
 | 
			
		||||
- Import `docs/ops/scanner-analyzers-grafana-dashboard.json`.
 | 
			
		||||
- Point the template variable `datasource` to the Prometheus instance ingesting `scanner_analyzer_bench_*` metrics.
 | 
			
		||||
- Panels:
 | 
			
		||||
  - **Max Duration (ms)** – compares live runs vs baseline.
 | 
			
		||||
  - **Regression Ratio vs Limit** – plots `(max / baseline_max - 1) * 100`.
 | 
			
		||||
  - **Breached Scenarios** – stat panel sourced from `scanner_analyzer_bench_regression_breached`.
 | 
			
		||||
 | 
			
		||||
## Alerting & on-call response
 | 
			
		||||
- **Primary alert**: fire when `scanner_analyzer_bench_regression_ratio{scenario=~".+"} >= 1.20` for 2 consecutive samples (10 min default). Suggested PromQL:
 | 
			
		||||
  ```
 | 
			
		||||
  max_over_time(scanner_analyzer_bench_regression_ratio[10m]) >= 1.20
 | 
			
		||||
  ```
 | 
			
		||||
- Suppress duplicates using the `scenario` label.
 | 
			
		||||
- Pager payload should include `scenario`, `max_ms`, `baseline_max_ms`, and `commit`.
 | 
			
		||||
- Immediate triage steps:
 | 
			
		||||
  1. Check `latest.json` artefact for the failing scenario – confirm commit and environment.
 | 
			
		||||
  2. Re-run the harness with `--captured-at` and `--baseline` pointing at the last known good CSV to verify determinism.
 | 
			
		||||
  3. If regression persists, open an incident ticket tagged `scanner-analyzer-perf` and page the owning language guild.
 | 
			
		||||
  4. Roll back the offending change or update the baseline after sign-off from the guild lead and Perf captain.
 | 
			
		||||
 | 
			
		||||
Document the outcome in `docs/12_PERFORMANCE_WORKBOOK.md` (section 8) so trendlines reflect any accepted regressions.
 | 
			
		||||
							
								
								
									
										88
									
								
								docs/ops/scanner-rustfs-migration.md
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										88
									
								
								docs/ops/scanner-rustfs-migration.md
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,88 @@
 | 
			
		||||
# Scanner Artifact Store Migration (MinIO → RustFS)
 | 
			
		||||
 | 
			
		||||
## Overview
 | 
			
		||||
 | 
			
		||||
Sprint 11 introduces **RustFS** as the default artifact store for the Scanner plane. Existing
 | 
			
		||||
deployments running MinIO (or any S3-compatible backend) must migrate stored SBOM artefacts to RustFS
 | 
			
		||||
before switching the Scanner hosts to `scanner.artifactStore.driver = "rustfs"`.
 | 
			
		||||
 | 
			
		||||
This runbook covers the recommended migration workflow and validation steps.
 | 
			
		||||
 | 
			
		||||
## Prerequisites
 | 
			
		||||
 | 
			
		||||
- RustFS service deployed and reachable from the Scanner control plane (`http(s)://rustfs:8080`).
 | 
			
		||||
- Existing MinIO/S3 credentials with read access to the current bucket.
 | 
			
		||||
- CLI environment with the StellaOps source tree (for the migration tool) and `dotnet 10` SDK.
 | 
			
		||||
- Maintenance window sized to copy all artefacts (migration is read-only on the source bucket).
 | 
			
		||||
 | 
			
		||||
## 1. Snapshot source bucket (optional but recommended)
 | 
			
		||||
 | 
			
		||||
If the MinIO deployment offers versioning or snapshots, take one before migrating. For non-versioned
 | 
			
		||||
deployments, capture an external backup (e.g., `mc mirror` to offline storage).
 | 
			
		||||
 | 
			
		||||
## 2. Dry-run the migrator
 | 
			
		||||
 | 
			
		||||
```
 | 
			
		||||
dotnet run --project tools/RustFsMigrator -- \
 | 
			
		||||
  --s3-bucket scanner-artifacts \
 | 
			
		||||
  --s3-endpoint http://stellaops-minio:9000 \
 | 
			
		||||
  --s3-access-key stellaops \
 | 
			
		||||
  --s3-secret-key dev-minio-secret \
 | 
			
		||||
  --rustfs-endpoint http://stellaops-rustfs:8080 \
 | 
			
		||||
  --rustfs-bucket scanner-artifacts \
 | 
			
		||||
  --prefix scanner/ \
 | 
			
		||||
  --dry-run
 | 
			
		||||
```
 | 
			
		||||
 | 
			
		||||
The dry-run enumerates keys and reports the object count without writing to RustFS. Use this to
 | 
			
		||||
estimate migration time.
 | 
			
		||||
 | 
			
		||||
## 3. Execute migration
 | 
			
		||||
 | 
			
		||||
Remove the `--dry-run` flag to copy data. Optional flags:
 | 
			
		||||
 | 
			
		||||
- `--immutable` – mark all migrated objects as immutable (`X-RustFS-Immutable`).
 | 
			
		||||
- `--retain-days 365` – request retention (in days) via `X-RustFS-Retain-Seconds`.
 | 
			
		||||
- `--rustfs-api-key-header` / `--rustfs-api-key` – provide auth headers when RustFS is protected.
 | 
			
		||||
 | 
			
		||||
The tool streams each object from S3 and performs an idempotent `PUT` to RustFS preserving the key
 | 
			
		||||
structure (e.g., `scanner/layers/<sha256>/sbom.cdx.json.zst`).
 | 
			
		||||
 | 
			
		||||
## 4. Verify sample objects
 | 
			
		||||
 | 
			
		||||
Pick a handful of SBOM digests and confirm:
 | 
			
		||||
 | 
			
		||||
1. `GET /api/v1/buckets/<bucket>/objects/<key>` returns the expected payload (size + SHA-256).
 | 
			
		||||
2. Scanner WebService configured with `scanner.artifactStore.driver = "rustfs"` can fetch the same
 | 
			
		||||
   artefacts (Smoke test: `GET /api/v1/scanner/sboms/<digest>?format=cdx-json`).
 | 
			
		||||
 | 
			
		||||
## 5. Switch Scanner hosts
 | 
			
		||||
 | 
			
		||||
Update configuration (Helm/Compose/environment) to set:
 | 
			
		||||
 | 
			
		||||
```
 | 
			
		||||
scanner:
 | 
			
		||||
  artifactStore:
 | 
			
		||||
    driver: rustfs
 | 
			
		||||
    endpoint: http://stellaops-rustfs:8080
 | 
			
		||||
    bucket: scanner-artifacts
 | 
			
		||||
    timeoutSeconds: 30
 | 
			
		||||
```
 | 
			
		||||
 | 
			
		||||
Redeploy Scanner WebService and Worker. Monitor logs for `RustFS` upload/download messages and
 | 
			
		||||
Prometheus scrape (`rustfs_requests_total`).
 | 
			
		||||
 | 
			
		||||
## 6. Cleanup legacy MinIO (optional)
 | 
			
		||||
 | 
			
		||||
After a complete migration and validation period, decommission the MinIO bucket or repurpose it for
 | 
			
		||||
other components (Concelier still supports S3). Ensure backups reference RustFS snapshots going
 | 
			
		||||
forward.
 | 
			
		||||
 | 
			
		||||
## Troubleshooting
 | 
			
		||||
 | 
			
		||||
- **Uploads fail (HTTP 4xx/5xx):** Check RustFS logs and confirm API key headers. Re-run the migrator
 | 
			
		||||
  for the affected keys.
 | 
			
		||||
- **Missing objects post-cutover:** Re-run the migrator with the specific `--prefix`. The tool is
 | 
			
		||||
  idempotent and safely overwrites existing objects.
 | 
			
		||||
- **Performance tuning:** Run multiple instances of the migrator with disjoint prefixes if needed; the
 | 
			
		||||
  RustFS API is stateless and supports parallel PUTs.
 | 
			
		||||
		Reference in New Issue
	
	Block a user