Compare commits

...

3 Commits

Author SHA1 Message Date
96d52884e8 Add Policy DSL Validator, Schema Exporter, and Simulation Smoke tools
Some checks failed
Docs CI / lint-and-preview (push) Has been cancelled
- Implemented PolicyDslValidator with command-line options for strict mode and JSON output.
- Created PolicySchemaExporter to generate JSON schemas for policy-related models.
- Developed PolicySimulationSmoke tool to validate policy simulations against expected outcomes.
- Added project files and necessary dependencies for each tool.
- Ensured proper error handling and usage instructions across tools.
2025-10-27 08:00:11 +02:00
651b8e0fa3 feat: Add new projects to solution and implement contract testing documentation
- Added "StellaOps.Policy.Engine", "StellaOps.Cartographer", and "StellaOps.SbomService" projects to the StellaOps solution.
- Created AGENTS.md to outline the Contract Testing Guild Charter, detailing mission, scope, and definition of done.
- Established TASKS.md for the Contract Testing Task Board, outlining tasks for Sprint 62 and Sprint 63 related to mock servers and replay testing.
2025-10-27 07:57:55 +02:00
1e41ba7ffa feat: Implement NotifyPanelComponent with unit tests and mock API service
- Added NotifyPanelComponent for managing notification channels and rules.
- Implemented reactive forms for channel and rule management.
- Created unit tests for NotifyPanelComponent to validate functionality.
- Developed MockNotifyApiService to simulate API interactions for testing.
- Added mock data for channels, rules, and deliveries to facilitate testing.
- Introduced RuntimeEventFactoryTests to ensure correct event creation with build ID.
2025-10-25 19:11:38 +03:00
1032 changed files with 67903 additions and 5715 deletions

View File

@@ -1,40 +1,40 @@
# .gitea/workflows/build-test-deploy.yml
# Unified CI/CD workflow for git.stella-ops.org (Feedser monorepo)
name: Build Test Deploy
on:
push:
branches: [ main ]
paths:
- 'src/**'
- 'docs/**'
- 'scripts/**'
- 'Directory.Build.props'
- 'Directory.Build.targets'
- 'global.json'
- '.gitea/workflows/**'
pull_request:
branches: [ main, develop ]
paths:
- 'src/**'
- 'docs/**'
- 'scripts/**'
- '.gitea/workflows/**'
workflow_dispatch:
inputs:
force_deploy:
description: 'Ignore branch checks and run the deploy stage'
required: false
default: 'false'
type: boolean
env:
DOTNET_VERSION: '10.0.100-rc.1.25451.107'
BUILD_CONFIGURATION: Release
CI_CACHE_ROOT: /data/.cache/stella-ops/feedser
RUNNER_TOOL_CACHE: /toolcache
# .gitea/workflows/build-test-deploy.yml
# Unified CI/CD workflow for git.stella-ops.org (Feedser monorepo)
name: Build Test Deploy
on:
push:
branches: [ main ]
paths:
- 'src/**'
- 'docs/**'
- 'scripts/**'
- 'Directory.Build.props'
- 'Directory.Build.targets'
- 'global.json'
- '.gitea/workflows/**'
pull_request:
branches: [ main, develop ]
paths:
- 'src/**'
- 'docs/**'
- 'scripts/**'
- '.gitea/workflows/**'
workflow_dispatch:
inputs:
force_deploy:
description: 'Ignore branch checks and run the deploy stage'
required: false
default: 'false'
type: boolean
env:
DOTNET_VERSION: '10.0.100-rc.1.25451.107'
BUILD_CONFIGURATION: Release
CI_CACHE_ROOT: /data/.cache/stella-ops/feedser
RUNNER_TOOL_CACHE: /toolcache
jobs:
profile-validation:
runs-on: ubuntu-22.04
@@ -58,33 +58,60 @@ jobs:
PUBLISH_DIR: ${{ github.workspace }}/artifacts/publish/webservice
AUTHORITY_PUBLISH_DIR: ${{ github.workspace }}/artifacts/publish/authority
TEST_RESULTS_DIR: ${{ github.workspace }}/artifacts/test-results
steps:
- name: Checkout repository
uses: actions/checkout@v4
with:
fetch-depth: 0
- name: Setup .NET ${{ env.DOTNET_VERSION }}
uses: actions/setup-dotnet@v4
with:
dotnet-version: ${{ env.DOTNET_VERSION }}
include-prerelease: true
- name: Restore dependencies
run: dotnet restore src/StellaOps.Feedser.sln
- name: Build solution (warnings as errors)
run: dotnet build src/StellaOps.Feedser.sln --configuration $BUILD_CONFIGURATION --no-restore -warnaserror
- name: Run unit and integration tests
steps:
- name: Checkout repository
uses: actions/checkout@v4
with:
fetch-depth: 0
- name: Validate NuGet restore source ordering
run: python3 ops/devops/validate_restore_sources.py
- name: Setup .NET ${{ env.DOTNET_VERSION }}
uses: actions/setup-dotnet@v4
with:
dotnet-version: ${{ env.DOTNET_VERSION }}
include-prerelease: true
- name: Restore Concelier solution
run: dotnet restore src/StellaOps.Concelier.sln
- name: Build Concelier solution (warnings as errors)
run: dotnet build src/StellaOps.Concelier.sln --configuration $BUILD_CONFIGURATION --no-restore -warnaserror
- name: Run Concelier unit and integration tests
run: |
mkdir -p "$TEST_RESULTS_DIR"
dotnet test src/StellaOps.Feedser.sln \
dotnet test src/StellaOps.Concelier.sln \
--configuration $BUILD_CONFIGURATION \
--no-build \
--logger "trx;LogFileName=stellaops-feedser-tests.trx" \
--logger "trx;LogFileName=stellaops-concelier-tests.trx" \
--results-directory "$TEST_RESULTS_DIR"
- name: Lint policy DSL samples
run: dotnet run --project tools/PolicyDslValidator/PolicyDslValidator.csproj -- --strict docs/examples/policies/*.yaml
- name: Run policy simulation smoke tests (first pass)
run: dotnet run --project tools/PolicySimulationSmoke/PolicySimulationSmoke.csproj -- --scenario-root samples/policy/simulations --output artifacts/policy-simulations/run1
- name: Verify policy simulation determinism
run: |
dotnet run --project tools/PolicySimulationSmoke/PolicySimulationSmoke.csproj -- --scenario-root samples/policy/simulations --output artifacts/policy-simulations/run2
diff -u \
artifacts/policy-simulations/run1/policy-simulation-summary.json \
artifacts/policy-simulations/run2/policy-simulation-summary.json
- name: Upload policy simulation artifacts
uses: actions/upload-artifact@v4
with:
name: policy-simulation-diffs
path: artifacts/policy-simulations
if-no-files-found: error
retention-days: 14
- name: Run release tooling tests
run: python ops/devops/release/test_verify_release.py
- name: Build scanner language analyzer projects
run: |
dotnet restore src/StellaOps.sln
@@ -118,11 +145,11 @@ jobs:
CAPTURED_AT="$(date -u +"%Y-%m-%dT%H:%M:%SZ")"
dotnet run \
--project bench/Scanner.Analyzers/StellaOps.Bench.ScannerAnalyzers/StellaOps.Bench.ScannerAnalyzers.csproj \
--project src/StellaOps.Bench/Scanner.Analyzers/StellaOps.Bench.ScannerAnalyzers/StellaOps.Bench.ScannerAnalyzers.csproj \
--configuration $BUILD_CONFIGURATION \
-- \
--repo-root . \
--baseline bench/Scanner.Analyzers/baseline.csv \
--baseline src/StellaOps.Bench/Scanner.Analyzers/baseline.csv \
--out "$PERF_OUTPUT_DIR/latest.csv" \
--json "$PERF_OUTPUT_DIR/report.json" \
--prom "$PERF_OUTPUT_DIR/metrics.prom" \
@@ -269,7 +296,7 @@ PY
path: ${{ env.AUTHORITY_PUBLISH_DIR }}
if-no-files-found: error
retention-days: 7
- name: Upload test results
if: always()
uses: actions/upload-artifact@v4
@@ -297,27 +324,27 @@ PY
env:
DOCS_OUTPUT_DIR: ${{ github.workspace }}/artifacts/docs-site
steps:
- name: Checkout repository
uses: actions/checkout@v4
- name: Setup Python
uses: actions/setup-python@v5
with:
python-version: '3.11'
- name: Install documentation dependencies
run: |
python -m pip install --upgrade pip
python -m pip install markdown pygments
- name: Render documentation bundle
run: |
python scripts/render_docs.py --source docs --output "$DOCS_OUTPUT_DIR" --clean
- name: Upload documentation artifact
uses: actions/upload-artifact@v4
with:
name: feedser-docs-site
- name: Checkout repository
uses: actions/checkout@v4
- name: Setup Python
uses: actions/setup-python@v5
with:
python-version: '3.11'
- name: Install documentation dependencies
run: |
python -m pip install --upgrade pip
python -m pip install markdown pygments
- name: Render documentation bundle
run: |
python scripts/render_docs.py --source docs --output "$DOCS_OUTPUT_DIR" --clean
- name: Upload documentation artifact
uses: actions/upload-artifact@v4
with:
name: feedser-docs-site
path: ${{ env.DOCS_OUTPUT_DIR }}
if-no-files-found: error
retention-days: 7
@@ -326,7 +353,7 @@ PY
runs-on: ubuntu-22.04
needs: build-test
env:
BENCH_DIR: bench/Scanner.Analyzers
BENCH_DIR: src/StellaOps.Bench/Scanner.Analyzers
steps:
- name: Checkout repository
uses: actions/checkout@v4
@@ -412,163 +439,163 @@ PY
needs.scanner-perf.result == 'success' &&
(
(github.event_name == 'push' && github.ref == 'refs/heads/main') ||
github.event_name == 'workflow_dispatch'
)
environment: staging
steps:
- name: Checkout repository
uses: actions/checkout@v4
with:
sparse-checkout: |
scripts
.gitea/workflows
sparse-checkout-cone-mode: true
- name: Check if deployment should proceed
id: check-deploy
run: |
if [ "${{ github.event_name }}" = "workflow_dispatch" ]; then
if [ "${{ github.event.inputs.force_deploy }}" = "true" ]; then
echo "should-deploy=true" >> $GITHUB_OUTPUT
echo "✅ Manual deployment requested"
else
echo "should-deploy=false" >> $GITHUB_OUTPUT
echo " Manual dispatch without force_deploy=true — skipping"
fi
elif [ "${{ github.ref }}" = "refs/heads/main" ]; then
echo "should-deploy=true" >> $GITHUB_OUTPUT
echo "✅ Deploying latest main branch build"
else
echo "should-deploy=false" >> $GITHUB_OUTPUT
echo " Deployment restricted to main branch"
fi
- name: Resolve deployment credentials
id: params
if: steps.check-deploy.outputs.should-deploy == 'true'
run: |
missing=()
host="${{ secrets.STAGING_DEPLOYMENT_HOST }}"
if [ -z "$host" ]; then host="${{ vars.STAGING_DEPLOYMENT_HOST }}"; fi
if [ -z "$host" ]; then host="${{ secrets.DEPLOYMENT_HOST }}"; fi
if [ -z "$host" ]; then host="${{ vars.DEPLOYMENT_HOST }}"; fi
if [ -z "$host" ]; then missing+=("STAGING_DEPLOYMENT_HOST"); fi
user="${{ secrets.STAGING_DEPLOYMENT_USERNAME }}"
if [ -z "$user" ]; then user="${{ vars.STAGING_DEPLOYMENT_USERNAME }}"; fi
if [ -z "$user" ]; then user="${{ secrets.DEPLOYMENT_USERNAME }}"; fi
if [ -z "$user" ]; then user="${{ vars.DEPLOYMENT_USERNAME }}"; fi
if [ -z "$user" ]; then missing+=("STAGING_DEPLOYMENT_USERNAME"); fi
path="${{ secrets.STAGING_DEPLOYMENT_PATH }}"
if [ -z "$path" ]; then path="${{ vars.STAGING_DEPLOYMENT_PATH }}"; fi
docs_path="${{ secrets.STAGING_DOCS_PATH }}"
if [ -z "$docs_path" ]; then docs_path="${{ vars.STAGING_DOCS_PATH }}"; fi
key="${{ secrets.STAGING_DEPLOYMENT_KEY }}"
if [ -z "$key" ]; then key="${{ secrets.DEPLOYMENT_KEY }}"; fi
if [ -z "$key" ]; then key="${{ vars.STAGING_DEPLOYMENT_KEY }}"; fi
if [ -z "$key" ]; then key="${{ vars.DEPLOYMENT_KEY }}"; fi
if [ -z "$key" ]; then missing+=("STAGING_DEPLOYMENT_KEY"); fi
if [ ${#missing[@]} -gt 0 ]; then
echo "❌ Missing deployment configuration: ${missing[*]}"
exit 1
fi
key_file="$RUNNER_TEMP/staging_deploy_key"
printf '%s\n' "$key" > "$key_file"
chmod 600 "$key_file"
echo "host=$host" >> $GITHUB_OUTPUT
echo "user=$user" >> $GITHUB_OUTPUT
echo "path=$path" >> $GITHUB_OUTPUT
echo "docs-path=$docs_path" >> $GITHUB_OUTPUT
echo "key-file=$key_file" >> $GITHUB_OUTPUT
- name: Download service artifact
if: steps.check-deploy.outputs.should-deploy == 'true' && steps.params.outputs.path != ''
uses: actions/download-artifact@v4
with:
name: feedser-publish
path: artifacts/service
- name: Download documentation artifact
if: steps.check-deploy.outputs.should-deploy == 'true' && steps.params.outputs['docs-path'] != ''
uses: actions/download-artifact@v4
with:
name: feedser-docs-site
path: artifacts/docs
- name: Install rsync
if: steps.check-deploy.outputs.should-deploy == 'true'
run: |
if command -v rsync >/dev/null 2>&1; then
exit 0
fi
CACHE_DIR="${CI_CACHE_ROOT:-/tmp}/apt"
mkdir -p "$CACHE_DIR"
KEY="rsync-$(lsb_release -rs 2>/dev/null || echo unknown)"
DEB_DIR="$CACHE_DIR/$KEY"
mkdir -p "$DEB_DIR"
if ls "$DEB_DIR"/rsync*.deb >/dev/null 2>&1; then
apt-get update
apt-get install -y --no-install-recommends "$DEB_DIR"/libpopt0*.deb "$DEB_DIR"/rsync*.deb
else
apt-get update
apt-get download rsync libpopt0
mv rsync*.deb libpopt0*.deb "$DEB_DIR"/
dpkg -i "$DEB_DIR"/libpopt0*.deb "$DEB_DIR"/rsync*.deb || apt-get install -f -y
fi
- name: Deploy service bundle
if: steps.check-deploy.outputs.should-deploy == 'true' && steps.params.outputs.path != ''
env:
HOST: ${{ steps.params.outputs.host }}
USER: ${{ steps.params.outputs.user }}
TARGET: ${{ steps.params.outputs.path }}
KEY_FILE: ${{ steps.params.outputs['key-file'] }}
run: |
SERVICE_DIR="artifacts/service/feedser-publish"
if [ ! -d "$SERVICE_DIR" ]; then
echo "❌ Service artifact directory missing ($SERVICE_DIR)"
exit 1
fi
echo "🚀 Deploying Feedser web service to $HOST:$TARGET"
rsync -az --delete \
-e "ssh -i $KEY_FILE -o StrictHostKeyChecking=no" \
"$SERVICE_DIR"/ \
"$USER@$HOST:$TARGET/"
- name: Deploy documentation bundle
if: steps.check-deploy.outputs.should-deploy == 'true' && steps.params.outputs['docs-path'] != ''
env:
HOST: ${{ steps.params.outputs.host }}
USER: ${{ steps.params.outputs.user }}
DOCS_TARGET: ${{ steps.params.outputs['docs-path'] }}
KEY_FILE: ${{ steps.params.outputs['key-file'] }}
run: |
DOCS_DIR="artifacts/docs/feedser-docs-site"
if [ ! -d "$DOCS_DIR" ]; then
echo "❌ Documentation artifact directory missing ($DOCS_DIR)"
exit 1
fi
echo "📚 Deploying documentation bundle to $HOST:$DOCS_TARGET"
rsync -az --delete \
-e "ssh -i $KEY_FILE -o StrictHostKeyChecking=no" \
"$DOCS_DIR"/ \
"$USER@$HOST:$DOCS_TARGET/"
- name: Deployment summary
if: steps.check-deploy.outputs.should-deploy == 'true'
run: |
echo "✅ Deployment completed"
echo " Host: ${{ steps.params.outputs.host }}"
echo " Service path: ${{ steps.params.outputs.path || '(skipped)' }}"
echo " Docs path: ${{ steps.params.outputs['docs-path'] || '(skipped)' }}"
github.event_name == 'workflow_dispatch'
)
environment: staging
steps:
- name: Checkout repository
uses: actions/checkout@v4
with:
sparse-checkout: |
scripts
.gitea/workflows
sparse-checkout-cone-mode: true
- name: Check if deployment should proceed
id: check-deploy
run: |
if [ "${{ github.event_name }}" = "workflow_dispatch" ]; then
if [ "${{ github.event.inputs.force_deploy }}" = "true" ]; then
echo "should-deploy=true" >> $GITHUB_OUTPUT
echo "✅ Manual deployment requested"
else
echo "should-deploy=false" >> $GITHUB_OUTPUT
echo " Manual dispatch without force_deploy=true — skipping"
fi
elif [ "${{ github.ref }}" = "refs/heads/main" ]; then
echo "should-deploy=true" >> $GITHUB_OUTPUT
echo "✅ Deploying latest main branch build"
else
echo "should-deploy=false" >> $GITHUB_OUTPUT
echo " Deployment restricted to main branch"
fi
- name: Resolve deployment credentials
id: params
if: steps.check-deploy.outputs.should-deploy == 'true'
run: |
missing=()
host="${{ secrets.STAGING_DEPLOYMENT_HOST }}"
if [ -z "$host" ]; then host="${{ vars.STAGING_DEPLOYMENT_HOST }}"; fi
if [ -z "$host" ]; then host="${{ secrets.DEPLOYMENT_HOST }}"; fi
if [ -z "$host" ]; then host="${{ vars.DEPLOYMENT_HOST }}"; fi
if [ -z "$host" ]; then missing+=("STAGING_DEPLOYMENT_HOST"); fi
user="${{ secrets.STAGING_DEPLOYMENT_USERNAME }}"
if [ -z "$user" ]; then user="${{ vars.STAGING_DEPLOYMENT_USERNAME }}"; fi
if [ -z "$user" ]; then user="${{ secrets.DEPLOYMENT_USERNAME }}"; fi
if [ -z "$user" ]; then user="${{ vars.DEPLOYMENT_USERNAME }}"; fi
if [ -z "$user" ]; then missing+=("STAGING_DEPLOYMENT_USERNAME"); fi
path="${{ secrets.STAGING_DEPLOYMENT_PATH }}"
if [ -z "$path" ]; then path="${{ vars.STAGING_DEPLOYMENT_PATH }}"; fi
docs_path="${{ secrets.STAGING_DOCS_PATH }}"
if [ -z "$docs_path" ]; then docs_path="${{ vars.STAGING_DOCS_PATH }}"; fi
key="${{ secrets.STAGING_DEPLOYMENT_KEY }}"
if [ -z "$key" ]; then key="${{ secrets.DEPLOYMENT_KEY }}"; fi
if [ -z "$key" ]; then key="${{ vars.STAGING_DEPLOYMENT_KEY }}"; fi
if [ -z "$key" ]; then key="${{ vars.DEPLOYMENT_KEY }}"; fi
if [ -z "$key" ]; then missing+=("STAGING_DEPLOYMENT_KEY"); fi
if [ ${#missing[@]} -gt 0 ]; then
echo "❌ Missing deployment configuration: ${missing[*]}"
exit 1
fi
key_file="$RUNNER_TEMP/staging_deploy_key"
printf '%s\n' "$key" > "$key_file"
chmod 600 "$key_file"
echo "host=$host" >> $GITHUB_OUTPUT
echo "user=$user" >> $GITHUB_OUTPUT
echo "path=$path" >> $GITHUB_OUTPUT
echo "docs-path=$docs_path" >> $GITHUB_OUTPUT
echo "key-file=$key_file" >> $GITHUB_OUTPUT
- name: Download service artifact
if: steps.check-deploy.outputs.should-deploy == 'true' && steps.params.outputs.path != ''
uses: actions/download-artifact@v4
with:
name: feedser-publish
path: artifacts/service
- name: Download documentation artifact
if: steps.check-deploy.outputs.should-deploy == 'true' && steps.params.outputs['docs-path'] != ''
uses: actions/download-artifact@v4
with:
name: feedser-docs-site
path: artifacts/docs
- name: Install rsync
if: steps.check-deploy.outputs.should-deploy == 'true'
run: |
if command -v rsync >/dev/null 2>&1; then
exit 0
fi
CACHE_DIR="${CI_CACHE_ROOT:-/tmp}/apt"
mkdir -p "$CACHE_DIR"
KEY="rsync-$(lsb_release -rs 2>/dev/null || echo unknown)"
DEB_DIR="$CACHE_DIR/$KEY"
mkdir -p "$DEB_DIR"
if ls "$DEB_DIR"/rsync*.deb >/dev/null 2>&1; then
apt-get update
apt-get install -y --no-install-recommends "$DEB_DIR"/libpopt0*.deb "$DEB_DIR"/rsync*.deb
else
apt-get update
apt-get download rsync libpopt0
mv rsync*.deb libpopt0*.deb "$DEB_DIR"/
dpkg -i "$DEB_DIR"/libpopt0*.deb "$DEB_DIR"/rsync*.deb || apt-get install -f -y
fi
- name: Deploy service bundle
if: steps.check-deploy.outputs.should-deploy == 'true' && steps.params.outputs.path != ''
env:
HOST: ${{ steps.params.outputs.host }}
USER: ${{ steps.params.outputs.user }}
TARGET: ${{ steps.params.outputs.path }}
KEY_FILE: ${{ steps.params.outputs['key-file'] }}
run: |
SERVICE_DIR="artifacts/service/feedser-publish"
if [ ! -d "$SERVICE_DIR" ]; then
echo "❌ Service artifact directory missing ($SERVICE_DIR)"
exit 1
fi
echo "🚀 Deploying Feedser web service to $HOST:$TARGET"
rsync -az --delete \
-e "ssh -i $KEY_FILE -o StrictHostKeyChecking=no" \
"$SERVICE_DIR"/ \
"$USER@$HOST:$TARGET/"
- name: Deploy documentation bundle
if: steps.check-deploy.outputs.should-deploy == 'true' && steps.params.outputs['docs-path'] != ''
env:
HOST: ${{ steps.params.outputs.host }}
USER: ${{ steps.params.outputs.user }}
DOCS_TARGET: ${{ steps.params.outputs['docs-path'] }}
KEY_FILE: ${{ steps.params.outputs['key-file'] }}
run: |
DOCS_DIR="artifacts/docs/feedser-docs-site"
if [ ! -d "$DOCS_DIR" ]; then
echo "❌ Documentation artifact directory missing ($DOCS_DIR)"
exit 1
fi
echo "📚 Deploying documentation bundle to $HOST:$DOCS_TARGET"
rsync -az --delete \
-e "ssh -i $KEY_FILE -o StrictHostKeyChecking=no" \
"$DOCS_DIR"/ \
"$USER@$HOST:$DOCS_TARGET/"
- name: Deployment summary
if: steps.check-deploy.outputs.should-deploy == 'true'
run: |
echo "✅ Deployment completed"
echo " Host: ${{ steps.params.outputs.host }}"
echo " Service path: ${{ steps.params.outputs.path || '(skipped)' }}"
echo " Docs path: ${{ steps.params.outputs['docs-path'] || '(skipped)' }}"
- name: Deployment skipped summary
if: steps.check-deploy.outputs.should-deploy != 'true'
run: |

View File

@@ -44,6 +44,9 @@ jobs:
with:
fetch-depth: 0
- name: Validate NuGet restore source ordering
run: python3 ops/devops/validate_restore_sources.py
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v3
@@ -58,6 +61,75 @@ jobs:
dotnet-version: ${{ env.DOTNET_VERSION }}
include-prerelease: true
- name: Install cross-arch objcopy tooling
run: |
sudo apt-get update
sudo apt-get install -y --no-install-recommends binutils-aarch64-linux-gnu
- name: Publish Python analyzer plug-in
run: |
set -euo pipefail
dotnet publish src/StellaOps.Scanner.Analyzers.Lang.Python/StellaOps.Scanner.Analyzers.Lang.Python.csproj \
--configuration Release \
--output out/analyzers/python \
--no-self-contained
mkdir -p plugins/scanner/analyzers/lang/StellaOps.Scanner.Analyzers.Lang.Python
cp out/analyzers/python/StellaOps.Scanner.Analyzers.Lang.Python.dll plugins/scanner/analyzers/lang/StellaOps.Scanner.Analyzers.Lang.Python/
if [ -f out/analyzers/python/StellaOps.Scanner.Analyzers.Lang.Python.pdb ]; then
cp out/analyzers/python/StellaOps.Scanner.Analyzers.Lang.Python.pdb plugins/scanner/analyzers/lang/StellaOps.Scanner.Analyzers.Lang.Python/
fi
- name: Run Python analyzer smoke checks
run: |
dotnet run \
--project tools/LanguageAnalyzerSmoke/LanguageAnalyzerSmoke.csproj \
--configuration Release \
-- \
--repo-root .
# Note: this step enforces DEVOPS-REL-14-004 by signing the restart-only Python plug-in.
# Ensure COSIGN_KEY_REF or COSIGN_IDENTITY_TOKEN is configured, otherwise the job will fail.
- name: Sign Python analyzer artefacts
env:
COSIGN_KEY_REF: ${{ secrets.COSIGN_KEY_REF }}
COSIGN_PASSWORD: ${{ secrets.COSIGN_PASSWORD }}
COSIGN_IDENTITY_TOKEN: ${{ secrets.COSIGN_IDENTITY_TOKEN }}
run: |
set -euo pipefail
if [[ -z "${COSIGN_KEY_REF:-}" && -z "${COSIGN_IDENTITY_TOKEN:-}" ]]; then
echo "::error::COSIGN_KEY_REF or COSIGN_IDENTITY_TOKEN must be provided to sign analyzer artefacts." >&2
exit 1
fi
export COSIGN_PASSWORD="${COSIGN_PASSWORD:-}"
export COSIGN_EXPERIMENTAL=1
PLUGIN_DIR="plugins/scanner/analyzers/lang/StellaOps.Scanner.Analyzers.Lang.Python"
ARTIFACTS=(
"StellaOps.Scanner.Analyzers.Lang.Python.dll"
"manifest.json"
)
for artifact in "${ARTIFACTS[@]}"; do
FILE="${PLUGIN_DIR}/${artifact}"
if [[ ! -f "${FILE}" ]]; then
echo "::error::Missing analyzer artefact ${FILE}" >&2
exit 1
fi
sha256sum "${FILE}" | awk '{print $1}' > "${FILE}.sha256"
SIGN_ARGS=(--yes "${FILE}")
if [[ -n "${COSIGN_KEY_REF:-}" ]]; then
SIGN_ARGS=(--key "${COSIGN_KEY_REF}" "${SIGN_ARGS[@]}")
fi
if [[ -n "${COSIGN_IDENTITY_TOKEN:-}" ]]; then
SIGN_ARGS=(--identity-token "${COSIGN_IDENTITY_TOKEN}" "${SIGN_ARGS[@]}")
fi
cosign sign-blob "${SIGN_ARGS[@]}" > "${FILE}.sig"
done
- name: Install Helm 3.16.0
run: |
curl -fsSL https://get.helm.sh/helm-v3.16.0-linux-amd64.tar.gz -o /tmp/helm.tgz
@@ -124,6 +196,7 @@ jobs:
mkdir -p out/release
- name: Build release bundle
# NOTE (DEVOPS-REL-17-004): build_release.py now fails if out/release/debug is missing
env:
COSIGN_KEY_REF: ${{ secrets.COSIGN_KEY_REF }}
COSIGN_PASSWORD: ${{ secrets.COSIGN_PASSWORD }}
@@ -141,6 +214,10 @@ jobs:
--git-sha "${{ github.sha }}" \
"${EXTRA_ARGS[@]}"
- name: Verify release artefacts
run: |
python ops/devops/release/verify_release.py --release-dir out/release
- name: Upload release artefacts
uses: actions/upload-artifact@v4
with:

6
.gitignore vendored
View File

@@ -28,4 +28,8 @@ seed-data/cert-bund/**/*.sha256
out/offline-kit/web/**/*
src/StellaOps.Web/node_modules/**/*
src/StellaOps.Web/.angular/**/*
src/StellaOps.Web/.angular/**/*
**/node_modules/**/*
node_modules
node_modules
node_modules

View File

@@ -96,16 +96,17 @@ You main characteristics:
Maintains this agent framework, templates, and perdirectory guides; assists parallelization and reviews.
## 5.2) Work-in-parallel rules (important)
## 5.2) Work rules (important)
- **Directory ownership**: Each agent works **only inside its module directory**. Crossmodule edits require a brief handshake in issues/PR description.
- **Scoping**: Use each modules `AGENTS.md` and `TASKS.md` to plan; autonomous agents must read `src/AGENTS.md` and the module docs before acting.
- **Determinism**: Sort keys, normalize timestamps to UTC ISO8601, avoid nondeterministic data in exports and tests.
- **Status tracking**: Update your modules `TASKS.md` as you progress (TODO → DOING → DONE/BLOCKED). Before starting of actual work - ensure you have set the task to DOING. When complete or stop update the status in corresponding TASKS.md and in ./SPRINTS.md and ./EXECPLAN.md file.
- **Status tracking**: Update your modules `TASKS.md` as you progress (TODO → DOING → DONE/BLOCKED). Before starting of actual work - ensure you have set the task to DOING. When complete or stop update the status in corresponding TASKS.md and in ./SPRINTS.md file.
- **Coordination**: In case task is discovered as blocked on other team or task, according TASKS.md files that dependency is on needs to be changed by adding new tasks describing the requirement. the current task must be updated as completed. In case task changes, scope or requirements or rules - other documentations needs be updated accordingly.
- **Sprint synchronization**: When given task seek for relevant directory to work on from SPRINTS.md. Confirm its state on both SPRINTS.md and EXECPLAN.md and the relevant TASKS.md file. Always check the AGENTS.md in the relevant TASKS.md directory.
- **Sprint synchronization**: When given task seek for relevant directory to work on from SPRINTS.md. Confirm its state on both SPRINTS.md and the relevant TASKS.md file. Always check the AGENTS.md in the relevant TASKS.md directory.
- **Tests**: Add/extend fixtures and unit tests per change; never regress determinism or precedence.
- **Test layout**: Use module-specific projects in `StellaOps.Concelier.<Component>.Tests`; shared fixtures/harnesses live in `StellaOps.Concelier.Testing`.
- **Execution autonomous**: In case you need to continue with more than one options just continue sequentially, unless the continue requires design decision.
- **EPIC reference**: In case task (on relevant TASKS.md file) references in direct or indirect way an EPIC - then seek for epic document like ./EPIC_*.md. There will be more information how to implement the task.
---

12
Directory.Build.props Normal file
View File

@@ -0,0 +1,12 @@
<Project>
<PropertyGroup>
<StellaOpsRepoRoot Condition="'$(StellaOpsRepoRoot)' == ''">$([System.IO.Path]::GetFullPath('$(MSBuildThisFileDirectory)'))</StellaOpsRepoRoot>
<StellaOpsLocalNuGetSource Condition="'$(StellaOpsLocalNuGetSource)' == ''">$([System.IO.Path]::GetFullPath('$(StellaOpsRepoRoot)local-nuget/'))</StellaOpsLocalNuGetSource>
<StellaOpsDotNetPublicSource Condition="'$(StellaOpsDotNetPublicSource)' == ''">https://pkgs.dev.azure.com/dnceng/public/_packaging/dotnet-public/nuget/v3/index.json</StellaOpsDotNetPublicSource>
<StellaOpsNuGetOrgSource Condition="'$(StellaOpsNuGetOrgSource)' == ''">https://api.nuget.org/v3/index.json</StellaOpsNuGetOrgSource>
<_StellaOpsDefaultRestoreSources>$(StellaOpsLocalNuGetSource);$(StellaOpsDotNetPublicSource);$(StellaOpsNuGetOrgSource)</_StellaOpsDefaultRestoreSources>
<_StellaOpsOriginalRestoreSources Condition="'$(_StellaOpsOriginalRestoreSources)' == ''">$(RestoreSources)</_StellaOpsOriginalRestoreSources>
<RestoreSources Condition="'$(_StellaOpsOriginalRestoreSources)' == ''">$(_StellaOpsDefaultRestoreSources)</RestoreSources>
<RestoreSources Condition="'$(_StellaOpsOriginalRestoreSources)' != ''">$(_StellaOpsDefaultRestoreSources);$(_StellaOpsOriginalRestoreSources)</RestoreSources>
</PropertyGroup>
</Project>

524
EPIC_1.md Normal file
View File

@@ -0,0 +1,524 @@
Heres the full writeup you can drop into your repo as the canonical reference for Epic 1. Its written in clean productdoc style so its safe to check in as Markdown. No fluff, just everything you need to build, test, and police it.
---
# Epic 1: AggregationOnly Contract (AOC) Enforcement
> Short name: **AOC enforcement**
> Services touched: **Conseiller (advisory ingestion), Excitator (VEX ingestion), Web API, Workers, Policy Engine, CLI, Console, Authority**
> Data stores: **MongoDB primary, optional Redis/NATS for jobs**
---
## 1) What it is
**AggregationOnly Contract (AOC)** is the ingestion covenant for StellaOps. It defines a hard boundary between **collection** and **interpretation**:
* **Ingestion (Conseiller/Excitator)** only **collects** data and preserves it as immutable raw facts with provenance. It does not decide, merge, normalize, prioritize, or assign severity. It may compute **links** that help future joins (aliases, PURLs, CPEs), but never derived judgments.
* **Policy evaluation** is the only place where merges, deduplication, consensus, severity computation, and status folding are allowed. Its reproducible and traceable.
The AOC establishes:
* **Immutable raw stores**: `advisory_raw` and `vex_raw` documents with full provenance, signatures, checksums, and upstream identifiers.
* **Linksets**: machinegenerated join hints (aliases, PURLs, CPEs, CVE/GHSA IDs) that never change the underlying source content.
* **Invariants**: a strict set of “never do this in ingestion” rules enforced by schema validation, runtime guards, and CI checks.
* **AOC Verifier**: a buildtime and runtime watchdog that blocks noncompliant code and data writes.
This epic delivers: schemas, guards, error codes, APIs, tests, migration, docs, and ops dashboards to make AOC nonnegotiable across the platform.
---
## 2) Why
AOC makes results **auditable, deterministic, and organizationspecific**. Source vendors disagree; your policies decide. By removing hidden heuristics from ingestion, we avoid unexplainable risk changes, race conditions between collectors, and vendor bias. Policytime evaluation yields reproducible deltas with complete “why” traces.
---
## 3) How it should work (deep details)
### 3.1 Core invariants
The following must be true for every write to `advisory_raw` and `vex_raw` and for every ingestion pipeline:
1. **No severity in ingestion**
* Forbidden fields: `severity`, `cvss`, `cvss_vector`, `effective_status`, `effective_range`, `merged_from`, `consensus_provider`, `reachability`, `asset_criticality`, `risk_score`.
2. **No merges or dedups in ingestion**
* No combining two upstream advisories into one. No picking a single truth when multiple VEX statements exist.
3. **Provenance is mandatory**
* Every raw doc includes `provenance` and `signature/checksum`.
4. **Idempotent upserts**
* Same upstream document (by `upstream_id` + `source` + `content_hash`) must not create duplicates.
5. **Appendonly versioning**
* Revisions from the source create new immutable documents with `supersedes` pointers; no inplace edits.
6. **Linkset only**
* Ingestion can compute and store a `linkset` for join performance. Linkset does not alter or infer severity/status.
7. **Policytime only for effective findings**
* Only the Policy Engine can write `effective_finding_*` materializations.
8. **Schema safety**
* Strict JSON schema validation at DB level; unknown fields reject writes.
9. **Clock discipline**
* Timestamps are UTC, monotonic within a batch; collectors record `fetched_at` and `received_at`.
### 3.2 Data model
#### 3.2.1 `advisory_raw` (Mongo collection)
```json
{
"_id": "advisory_raw:osv:GHSA-xxxx-....:v3",
"source": {
"vendor": "OSV",
"stream": "github",
"api": "https://api.osv.dev/v1/.../GHSA-...",
"collector_version": "conseiller/1.7.3"
},
"upstream": {
"upstream_id": "GHSA-xxxx-....",
"document_version": "2024-09-01T12:13:14Z",
"fetched_at": "2025-01-02T03:04:05Z",
"received_at": "2025-01-02T03:04:06Z",
"content_hash": "sha256:...",
"signature": {
"present": true,
"format": "dsse",
"key_id": "rekor:.../key/abc",
"sig": "base64..."
}
},
"content": {
"format": "OSV",
"spec_version": "1.6",
"raw": { /* full upstream JSON, unmodified */ }
},
"identifiers": {
"cve": ["CVE-2023-1234"],
"ghsa": ["GHSA-xxxx-...."],
"aliases": ["CVE-2023-1234", "GHSA-xxxx-...."]
},
"linkset": {
"purls": ["pkg:npm/lodash@4.17.21", "pkg:maven/..."],
"cpes": ["cpe:2.3:a:..."],
"references": [
{"type":"advisory","url":"https://..."},
{"type":"fix","url":"https://..."}
],
"reconciled_from": ["content.raw.affected.ranges", "content.raw.pkg"]
},
"supersedes": "advisory_raw:osv:GHSA-xxxx-....:v2",
"tenant": "default"
}
```
> Note: No `severity`, no `cvss`, no `effective_*`. If the upstream payload includes CVSS, it stays inside `content.raw` and is not promoted or normalized at ingestion.
#### 3.2.2 `vex_raw` (Mongo collection)
```json
{
"_id": "vex_raw:vendorX:doc-123:v4",
"source": {
"vendor": "VendorX",
"stream": "vex",
"api": "https://.../vex/doc-123",
"collector_version": "excitator/0.9.2"
},
"upstream": {
"upstream_id": "doc-123",
"document_version": "2025-01-15T08:09:10Z",
"fetched_at": "2025-01-16T01:02:03Z",
"received_at": "2025-01-16T01:02:03Z",
"content_hash": "sha256:...",
"signature": { "present": true, "format": "cms", "key_id": "kid:...", "sig": "..." }
},
"content": {
"format": "CycloneDX-VEX", // or "CSAF-VEX"
"spec_version": "1.5",
"raw": { /* full upstream VEX */ }
},
"identifiers": {
"statements": [
{
"advisory_ids": ["CVE-2023-1234","GHSA-..."],
"component_purls": ["pkg:deb/openssl@1.1.1"],
"status": "not_affected",
"justification": "component_not_present"
}
]
},
"linkset": {
"purls": ["pkg:deb/openssl@1.1.1"],
"cves": ["CVE-2023-1234"],
"ghsas": ["GHSA-..."]
},
"supersedes": "vex_raw:vendorX:doc-123:v3",
"tenant": "default"
}
```
> VEX statuses remain as raw facts. No crossprovider consensus is computed here.
### 3.3 Database validation
* MongoDB JSON Schema validators on both collections:
* Reject forbidden fields at the top level.
* Enforce presence of `source`, `upstream`, `content`, `linkset`, `tenant`.
* Enforce string formats for timestamps and hashes.
### 3.4 Write paths
1. **Collector fetches upstream**
* Normalize transport (gzip/json), compute `content_hash`, verify signature if available.
2. **Build raw doc**
* Populate `source`, `upstream`, `content.raw`, `identifiers`, `linkset`.
3. **Idempotent upsert**
* Lookup by `(source.vendor, upstream.upstream_id, upstream.content_hash)`. If exists, skip; if new content hash, insert new revision with `supersedes`.
4. **AOC guard**
* Runtime interceptor inspects write payload; if any forbidden field detected, reject with `ERR_AOC_001`.
5. **Metrics**
* Emit `ingestion_write_ok` or `ingestion_write_reject` with reason code.
### 3.5 Read paths (ingestion scope)
* Allow only listing, getting raw docs, and searching by linkset. No endpoints return “effective findings” from ingestion services.
### 3.6 Error codes
| Code | Meaning | HTTP |
| ------------- | ------------------------------------------------------------ | ---- |
| `ERR_AOC_001` | Forbidden field present (severity/consensus/normalized data) | 400 |
| `ERR_AOC_002` | Merge attempt detected (multiple upstreams fused) | 400 |
| `ERR_AOC_003` | Idempotency violation (duplicate without supersedes) | 409 |
| `ERR_AOC_004` | Missing provenance fields | 422 |
| `ERR_AOC_005` | Signature/checksum mismatch | 422 |
| `ERR_AOC_006` | Attempt to write effective findings from ingestion context | 403 |
| `ERR_AOC_007` | Unknown toplevel fields (schema violation) | 400 |
### 3.7 AOC Verifier
A buildtime and runtime safeguard:
* **Static checks (CI)**
* Block imports of `*.Policy*` or `*.Merge*` from ingestion modules.
* AST lint rule: any write to `advisory_raw` or `vex_raw` setting a forbidden key fails the build.
* **Runtime checks**
* Repository layer interceptor inspects documents before insert/update; rejects forbidden fields and multisource merges.
* **Drift detection job**
* Nightly job scans newest N docs; if violation found, pages ops and blocks new pipeline runs.
> **Imposed rule:** Work of this type or tasks of this type on this component must also be applied everywhere else it should be applied.
### 3.8 Indexing strategy
* `advisory_raw`:
* `{ "identifiers.cve": 1 }`, `{ "identifiers.ghsa": 1 }`, `{ "linkset.purls": 1 }`, `{ "source.vendor": 1, "upstream.upstream_id": 1, "upstream.content_hash": 1 }` (unique), `{ "tenant": 1 }`.
* `vex_raw`:
* `{ "identifiers.statements.advisory_ids": 1 }`, `{ "linkset.purls": 1 }`, `{ "source.vendor": 1, "upstream.upstream_id": 1, "upstream.content_hash": 1 }` (unique), `{ "tenant": 1 }`.
### 3.9 Interaction with Policy Engine
* Policy Engine pulls raw docs by identifiers/linksets and computes:
* Dedup/merge per policy
* Consensus for VEX statements
* Severity normalization and risk scoring
* Writes **only** to `effective_finding_{policyId}` collections.
A dedicated write guard refuses `effective_finding_*` writes from any caller that isnt the Policy Engine service identity.
> **Imposed rule:** Work of this type or tasks of this type on this component must also be applied everywhere else it should be applied.
### 3.10 Migration plan
1. **Freeze ingestion writes** except raw passthrough.
2. **Backfill**: copy existing ingestion collections to `_backup_*`.
3. **Strip forbidden fields** from raw copies, move them into a temporary `advisory_view_legacy` used only by Policy Engine for parity.
4. **Enable DB schema validators**.
5. **Run collectors** in dryrun; ensure only allowed keys land.
6. **Switch Policy Engine** to pull exclusively from `*_raw` and to compute everything else.
7. **Delete legacy normalized fields** in ingestion codepaths.
8. **Enable runtime guards** and CI lint.
### 3.11 Observability
* Metrics:
* `aoc_violation_total{code=...}`, `ingestion_write_total{result=ok|reject}`, `ingestion_signature_verified_total{result=ok|fail}`, `ingestion_latency_seconds`, `advisory_revision_count`.
* Tracing: span `ingest.fetch`, `ingest.transform`, `ingest.write`, `aoc.guard`.
* Logs: include `tenant`, `source.vendor`, `upstream.upstream_id`, `content_hash`, `correlation_id`.
> **Imposed rule:** Work of this type or tasks of this type on this component must also be applied everywhere else it should be applied.
### 3.12 Security and tenancy
* Every raw doc carries a `tenant` field.
* Authority enforces `advisory:write` and `vex:write` scopes for ingestion endpoints.
* Crosstenant reads/writes are blocked by default.
* Secrets never logged; signatures verified with pinned trust stores.
> **Imposed rule:** Work of this type or tasks of this type on this component must also be applied everywhere else it should be applied.
### 3.13 CLI and Console behavior
* **CLI**
* `stella sources ingest --dry-run` prints wouldwrite payload and explicitly shows that no severity/status fields are present.
* `stella aoc verify` scans last K documents and reports violations with exit codes.
* **Console**
* Sources dashboard shows AOC pass/fail per job, most recent violation codes, and a drilldown to the offending document.
> **Imposed rule:** Work of this type or tasks of this type on this component must also be applied everywhere else it should be applied.
---
## 4) API surface (ingestion scope)
### 4.1 Conseiller (Advisories)
* `POST /ingest/advisory`
* Body: raw upstream advisory with metadata; server constructs document, not the client.
* Rejections: `ERR_AOC_00x` per table above.
* `GET /advisories/raw/{id}`
* `GET /advisories/raw?cve=CVE-...&purl=pkg:...&tenant=...`
* `GET /advisories/raw/{id}/provenance`
* `POST /aoc/verify?since=ISO8601` returns summary stats and first N violations.
### 4.2 Excitator (VEX)
* `POST /ingest/vex`
* `GET /vex/raw/{id}`
* `GET /vex/raw?advisory_id=CVE-...&purl=pkg:...`
* `POST /aoc/verify?since=ISO8601`
All endpoints require `tenant` scope and appropriate `:write` or `:read`.
> **Imposed rule:** Work of this type or tasks of this type on this component must also be applied everywhere else it should be applied.
---
## 5) Example: endtoend flow
1. Collector fetches `GHSA-1234` from OSV.
2. Build `advisory_raw` with linkset PURLs.
3. Insert; AOC guard approves.
4. Policy Engine later evaluates SBOM `S-42` under `policy P-7`, reads raw advisory and any VEX raw docs, computes effective findings, and writes to `effective_finding_P-7`.
5. CLI `stella aoc verify --since 24h` returns `0` violations.
---
## 6) Implementation tasks
Breakdown by component with exact work items. Each section ends with the imposed sentence you requested.
### 6.1 Conseiller (advisory ingestion, WS + Worker)
* [ ] Add Mongo JSON schema validation for `advisory_raw`.
* [ ] Implement repository layer with **write interceptors** that reject forbidden fields.
* [ ] Compute `linkset` from upstream using deterministic mappers.
* [ ] Enforce idempotency by unique index on `(source.vendor, upstream.upstream_id, upstream.content_hash, tenant)`.
* [ ] Remove any normalization pipelines; relocate to Policy Engine.
* [ ] Add `POST /ingest/advisory` and `GET /advisories/raw*` endpoints with Authority scope checks.
* [ ] Emit observability metrics and traces.
* [ ] Unit tests: schema violations, idempotency, supersedes chain, forbidden fields.
* [ ] Integration tests: large batch ingest, linkset correctness against golden fixtures.
**Imposed rule:** Work of this type or tasks of this type on this component must also be applied everywhere else it should be applied.
### 6.2 Excitator (VEX ingestion, WS + Worker)
* [ ] Add Mongo JSON schema validation for `vex_raw`.
* [ ] Implement repository layer guard identical to Conseiller.
* [ ] Deterministic `linkset` extraction for advisory IDs and PURLs.
* [ ] Endpoints `POST /ingest/vex`, `GET /vex/raw*` with scopes.
* [ ] Remove any consensus or folding logic; leave VEX statements as raw.
* [ ] Tests as per Conseiller, with rich fixtures for CycloneDXVEX and CSAF.
**Imposed rule:** Work of this type or tasks of this type on this component must also be applied everywhere else it should be applied.
### 6.3 Web API shared library
* [ ] Define `AOCForbiddenKeys` and export for both services.
* [ ] Provide `AOCWriteGuard` middleware and `AOCError` types.
* [ ] Provide `ProvenanceBuilder` utility.
* [ ] Provide `SignatureVerifier` and `Checksum` helpers.
**Imposed rule:** Work of this type or tasks of this type on this component must also be applied everywhere else it should be applied.
### 6.4 Policy Engine
* [ ] Block any import/use from ingestion modules by lint rule.
* [ ] Add hard gate on `effective_finding_*` writes that verifies caller identity is Policy Engine.
* [ ] Update readers to pull fields only from `content.raw`, `identifiers`, `linkset`, not any legacy normalized fields.
**Imposed rule:** Work of this type or tasks of this type on this component must also be applied everywhere else it should be applied.
### 6.5 Authority
* [ ] Introduce scopes: `advisory:write`, `advisory:read`, `vex:write`, `vex:read`, `aoc:verify`.
* [ ] Add `tenant` claim propagation to ingestion services.
**Imposed rule:** Work of this type or tasks of this type on this component must also be applied everywhere else it should be applied.
### 6.6 CLI
* [ ] `stella sources ingest --dry-run` and `stella aoc verify` commands.
* [ ] Exit codes mapping to `ERR_AOC_00x`.
* [ ] JSON output schema including violation list.
**Imposed rule:** Work of this type or tasks of this type on this component must also be applied everywhere else it should be applied.
### 6.7 Console
* [ ] Sources dashboard tiles: last run, AOC violations, top error codes.
* [ ] Drilldown page rendering offending doc with highlight on forbidden keys.
* [ ] “Verify last 24h” action calling the AOC Verifier endpoint.
**Imposed rule:** Work of this type or tasks of this type on this component must also be applied everywhere else it should be applied.
### 6.8 CI/CD
* [ ] AST linter to forbid writes of banned keys in ingestion modules.
* [ ] Unit test coverage gates for AOC guard code.
* [ ] Pipeline stage that runs `stella aoc verify` against seeded DB snapshots.
**Imposed rule:** Work of this type or tasks of this type on this component must also be applied everywhere else it should be applied.
---
## 7) Documentation changes (create/update these files)
1. **`/docs/ingestion/aggregation-only-contract.md`**
* Add: philosophy, invariants, schemas for `advisory_raw`/`vex_raw`, error codes, linkset definition, examples, idempotency rules, supersedes, API references, migration steps, observability, security.
2. **`/docs/architecture/overview.md`**
* Update system diagram to show AOC boundary and raw stores; add sequence diagram: fetch → guard → raw insert → policy evaluation.
3. **`/docs/architecture/policy-engine.md`**
* Clarify ingestion boundary; list inputs consumed from raw; note that any severity/consensus is policytime only.
4. **`/docs/ui/console.md`**
* Add Sources dashboard section: AOC tiles and violation drilldown.
5. **`/docs/cli/cli-reference.md`**
* Add `stella aoc verify` and `stella sources ingest --dry-run` usage and exit codes.
6. **`/docs/observability/observability.md`**
* Document new metrics, traces, logs keys for AOC.
7. **`/docs/security/authority-scopes.md`**
* Add new scopes and tenancy enforcement for ingestion endpoints.
8. **`/docs/deploy/containers.md`**
* Note DB validators must be enabled; environment flags for AOC guards; readonly user for verify endpoint.
Each file should include a “Compliance checklist” subsection for AOC.
> **Imposed rule:** Work of this type or tasks of this type on this component must also be applied everywhere else it should be applied.
---
## 8) Acceptance criteria
* DB validators are active and reject writes with forbidden fields.
* AOC runtime guards log and reject violations with correct error codes.
* CI linter prevents shipping code that writes forbidden keys to raw stores.
* Ingestion of known fixture sets results in zero normalized fields outside `content.raw`.
* Policy Engine is the only writer of `effective_finding_*` materializations.
* CLI `stella aoc verify` returns success on clean datasets and nonzero on seeded violations.
* Console shows AOC status and violation drilldowns.
---
## 9) Risks and mitigations
* **Collector drift**: new upstream fields tempt developers to normalize.
* Mitigation: CI linter + guard + schema validators; require RFC to extend linkset.
* **Performance impact**: extra validation on write.
* Mitigation: guard is O(number of keys) and schema check is bounded; indexes sized appropriately.
* **Migration complexity**: moving legacy normalized fields out.
* Mitigation: temporary `advisory_view_legacy` for parity; stepwise cutover.
* **Tenant leakage**: missing tenant on write.
* Mitigation: schema requires `tenant`; middleware injects and asserts.
---
## 10) Test plan
* **Unit tests**
* Guard rejects forbidden keys; idempotency; supersedes chain; provenance required.
* Signature verification paths: good, bad, absent.
* **Property tests**
* Randomized upstream docs never produce forbidden keys at top level.
* **Integration tests**
* Batch ingest of 50k advisories: throughput, zero violations.
* Mixed VEX sources with contradictory statements remain separate in raw.
* **Contract tests**
* Policy Engine refuses to run without raw inputs; writes only to `effective_finding_*`.
* **Endtoend**
* Seed SBOM + advisories + VEX; ensure findings are identical pre/post migration.
---
## 11) Developer checklists
**Definition of Ready**
* Upstream spec reference attached.
* Linkset mappers defined.
* Example fixtures added.
**Definition of Done**
* DB validators deployed and tested.
* Runtime guards enabled.
* CI linter merged and enforced.
* Docs updated (files in section 7).
* Metrics visible on dashboard.
* CLI verify passes.
---
## 12) Glossary
* **Raw document**: exact upstream content plus provenance, with join hints.
* **Linkset**: PURLs/CPEs/IDs extracted to accelerate joins later.
* **Supersedes**: pointer from a newer raw doc to the previous revision of the same upstream doc.
* **Policytime**: evaluation phase where merges, consensus, and severity are computed.
* **AOC**: AggregationOnly Contract.
---
### Final imposed reminder
**Work of this type or tasks of this type on this component must also be applied everywhere else it should be applied.**

566
EPIC_10.md Normal file
View File

@@ -0,0 +1,566 @@
Fine. Heres your next brick of “maximum documentation.” Try not to drop it on your foot.
> **Imposed rule:** Work of this type or tasks of this type on this component must also be applied everywhere else it should be applied.
---
# Epic 10: Export Center (JSON, Trivy DB, Mirror bundles)
**Short name:** `Export Center`
**Primary service:** `exporter`
**Surfaces:** Console (Web UI), CLI, Web API
**Touches:** Conseiller (Feedser), Excitator (Vexer), SBOM Service, Policy Engine, VEX Consensus Lens, Findings Ledger, Authority (authN/Z), Object Storage, Orchestrator, Signing/Attestation, Telemetry
**AOC ground rule:** Conseiller and Excitator aggregate but never merge. The Export Center serializes evidence and policy results; it does not rewrite or “improve” your data in-flight.
---
## 1) What it is
The Export Center is the unified system for packaging StellaOps data into portable, verifiable bundles:
* **Canonical JSON exports** for advisories, VEX, SBOMs, findings, and policy-evaluation snapshots.
* **Trivy DB compatible bundles** so downstream scanners can use Stellas curated vulnerability knowledge without custom integrations.
* **Mirror bundles** for airgapped or disconnected environments containing raw evidence, normalized records, indexes, and policy snapshots, with provenance, signatures, and optional encryption.
It centralizes format adapters, compliance and provenance, scheduling, versioning, and distribution (download, OCI push, file share). Every export is reproducible by `run_id`, cryptographically signed, and audittraceable back to source artifacts.
---
## 2) Why (brief)
Teams need to move results into other scanners, CI systems, and isolated networks without babysitting ten different scripts. The Export Center gives a single, policyaware, verifiable exit point that doesnt surprise compliance or set your ops team on fire.
---
## 3) How it should work (maximum detail)
### 3.1 Capabilities
* **Profiles**
* `json:raw` canonical JSON lines for advisories, VEX, SBOMs.
* `json:policy` adds policy evaluation results (allow/deny/risk, rationales).
* `trivy:db` Trivy DBcompatible export (core vulnerability DB).
* `trivy:java-db` optional Java ecosystem DB export if enabled.
* `mirror:full` airgap bundle with raw + normalized + indexes + policy + VEX consensus + SBOMs.
* `mirror:delta` incremental bundle based on a previous export manifest.
* **Scope selectors**
* By time window, product, ecosystem, package, image digest, repository, tenant, tag.
* Include/exclude SBOMs, advisories, VEX, findings, policy snapshots.
* **Policy awareness**
* Optionally bake a **policy snapshot** into the bundle, including the policy version, inputs and decision traces.
* Can export **raw** evidence only (AOC) or **raw + evaluated**.
* **Provenance & signing**
* Generate attestation metadata with source URIs, artifact hashes, schema versions, export profile and filters.
* Sign manifests and bundle using configured KMS; support detached and inbundle signatures.
* **Distribution**
* Download via Console or API (streaming).
* Push to OCI registry as an artifact/image with annotations.
* Write to object storage prefix for batch pickup.
* **Scheduling & automation**
* Oneoff, cron, and eventtriggered exports (e.g., after a “VEX consensus recompute” run).
* Retention policies and automatic expiry for old bundles.
* **Observability**
* Export metrics, throughput, size, downstream pull counts (when pushed to registry with report backs).
### 3.2 Architecture
* **exporter (service)**
* Orchestrates export runs, gathers records from the ledger and indexes, calls format adapters, writes bundles, signs, and publishes distribution tasks.
* Stateless workers pull “export.jobs” from the orchestrator, stream data, and write manifests into object storage.
* **format adapters**
* Pluggable adapters:
* `adapter-json`: canonicalized JSONL writers per record type.
* `adapter-trivy`: translates Stellas normalized advisory model into Trivy DB format (and Java DB if enabled).
* `adapter-mirror`: constructs a portable filesystem/OCI layout with manifests, indexes, and data subtrees.
* **manifesting & provenance**
* `export.json` (export manifest): profile, filters, counts, schema versions, content checksums, start/finish times, inputs list (artifact ids + hashes).
* `provenance.json`: full chain back to source runs and artifacts; linked signatures.
* **distribution engines**
* `dist-http` streaming for downloads.
* `dist-oci` layer writer with descriptors and annotations.
* `dist-objstore` for staging to buckets.
* **security**
* Tenant scoping, RBAC on export creation and retrieval.
* Optional inbundle encryption (age/AESGCM) with key wrapping using KMS.
### 3.3 Data model (selected tables)
* `export_profiles`
* `id`, `name`, `kind` (`json|trivy|mirror`), `variant` (`raw|policy|db|java-db|full|delta`), `config_json`, `created_by`, `created_at`.
* `export_runs`
* `id`, `profile_id`, `trigger` (`manual|schedule|event|api`), `state`, `filters_json`, `policy_version`, `started_at`, `finished_at`, `artifact_uri`, `size_bytes`, `sig_uri`, `provenance_uri`, `tenant_id`, `error_class`, `error_message`.
* `export_inputs`
* Link table between `export_runs` and source artifacts. `export_run_id`, `artifact_id`, `hash`.
* `export_distributions`
* `export_run_id`, `type` (`download|oci|objstore`), `target`, `state`, `meta_json`, `created_at`, `updated_at`.
### 3.4 Canonical file layouts
**JSON profile output**
Directory layout under export root:
```
/export/
export.json # export manifest
provenance.json # provenance and source artifact chain
signatures/
export.sig # detached signature for export.json
advisories/
normalized.jsonl # normalized advisory records
vex/
normalized.jsonl # normalized VEX records
sboms/
<subject>/sbom.spdx.json # one per subject; SPDX JSON or CycloneDX JSON
findings/
policy_evaluated.jsonl # if profile=json:policy
```
**Trivy DB profile output**
Produced as a compressed artifact:
```
/export/
export.json
provenance.json
trivy/
db.bundle # Trivy DB compatible archive
java-db.bundle # optional Java DB bundle (if enabled)
signatures/
trivy-db.sig
```
Notes:
* The adapter keeps an internal mapping of Stella normalized fields to Trivys expected fields and namespaces. The mapping is versioned to track upstream schema evolution.
**Mirror bundle (filesystem layout)**
```
/mirror/
manifest.yaml # high-level bundle manifest (profile, filters, counts)
export.json # same as JSON profile
provenance.json
indexes/
advisories.index.json # quick lookups (pkg -> advisory ids)
vex.index.json
sbom.index.json
advisories/raw/...
advisories/normalized/...
vex/raw/...
vex/normalized/...
sboms/raw/...
sboms/graph/...
policy/
snapshot.yaml # full policy set used for evaluation
evaluations.jsonl # decision outputs if requested
consensus/
vex_consensus.jsonl
signatures/
manifest.sig
export.sig
README.md
```
**Mirror bundle (OCI layout)**
Following standard OCI image artifact layout with annotations (`org.opencontainers.artifact.description`, `com.stella.export.profile`, `com.stella.export.filters`), and manifest lists for large bundles.
### 3.5 Export workflow
1. **Plan**
Exporter computes candidates based on filters. For `mirror:delta`, compares with previous manifest to compute changes.
2. **Stream & write**
Records are streamed from the Findings Ledger and stores. Writers are forwardonly, emitting JSONL or adapterspecific structures, chunked for memory safety.
3. **Sign & attest**
Once all content hashes are stable, Export Center writes `export.json`, `provenance.json`, and signs using KMS. Optional encryption wraps data layers.
4. **Distribute**
Depending on profile settings, it exposes a download URL, pushes an OCI artifact, or writes to object storage. Distribution metadata is recorded.
5. **Audit & retention**
Run, manifest, and signatures are immutable. Retention policy prunes large data folders after N days with manifests retained longer.
### 3.6 APIs
```
POST /export/profiles
GET /export/profiles?kind=&variant=
GET /export/profiles/{id}
PATCH /export/profiles/{id}
DELETE /export/profiles/{id}
POST /export/runs
GET /export/runs?state=&profile_id=&from=&to=&tenant_id=
GET /export/runs/{run_id}
POST /export/runs/{run_id}/cancel
GET /export/runs/{run_id}/download # presigned URL or streaming
POST /export/runs/{run_id}/distribute # { "type":"oci|objstore", "target":"..." }
GET /export/runs/{run_id}/manifest # export.json
GET /export/runs/{run_id}/provenance
GET /export/runs/{run_id}/signatures
GET /export/metrics/overview
WS /export/streams/updates
```
**Request example (create run):**
```json
{
"profile_id": "prof_json_policy",
"filters": {
"time_from": "2025-01-01T00:00:00Z",
"time_to": "2025-01-31T23:59:59Z",
"ecosystems": ["pypi", "npm"],
"include": ["advisories", "vex", "sboms", "findings"]
},
"distribution": { "type": "download" },
"policy_version": "pol-v1.8.2"
}
```
### 3.7 CLI
```
stella export profiles list --kind mirror
stella export profiles create --file profile.yaml
stella export run create --profile prof_json_policy --from 2025-01-01 --to 2025-01-31 --include advisories,vex,sboms --download
stella export run status <run-id>
stella export run cancel <run-id>
stella export run get <run-id> --manifest
stella export run download <run-id> --out export-jan.tar.zst
stella export distribute <run-id> --oci ghcr.io/org/stella-export:jan2025
stella export verify <bundle> --manifest export.json --sig signatures/export.sig
```
Exit codes: `0` ok, `2` bad args, `4` not found, `5` denied, `6` integrity failed, `8` export error.
### 3.8 RBAC & security
* Roles:
* `Export.Viewer`: list runs, download completed bundles.
* `Export.Operator`: create runs, cancel, schedule, distribute.
* `Export.Admin`: manage profiles, set retention, manage signing keys.
* Tenancy:
* Every run and artifact scoped by tenant; crosstenant export is disallowed.
* Secrets:
* KMS references for signing and encryption; never store private keys.
* PII & redaction:
* Exporters must not include secrets or credentials. Redaction rules enforced at writer level with schemabased allowlists.
### 3.9 Observability
* Metrics:
* `export_bytes_total{profile,tenant}`
* `export_records_total{type}`
* `export_duration_ms{profile}`
* `export_failures_total{error_class}`
* `export_distributions_total{type}`
* `export_verify_fail_total`
* Traces:
* Spans per export phase: plan, stream, write, sign, distribute; baggage includes `export_run_id`.
* Logs:
* Structured JSON with counts, sizes, hashes, and redaction hints.
### 3.10 Performance targets
* Stream throughput ≥ 25k records/sec per worker for JSONL writing with compression.
* Trivy bundle generation for 1M advisories ≤ 8 minutes on a standard worker.
* Mirror delta export for 5% change set ≤ 2 minutes.
### 3.11 Edge cases & behavior
* **Schema drift**: adapter refuses to emit unknown fields without explicit mapping; run fails with `error_class=schema_mismatch`.
* **Oversized bundles**: automatic sharding by time or content type; mirror OCI uses multimanifest indices.
* **Missing policy snapshot**: profile `json:policy` will autopull latest version unless pinned; pinning is recommended for reproducibility.
* **Duplicate evidence**: writers dedupe by artifact hash and advisory id; AOC forbids merging.
* **Airgap encryption**: if `encrypt=true`, mirror bundles require recipient public key material; decryption tooling documented.
---
## 4) Implementation plan
### 4.1 Modules
* **New service:** `src/StellaOps.ExportCenter`
* `api/` REST + WS
* `planner/` scope planning, delta computation, sampling
* `adapters/`
* `json/` canonical writers
* `trivy/` db builders and schema mapping
* `mirror/` fs/OCI builders, sharding, delta logic
* `signing/` KMS clients, attestations
* `dist/` download streaming, OCI push, object storage writer
* `state/` repositories, migrations
* `metrics/`, `audit/`, `security/`
* **SDK/CLI**
* `src/StellaOps.Cli` subcommands with streaming download and verification.
* **Console**
* `console/apps/export-center/` pages:
* Overview, Profiles, Runs, Run Detail, Distributions, Settings.
* Components: `ExportPlanPreview`, `ProfileEditor`, `RunDiff`, `VerifyPanel`.
* **Existing services updates**
* Findings Ledger: new paginated streaming endpoints for advisories/VEX/SBOM/findings by filters and snapshots.
* Policy Engine: “policy snapshot” exportable endpoint.
* VEX Lens: consensus snapshot endpoint.
### 4.2 Packaging & deployment
* Containers:
* `stella/exporter:<ver>`
* `stella/exporter-worker:<ver>` (optional separated worker pool)
* Helm:
* WS replicas, concurrent export limits, default compression (`zstd`), default retention, KMS settings, OCI creds secrets.
* DB migrations:
* Create `export_*` tables with proper indices (tenant, time, state).
### 4.3 Rollout
* Phase 1: JSON (raw, policy) and Mirror (full) as download only.
* Phase 2: Trivy DB adapters, OCI distribution.
* Phase 3: Mirror deltas, encryption, verification tooling, scheduling.
---
## 5) Documentation changes
Create/update the following docs; each page must end with the imposed rule statement.
1. `/docs/export-center/overview.md`
Purpose, profiles, supported targets, AOC alignment, security model.
2. `/docs/export-center/architecture.md`
Service components, adapters, manifests, signing, distribution flows.
3. `/docs/export-center/profiles.md`
Profile schemas, examples, versioning, compatibility notes.
4. `/docs/export-center/api.md`
All endpoints with request/response examples and error codes.
5. `/docs/export-center/cli.md`
Commands with examples, scripts for CI/CD, verification.
6. `/docs/export-center/mirror-bundles.md`
Filesystem and OCI layouts, delta exports, encryption, airgap import guide.
7. `/docs/export-center/trivy-adapter.md`
Field mapping, supported ecosystems, compatibility and test matrix.
8. `/docs/export-center/provenance-and-signing.md`
Manifest format, attestation details, verification process.
9. `/docs/operations/export-runbook.md`
Common failures, recovery, tuning, capacity planning.
10. `/docs/security/export-hardening.md`
RBAC, tenant isolation, secret redaction, encryption keys.
> **Imposed rule:** Work of this type or tasks of this type on this component must also be applied everywhere else it should be applied.
---
## 6) Engineering tasks
### Backend: exporter
* [ ] Migrations for `export_profiles`, `export_runs`, `export_inputs`, `export_distributions`.
* [ ] Planner to resolve filters to iterators over advisory/VEX/SBOM/findings datasets with pagination.
* [ ] JSON adapter: canonical JSONL writers with schema normalization and redaction enforcement.
* [ ] Policy snapshot embedder: pull policy version and evaluation outputs when requested.
* [ ] Trivy adapter: implement schema mapping, writer, integrity validation, and compatibility version flag.
* [ ] Mirror adapter: filesystem and OCI writer, sharding, manifest creation, delta computation.
* [ ] Signing/attestation using KMS; detached and embedded options.
* [ ] Distribution engines: download streaming, OCI push, object storage staging.
* [ ] API layer with async export run handling and WebSocket updates.
* [ ] Rate limit and concurrency controls per tenant/profile.
* [ ] Audit logging for all create/cancel/distribute/verify actions.
### Integrations
* [ ] Findings Ledger streaming APIs for each content type.
* [ ] Policy Engine endpoint to return deterministic policy snapshot and decision set by run.
* [ ] VEX Lens endpoint to expose consensus snapshot.
### Console
* [ ] Profiles CRUD with validation and test preview.
* [ ] Create Run wizard with live count estimates and storage footprint prediction.
* [ ] Runs list + detail page with manifest, provenance, and quick verify.
* [ ] Download and distribution actions with progress and logs.
* [ ] Verification panel to check signatures and hashes clientside.
### CLI
* [ ] `stella export` commands as defined; include `verify` that checks signatures and hashes.
* [ ] Autoresume of interrupted downloads with range requests.
* [ ] Friendly error messages for schema mismatch and verification failure.
### Observability
* [ ] Metrics and traces per §3.9; dashboards for throughput, durations, failures, sizes.
* [ ] Alerts for export failure rate and verify failures.
### Security & RBAC
* [ ] Enforce tenant scoping at query level; fuzz tests for leakage.
* [ ] Role matrix checks on each API; Console hides forbidden actions.
* [ ] Encryption test vectors and key rotation procedure.
### Docs
* [ ] Author all files in §5 with concrete examples and diagrams.
* [ ] Crosslink from Orchestrator, Policy Studio, VEX Lens, and SBOM docs to the Export Center pages.
* [ ] Append imposed rule line at the end of each page.
> **Imposed rule:** Work of this type or tasks of this type on this component must also be applied everywhere else it should be applied.
---
## 7) Implementation notes per profile
### 7.1 JSON: raw
* **Content:** advisories.normalized, vex.normalized, sboms (SPDX/CycloneDX), optional findings.raw.
* **Normalization:** enforce Stella field casing, timestamps in RFC3339, unicode NFC.
* **Compression:** `.jsonl.zst` per file to allow split/merge.
### 7.2 JSON: policy
* **Adds:** `policy_snapshot` and `findings.policy_evaluated.jsonl` with decision, rule_id, rationale, inputs fingerprint.
* **Determinism:** include `policy_version` and `inputs_hash`; replays should match exactly.
### 7.3 Trivy DB
* **Mapping:**
* Package name, ecosystem, version ranges, CVE/CWE/aliases, severity mapping, vendor statements, fixed versions.
* Ensure namespace mapping avoids collisions (e.g., distro vs ecosystem).
* **Compatibility:** version flag in manifest; adapter throws if upstream schema version not supported.
* **Validation:** run postbuild sanity checks (counts, required indexes).
### 7.4 Mirror: full/delta
* **Full:** everything needed to spin up an isolated readonly Stella mirror with local search.
* **Delta:** compute changed/added/removed advisory ids, VEX statements, SBOM subjects; update indexes and manifest with `base_export_id`.
* **Encryption:** if enabled, encrypt data subtrees; leave `manifest.yaml` unencrypted for discoverability unless `strict=true`.
---
## 8) Acceptance criteria
* Operators can create an export with filters, download it, verify signatures, and trace back to source artifacts via provenance.
* Trivy adapter produces a bundle consumable by Trivy without custom flags (basic validation in CI).
* Mirror bundle imports successfully in an airgapped “mirrorreader” sample app and serves queries from indexes.
* Policyaware exports include deterministic decisions matching the specified `policy_version`.
* RBAC prevents a Viewer from creating or canceling exports; tenancy prevents crosstenant leakage.
* Metrics and dashboards show perprofile throughput and error classes; alerts trigger on failure spikes.
* Export retries are idempotent and do not duplicate content; hashes stable across reruns with identical inputs.
---
## 9) Risks & mitigations
* **Upstream schema changes break Trivy export.**
Mitigation: versioned adapter with compatibility gate; integration tests against known fixtures; fail early with clear remediation.
* **Bundle size explosion.**
Mitigation: zstd compression, sharding, delta exports, contentaddressed storage reuse for mirror OCI.
* **Data leakage via exports.**
Mitigation: strict allowlist schemas, redaction filters, RBAC, tenant scoping tests, encryption for mirror.
* **Nondeterministic policy outputs.**
Mitigation: pin policy version and inputs hash; snapshot embedded rules; deterministic evaluation mode only.
* **Slow downloads/timeouts.**
Mitigation: streaming with range support, resumable downloads, CDN integration if needed.
---
## 10) Test plan
* **Unit**
Schema normalization; Trivy mapping; mirror delta computation; manifest hashing; signing.
* **Integration**
Endtoend export with each profile; verify bundles; replay determinism of policy exports; OCI push/pull.
* **Compatibility**
Validate Trivy bundle against a matrix of versions; import mirror bundle into a reference reader and run queries.
* **Security**
Tenant isolation fuzzing; redaction checks; encryption roundtrip; signature verification with rotated keys.
* **Performance**
Large dataset generation; parallel writer stress; OCI multimanifest publishing; download resume under packet loss.
* **Chaos**
Kill exporter midwrite; ensure resume or clean failure without partial corrupt bundles.
---
## 11) Philosophy
* **Ports, not prisons.** Exports should free your data to move with integrity and context, not trap it in a proprietary maze.
* **Reproducible or it didnt happen.** Every bit derived from known inputs, signed, traceable.
* **Airgap is a firstclass citizen.** Mirror bundles are not an afterthought; theyre how serious orgs actually run.
> Final reminder: **Work of this type or tasks of this type on this component must also be applied everywhere else it should be applied.**

BIN
EPIC_11.md Normal file

Binary file not shown.

BIN
EPIC_12.md Normal file

Binary file not shown.

BIN
EPIC_13.md Normal file

Binary file not shown.

BIN
EPIC_14.md Normal file

Binary file not shown.

BIN
EPIC_15.md Normal file

Binary file not shown.

1
EPIC_16.md Normal file
View File

@@ -0,0 +1 @@
See `docs/airgap/EPIC_16_AIRGAP_MODE.md` for the full Epic 16 specification.

1
EPIC_17.md Normal file
View File

@@ -0,0 +1 @@
See `docs/api/EPIC_17_SDKS_OPENAPI.md` for the complete Epic 17 specification.

1
EPIC_18.md Normal file
View File

@@ -0,0 +1 @@
See `docs/risk/EPIC_18_RISK_PROFILES.md` for the complete Epic 18 specification.

1
EPIC_19.md Normal file
View File

@@ -0,0 +1 @@
See `docs/attestor/EPIC_19_ATTESTOR_CONSOLE.md` for the complete Epic 19 specification.

567
EPIC_2.md Normal file
View File

@@ -0,0 +1,567 @@
Fine. Heres the next epic, written so you can paste it straight into the repo without having to babysit me. Same structure as before, maximum detail, zero handwaving.
---
# Epic 2: Policy Engine & Policy Editor (VEX + Advisory Application Rules)
> Short name: **Policy Engine v2**
> Services touched: **Policy Engine, Web API, Console (Policy Editor), CLI, Conseiller, Excitator, SBOM Service, Authority, Workers/Scheduler**
> Data stores: **MongoDB (policies, runs, effective findings), optional Redis/NATS for jobs**
---
## 1) What it is
This epic delivers the **organizationspecific decision layer** for Stella. Ingestion is now AOCcompliant (Epic 1). That means advisories and VEX arrive as immutable raw facts. This epic builds the place where those facts become **effective findings** under policies you control.
Core deliverables:
* **Policy Engine**: deterministic evaluator that applies rule sets to inputs:
* Inputs: `advisory_raw`, `vex_raw`, SBOMs, optional telemetry hooks (reachability stubs), org metadata.
* Outputs: `effective_finding_{policyId}` materializations, with full explanation traces.
* **Policy Editor (Console + CLI)**: versioned policy authoring, simulation, review/approval workflow, and change diffs.
* **Rules DSL v1**: safe, declarative language for VEX application, advisory normalization, and risk scoring. No arbitrary code execution, no network calls.
* **Run Orchestrator**: incremental reevaluation when new raw facts or SBOM changes arrive; efficient partial updates.
The philosophy is boring on purpose: policy is a **pure function of inputs**. Same inputs and same policy yield the same outputs, every time, on every machine. If you want drama, watch reality TV, not your risk pipeline.
---
## 2) Why
* Vendors disagree, contexts differ, and your tolerance for risk is not universal.
* VEX means nothing until you decide **how** to apply it to **your** assets.
* Auditors care about the “why.” Youll need consistent, replayable answers, with traces.
* Security teams need **simulation** before rollouts, and **diffs** after.
---
## 3) How it should work (deep details)
### 3.1 Data model
#### 3.1.1 Policy documents (Mongo: `policies`)
```json
{
"_id": "policy:P-7:v3",
"policy_id": "P-7",
"version": 3,
"name": "Default Org Policy",
"status": "approved", // draft | submitted | approved | archived
"owned_by": "team:sec-plat",
"valid_from": "2025-01-15T00:00:00Z",
"valid_to": null,
"dsl": {
"syntax": "stella-dsl@1",
"source": "rule-set text or compiled IR ref"
},
"metadata": {
"description": "Baseline scoring + VEX precedence",
"tags": ["baseline","vex","cvss"]
},
"provenance": {
"created_by": "user:ali",
"created_at": "2025-01-15T08:00:00Z",
"submitted_by": "user:kay",
"approved_by": "user:root",
"approval_at": "2025-01-16T10:00:00Z",
"checksum": "sha256:..."
},
"tenant": "default"
}
```
Constraints:
* `status=approved` is required to run in production.
* Version increments are appendonly. Old versions remain runnable for replay.
#### 3.1.2 Policy runs (Mongo: `policy_runs`)
```json
{
"_id": "run:P-7:2025-02-20T12:34:56Z:abcd",
"policy_id": "P-7",
"policy_version": 3,
"inputs": {
"sbom_set": ["sbom:S-42"],
"advisory_cursor": "2025-02-20T00:00:00Z",
"vex_cursor": "2025-02-20T00:00:00Z"
},
"mode": "incremental", // full | incremental | simulate
"stats": {
"components": 1742,
"advisories_considered": 9210,
"vex_considered": 1187,
"rules_fired": 68023,
"findings_out": 4321
},
"trace": {
"location": "blob://traces/run-.../index.json",
"sampling": "smart-10pct"
},
"status": "succeeded", // queued | running | failed | succeeded | canceled
"started_at": "2025-02-20T12:34:56Z",
"finished_at": "2025-02-20T12:35:41Z",
"tenant": "default"
}
```
#### 3.1.3 Effective findings (Mongo: `effective_finding_P-7`)
```json
{
"_id": "P-7:S-42:pkg:npm/lodash@4.17.21:CVE-2021-23337",
"policy_id": "P-7",
"policy_version": 3,
"sbom_id": "S-42",
"component_purl": "pkg:npm/lodash@4.17.21",
"advisory_ids": ["CVE-2021-23337", "GHSA-..."],
"status": "affected", // affected | not_affected | fixed | under_investigation | suppressed
"severity": {
"normalized": "High",
"score": 7.5,
"vector": "CVSS:3.1/AV:N/AC:L/PR:N/UI:N/S:U/C:H/I:H/A:N",
"rationale": "cvss_base(OSV) + vendor_weighting + env_modifiers"
},
"rationale": [
{"rule":"vex.precedence","detail":"VendorX not_affected justified=component_not_present wins"},
{"rule":"advisory.cvss.normalization","detail":"mapped GHSA severity to CVSS 3.1 = 7.5"}
],
"references": {
"advisory_raw_ids": ["advisory_raw:osv:GHSA-...:v3"],
"vex_raw_ids": ["vex_raw:VendorX:doc-123:v4"]
},
"run_id": "run:P-7:2025-02-20T12:34:56Z:abcd",
"tenant": "default"
}
```
Write protection: only the **Policy Engine** service identity may write any `effective_finding_*` collection.
---
### 3.2 Rules DSL v1 (stelladsl@1)
**Design goals**
* Declarative, composable, deterministic.
* No loops, no network IO, no nondeterministic time.
* Policy authors see readable text; the engine compiles to a safe IR.
**Concepts**
* **WHEN** condition matches a tuple `(sbom_component, advisory, optional vex_statements)`
* **THEN** actions set `status`, compute `severity`, attach `rationale`, or `suppress` with reason.
* **Profiles** for severity and scoring; **Maps** for vendor weighting; **Guards** for VEX justification.
**Minigrammar (subset)**
```
policy "Default Org Policy" syntax "stella-dsl@1" {
profile severity {
map vendor_weight {
source "GHSA" => +0.5
source "OSV" => +0.0
source "VendorX" => -0.2
}
env base_cvss {
if env.runtime == "serverless" then -0.5
if env.exposure == "internal-only" then -1.0
}
}
rule vex_precedence {
when vex.any(status in ["not_affected","fixed"])
and vex.justification in ["component_not_present","vulnerable_code_not_present"]
then status := vex.status
because "VEX strong justification prevails";
}
rule advisory_to_cvss {
when advisory.source in ["GHSA","OSV"]
then severity := normalize_cvss(advisory)
because "Map vendor severity or CVSS vector";
}
rule reachability_soft_suppress {
when severity.normalized <= "Medium"
and telemetry.reachability == "none"
then status := "suppressed"
because "not reachable and low severity";
}
}
```
**Builtins** (nonexhaustive)
* `normalize_cvss(advisory)` maps GHSA/OSV/CSAF severity fields to CVSS v3.1 numbers when possible; otherwise vendortonumeric mapping table in policy.
* `vex.any(...)` tests across matching VEX statements for the same `(component, advisory)`.
* `telemetry.*` is an optional input namespace reserved for future reachability data; if absent, expressions evaluate to `unknown` (no effect).
**Determinism**
* Rules are evaluated in **stable order**: explicit `priority` attribute or lexical order.
* **Firstmatch** semantics for conflicting status unless `combine` is used.
* Severity computations are pure; numeric maps are part of policy document.
---
### 3.3 Evaluation model
1. **Selection**
* For each SBOM component PURL, find candidate advisories from `advisory_raw` via linkset PURLs or identifiers.
* For each pair `(component, advisory)`, load all matching VEX facts from `vex_raw`.
2. **Context assembly**
* Build an evaluation context from:
* `sbom_component`: PURL, licenses, relationships.
* `advisory`: source, identifiers, references, embedded vendor severity (kept in `content.raw`).
* `vex`: list of statements with status and justification.
* `env`: orgspecific env vars configured per policy run (e.g., exposure).
* Optional `telemetry` if available.
3. **Rule execution**
* Compile DSL to IR once per policy version; cache.
* Execute rules per tuple; record which rules fired and the order.
* If no rule sets status, default is `affected`.
* If no rule sets severity, default severity uses `normalize_cvss(advisory)` with vendor defaults.
4. **Materialization**
* Write to `effective_finding_{policyId}` with `rationale` chain and references to raw docs.
* Emit pertuple trace events; sample and store full traces per run.
5. **Incremental updates**
* A watch job observes new `advisory_raw` and `vex_raw` inserts and SBOM deltas.
* The orchestrator computes the affected tuples and reevaluates only those.
6. **Replay**
* Any `policy_run` is fully reproducible by `(policy_id, version, input set, cursors)`.
---
### 3.4 VEX application semantics
* **Precedence**: a `not_affected` with strong justification (`component_not_present`, `vulnerable_code_not_present`, `fix_not_required`) wins unless another rule explicitly overrides by environment context.
* **Scoping**: VEX statements often specify product/component scope. Matching uses PURL equivalence and version ranges extracted during ingestion linkset generation.
* **Conflicts**: If multiple VEX statements conflict, the default is **mostspecific scope wins** (component > product > vendor), then newest `document_version`. Policies can override with explicit rules.
* **Explainability**: Every VEXdriven decision records which statement IDs were considered and which one won.
---
### 3.5 Advisory normalization rules
* **Vendor severity mapping**: Map GHSA levels or CSAF producttree severities to CVSSlike numeric bands via policy maps.
* **CVSS vector use**: If a valid vector exists in `content.raw`, parse and compute; apply policy modifiers from `profile severity`.
* **Temporal/environment modifiers**: Optional reductions for network exposure, isolation, or compensating controls, all encoded in policy.
---
### 3.6 Performance and scale
* Partition evaluation by SBOM ID and hash ranges of PURLs.
* Preindex `advisory_raw.linkset.purls` and `vex_raw.linkset.purls` (already in Epic 1).
* Use streaming iterators; avoid loading entire SBOM or advisory sets into memory.
* Materialize only changed findings (diffaware writes).
* Target: 100k components, 1M advisories considered, 5 minutes incremental SLA on commodity hardware.
---
### 3.7 Error codes
| Code | Meaning | HTTP |
| ------------- | ----------------------------------------------------- | ---- |
| `ERR_POL_001` | Policy syntax error | 400 |
| `ERR_POL_002` | Policy not approved for run | 403 |
| `ERR_POL_003` | Missing inputs (SBOM/advisory/vex fetch failed) | 424 |
| `ERR_POL_004` | Determinism guard triggered (nonpure function usage) | 500 |
| `ERR_POL_005` | Write denied to effective findings (caller invalid) | 403 |
| `ERR_POL_006` | Run canceled or timed out | 408 |
---
### 3.8 Observability
* Metrics:
* `policy_compile_seconds`, `policy_run_seconds{mode=...}`, `rules_fired_total`, `findings_written_total`, `vex_overrides_total`, `simulate_diff_total{delta=up|down|unchanged}`.
* Tracing:
* Spans: `policy.compile`, `policy.select`, `policy.eval`, `policy.materialize`.
* Logs:
* Include `policy_id`, `version`, `run_id`, `sbom_id`, `component_purl`, `advisory_id`, `vex_count`, `rule_hits`.
> **Imposed rule:** Work of this type or tasks of this type on this component must also be applied everywhere else it should be applied.
---
### 3.9 Security and tenancy
* Only users with `policy:write` can create/modify policies.
* `policy:approve` is a separate privileged role.
* Only Policy Engine service identity has `effective:write`.
* Tenancy is explicit on all documents and queries.
> **Imposed rule:** Work of this type or tasks of this type on this component must also be applied everywhere else it should be applied.
---
## 4) API surface
### 4.1 Policy CRUD and lifecycle
* `POST /policies` create draft
* `GET /policies?status=...` list
* `GET /policies/{policyId}/versions/{v}` fetch
* `POST /policies/{policyId}/submit` move draft to submitted
* `POST /policies/{policyId}/approve` approve version
* `POST /policies/{policyId}/archive` archive version
### 4.2 Compilation and validation
* `POST /policies/{policyId}/versions/{v}/compile`
* Returns IR checksum, syntax diagnostics, rule stats.
### 4.3 Runs
* `POST /policies/{policyId}/runs` body: `{mode, sbom_set, advisory_cursor?, vex_cursor?, env?}`
* `GET /policies/{policyId}/runs/{runId}` status + stats
* `POST /policies/{policyId}/simulate` returns **diff** vs current approved version on a sample SBOM set.
### 4.4 Findings and explanations
* `GET /findings/{policyId}?sbom_id=S-42&status=affected&severity=High+Critical`
* `GET /findings/{policyId}/{findingId}/explain` returns ordered rule hits and linked raw IDs.
All endpoints require tenant scoping and appropriate `policy:*` or `findings:*` roles.
---
## 5) Console (Policy Editor) and CLI behavior
**Console**
* Monacostyle editor with DSL syntax highlighting, lint, quick docs.
* Sidebyside **Simulation** panel: show count of affected findings before/after.
* Approval workflow: submit, review comments, approve with rationale.
* Diffs: show rulewise changes and estimated impact.
* Readonly run viewer: heatmap of rules fired, top suppressions, VEX wins.
**CLI**
* `stella policy new --name "Default Org Policy"`
* `stella policy edit P-7` opens local editor -> `submit`
* `stella policy approve P-7 --version 3`
* `stella policy simulate P-7 --sbom S-42 --env exposure=internal-only`
* `stella findings ls --policy P-7 --sbom S-42 --status affected`
Exit codes map to `ERR_POL_*`.
---
## 6) Implementation tasks
### 6.1 Policy Engine service
* [ ] Implement DSL parser and IR compiler (`stella-dsl@1`).
* [ ] Build evaluator with stable ordering and firstmatch semantics.
* [ ] Implement selection joiners for SBOM↔advisory↔vex using linksets.
* [ ] Materialization writer with upsertonly semantics to `effective_finding_{policyId}`.
* [ ] Determinism guard (ban wallclock, network, and RNG during eval).
* [ ] Incremental orchestrator listening to advisory/vex/SBOM change streams.
* [ ] Trace emitter with rulehit sampling.
* [ ] Unit tests, property tests, golden fixtures; perf tests to target SLA.
**Imposed rule:** Work of this type or tasks of this type on this component must also be applied everywhere else it should be applied.
### 6.2 Web API
* [ ] Policy CRUD, compile, run, simulate, findings, explain endpoints.
* [ ] Pagination, filters, and tenant enforcement on all list endpoints.
* [ ] Error mapping to `ERR_POL_*`.
* [ ] Rate limits on simulate endpoints.
**Imposed rule:** Work of this type or tasks of this type on this component must also be applied everywhere else it should be applied.
### 6.3 Console (Policy Editor)
* [ ] Editor with DSL syntax highlighting and inline diagnostics.
* [ ] Simulation UI with pre/post counts and top deltas.
* [ ] Approval workflow UI with audit trail.
* [ ] Run viewer dashboards (rule heatmap, VEX wins, suppressions).
**Imposed rule:** Work of this type or tasks of this type on this component must also be applied everywhere else it should be applied.
### 6.4 CLI
* [ ] New commands: `policy new|edit|submit|approve|simulate`, `findings ls|get`.
* [ ] Json/YAML output formats for CI consumption.
* [ ] Nonzero exits on syntax errors or simulation failures; map to `ERR_POL_*`.
**Imposed rule:** Work of this type or tasks of this type on this component must also be applied everywhere else it should be applied.
### 6.5 Conseiller & Excitator integration
* [ ] Provide search endpoints optimized for policy selection (batch by PURLs and IDs).
* [ ] Harden linkset extraction to maximize join recall.
* [ ] Add cursors for incremental selection windows per run.
**Imposed rule:** Work of this type or tasks of this type on this component must also be applied everywhere else it should be applied.
### 6.6 SBOM Service
* [ ] Ensure fast PURL index and component metadata projection for policy queries.
* [ ] Provide relationship graph API for future transitive logic.
* [ ] Emit change events on SBOM updates.
**Imposed rule:** Work of this type or tasks of this type on this component must also be applied everywhere else it should be applied.
### 6.7 Authority
* [ ] Define scopes: `policy:write`, `policy:approve`, `policy:run`, `findings:read`, `effective:write`.
* [ ] Issue service identity for Policy Engine with `effective:write` only.
* [ ] Enforce tenant claims at gateway.
**Imposed rule:** Work of this type or tasks of this type on this component must also be applied everywhere else it should be applied.
### 6.8 CI/CD
* [ ] Lint policy DSL in PRs; block invalid syntax.
* [ ] Run `simulate` against golden SBOMs to detect explosive deltas.
* [ ] Determinism CI: two runs with identical seeds produce identical outputs.
**Imposed rule:** Work of this type or tasks of this type on this component must also be applied everywhere else it should be applied.
---
## 7) Documentation changes (create/update these files)
1. **`/docs/policy/overview.md`**
* What the Policy Engine is, highlevel concepts, inputs, outputs, determinism.
2. **`/docs/policy/dsl.md`**
* Full grammar, builtins, examples, best practices, antipatterns.
3. **`/docs/policy/lifecycle.md`**
* Draft → submitted → approved → archived, roles, and audit trail.
4. **`/docs/policy/runs.md`**
* Run modes, incremental mechanics, cursors, replay.
5. **`/docs/api/policy.md`**
* Endpoints, request/response schemas, error codes.
6. **`/docs/cli/policy.md`**
* Command usage, examples, exit codes, JSON output contracts.
7. **`/docs/ui/policy-editor.md`**
* Screens, workflows, simulation, diffs, approvals.
8. **`/docs/architecture/policy-engine.md`**
* Detailed sequence diagrams, selection/join strategy, materialization schema.
9. **`/docs/observability/policy.md`**
* Metrics, tracing, logs, sample dashboards.
10. **`/docs/security/policy-governance.md`**
* Scopes, approvals, tenancy, least privilege.
11. **`/docs/examples/policies/`**
* `baseline.pol`, `serverless.pol`, `internal-only.pol`, each with commentary.
12. **`/docs/faq/policy-faq.md`**
* Common pitfalls, VEX conflict handling, determinism gotchas.
Each file includes a **Compliance checklist** for authors and reviewers.
> **Imposed rule:** Work of this type or tasks of this type on this component must also be applied everywhere else it should be applied.
---
## 8) Acceptance criteria
* Policies are versioned, approvable, and compilable; invalid DSL blocks merges.
* Engine produces deterministic outputs with full rationale chains.
* VEX precedence rules work per spec and are overridable by policy.
* Simulation yields accurate pre/post deltas and diffs.
* Only Policy Engine can write to `effective_finding_*`.
* Incremental runs pick up new advisories/VEX/SBOM changes without full reruns.
* Console and CLI cover authoring, simulation, approval, and retrieval.
* Observability dashboards show rule hits, VEX wins, and run timings.
---
## 9) Risks and mitigations
* **Policy sprawl**: too many similar policies.
* Mitigation: templates, policy inheritance in v1.1, tagging, ownership metadata.
* **Nondeterminism creep**: someone sneaks wallclock or network into evaluation.
* Mitigation: determinism guard, static analyzer, and CI replay check.
* **Join missrate**: weak linksets cause undermatching.
* Mitigation: linkset strengthening in ingestion, PURL equivalence tables, monitoring for “zerohit” rates.
* **Approval bottlenecks**: blocked rollouts.
* Mitigation: RBAC with delegated approvers and timeboxed SLAs.
---
## 10) Test plan
* **Unit**: parser, compiler, evaluator; conflict resolution; precedence.
* **Property**: random policies over synthetic inputs; ensure no panics and stable outputs.
* **Golden**: fixed SBOM + curated advisories/VEX → expected findings; compare every run.
* **Performance**: large SBOMs with heavy rule sets; assert run times and memory ceilings.
* **Integration**: endtoend simulate → approve → run → diff; verify write protections.
* **Chaos**: inject malformed VEX, missing advisories; ensure graceful degradation and clear errors.
---
## 11) Developer checklists
**Definition of Ready**
* Policy grammar finalized; examples prepared.
* Linkset join queries benchmarked.
* Owner and approvers assigned.
**Definition of Done**
* All APIs live with RBAC.
* CLI and Console features shipped.
* Determinism and golden tests green.
* Observability dashboards deployed.
* Docs in section 7 merged.
* Two real org policies migrated and in production.
---
## 12) Glossary
* **Policy**: versioned rule set controlling status and severity.
* **DSL**: domainspecific language used to express rules.
* **Run**: a single evaluation execution with defined inputs and outputs.
* **Simulation**: a run that doesnt write findings; returns diffs.
* **Materialization**: persisted effective findings for fast queries.
* **Determinism**: same inputs + same policy = same outputs. Always.
---
### Final imposed reminder
**Work of this type or tasks of this type on this component must also be applied everywhere else it should be applied.**

409
EPIC_4.md Normal file
View File

@@ -0,0 +1,409 @@
Heres Epic 4 in the same pasteintorepo, implementationready style as the prior epics. Its exhaustive, formal, and slots directly into the existing AOC model, Policy Engine, and Console.
---
# Epic 4: Policy Studio (author, version, simulate)
> Short name: **Policy Studio**
> Services touched: **Policy Engine**, **Policy Registry** (new), **Web API Gateway**, **Authority** (authN/Z), **Scheduler/Workers**, **SBOM Service**, **Conseiller (Feedser)**, **Excitator (Vexer)**, **Telemetry**
> Surfaces: **Console (Web UI)** feature module, **CLI**, **CI hooks**
> Deliverables: Authoring workspace, policy versioning, static checks, simulation at scale, reviews/approvals, signing/publishing, promotion
---
## 1) What it is
**Policy Studio** is the endtoend system for creating, evolving, and safely rolling out the rules that turn AOC facts (SBOM, advisories, VEX) into **effective findings**. It provides:
* A **workspace** where authors write policies in the DSL (Epic 2), with linting, autocompletion, snippets, and templates.
* A **Policy Registry** that stores immutable versions, compiled artifacts, metadata, provenance, and signatures.
* **Simulation** at two levels: quick local samples and large batch simulations across real SBOM inventories with full deltas.
* A **review/approval** workflow with comments, diffs, required approvers, and promotion to environments (dev/test/prod).
* **Publishing** semantics: signed, immutable versions bound to tenants; rollback and deprecation.
* Tight integration with **Explain** traces so any change can show exactly which rules fired and why outcomes shifted.
The Studio respects **AOC enforcement**: policies never edit or merge facts. They only interpret facts and produce determinations consistent with precedence rules defined in the DSL.
---
## 2) Why
* Policy errors are expensive. Authors need safe sandboxes, deterministic builds, and evidence before rollout.
* Auditors require immutability, provenance, and reproducibility from “source policy” to “effective finding.”
* Teams want gradual rollout: simulate, canary, promote, observe, rollback.
* Policy knowledge should be modular, reusable, and testable, not tribal.
---
## 3) How it should work (maximum detail)
### 3.1 Domain model
* **PolicyPackage**: `{name, tenant, description, owners[], tags[], created_at}`
* **PolicyVersion** (immutable): `{package, semver, source_sha, compiled_sha, status: draft|review|approved|published|deprecated|archived, created_by, created_at, signatures[], changelog, metadata{}}`
* **Workspace**: mutable working area for authors; holds unversioned edits until compiled.
* **CompilationArtifact**: `{policy_version, compiler_version, diagnostics[], rule_index[], symbol_table}`
* **SimulationSpec**: `{policy_version|workspace, sbom_selector, time_window?, environment?, sample_size?, severity_floor?, includes{advisories?, vex?}}`
* **SimulationRun**: `{run_id, spec, started_at, finished_at, result{counts_before, counts_after, top_deltas[], by_rule_hit[], sample_explains[]}}`
* **Review**: `{policy_version, required_approvers[], votes[], comments[], files_changed[], diffs[]}`
* **Promotion**: `{policy_version, environment: dev|test|prod, promoted_by, promoted_at, rollout_strategy: All|Percent|TenantSubset}`
* **Attestation**: OIDCbacked signature metadata binding `source_sha` and `compiled_sha` to an actor and time.
> **Imposed rule:** Work of this type or tasks of this type on this component must also be applied everywhere else it should be applied.
### 3.2 Authoring workflow
1. **Create** a workspace from a template (e.g., “Default Risk Model,” “License Tilted,” “CloudNative SBOM”).
2. **Edit** in the Studio: Monaco editor with DSL grammar, intelligent completion for predicates, policies, attributes.
3. **Lint & compile** locally: semantic checks, forbidden rules detection, policy size limits, constantfolding.
4. **Unit tests**: run policy test cases on bundled fixtures and golden expectations.
5. **Quick simulate** on selected SBOMs (1050 items) to preview counts, examples, and rule heatmap.
6. **Propose version**: bump semver, enter changelog; create a **PolicyVersion** in `review` with compiled artifacts.
7. **Review & approval**: sidebyside diff, comments, required approvers enforced by RBAC.
8. **Batch simulation**: run at scale across tenant inventory; produce deltas, sample explainer evidence.
9. **Publish**: sign and move to `published`; optional **Promotion** to target environment(s).
10. **Run** evaluation with the selected policy version; verify outcomes; optionally promote to default.
11. **Rollback**: select an older version; promotion updates references without mutating older versions.
### 3.3 Editing experience (Console)
* **Threepane layout**: file tree, editor, diagnostics/simulation.
* **Features**: autocomplete from symbol table, ineditor docs on hover, goto definition, rule references, rename symbols across files, snippet library, policy templates.
* **Validations**:
* AOC guardrails: no edit/merge actions on source facts, only interpretation.
* Precedence correctness: if rules conflict, studio shows explicit order and effective winner.
* Severity floor and normalization mapping validated against registry configuration.
* **Diagnostics panel**: errors, warnings, performance hints (e.g., “predicate X loads N advisories per component; consider indexing”).
* **Rule heatmap**: during simulation, bar chart of rule firings and the objects they impact.
* **Explain sampler**: click any delta bucket to open a sampled finding with full trace.
### 3.4 Simulation
* **Quick Sim**: synchronous; runs in browserorchestrated job against API, constrained by `sample_size`.
* **Batch Sim**: asynchronous run in workers:
* Input selection: all SBOMs, labels, artifact regex, last N ingests, or a curated set.
* Outputs: counts by severity before/after, by status, top deltas by component and advisory, rule heatmap, top K affected artifacts.
* Evidence: NDJSON of sampled findings with traces; CSV summary; signed result manifest.
* Guardrails: cannot publish if batch sim drift > configurable threshold without an override justification.
### 3.5 Versioning & promotion
* Semver enforced: `major` implies compatibility break (e.g., precedence changes), `minor` adds rules, `patch` fixes.
* **Immutable**: after `published`, the version cannot change; deprecate instead.
* **Environment bindings**: dev/test/prod mapping per tenant; default policy per environment.
* **Canary**: promote to a subset of tenants or artifacts; the Runs page displays A/B comparisons.
### 3.6 Review & approval
* Require N approvers by role; selfapproval optionally prohibited.
* Line and file comments; overall decision with justification.
* Review snapshot captures: diffs, diagnostics, simulation summary.
* Webhooks to notify external systems of review events.
### 3.7 RBAC (Authority)
Roles per tenant:
* **Policy Author**: create/edit workspace, quick sim, propose versions.
* **Policy Reviewer**: comment, request changes, approve/reject.
* **Policy Approver**: final approve, publish.
* **Policy Operator**: promote, rollback, schedule runs.
* **Readonly Auditor**: view everything, download evidence.
All actions serverchecked; UI only hides affordances.
### 3.8 CLI + CI integration
CLI verbs (examples):
```
stella policy init --template default
stella policy lint
stella policy compile
stella policy test --golden ./tests
stella policy simulate --sboms label:prod --sample 1000
stella policy version bump --level minor --changelog "Normalize GHSA CVSS"
stella policy submit --reviewers alice@example.com,bob@example.com
stella policy approve --version 1.3.0
stella policy publish --version 1.3.0 --sign
stella policy promote --version 1.3.0 --env test --percent 20
stella policy rollback --env prod --to 1.2.1
```
CI usage:
* Lint, compile, and run unit tests on PRs that modify `/policies/**`.
* Optionally trigger **Batch Sim** against a staging inventory and post a Markdown report to the PR.
* Block merge if diagnostics include errors or drift exceeds thresholds.
> **Imposed rule:** Work of this type or tasks of this type on this component must also be applied everywhere else it should be applied.
### 3.9 APIs (representative)
* `POST /policies/workspaces` create from template
* `PUT /policies/workspaces/{id}/files` edit source files
* `POST /policies/workspaces/{id}/compile` get diagnostics + compiled artifact
* `POST /policies/workspaces/{id}/simulate` quick sim
* `POST /policies/versions` create version from workspace with semver + changelog
* `GET /policies/versions/{id}` fetch version + diagnostics + sim summary
* `POST /policies/versions/{id}/reviews` open review
* `POST /policies/versions/{id}/approve` record approval
* `POST /policies/versions/{id}/publish` sign + publish
* `POST /policies/versions/{id}/promote` bind to env/canary
* `POST /policies/versions/{id}/simulate-batch` start batch sim (async)
* `GET /policies/simulations/{run_id}` get sim results and artifacts
* `GET /policies/registry` list packages/versions, status and bindings
All calls require tenant scoping and RBAC.
### 3.10 Storage & data
* **Policy Registry DB** (MongoDB): packages, versions, workspaces, metadata.
* **Object storage**: source bundles, compiled artifacts, simulation result bundles, evidence.
* **Indexing**: compound indexes by `{tenant, package}`, `{tenant, status}`, `{tenant, environment}`.
* **Retention**: configurable retention for workspaces and simulation artifacts; versions never deleted, only archived.
### 3.11 Evidence & provenance
* Every published version has:
* `source_sha` (content digest of the policy source bundle)
* `compiled_sha` (digest of compiled artifact)
* Attestation: signed envelope binding digests to an identity, time, and tenant.
* Links to the exact compiler version, inputs, and environment.
### 3.12 Observability
* Metrics: compile time, diagnostics rate, simulation queue depth, delta magnitude distribution, approval latencies.
* Logs: structured events for lifecycle transitions.
* Traces: long simulations emit span per shard.
### 3.13 Performance & scale
* Compilation should complete under 3 seconds for typical policies; warn at 10s.
* Batch sim uses workers with partitioning by SBOM id; results reduced by the API.
* Memory guardrails on rule execution; deny policies that exceed configured complexity limits.
### 3.14 Security
* OIDCbacked signing and attestation.
* Policy sources are scanned on upload for secrets; blocked if found.
* Strict CSP in Studio pages; tokens stored in memory, not localStorage.
* Tenant isolation in buckets and DB collections.
> **Imposed rule:** Work of this type or tasks of this type on this component must also be applied everywhere else it should be applied.
---
## 4) Implementation plan
### 4.1 Services
* **Policy Registry (new microservice)**
* REST API and background workers for batch simulation orchestration.
* Stores workspaces, versions, metadata, bindings, reviews.
* Generates signed attestations at publish time.
* Coordinates with **Policy Engine** for compile/simulate invocations.
* **Policy Engine (existing)**
* Expose compile and simulate endpoints with deterministic outputs.
* Provide rule coverage, symbol table, and explain traces for samples.
* **Web API Gateway**
* Routes requests; injects tenant context; enforces RBAC.
### 4.2 Console (Web UI) feature module
* `packages/features/policies` (shared with Epic 3):
* **Studio** routes: `/policies/studio`, `/policies/:id/versions/:v/edit`, `/simulate`, `/review`.
* Monaco editor wrapper for DSL with hover docs, autocomplete.
* Diff viewer, diagnostics, heatmap, explain sampler, review UI.
### 4.3 CLI
* New commands under `stella policy *`; typed client generated from OpenAPI.
* Outputs machinereadable JSON and pretty tables.
### 4.4 Workers
* **Simulation workers**: pull shards of SBOMs, run policy, emit partials, reduce into result bundle.
* **Notification worker**: sends webhooks on review, approval, publish, promote.
---
## 5) Documentation changes (create/update)
1. **`/docs/policy/studio-overview.md`**
* Concepts, roles, lifecycle, glossary.
2. **`/docs/policy/authoring.md`**
* Workspace, templates, snippets, lint rules, best practices.
3. **`/docs/policy/versioning-and-publishing.md`**
* Semver, immutability, deprecation, rollback, attestations.
4. **`/docs/policy/simulation.md`**
* Quick vs batch sim, selection strategies, thresholds, evidence artifacts.
5. **`/docs/policy/review-and-approval.md`**
* Required approvers, comments, webhooks, audit trail.
6. **`/docs/policy/promotion.md`**
* Environments, canary, default policy binding, rollback.
7. **`/docs/policy/cli.md`**
* Command reference with examples and JSON outputs.
8. **`/docs/policy/api.md`**
* REST endpoints, request/response schemas, error codes.
9. **`/docs/security/policy-attestations.md`**
* Signatures, digests, verifier steps.
10. **`/docs/architecture/policy-registry.md`**
* Service design, schemas, queues, failure modes.
11. **`/docs/observability/policy-telemetry.md`**
* Metrics, logs, tracing, dashboards.
12. **`/docs/runbooks/policy-incident.md`**
* Rolling back a bad policy, freezing publishes, forensic steps.
13. **`/docs/examples/policy-templates.md`**
* Readymade templates and snippet catalog.
14. **`/docs/aoc/aoc-guardrails.md`**
* How Studio enforces AOC in authoring and review.
Each doc ends with a “Compliance checklist.”
**Imposed rule:** Work of this type or tasks of this type on this component must also be applied everywhere else it should be applied.
---
## 6) Tasks
### 6.1 Backend: Policy Registry
* [ ] Define OpenAPI spec for Registry (workspaces, versions, reviews, sim).
* [ ] Implement workspace storage and file CRUD.
* [ ] Integrate with Policy Engine compile endpoint; return diagnostics, symbol table.
* [ ] Implement quick simulation with request limits.
* [ ] Implement batch simulation orchestration: enqueue shards, collect results, reduce deltas, store artifacts.
* [ ] Implement review model: comments, required approvers, decisions.
* [ ] Implement publish: sign, persist attestation, set status=published.
* [ ] Implement promotion bindings per tenant/environment; canary subsets.
* [ ] RBAC checks for all endpoints.
* [ ] Unit/integration tests; load tests for batch sim.
### 6.2 Policy Engine enhancements
* [ ] Return rule coverage and firing counts with compile/simulate.
* [ ] Return symbol table and inline docs for editor autocomplete.
* [ ] Expose deterministic Explain traces for sampled findings.
* [ ] Enforce complexity/time limits and report breaches.
### 6.3 Console (Web UI)
* [ ] Build Studio editor wrapper with Monaco + DSL language server hooks.
* [ ] Implement file tree, snippets, templates, hotkeys, search/replace.
* [ ] Diagnostics panel with jumptoline, quick fixes.
* [ ] Simulation panel: quick sim UI, charts, heatmap, sample explains.
* [ ] Review UI: diff, comments, approvals, status badges.
* [ ] Publish & Promote flows with confirmation and postactions.
* [ ] Batch sim results pages with export buttons.
* [ ] Accessibility audits and keyboardonly authoring flow.
### 6.4 CLI
* [ ] Implement commands listed in 3.8 with rich help and examples.
* [ ] Add `--json` flag for machine consumption; emit stable schemas.
* [ ] Exit codes aligned with CI usage (lint errors → nonzero).
### 6.5 CI/CD & Security
* [ ] Add CI job that runs `stella policy lint/compile/test` on PRs.
* [ ] Optional job that triggers batch sim against staging inventory; post summary to PR.
* [ ] Policy source secret scanning; block on findings.
* [ ] Signing keys configuration; verify pipeline for attestation on publish.
### 6.6 Docs
* [ ] Write all docs in section 5 with screenshots and CLI transcripts.
* [ ] Add cookbook examples and templates in `/docs/examples/policy-templates.md`.
* [ ] Wire contextual Help links from Studio to relevant docs.
> **Imposed rule:** Work of this type or tasks of this type on this component must also be applied everywhere else it should be applied.
---
## 7) Acceptance criteria
* Authors can create, edit, lint, compile policies with inline diagnostics and autocomplete.
* Quick simulation produces counts, rule heatmap, and sample explains within UI.
* Batch simulation scales across large SBOM sets, producing deltas and downloadable evidence.
* Review requires configured approvers; comments and diffs are preserved.
* Publish generates immutable, signed versions with attestations.
* Promotion binds versions to environments and supports canary and rollback.
* CLI supports full lifecycle and is usable in CI.
* All actions are tenantscoped, RBACenforced, and logged.
* AOC guardrails prevent any mutation of raw facts.
* Documentation shipped and linked contextually from the Studio.
---
## 8) Risks & mitigations
* **Policy complexity causes timeouts** → compiletime complexity scoring, execution limits, early diagnostics.
* **Simulation cost at scale** → sharding and streaming reducers; sampling; configurable quotas.
* **RBAC misconfiguration** → serverenforced checks, defenseindepth tests, denybydefault.
* **Attestation key management** → OIDCbacked signatures; auditable verifier tool; timeboxed credentials.
* **Editor usability** → language server with accurate completions; docs on hover; snippet library.
---
## 9) Test plan
* **Unit**: compiler adapters, registry models, reviewers workflow, CLI options.
* **Integration**: compile→simulate→publish→promote on seeded data.
* **E2E**: Playwright flows for author→review→batch sim→publish→promote→rollback.
* **Performance**: load test batch simulation with 100k components spread across SBOMs.
* **Security**: RBAC matrix tests; secret scanning; signing and verification.
* **Determinism**: same inputs produce identical `compiled_sha` and simulation summaries.
---
## 10) Feature flags
* `policy.studio` (enables editor and quick sim)
* `policy.batch-sim`
* `policy.canary-promotion`
* `policy.signature-required` (enforce signing on publish)
Flags documented in `/docs/observability/policy-telemetry.md`.
---
## 11) Nongoals (this epic)
* Building a general IDE for arbitrary languages; the editor is purposebuilt for the DSL.
* Autogenerated policies from AI without human approval.
* Crosstenant policies; all policies are tenantscoped.
---
## 12) Philosophy
* **Safety first**: its cheaper to prevent a bad policy than to fix its fallout.
* **Determinism**: same inputs, same outputs, verifiably.
* **Immutability**: versions and evidence are forever; we deprecate, not mutate.
* **Transparency**: every change is explainable with traces and proofs.
* **Reusability**: templates, snippets, and tests turn policy from art into engineering.
> Final reminder: **Work of this type or tasks of this type on this component must also be applied everywhere else it should be applied.**

431
EPIC_5.md Normal file
View File

@@ -0,0 +1,431 @@
Heres Epic 5 in the same pasteintorepo, implementationready format as the prior epics. Its exhaustive, formal, and designed to slot into AOC, Policy Engine, Conseiller/Excitator, and the Console.
---
# Epic 5: SBOM Graph Explorer
> Short name: **Graph Explorer**
> Services touched: **SBOM Service**, **Graph Indexer** (new), **Graph API** (new), **Policy Engine**, **Conseiller (Feedser)**, **Excitator (Vexer)**, **Web API Gateway**, **Authority** (authN/Z), **Workers/Scheduler**, **Telemetry**
> Surfaces: **Console (Web UI)** graph module, **CLI**, **Exports**
> Deliverables: Interactive graph UI with semantic zoom, saved queries, policy/VEX/advisory overlays, diff views, impact analysis, exports
---
## 1) What it is
**SBOM Graph Explorer** is the interactive, tenantscoped view of all supplychain relationships the platform knows about, rendered as a navigable graph. It connects:
* **Artifacts** (applications, images, libs), **Packages/Versions**, **Files/Paths**, **Licenses**, **Advisories** (from Conseiller), **VEX statements** (from Excitator), **Provenance** (builds, sources), and **Policies** (overlays of determinations)
* **Edges** like `depends_on`, `contains`, `built_from`, `declared_in`, `affected_by`, `vex_exempts`, `governs_with`
* **Time/version** dimension: multiple SBOM snapshots with diffs
Its built for investigation and review: find where a vulnerable package enters; see which apps are impacted; understand why a finding exists; simulate a policy version and see the delta. The explorer observes **AOC enforcement**: it never mutates facts; it aggregates and visualizes them. Only the Policy Engine may classify, and classification is displayed as overlays.
---
## 2) Why
* SBOMs are graphs. Tables flatten what matters and hide transitive risk.
* Engineers, security, and auditors need impact answers quickly: “What pulls in `log4j:2.17` and where is it at runtime?”
* Policy/VEX/advisory interactions are nuanced. A visual overlay makes precedence and outcomes obvious.
* Review is collaborative; you need saved queries, deep links, exports, and consistent evidence.
---
## 3) How it should work (maximum detail)
### 3.1 Domain model
**Nodes** (typed, versioned, tenantscoped):
* `Artifact`: application, service, container image, library, module
* `Package`: name + ecosystem (purl), `PackageVersion` with resolved version
* `File`: path within artifact or image layer
* `License`: SPDX id
* `Advisory`: normalized advisory id (GHSA, CVE, vendor), source = Conseiller
* `VEX`: statement with product context, status, justification, source = Excitator
* `SBOM`: ingestion unit; includes metadata (tool, sha, build info)
* `PolicyDetermination`: materialized view of Policy Engine results (readonly overlay)
* `Build`: provenance, commit, workflow run
* `Source`: repo, tag, commit
**Edges** (directed):
* `declared_in` (PackageVersion → SBOM)
* `contains` (Artifact → PackageVersion | File)
* `depends_on` (PackageVersion → PackageVersion) with scope attr (prod|dev|test|optional)
* `built_from` (Artifact → Build), `provenance_of` (Build → Source)
* `affected_by` (PackageVersion → Advisory) with range semantics
* `vex_exempts` (Advisory ↔ VEX) scoped by product/component
* `licensed_under` (Artifact|PackageVersion → License)
* `governs_with` (Artifact|PackageVersion → PolicyDetermination)
* `derived_from` (SBOM → SBOM) for superseding snapshots
**Identity & versioning**
* Every node has a stable key: `{tenant}:{type}:{natural_id}` (e.g., purl for packages, digest for images).
* SBOM snapshots are immutable; edges carry `valid_from`/`valid_to` for time travel and diffing.
> **Imposed rule:** Work of this type or tasks of this type on this component must also be applied everywhere else it should be applied.
### 3.2 User capabilities (endtoend)
* **Search & Navigate**: global search (purls, CVEs, repos, licenses), keyboard nav, breadcrumbs, semantic zoom.
* **Lenses**: toggle views (Security, License, Provenance, Runtime vs Dev, Policy effect).
* **Overlays**:
* **Advisory overlay**: show affected nodes/edges with source, severity, ranges.
* **VEX overlay**: show suppressions/justifications; collapse exempted paths.
* **Policy overlay**: choose a policy version; nodes/edges reflect determinations (severity, status) with explain sampling.
* **Impact analysis**: pick a vulnerable node; highlight upstream/downstream dependents, scope filters, shortest/all paths with constraints.
* **Diff view**: compare SBOM A vs B; show added/removed nodes/edges, changed versions, changed determinations.
* **Saved queries**: visual builder + JSON query; shareable permalinks scoped by tenant and environment.
* **Exports**: GraphML, CSV edge list, NDJSON of findings, PNG/SVG snapshot.
* **Evidence details**: side panel with raw facts, advisory links, VEX statements, policy explain trace, provenance.
* **Accessibility**: tabnavigable, highcontrast, screenreader labels for nodes and sidebars.
### 3.3 Query model
* **Visual builder** for common queries:
* “Show all paths from Artifact X to Advisory Y up to depth 6.”
* “All runtime dependencies with license = GPL3.0.”
* “All artifacts affected by GHSA… with no applicable VEX.”
* “Which SBOMs introduced/removed `openssl` between build 120 and 130?”
* **JSON query** (internal, POST body) with:
* `start`: list of node selectors (type + id or attributes)
* `expand`: edge types and depth, direction, scope filters
* `where`: predicates on node/edge attributes
* `overlay`: policy version id, advisory sources, VEX filters
* `limit`: nodes, edges, timebox, cost budget
* **Cost control**: server estimates cost, denies or pages results; UI streams partial graph tiles.
### 3.4 UI architecture (Console)
* **Canvas**: WebGL renderer with levelofdetail, edge bundling, and label culling; deterministic layout when possible (seeded).
* **Semantic zoom**:
* Far: clusters by artifact/repo/ecosystem, color by lens
* Mid: package groups, advisory badges, license swatches
* Near: concrete versions, direct edges, inline badges for policy determinations
* **Panels**:
* Left: search, filters, lens selector, saved queries
* Right: details, explain trace, evidence tabs (Advisory/VEX/Policy/Provenance)
* Bottom: query expression, diagnostics, performance/stream status
* **Diff mode**: split or overlay, color legend (add/remove/changed), filter by node type.
* **Deep links**: URL encodes query + viewport; shareable respecting RBAC.
* **Keyboard**: space drag, +/- zoom, F to focus, G to expand neighbors, P to show paths.
### 3.5 Backend architecture
**Graph Indexer (new)**
* Consumes SBOM ingests, Conseiller advisories, Excitator VEX statements, Policy Engine determinations (readonly).
* Projects facts into a **property graph** persisted in:
* Primary: document store + adjacency sets (e.g., Mongo collections + compressed adjacency lists)
* Optional driver for graph DB backends if needed (pluggable)
* Maintains materialized aggregates: degree, critical paths cache, affected artifact counts, license distribution.
* Emits **graph snapshots** per SBOM with lineage to original ingestion.
**Graph API (new)**
* Endpoints for search, neighbor expansion, path queries, diffs, overlays, exports.
* Streaming responses for large graphs (chunked NDJSON tiles).
* Cost accounting + quotas per tenant.
**Workers**
* **Centrality & clustering** precompute on idle: betweenness approximations, connected components, Louvain clusters.
* **Diff compute** on new SBOM ingestion pairs (previous vs current).
* **Overlay materialization** cache for popular policy versions.
**Policy Engine integration**
* Graph API requests can specify a policy version.
* For sampled nodes, the API fetches explain traces; for counts, uses precomputed overlay materializations where available.
**AOC enforcement**
* Graph Indexer never merges or edits advisories/VEX; it links them and exposes overlays that the Policy Engine evaluates.
* Conseiller and Excitator remain authoritative sources; severities come from Policygoverned normalization.
### 3.6 APIs (representative)
* `GET /graph/search?q=...&type=package|artifact|advisory|license`
* `POST /graph/query` ⇒ stream tiles `{nodes[], edges[], stats, cursor}`
* `POST /graph/paths` body: `{from, to, depth<=6, constraints{scope, runtime_only}}`
* `POST /graph/diff` body: `{sbom_a, sbom_b, filters}`
* `GET /graph/snapshots/{sbom_id}` ⇒ graph metadata, counts, top advisories
* `POST /graph/export` body: `{format: graphml|csv|ndjson|png|svg, query|snapshot}`
* `GET /graph/saved` / `POST /graph/saved` save and list tenant queries
* `GET /graph/overlays/policy/{version_id}` ⇒ summary stats for caching
All endpoints tenantscoped, RBACchecked. Timeouts and pagination by server. Errors return structured diagnostics.
### 3.7 CLI
```
stella sbom graph search "purl:pkg:maven/org.apache.logging.log4j/log4j-core"
stella sbom graph query --file ./query.json --export graphml > graph.graphml
stella sbom graph impacted --advisory GHSA-xxxx --runtime-only --limit 100
stella sbom graph paths --from artifact:service-a --to advisory:GHSA-xxxx --depth 5 --policy 1.3.0
stella sbom graph diff --sbom-a 2025-03-15T10:00Z --sbom-b 2025-03-22T10:00Z --export csv > diff.csv
stella sbom graph save --name "openssl-runtime" --file ./query.json
```
Exit codes: 0 ok, 2 query validation error, 3 overbudget, 4 not found, 5 RBAC denied.
> **Imposed rule:** Work of this type or tasks of this type on this component must also be applied everywhere else it should be applied.
### 3.8 Performance & scale
* **Progressive loading**: server pages tiles by BFS frontier; client renders incrementally.
* **Viewport culling**: only visible nodes/edges in canvas; offscreen demoted to aggregates.
* **Levelofdetail**: simplified glyphs and collapsed clusters at distance.
* **Query budgets**: pertenant rate + node/edge caps; interactive paths limited to depth ≤ 6.
* **Caching**: hot queries memoized per tenant + overlay version; diffs precomputed for consecutive SBOMs.
### 3.9 Security
* Multitenant isolation at storage and API layers.
* RBAC roles:
* **Viewer**: browse graphs, saved queries
* **Investigator**: run queries, export data
* **Operator**: configure budgets, purge caches
* **Auditor**: download evidence bundles
* Input validation for query JSON; deny disallowed edge traversals; strict CSP in web app.
### 3.10 Observability
* Metrics: tile latency, nodes/edges per tile, cache hit rate, query denials, memory pressure.
* Logs: structured, include query hash, cost, truncation flags.
* Traces: server spans per stage (parse, plan, fetch, overlay, stream).
### 3.11 Accessibility & UX guarantees
* Keyboard complete, ARIA roles for graph and panels, highcontrast theme.
* Deterministic layout on reload for shareable investigations.
### 3.12 Data retention
* Graph nodes derived from SBOMs share retention with SBOM artifacts; overlays are ephemeral caches.
* Saved queries retained until deleted; references to missing objects show warnings.
---
## 4) Implementation plan
### 4.1 Services
* **Graph Indexer (new microservice)**
* Subscribes to SBOM ingest events, Conseiller advisory updates, Excitator VEX updates, Policy overlay materializations.
* Builds adjacency lists and node documents; computes aggregates and clusters.
* **Graph API (new microservice)**
* Validates and executes queries; streams tiles; composes overlays; serves diffs and exports.
* Integrates with Policy Engine for explain sample retrieval.
* **SBOM Service (existing)**
* Emits ingestion events with SBOM ids and lineage; exposes SBOM metadata to Graph API.
* **Web API Gateway**
* Routes `/graph/*`, injects tenant context, enforces RBAC.
### 4.2 Console (Web UI) feature module
* `packages/features/graph-explorer`
* Canvas renderer (WebGL), panels, query builder, diff mode, overlays, exports.
* Deeplink router and viewport state serializer.
### 4.3 Workers
* Centrality/clustering worker, diff worker, overlay materialization worker.
* Schedules on lowtraffic windows; backpressure aware.
### 4.4 Data model (storage)
* Collections:
* `graph_nodes`: `{_id, tenant, type, natural_id, attrs, degree, created_at, updated_at}`
* `graph_edges`: `{_id, tenant, from_id, to_id, type, attrs, valid_from, valid_to}`
* `graph_snapshots`: perSBOM node/edge references
* `graph_saved_queries`: `{_id, tenant, name, query_json, created_by}`
* `graph_overlays_cache`: keyed by `{tenant, policy_version, hash(query)}`
* Indexes: compound on `{tenant, type, natural_id}`, `{tenant, from_id}`, `{tenant, to_id}`, time bounds.
---
## 5) Documentation changes (create/update)
1. **`/docs/sbom/graph-explorer-overview.md`**
* Concepts, node/edge taxonomy, lenses, overlays, roles, limitations.
2. **`/docs/sbom/graph-using-the-console.md`**
* Walkthroughs: search, navigate, impact, diff, export; screenshots and keyboard cheatsheet.
3. **`/docs/sbom/graph-query-language.md`**
* JSON schema, examples, constraints, cost/budget rules.
4. **`/docs/sbom/graph-api.md`**
* REST endpoints, request/response examples, streaming and pagination.
5. **`/docs/sbom/graph-cli.md`**
* CLI command reference and example pipelines.
6. **`/docs/policy/graph-overlays.md`**
* How policy versions render in Graph; explain sampling; AOC guardrails.
7. **`/docs/vex/graph-integration.md`**
* How VEX suppressions appear and how to validate product scoping.
8. **`/docs/advisories/graph-integration.md`**
* Advisory linkage and severity normalization by policy.
9. **`/docs/architecture/graph-services.md`**
* Graph Indexer, Graph API, storage choices, failure modes.
10. **`/docs/observability/graph-telemetry.md`**
* Metrics, logs, tracing, dashboards.
11. **`/docs/runbooks/graph-incidents.md`**
* Handling runaway queries, cache poisoning, degraded render.
12. **`/docs/security/graph-rbac.md`**
* Permissions matrix, multitenant boundaries.
Every doc should end with a “Compliance checklist.”
**Imposed rule:** Work of this type or tasks of this type on this component must also be applied everywhere else it should be applied.
---
## 6) Tasks
### 6.1 Backend: Graph Indexer
* [ ] Define node/edge schemas and attribute dictionaries for each type.
* [ ] Implement event consumers for SBOM ingests, Conseiller updates, Excitator updates.
* [ ] Build ingestion pipeline that populates nodes/edges and maintains `valid_from/valid_to`.
* [ ] Implement aggregate counters and degree metrics.
* [ ] Implement clustering job and persist cluster ids per node.
* [ ] Implement snapshot materialization per SBOM and lineage tracking.
* [ ] Unit tests for each node/edge builder; propertybased tests for identity stability.
### 6.2 Backend: Graph API
* [ ] Implement `/graph/search` with prefix and exact match across node types.
* [ ] Implement `/graph/query` with validation, planning, cost estimation, and streaming tile results.
* [ ] Implement `/graph/paths` with constraints and depth limits; shortest path heuristic.
* [ ] Implement `/graph/diff` computing adds/removes/changed versions; stream results.
* [ ] Implement overlays: advisory join, VEX join, policy materialization and explain sampling.
* [ ] Implement exports: GraphML, CSV edge list, NDJSON findings, PNG/SVG snapshots.
* [ ] RBAC middleware integration; multitenant scoping.
* [ ] Load tests with synthetic large SBOMs; define default budgets.
### 6.3 Policy Engine integration
* [ ] Add endpoint to fetch explain traces for specific node ids in batch.
* [ ] Add materialization export that Graph API can cache per policy version.
### 6.4 Console (Web UI)
* [ ] Create `graph-explorer` module with routes `/graph`, `/graph/diff`, `/graph/q/:id`.
* [ ] Implement WebGL canvas with LOD, culling, edge bundling, deterministic layout seed.
* [ ] Build search, filter, lens, and overlay toolbars.
* [ ] Side panels: details, evidence tabs, explain viewer.
* [ ] Diff mode: split/overlay toggles and color legend.
* [ ] Saved queries: create, update, run; deep links.
* [ ] Export UI: formats, server roundtrip, progress indicators.
* [ ] a11y audit and keyboardonly flow.
### 6.5 CLI
* [ ] Implement `stella sbom graph *` subcommands with JSON IO and piping support.
* [ ] Document examples and stable output schemas for CI consumption.
### 6.6 Observability & Ops
* [ ] Dashboards for tile latency, query denials, cache hit rate, memory.
* [ ] Alerting on query error spikes, OOM risk, cache churn.
* [ ] Runbooks in `/docs/runbooks/graph-incidents.md`.
### 6.7 Docs
* [ ] Author all docs in section 5, link from Console contextual help.
* [ ] Add endtoend tutorial: “Investigate GHSAXXXX across prod artifacts.”
> **Imposed rule:** Work of this type or tasks of this type on this component must also be applied everywhere else it should be applied.
---
## 7) Acceptance criteria
* Console renders large SBOM graphs with semantic zoom, overlays, and responsive interactions.
* Users can run impact and path queries with bounded depth and get results within budget.
* VEX suppressions and advisory severities appear correctly and are consistent with policy.
* Diff view clearly shows added/removed/changed nodes/edges between two SBOMs.
* Saved queries and deep links reproduce the same view deterministically (given same data).
* Exports produce valid GraphML/CSV/NDJSON and image snapshots.
* CLI supports search, query, paths, impacted, diff, and export with stable schemas.
* AOC guardrails: explorer never mutates facts; overlays reflect Policy Engine decisions.
* RBAC enforced; all actions logged and observable.
---
## 8) Risks & mitigations
* **Graph explosion on large monorepos** → tiling, clustering, budgets, and strict depth limits.
* **Inconsistent identities across tools** → canonicalize purls/digests; propertybased tests for identity stability.
* **Policy overlay latency** → precompute materializations for hot policy versions; sample explains only on focus.
* **User confusion** → strong lens defaults, deterministic layouts, legends, incontext help.
---
## 9) Test plan
* **Unit**: node/edge builders, identity normalization, cost estimator.
* **Integration**: ingest SBOM + advisories + VEX, verify overlays and counts.
* **E2E**: Playwright flows for search→impact→diff→export; deep link determinism.
* **Performance**: simulate 500k nodes/2M edges; measure tile latency and memory.
* **Security**: RBAC matrix; tenant isolation tests; query validation fuzzing.
* **Determinism**: snapshot roundtrip: same query and seed produce identical layout and stats.
---
## 10) Feature flags
* `graph.explorer` (UI feature module)
* `graph.paths` (advanced path queries)
* `graph.diff` (SBOM diff mode)
* `graph.overlays.policy` (policy overlay + explain sampling)
* `graph.export` (exports enabled)
Documented in `/docs/observability/graph-telemetry.md`.
---
## 11) Nongoals (this epic)
* Realtime process/runtime call graphs.
* Full substitution for text reports; Explorer complements Reports.
* Crosstenant graphs; all queries are tenantscoped.
---
## 12) Philosophy
* **See the system**: security and license risk are structural. If you cannot see structure, you will miss risk.
* **Evidence over assertion**: every colored node corresponds to raw facts and explainable determinations.
* **Bounded interactivity**: fast, partial answers beat slow “complete” ones.
* **Immutability**: graphs mirror SBOM snapshots and are never rewritten; we add context, not edits.
> Final reminder: **Work of this type or tasks of this type on this component must also be applied everywhere else it should be applied.**

650
EPIC_6.md Normal file
View File

@@ -0,0 +1,650 @@
Below is the expanded, “maximum documentation” package for Epic 6. It is pasteready for your repo and deliberately formal so engineering, docs, and audit folks can work from the same source of truth.
> **Imposed rule:** Work of this type or tasks of this type on this component must also be applied everywhere else it should be applied.
---
# Epic 6: Vulnerability Explorer (policyaware)
**Short name:** `Vuln Explorer`
**Services touched:** Conseiller (Feedser), Excitator (Vexer), SBOM Service, Policy Engine, Findings Ledger (new), Web API Gateway, Authority (authN/Z), Workers/Scheduler, Telemetry/Analytics, Console (Web UI), CLI
**AOC ground rule:** Conseiller/Excitator aggregate but never merge or rewrite source documents. The Explorer only renders **effective** results as decided by Policy Engine and records human workflow as immutable ledger events.
---
## 1) What it is
The Vulnerability Explorer is the API and UI for operational triage, investigation, and reporting of vulnerabilities across all artifacts tracked by StellaOps. It correlates SBOM inventory with advisory and VEX evidence, then displays the **effective** status and obligations per the currently selected **Policy version**. It never edits source evidence. It provides:
* Policyaware lists, pivots, and detail views
* Triage workflow with immutable audit ledger
* Risk acceptance with expiry and evidence
* SLA tracking by severity and business tier
* Simulation against other policy versions
* Exports and cryptographically signed “evidence bundles”
**Identity principle:** one **Finding** per tuple `(artifact_id, purl, version, advisory_key)`, with links to every contributing AdvisoryEvidence and VexEvidence.
---
## 2) Why (concise)
* Prioritization must reflect **policy and VEX**, not raw feeds.
* Audit requires complete, reproducible lineage: what changed, why, and who decided.
* Operators need consistent APIs, CLI, and a UI that explain determinations, not just list CVEs.
---
## 3) How it should work (maximum detail)
### 3.1 Domain model and contracts
**Evidence (immutable)**
* `AdvisoryEvidence` (from Conseiller)
```json
{
"id": "adv_evd:...uuid...",
"tenant": "acme",
"source": "ghsa|nvd|vendor|ossindex|... ",
"source_id": "GHSA-xxxx",
"schema": "GHSA|CVE|CSAF-ADV",
"advisory_key": "CVE-2024-12345",
"affected": [{"ecosystem":"npm","purl":"pkg:npm/lodash","ranges":[{"type":"semver","events":[{"introduced":"0.0.0"},{"fixed":"4.17.21"}]}]}],
"cvss": {"version":"3.1","baseScore":7.5,"vectorString":"AV:N/AC:L/..."},
"severity": "HIGH",
"urls": ["https://..."],
"published": "2024-06-10T12:00:00Z",
"withdrawn": null,
"ingested_at": "2024-06-11T08:43:21Z"
}
```
* `VexEvidence` (from Excitator)
```json
{
"id": "vex_evd:...uuid...",
"tenant": "acme",
"source": "vendor-csaf",
"schema": "CSAF-VEX",
"advisory_key": "CVE-2024-12345",
"product_scope": [{"purl":"pkg:npm/lodash@4.17.20"}],
"status": "not_affected|affected|fixed|under_investigation",
"justification": "component_not_present|vulnerable_code_not_in_execute_path|... ",
"impact_statement": "Not reachable in Acme Payment Service.",
"timestamp": "2024-06-12T10:00:00Z",
"ingested_at": "2024-06-12T10:10:02Z"
}
```
* `InventoryEvidence` (from SBOM Service)
```json
{
"id": "inv_evd:...uuid...",
"tenant": "acme",
"artifact_id": "svc:payments@1.14.0",
"sbom_id": "sbom:sha256:...",
"purl": "pkg:npm/lodash@4.17.20",
"scope": "runtime|dev|test",
"runtime_flag": true,
"paths": [["root", "pkg:npm/a", "pkg:npm/lodash"]],
"discovered_at": "2024-06-12T11:00:00Z"
}
```
**PolicyDetermination** (readonly from Policy Engine)
```json
{
"key": {
"artifact_id": "svc:payments@1.14.0",
"purl": "pkg:npm/lodash",
"version": "4.17.20",
"advisory_key": "CVE-2024-12345",
"policy_version": "1.3.0"
},
"applicable": true,
"effective_severity": "HIGH",
"exploitability": "ACTIVE|LIKELY|UNKNOWN|UNLIKELY",
"signals": {"epss": 0.86, "kev": true, "maturity": "weaponized"},
"suppression_state": "none|policy|vex",
"obligations": [{"type":"fix_by","due":"2024-07-15"},{"type":"document_risk"}],
"sla": {"due":"2024-07-15","tier":"gold"},
"rationale": [
{"rule":"sev.base=nvd","detail":"NVD base:7.5"},
{"rule":"exploit.upsell.kev","detail":"KEV flag → HIGH"},
{"rule":"env.weight.prod","detail":"Env=prod no downgrade"}
]
}
```
**Finding identity**
```
finding_id = hash(tenant, artifact_id, purl, version, advisory_key)
```
**Ledger event** (appendonly, tamperevident)
```json
{
"event_id": "led:...uuid...",
"finding_id": "f:...hash...",
"tenant": "acme",
"type": "assign|comment|attach|change_state|accept_risk|set_target_fix|verify_fix|reopen",
"payload": {"to":"team:platform","reason":"oncall triage"},
"actor": {"user_id":"u:42","display":"Dana S."},
"ts": "2024-06-12T14:01:02Z",
"prev_event_hash": "sha256:...",
"event_hash": "sha256:sha256(canonical_json(event) + prev_event_hash)"
}
```
**Projection** (materialized current state for fast lists)
```json
{
"finding_id":"f:...hash...",
"tenant":"acme",
"artifact_id":"svc:payments@1.14.0",
"purl":"pkg:npm/lodash",
"version":"4.17.20",
"advisory_key":"CVE-2024-12345",
"effective_severity":"HIGH",
"exploitability":"ACTIVE",
"suppression_state":"none",
"status":"UNDER_INVESTIGATION|REMEDIATING|ACCEPTED|RESOLVED|NEW|SUPPRESSED",
"sla_due":"2024-07-15",
"owner":"team:platform",
"kev":true,
"epss":0.86,
"new_since":"2024-06-12",
"has_fix":true,
"envs":["prod"],
"runtime_flag":true,
"updated_at":"2024-06-12T14:01:02Z"
}
```
**Advisory key normalization**
* Input identifiers: `CVE-*`, `GHSA-*`, vendor IDs.
* Preference order: CVE, then GHSA, else vendor id prefixed with namespace.
* Canonicalization: uppercase, trim, map withdrawn to same key but mark `withdrawn=true` in evidence.
* Conseiller must publish `links: [all source ids]` for provenance.
### 3.2 Resolver algorithm (candidate findings)
**Goal:** produce tuples `(artifact_id, purl, version, advisory_key)` where inventory intersects affected ranges and policy deems the path relevant.
**Pseudocode**
```
for each artifact sbom S:
inv = inventory(S)
for each advisory evidence A:
for each affected package spec in A.affected:
for each inv_item in inv where inv_item.purl package == spec.purl package:
if version_in_ranges(inv_item.version, spec.ranges, ecosystem):
if policy.path_scope_allows(inv_item.scope, inv_item.runtime_flag, inv_item.paths):
yield candidate (artifact_id, inv_item.purl, inv_item.version, A.advisory_key)
```
**Version semantics per ecosystem**
* npm: semver, pre-release excluded unless explicitly in range.
* Maven: Maven version rules, handle `-SNAPSHOT`, use mavenresolver semantics.
* PyPI: PEP 440 versioning.
* Go: semver with `+incompatible` handling.
* OS packages: RPM/DEB epoch:versionrelease ordering.
**Edge cases**
* Multiple paths: store shortest path and count.
* Dev/test scope: policy may exclude or downgrade.
* Withdrawn advisories: keep as evidence; Policy can set severity to `NONE`.
### 3.3 VEX precedence and scoping
* If any matching **VEX** says `not_affected` scoped to the artifact product/component per CSAF product tree, set `suppression_state="vex"` and `applicable=false`.
* If VEX says `fixed` and inventory version is >= fixed version, mark as **Resolved (verified)** after SBOM recrawl confirms.
* If VEX `under_investigation`, no suppression; may add a policy grace period obligation.
### 3.4 Policy evaluation
* Inputs: candidate tuple + context (artifact env, business tier, ownership, signals, fix availability).
* Determinations: applicability, effective severity, exploitability, obligations, SLA, suppression.
* Suppression by policy examples: testscope only; path through optional deps; package vendored but not linked at runtime.
* Simulation: identical input, alternate `policy_version`; returns determinations without side effects.
### 3.5 API surface (authoritative)
**List**
```
GET /vuln/findings?policy=1.3.0&sev=high,critical&group_by=artifact&exploit=kev&env=prod&page=1&page_size=100
```
Response
```json
{
"page": 1,
"page_size": 100,
"total": 740,
"group_by": "artifact",
"results": [
{"group":"svc:payments","counts":{"CRITICAL":3,"HIGH":12,"MEDIUM":8},"sla_breaches":2},
...
]
}
```
**Query (complex filters)**
```
POST /vuln/findings/query
```
```json
{
"policy": "1.3.0",
"filter": {
"severity": [">=MEDIUM"],
"exploit": ["kev", "epss>=0.8"],
"artifact": ["svc:payments", "svc:checkouts"],
"status": ["NEW","UNDER_INVESTIGATION"],
"env": ["prod"]
},
"sort": [{"field":"effective_severity","dir":"desc"},{"field":"epss","dir":"desc"}],
"page": {"number":1,"size":200}
}
```
**Detail**
```
GET /vuln/findings/{finding_id}?policy=1.3.0
```
Returns projection, evidence links, policy rationale, paths, history summary.
**Workflow**
```
POST /vuln/findings/{id}/assign { "to": "team:platform" }
POST /vuln/findings/{id}/comment { "text": "triage notes..." }
POST /vuln/findings/{id}/accept-risk { "until":"2025-06-30","reason":"vendor patch pending","evidence":["url|upload_id"] }
POST /vuln/findings/{id}/verify-fix { "sbom_id": "sbom:sha256:..." }
POST /vuln/findings/{id}/target-fix { "version": "4.17.21" }
```
**Simulation**
```
POST /vuln/simulate
{
"policy_from": "1.3.0",
"policy_to": "1.4.0",
"query": { "severity":[">=MEDIUM"], "env":["prod"] }
}
```
Response includes perfinding delta `{before, after, diff}`.
**Export**
```
POST /vuln/export { "format":"ndjson","scope":{"query":{...}} }
```
Returns a signed bundle (see §3.10).
**Errors**
* `400` validation, `403` RBAC, `404` not found, `409` state conflict (idempotency), `429` rate limited, `5xx` server.
### 3.6 Console (Web UI)
**Routes**
* `/vuln` list with saved views
* `/vuln/:id` detail drawer state
* `/vuln/simulate/:policyVersion` diff mode
**State shape (client)**
```ts
interface VulnListState {
policyVersion: string;
filters: {...};
sort: [...];
columns: string[];
viewId?: string;
page: {number: number; size: number};
}
```
**UX**
* Virtualized grid with server paging; column chooser; density toggle.
* Quick filters: severity, exploit signals, status, env, owner, fix availability.
* Detail tabs: Summary, Evidence (raw docs with provenance), Policy (rationale chain), Paths (deep link to Graph Explorer), Fixes, History.
* Simulation bar shows delta chips: `+21 HIGH`, `-9 Suppressed by VEX` etc.
* Evidence bundle dialog previews scope and size.
* a11y: ARIA roles on grid, keyboard shortcuts: `A` assign, `C` comment, `R` accept risk, `V` verify fix.
### 3.7 CLI
Commands
```
stella vuln list --policy 1.3.0 --sev high,critical --group-by artifact --env prod --json
stella vuln show --id <finding-id> --policy 1.3.0
stella vuln simulate --from 1.3.0 --to 1.4.0 --sev '>=medium' --delta --json
stella vuln assign --filter 'advisory:CVE-2024-12345 artifact:payments' --to team:platform
stella vuln accept-risk --id <finding-id> --until 2025-06-30 --reason "vendor patch pending" --evidence url:https://ticket/123
stella vuln verify-fix --id <finding-id> --sbom <sbom-id>
```
Return codes: `0 ok`, `2 invalid args`, `3 budget exceeded`, `4 not found`, `5 denied`.
### 3.8 Storage schema (illustrative)
**Tables**
```sql
-- Evidence
CREATE TABLE evidence_advisory (...);
CREATE INDEX ea_tenant_key ON evidence_advisory(tenant, advisory_key);
CREATE TABLE evidence_vex (...);
CREATE INDEX ev_tenant_key ON evidence_vex(tenant, advisory_key);
CREATE TABLE evidence_inventory (...);
CREATE INDEX ei_artifact_purl ON evidence_inventory(tenant, artifact_id, purl);
-- Ledger
CREATE TABLE findings_ledger_events (
event_id uuid PRIMARY KEY,
finding_id bytea NOT NULL,
tenant text NOT NULL,
type text NOT NULL,
payload jsonb NOT NULL,
actor jsonb NOT NULL,
ts timestamptz NOT NULL,
prev_event_hash bytea,
event_hash bytea NOT NULL
);
CREATE INDEX fle_find_ts ON findings_ledger_events(tenant, finding_id, ts);
-- Projection
CREATE TABLE findings_projection (
finding_id bytea PRIMARY KEY,
tenant text NOT NULL,
artifact_id text NOT NULL,
purl text NOT NULL,
version text NOT NULL,
advisory_key text NOT NULL,
policy_version text NOT NULL,
effective_severity text NOT NULL,
exploitability text,
suppression_state text,
status text NOT NULL,
sla_due date,
owner text,
kev boolean,
epss double precision,
envs text[],
runtime_flag boolean,
updated_at timestamptz NOT NULL
);
CREATE INDEX fp_query ON findings_projection(tenant, policy_version, effective_severity, status);
```
**Tamperevidence**
* Ledger events use chained SHA256 hashes over canonical JSON + previous hash.
* Daily Merkle root of all event hashes is anchored to the audit store (and optionally external timestamping service).
### 3.9 Performance and scaling
* P95 list endpoint under 600 ms for 100row pages at 5M findings/tenant.
* Projections denormalize heavy joins; background projector uses idempotent jobs keyed by `(tenant,finding_id,policy_version)`.
* Rate limits per tenant and per API key; backpressure on export jobs; exponential retry for projector.
### 3.10 Evidence bundle format
* **Container:** ZIP with `manifest.json`, `findings.ndjson`, `advisory_evidence.ndjson`, `vex_evidence.ndjson`, `inventory_evidence.ndjson`, `policy_version.json`, `ledger_events.ndjson`, `CHECKSUMS`.
* **Signing:** Detached signature `bundle.sig` using tenants org key (Ed25519).
* **Manifest**
```json
{"generated_at":"2024-06-12T15:00:00Z","tenant":"acme","policy_version":"1.3.0","scope":{"query":{...}},"counts":{"findings":421}}
```
### 3.11 Observability
* Metrics (OpenTelemetry):
* `vuln_findings_list_latency_ms` (histogram)
* `vuln_projection_lag_seconds` (gauge)
* `vuln_new_findings_total` (counter)
* `vuln_sla_breaches_total` (counter by sev, owner)
* `vuln_simulation_latency_ms` (histogram)
* Logs: structured JSON with `tenant`, `policy_version`, `query_hash`, `result_count`.
* Traces: spans for resolver, policy calls, projection builds, export assembly.
* PII: redact comments in logs; store attachments encrypted at rest (KMS).
### 3.12 Security & RBAC
**Roles**
* Viewer: GET list/detail/export read scope.
* Investigator: Viewer + workflow actions except risk acceptance.
* Operator: Investigator + risk acceptance, verify fix, bulk actions.
* Auditor: Viewer + evidence bundles and ledger integrity checks.
**ABAC**
* Attribute constraints: by `artifact.owner`, `env`, and `business_tier`.
* CSRF protection for Console; all POST require antiforgery tokens.
* Attachments stored with envelope encryption; signed URLs for limited time access.
### 3.13 Rollout and migrations
* Feature flags: `vuln.explorer.ui`, `vuln.explorer.simulation`, `vuln.explorer.bulk_actions`, `vuln.explorer.evidence_bundle`.
* Phase 1: dark launch API and projections.
* Phase 2: UI readonly list and detail.
* Phase 3: workflow actions and exports.
* Data backfill: replay advisory/VEX/SBOM events to seed projections.
* Compatibility: maintain projection v1 schema for two releases; migration scripts in `/migrations/vuln/`.
---
## 4) Implementation plan
### 4.1 Services
* **Findings Ledger (new)**
* Appendonly event store with projector to `findings_projection`.
* Event validation and canonicalization; hashing and Merkle root anchoring.
* **Vuln Explorer API (new)**
* Query/filter engine with policy parameterization and grouping.
* Simulation endpoint.
* Export job orchestrator.
* **Conseiller / Excitator (updates)**
* Guarantee canonical `advisory_key` and publish `links[]`.
* No merges; maintain raw payload snapshots.
* **Policy Engine (updates)**
* Batch evaluation endpoint `POST /policy/eval/batch` with `simulate` support.
* Return rationale chain with rule IDs.
* **SBOM Service (updates)**
* Publish inventory deltas; include `scope`, `runtime_flag`, `paths`.
* Nearest safe version hints.
* **Workers/Scheduler**
* Resolver job keyed by `(tenant, artifact_id, sbom_id)`; emits candidate tuples.
* Recompute on policy activation and evidence changes.
### 4.2 Code structure
```
/src/StellaOps.Findings.Ledger
/api
/projector
/storage
/src/StellaOps.VulnExplorer.Api
/routes
/query
/simulation
/export
/packages/console/features/vuln-explorer
/components
/pages
/state
/src/StellaOps.Cli
```
### 4.3 Performance tasks
* Projection indexes and covering queries; explain plans in `/docs/vuln/perf-notes.md`.
* Cache hot groupings per tenant with TTL and invalidation on ledger projector tick.
---
## 5) Documentation changes (create/update)
1. `/docs/vuln/explorer-overview.md`
Conceptual model, identities, evidence vs determinations, AOC guarantees.
2. `/docs/vuln/explorer-using-console.md`
Workflows with screenshots, keyboard shortcuts, saved views, deep links.
3. `/docs/vuln/explorer-api.md`
Endpoint specs, query language, grouping, pagination, errors, rate limits.
4. `/docs/vuln/explorer-cli.md`
Commands, flags, examples, exit codes.
5. `/docs/vuln/findings-ledger.md`
Event schema, state machine, hashing, Merkle roots, integrity checks.
6. `/docs/policy/vuln-determinations.md`
Inputs, outputs, precedence rules, simulation semantics.
7. `/docs/vex/explorer-integration.md`
CSAF mapping, scoping to product tree, precedence.
8. `/docs/advisories/explorer-integration.md`
Advisory key normalization, provenance, withdrawn handling.
9. `/docs/sbom/vuln-resolution.md`
Ecosystem version semantics, path sensitivity, scope rules.
10. `/docs/observability/vuln-telemetry.md`
Metrics, logs, traces, dashboards, SLOs.
11. `/docs/security/vuln-rbac.md`
Role mapping, ABAC, attachment encryption, CSRF.
12. `/docs/runbooks/vuln-ops.md`
Recompute storms, projector lag, policy activation drains, export failures.
13. `/docs/install/containers.md`
Add `findings-ledger`, `vuln-explorer-api` images, compose/k8s manifests, resource sizing, health checks.
> Each doc ends with: **Imposed rule:** Work of this type or tasks of this type on this component must also be applied everywhere else it should be applied.
---
## 6) Engineering tasks
### Backend: Findings & API
* [ ] Define evidence and ledger schemas; migrations.
* [ ] Implement resolver for npm, Maven, PyPI, Go, OS packages with propertybased tests for version comparisons.
* [ ] Implement ledger API and projector with idempotency and hashing.
* [ ] Implement list/detail/grouping endpoints with serverside paging.
* [ ] Implement simulation and export (bundle assembly, signing).
* [ ] Integrate Policy Engine batch eval with rationale traces.
* [ ] RBAC via Authority with ABAC filters.
* [ ] Load tests at 5M findings/tenant; tune indexes.
### Conseiller/Excitator
* [ ] Normalize `advisory_key` and persist `links[]`.
* [ ] Ensure raw payload snapshots are retrievable by Explorer for Evidence tab.
### SBOM Service
* [ ] Emit `scope`, `runtime_flag`, `paths`; safe version hints.
* [ ] Inventory delta events to trigger resolver.
### Console
* [ ] Build grid with virtualization, saved views, deep link serializer.
* [ ] Implement detail tabs and path deeplinks to Graph Explorer.
* [ ] Add simulation bar and delta chips.
* [ ] Evidence bundle dialog.
* [ ] a11y keyboard flow and ARIA labeling; unit and E2E tests.
### CLI
* [ ] `stella vuln list|show|simulate|assign|accept-risk|verify-fix` with `--json` and CSV export.
* [ ] Stable output schemas; pipefriendly defaults.
### Observability/Ops
* [ ] Dashboards for list latency, projection lag, new/reopened, SLA breaches.
* [ ] Alerts on projector backlog, API 5xx spikes, export failures.
* [ ] Runbooks in `/docs/runbooks/vuln-ops.md`.
### Docs
* [ ] Author files listed in §5 with crosslinks to Policy Studio and SBOM Graph Explorer.
* [ ] Update `/docs/install/containers.md` with new images and compose/k8s snippets.
> **Imposed rule:** Work of this type or tasks of this type on this component must also be applied everywhere else it should be applied.
---
## 7) Acceptance criteria
* List and detail views reflect **effective** policy outcomes and update instantly when switching policy versions.
* Evidence tab shows all raw advisory/VEX documents with provenance; no source merging.
* Resolver respects ecosystem semantics, scope, and paths; path tab roundtrips to Graph Explorer.
* Ledger events are immutable and reconstruct historical list states accurately.
* Simulation returns diffs without side effects and matches Policy Engine outputs.
* CLI/API support paging, grouping, export, simulation; contracts stable and documented.
* RBAC and tenant isolation validated by tests; attachments encrypted.
* P95 performance budgets met; dashboards green for SLOs.
---
## 8) Risks and mitigations
* **Advisory identity collisions** → strict canonicalization; preserve `links[]`; never merge raw docs.
* **Projection lag** → backpressure, worker autoscaling, health checks; alerting on lag.
* **Resolver false positives** → path evidence required; dev/test scope rules explicit; ecosystemspecific tests.
* **User confusion over suppression** → explicit badges; Policy tab with rationale and “why changed.”
* **Export size** → NDJSON streaming, size estimator in UI, scope previews.
---
## 9) Test plan
* **Unit:** version comparators, resolver per ecosystem, policy mapping, ledger state machine.
* **Integration:** SBOM + advisories + VEX ingestion, candidate generation, policy application, suppression precedence.
* **E2E Console:** triage, bulk assign, simulation, evidence bundle download; keyboardonly flow.
* **Performance:** list/grouping at target scale; projector rebuild; export assembly.
* **Security:** RBAC matrix, ABAC filters, CSRF, signed URL lifetimes, tamperevidence verification.
* **Determinism:** timetravel snapshots reproduce prior states byteforbyte.
---
## 10) Philosophy
* **Facts first, decisions second.** Evidence is immutable; decisions and workflow sit on top in a ledger.
* **Policy is the lens.** The same facts can imply different obligations; the system must make that explicit and reproducible.
* **Audit > convenience.** Every state change is justified, signed, and verifiable.
* **No hidden magic.** If anything is suppressed, the UI shows the rule or VEX that did it, with documents attached.
> Final reminder: **Work of this type or tasks of this type on this component must also be applied everywhere else it should be applied.**

545
EPIC_7.md Normal file
View File

@@ -0,0 +1,545 @@
Below is the expanded, “maximum documentation” package for Epic 7. It is pasteready for your repo and deliberately formal so engineering, docs, governance, and audit can work from the same source of truth.
> **Imposed rule:** Work of this type or tasks of this type on this component must also be applied everywhere else it should be applied.
---
# Epic 7: VEX Consensus Lens
**Short name:** `VEX Lens`
**Services touched:** Excitator (Vexer), Conseiller (Feedser), SBOM Service, Policy Engine, Findings Ledger, Web API Gateway, Authority (authN/Z), Console (Web UI), CLI, Telemetry/Analytics
**AOC ground rule:** Excitator and Conseiller aggregate but never merge or rewrite source documents. The Lens only computes a **derived, reproducible** consensus view while preserving all raw evidence and provenance.
---
## 1) What it is
The VEX Consensus Lens is a deterministic computation and presentation layer that ingests all available VEX statements about a given advisory key (e.g., CVE) and product/context, then produces an **explicit consensus state** for each `(artifact_id, purl, version, advisory_key)` tuple. It answers: “Given multiple, possibly conflicting VEX statements from different issuers, what is the most trustworthy, policyconsistent interpretation for this artifact and version?”
It never edits or merges VEX documents. It normalizes them, aligns their **product scope** to SBOM inventory, scores issuers via a tenantconfigurable **trust model**, applies time and version scoping, and outputs:
* `consensus_state`: `NOT_AFFECTED | AFFECTED | FIXED | UNDER_INVESTIGATION | INCONCLUSIVE | DISPUTED`
* `confidence`: 0.01.0 numeric score
* `rationale`: structured explanation of evidence, weights, and rules applied
* `quorum`: weighted vote breakdown by issuer, timestamp, justification, and version applicability
The Lens plugs into the Policy Engine. Policy remains the source of truth for applicability, severity, and obligations; the Lens supplies structured signals and provenance that policy can use to suppress findings, downgrade risk, or set SLAs.
Key properties:
* **Reproducible:** Same inputs and policy yield same outputs.
* **Explainable:** Every output carries a machinereadable rationale chain.
* **Scoped:** Product trees and version ranges are resolved against actual SBOM inventory.
* **Immutable evidence:** All raw VEX docs remain intact, retrievable, and linkable.
---
## 2) Why (concise)
Real ecosystems have overlapping VEX statements from upstream maintainers, vendors, distros, and third parties. Operators need a single, defensible view without losing provenance. The Lens reduces noise, flags conflicts, and turns fragmented VEX evidence into auditable signals that policy and humans can act on.
---
## 3) How it should work (maximum detail)
### 3.1 Input model
**From Excitator (Vexer):**
* Raw `VexEvidence` documents (CSAF VEX, OpenVEX, CycloneDX VEX) with:
* `advisory_key` (canonicalized)
* product tree or component coordinates (purl, CPE, vendor IDs)
* status (`not_affected`, `affected`, `fixed`, `under_investigation`)
* justifications (CSAF, OpenVEX enumerations)
* version ranges or fixed versions if available
* timestamps (issued, last_updated), source URLs
* cryptographic metadata (signature, issuer, certificate chain) if present
**From SBOM Service:**
* Inventory for each artifact: purls, versions, dependency paths, scopes, env flags.
**From Issuer Directory (new)**
* Directory of known VEX issuers:
* identity, organization, domain, CSAF publisher metadata
* public keys and trust anchors (Ed25519/X.509/PKIX/PKCS7/DSSE)
* default trust weight
* tenancyspecific overrides
**From Policy Engine:**
* Policy version and options relevant to VEX evaluation:
* trust model parameters
* confidence thresholds per environment/tier
* justification whitelist/blacklist
* recency requirements and expiry windows
* precedence rules for status conflicts
### 3.2 Normalization
1. **Canonical advisory key:** CVE preferred; GHSA mapped; vendor IDs namespaced.
2. **Status mapping:** Normalize all encodings to `NOT_AFFECTED | AFFECTED | FIXED | UNDER_INVESTIGATION`.
3. **Product scope alignment:** Convert CSAF product tree/CPE to purl variants using deterministic mappings; store mapping evidence.
4. **Version scoping:** For each evidence, compute `applies_to(version)` using ecosystem comparators (npm semver, Maven, PEP 440, Go semver, RPM/DEB EVR).
5. **Signature verification:** If signed, verify against Issuer Directory trust anchors; attach `sig_verified=true/false` and chain metadata.
6. **Temporal fields:** Compute `effective_at` (issued), `observed_at` (ingested), and staleness.
### 3.3 Trust model
Each `VexEvidence` gets a base weight `w_base` from issuer type and verification:
* Maintainer/Upstream signed: 0.9
* Maintainer/Upstream unsigned: 0.7
* Distro/SIG signed: 0.8
* Vendor of downstream product signed: 0.8
* Thirdparty scanner VEX: 0.4
* Unknown/unsigned with weak provenance: 0.2
Weights are then adjusted:
```
w = w_base
* f_signature(sig_verified) // e.g., 1.1 if verified, 0.8 if unverifiable
* f_recency(age_days) // decay after T days (policy)
* f_justification(type) // e.g., "component_not_present" lower if SBOM shows present
* f_scope_match(score) // quality of product match: exact purl > family > CPE wildcard
* f_env(app_env) // optional env-specific multipliers
```
All `f_*` are bounded to [0.5, 1.2] by default to prevent runaway effects. Tenants can override per Policy Studio.
### 3.4 Consensus algorithm
For a tuple `(artifact_id, purl, version, advisory_key)`:
1. **Collect** all normalized `VexEvidence` whose product scope maps to `purl` and `applies_to(version)==true`.
2. **Bucket** by normalized status; compute `W(status) = Σ w(evidence)` per status.
3. **Apply precedence** rules from policy:
* If `W(NOT_AFFECTED)` exceeds threshold `T_na` and there is no `FIXED` evidence contradictory for the same version, propose `NOT_AFFECTED`.
* If any `FIXED` applies and inventory version ≥ fixed version, propose `FIXED`.
* If `W(AFFECTED)``T_aff` and no dominating `NOT_AFFECTED`, propose `AFFECTED`.
* If `W(UNDER_INVESTIGATION)` dominates and others below thresholds, propose `UNDER_INVESTIGATION`.
* If both `AFFECTED` and `NOT_AFFECTED` exceed thresholds within a small margin `δ`, mark `DISPUTED`.
* Otherwise `INCONCLUSIVE`.
4. **Confidence score:**
```
confidence = W(winning_status) / (W(AFFECTED)+W(NOT_AFFECTED)+W(FIXED)+W(UNDER_INVESTIGATION) + ε)
```
Clip to [0.0, 1.0].
5. **Rationale chain:**
* Winning status, thresholds used, top 5 contributing evidences with weights and reasons, product mapping quality, version scoping evidence, and policy knobs that influenced the result.
6. **Quorum summary:**
* List issuers and their votes, signature state, timestamps, and justifications.
### 3.5 Policy interaction
* The Lens returns `consensus_state`, `confidence`, and structured `signals`.
* Policy Engine consumes these and may:
* Suppress findings automatically on `NOT_AFFECTED` confidence ≥ `P_na`.
* Downgrade severity or extend SLA when `UNDER_INVESTIGATION` from hightrust issuers.
* Require human approval when `DISPUTED`.
* Treat `FIXED` as resolved only when SBOM crawl verifies the fixed version or a `verify_fix` ledger event exists.
**Simulation:** Policy Studio can simulate different trust weights and thresholds and see consensus deltas without side effects.
### 3.6 Data contracts
**ConsensusRecord (materialized)**
```json
{
"id": "cons:sha256(tenant|artifact|purl|version|advisory|policy)",
"tenant": "acme",
"artifact_id": "svc:payments@1.14.0",
"purl": "pkg:npm/lodash@4.17.20",
"advisory_key": "CVE-2024-12345",
"policy_version": "1.3.0",
"consensus_state": "NOT_AFFECTED",
"confidence": 0.87,
"weights": {"AFFECTED":0.31,"NOT_AFFECTED":1.25,"FIXED":0.00,"UNDER_INVESTIGATION":0.12},
"top_evidence": ["vex_evd:...","vex_evd:..."],
"quorum": [
{"issuer":"lodash-maintainers","status":"NOT_AFFECTED","w":0.9,"sig_verified":true,"just":"vulnerable_code_not_present","issued":"2024-06-08"},
{"issuer":"vendorX-distro","status":"AFFECTED","w":0.25,"sig_verified":false,"just":"generic_advisory","issued":"2024-06-07"}
],
"rationale": [
{"rule":"trust.weight.issuer","detail":"maintainer signed evidence 0.9"},
{"rule":"scope.match.exact","detail":"exact purl match"},
{"rule":"justification.vcnp","detail":"supported by SBOM callgraph hint"}
],
"updated_at": "2024-06-12T10:00:00Z"
}
```
**Issuer Directory entry**
```json
{
"issuer_id": "iss:lodash",
"org": "Lodash Maintainers",
"domains": ["lodash.com"],
"keys": [{"type":"ed25519","pub":"...","expires":"2026-12-31"}],
"default_weight": 0.9,
"metadata": {"csaf_publisher": true}
}
```
### 3.7 APIs
**Compute/Query**
```
GET /vex/consensus?artifact=svc:payments@1.14.0&purl=pkg:npm/lodash@4.17.20&advisory=CVE-2024-12345&policy=1.3.0
POST /vex/consensus/query { "policy":"1.3.0", "filter": { "state":["DISPUTED","INCONCLUSIVE"], "confidence":"<0.6", "env":["prod"] }, "page":{...} }
GET /vex/consensus/{id} // full record
POST /vex/consensus/simulate // override trust knobs and thresholds for what-if
```
**Issuer Directory**
```
GET /vex/issuers
POST /vex/issuers (admin)
POST /vex/issuers/{id}/keys (admin)
```
**Exports**
```
POST /vex/consensus/export { "format":"ndjson","scope":{"filter":{...}} }
```
**Errors**
* `400` invalid mapping, `403` RBAC, `404` not found, `409` conflict, `429` rate limit.
### 3.8 Console (Web UI)
Routes:
* `/vex/consensus` overview with filters: state, confidence, issuer, advisory, artifact, env.
* `/vex/consensus/:id` detail: Evidence pane, Quorum graph, Policy impact, SBOM path links.
UX elements:
* **Quorum bar:** stacked bar showing weights per status; hover reveals issuer contributions.
* **Confidence chip:** numeric and qualitative band (Low/Med/High).
* **Evidence table:** paged list of VEX docs with signature icon, scope match quality tag, justification tag, issued/updated timestamps.
* **Conflict view:** for `DISPUTED`, show side-by-side issuer rationales and suggested next steps.
* **Deep link** into Vulnerability Explorer detail, preselecting the Policy version used for the consensus.
A11y:
* ARIA roles on grid and bars; keyboard shortcuts `S` switch policy, `T` trust presets, `E` export.
### 3.9 CLI
Commands:
```
stella vex consensus list --policy 1.3.0 --state disputed,inconclusive --confidence '<0.6' --artifact payments --json
stella vex consensus show --id <cons-id> --policy 1.3.0
stella vex simulate --policy 1.3.0 --trust 'issuer:lodash=1.0,issuer:vendorX=0.5' --thresholds 'na=1.0,aff=0.6' --json
stella vex issuers list
stella vex export --filter 'artifact:payments advisory:CVE-2024-12345' --out vex-consensus.ndjson
```
Exit codes: `0` ok, `2` invalid args, `4` not found, `5` denied.
### 3.10 Storage schema (illustrative)
```sql
-- Normalized VEX (reference Excitator id; do not alter raw)
CREATE TABLE vex_normalized (
id uuid PRIMARY KEY,
tenant text NOT NULL,
evidence_id text NOT NULL, -- link to Excitator
advisory_key text NOT NULL,
issuer_id text,
status text NOT NULL, -- NOT_AFFECTED|AFFECTED|FIXED|UNDER_INVESTIGATION
justification text,
purl text, -- normalized mapping target
version_range jsonb, -- ecosystem-specific encoding
sig_verified boolean,
scope_score real, -- 0..1 quality of mapping
issued timestamptz,
updated timestamptz,
w_base real,
UNIQUE (tenant, evidence_id)
);
-- Issuer Directory
CREATE TABLE vex_issuers (
issuer_id text PRIMARY KEY,
tenant text NOT NULL,
org text NOT NULL,
default_weight real NOT NULL,
metadata jsonb,
UNIQUE(tenant, org)
);
CREATE TABLE vex_issuer_keys (
id uuid PRIMARY KEY,
issuer_id text NOT NULL REFERENCES vex_issuers(issuer_id),
key_type text NOT NULL,
pubkey text NOT NULL,
expires timestamptz
);
-- Consensus projection
CREATE TABLE vex_consensus (
id bytea PRIMARY KEY,
tenant text NOT NULL,
artifact_id text NOT NULL,
purl text NOT NULL,
version text NOT NULL,
advisory_key text NOT NULL,
policy_version text NOT NULL,
consensus_state text NOT NULL,
confidence real NOT NULL,
weights jsonb NOT NULL,
top_evidence text[] NOT NULL,
updated_at timestamptz NOT NULL
);
CREATE INDEX vc_query ON vex_consensus(tenant, policy_version, consensus_state, confidence);
```
### 3.11 Integration with Findings Ledger and Vuln Explorer
* The Vuln Explorer reads `vex_consensus` for each finding and renders:
* Consensus chip, confidence, and a link to full quorum.
* For `NOT_AFFECTED` with confidence ≥ policy threshold, show “Suppressed by VEX (Consensus)” badge.
* For `DISPUTED`, open a triage banner prompting manual review and optional ledger comment/assignment.
* Ledger receives no new event type from lens computation itself. Human actions triggered by lens views produce standard events (`comment`, `assign`, `change_state`).
### 3.12 Security & RBAC
Roles:
* Viewer: query consensus readonly.
* Investigator: Viewer + export.
* Operator: Investigator + trust simulation.
* Admin: manage Issuer Directory entries and keys.
CSRF for Console; ABAC scoping by artifact ownership and environment.
### 3.13 Observability
Metrics:
* `vex_consensus_compute_latency_ms` (histogram)
* `vex_consensus_records_total` (counter)
* `vex_consensus_disputed_total` (counter by issuer combinations)
* `vex_consensus_staleness_seconds` (gauge)
* `vex_signature_verification_rate` (gauge)
Logs: structured events with `tenant`, `policy_version`, `advisory_key`, `quorum_summary`.
Traces: spans for normalization, mapping, trust weighting, consensus decision, DB writes.
### 3.14 Performance & scaling
Targets:
* P95 query under 500 ms for 100row pages at 10M consensus records/tenant.
* Projection jobs are idempotent and keyed by `(tenant, artifact, purl, version, advisory, policy)`; backpressure with work queues.
* Cache popular queries with tenantscoped TTL; invalidate on Excitator or policy changes.
### 3.15 Edge cases
* **Ambiguous product mapping:** mark low `scope_score`, cap weight, surface warning in UI.
* **VEX “not present” vs SBOM shows present:** downweight with `f_justification`, require manual check.
* **Withdrawn or superseded VEX:** decay to near zero; keep provenance.
* **Partial fixes:** if fixed version applies to subset of platforms, map to env or arch dimension when available.
* **Time travel:** consensus recalculated as of a timestamp using only evidence ≤ `as_of` and the corresponding policy version.
---
## 4) Implementation plan
### 4.1 Services
* **VEX Lens Service (new)**
* Normalization pipeline, trust weighting, consensus computation, and projections.
* Batch recompute on policy activation and Excitator deltas.
* **Excitator (updates)**
* Ensure all VEX evidence carries issuer hints and raw signature blobs when present.
* Publish product trees and original coordinates intact.
* **Policy Engine (updates)**
* Add VEX trust knobs, thresholds, recency decay, and status precedence.
* Batch eval endpoint accepts `consensus inputs` where needed.
* **Issuer Directory (new)**
* Manage issuer metadata and keys; tenant overrides; audit logs.
### 4.2 Code structure
```
/src/StellaOps.VexLens
/normalizer
/mapping # CPE/purl translators
/trust # weighting functions
/consensus # algorithm and projections
/api
/src/StellaOps.Excititor # updates
/src/StellaOps.Policy # updates
/src/StellaOps.IssuerDirectory
/packages/console/features/vex-consensus
/src/StellaOps.Cli
```
### 4.3 Rollout
* Phase 1: API readonly with basic trust model, Console list/detail, no simulation.
* Phase 2: Policy Studio integrations and simulation.
* Phase 3: Issuer Directory admin flows, exports, and advanced mapping diagnostics.
---
## 5) Documentation changes (create/update)
1. `/docs/vex/consensus-overview.md`
Purpose, scope, terminology, evidence vs derived view, AOC guarantees.
2. `/docs/vex/consensus-algorithm.md`
Normalization, mapping, weighting, thresholds, precedence, formulas, examples.
3. `/docs/vex/issuer-directory.md`
Managing issuers, keys, trust overrides, security model.
4. `/docs/vex/consensus-api.md`
Endpoints, request/response schemas, errors, pagination, rate limits.
5. `/docs/vex/consensus-console.md`
Screens, filters, conflict workflows, a11y, deep links.
6. `/docs/policy/vex-trust-model.md`
Policy knobs, thresholds, decay, simulation.
7. `/docs/sbom/vex-mapping.md`
Product tree mapping to purl/version, ecosystem comparators, edge cases.
8. `/docs/security/vex-signatures.md`
Signature verification flows, key management, auditing.
9. `/docs/runbooks/vex-ops.md`
Recompute storms, mapping failures, signature errors, lag, quotas.
All docs end with the imposed rule statement.
---
## 6) Engineering tasks
### Backend core
* [ ] Implement normalization for CSAF VEX, OpenVEX, CycloneDX VEX.
* [ ] Build product mapping library (CPE→purl, vendor tokens→purl families).
* [ ] Implement signature verification (Ed25519/PKIX/DSSE) using Issuer Directory keys.
* [ ] Implement trust weighting functions and configurable parameters.
* [ ] Implement consensus algorithm with unit tests and property tests.
* [ ] Materialize `vex_consensus` projection with indexes and idempotent workers.
* [ ] Batch recompute on policy activation and Excitator deltas.
### APIs & Integrations
* [ ] `/vex/consensus` query, detail, simulate, export.
* [ ] Policy Engine: consume consensus signals; add thresholds and precedence.
* [ ] Vuln Explorer: show consensus chip and triage banners; deep link to Lens.
### Issuer Directory
* [ ] CRUD for issuers and keys, audit logs, RBAC.
* [ ] Import common CSAF publishers; seed with sane defaults.
### Console
* [ ] Build list grid with filters and saved views.
* [ ] Quorum bar and Evidence table with signature icons and scope quality tags.
* [ ] Conflict view for `DISPUTED`.
* [ ] Simulation drawer integrated with Policy Studio.
### CLI
* [ ] `stella vex consensus list|show|simulate|export` with JSON/CSV.
* [ ] Stable schemas; tests for piping and scripting.
### Observability/Perf
* [ ] Metrics, logs, traces as specified; dashboards.
* [ ] Load tests at 10M consensus records/tenant; optimize indexes and caches.
### Docs
* [ ] Author and crosslink all docs listed in §5.
* [ ] Add examples and screenshots to Console doc.
> **Imposed rule:** Work of this type or tasks of this type on this component must also be applied everywhere else it should be applied.
---
## 7) Acceptance criteria
* Normalization supports CSAF VEX, OpenVEX, CycloneDX VEX with product tree mapping to purls across npm, Maven, PyPI, Go, RPM/DEB.
* Signature verification works and affects weights; unverifiable signatures do not crash flows.
* Consensus outputs are reproducible, explainable, and queryable at scale.
* Vuln Explorer displays consensus state and affects policy outcomes per thresholds.
* Simulation reflects policy trust changes without side effects and returns rationale deltas.
* CLI/API feature parity; evidence and quorum are exportable.
* P95 performance budgets met; dashboards reflect health.
---
## 8) Risks and mitigations
* **Mapping errors (CPE→purl):** use conservative scope scores, cap weights, surface warnings, manual override hooks in Policy Studio.
* **Malicious or mistaken issuer:** signature verification plus trust weighting and tenancy overrides.
* **Evidence storms:** debounce recompute; batch and shard workers; backpressure and prioritization.
* **User confusion with conflicting VEX:** clear conflict UI, rationale chains, suggested actions, and policy banners.
* **Stale statements:** recency decay and expiry windows in policy.
---
## 9) Test plan
* **Unit:** status mapping, comparators per ecosystem, trust weighting, threshold math.
* **Property tests:** invariants such as monotonicity with added supporting evidence and idempotent recompute.
* **Integration:** Excitator→Lens→Policy→Vuln Explorer pipeline with signed/unsigned, conflicting, and stale evidence.
* **E2E Console:** list filters, detail with quorum, conflict workflows, export, simulation.
* **Security:** RBAC on Issuer Directory, CSRF for Console, signature verification path traversal guards.
* **Performance:** cold/hot query latencies, recompute throughput, cache hit ratios.
* **Determinism:** timetravel snapshots reproduce prior consensus states.
---
## 10) Philosophy
* **Consensus, not replacement.** The Lens summarizes without erasing dissent.
* **Trust is contextual.** Tenants must tune weights and thresholds to their environments.
* **Proof over prose.** Every decision comes with math, provenance, and a rationale chain.
* **Safety by design.** When in doubt, surface conflicts instead of silently suppressing.
> Final reminder: **Work of this type or tasks of this type on this component must also be applied everywhere else it should be applied.**

440
EPIC_8.md Normal file
View File

@@ -0,0 +1,440 @@
Below is the “maximum documentation” bundle for Epic 8. Its engineered to be pasted into your repo without turning into yet another unread wiki tomb. Slight sarcasm included to keep blood flowing.
> **Imposed rule:** Work of this type or tasks of this type on this component must also be applied everywhere else it should be applied.
---
# Epic 8: Advisory AI Assistant (summaries, conflict explain, remediation hints)
**Short name:** `Advisory AI`
**Services touched:** Conseiller (Feedser), Excitator (Vexer), VEX Lens, SBOM Service, Policy Engine, Findings Ledger, Web API Gateway, Authority (authN/Z), Console (Web UI), CLI, Telemetry/Analytics
**AOC ground rule:** Conseiller and Excitator aggregate but never merge or mutate source docs. Advisory AI produces derived summaries and plans with strict provenance and citations. No silent rewriting of evidence. Ever.
---
## 1) What it is
Advisory AI is a tenantscoped, retrievalaugmented assistant that turns noisy security advisories and VEX statements into three consumable artifacts:
1. **Advisory Summary**
Condenses one or more advisories (CSAF, OSV, GHSA, vendor PDFs, distro notices) into a concise brief with key facts: affected ranges, exploit status, impact, known workarounds, fixed versions, and links. Always cites the exact sources and sections used.
2. **Conflict Explain**
Explains why VEX statements or advisories disagree for a specific artifact and version. Uses the VEX Consensus Lens outputs and issuer trust model to produce a humanreadable, stepbystep explanation: who said what, where the product scoping diverges, and what policy thresholds caused the final state.
3. **Remediation Hints**
Suggests practical next steps: upgrade paths compatible with your dependency graph, backports, config toggles, temporary policy suppressions, or compensating controls. Every hint is grounded in SBOM, environment, and policy. It ships as structured JSON plus a human summary, ready to paste into a ticket.
It lives in the Console as a side panel, in the CLI for batch runs, and via APIs for automation. It does not change scanner results or consensus on its own. Humans remain in charge. The machine does the skimming and the math so humans can keep the judgment and the coffee.
---
## 2) Why (brief)
Advisories are long, inconsistent, and sometimes contradictory. Teams waste cycles reconciling PDFs with package manifests. The assistant eliminates that sludge: fast summaries, explicit conflict explanations, and remediation hints that are actually applicable to your software, not to an imaginary ideal project from 2013.
---
## 3) How it should work (maximum detail)
### 3.1 Capabilities
* **Summaries**
* Input: one advisory or a bundle linked by the same advisory key (CVE, GHSA, vendorID), product scope, and environment.
* Output:
* 150 to 300 words summary
* `AdvisorySummary JSON` (schema below)
* Citations with paragraph anchors
* Confidence label and coverage score (how much of the advisory set is represented)
* **Conflict Explain**
* Input: `(artifact_id, purl, version, advisory_key)` tuple.
* Output: narrative plus a structured breakdown of consensus math, issuer votes, product mapping mismatches, and the exact policy knobs that tipped the result.
* **Remediation Hints**
* Input: same tuple plus SBOM context and environment.
* Output: ranked list of remediation options with feasibility score, blast radius estimate (derived from dependency paths), effort class, and links to fixed versions. Includes “do nothing” when the VEX consensus is not affected.
### 3.2 System design
**Architecture diagram in words (because ASCII art is a crime):**
1. **Retrievers**
* Structured retriever over Conseillers normalized advisory fields.
* Vector retriever over advisory text chunks with paragraph anchors.
* VEX retriever over Excitator evidence and VEX Lens consensus.
* SBOM retriever for purl, version, dependency paths, env flags.
2. **Deterministic resolvers**
* Version comparators per ecosystem.
* Range satisfaction checks.
* Dependency path scorers and blast radius estimator.
3. **Orchestrator**
* Taskspecific prompt templates for Summary, Conflict, Remediation.
* Tool calls to deterministics (version check, graph crawl) with results injected into the prompt as structured context.
* Strict token budgets and truncation rules to avoid model babble.
4. **Models**
* Default: onprem inference container with midsized model.
* Optional: tenantenabled remote inference. Disabled by default.
* Temperature locked low for summary and conflict. Slightly higher for remediation narrative phrasing. No creativity in facts.
5. **Guardrails**
* Prompt injection defense by stripping or quarantining advisory text that tries to instruct the model.
* Fact boundary tagger. The assistant must only state facts that appear in structured inputs or cited chunks.
* Redaction of secrets before prompts.
* Output validator checks: required JSON fields, numeric ranges, valid version strings.
### 3.3 Data contracts
**AdvisorySummary JSON**
```json
{
"advisory_key": "CVE-2025-12345",
"sources": [
{"id":"csaf:vendorA:2025-001","uri":"...","sections":["2.1","3.4"]},
{"id":"osv:pkg:npm/lodash","uri":"...","sections":["affected","references"]}
],
"affected_ranges": [
{"ecosystem":"npm","purl_family":"pkg:npm/lodash","introduced":"<4.17.15","fixed": "4.17.21"}
],
"exploit_status": "no_known_exploit | poc_public | exploited_in_the_wild | n/a",
"impact": {"cvss":[{"vector":"CVSS:3.1/AV:N/...","score":7.5}], "cwes":["CWE-79"]},
"workarounds": ["Disable feature X", "Set flag Y=false"],
"fixed_versions": ["4.17.21"],
"notes": "Vendor states not affected on platform Z due to build option W.",
"coverage_score": 0.86,
"generated_at": "2025-10-25T12:00:00Z"
}
```
**ConflictExplanation JSON**
```json
{
"tuple": {"artifact_id":"svc:checkout@1.9.0","purl":"pkg:npm/lodash@4.17.20","advisory_key":"CVE-2025-12345"},
"consensus": {"state":"NOT_AFFECTED","confidence":0.82},
"quorum": [
{"issuer":"lodash-maintainers","status":"NOT_AFFECTED","weight":0.9,"sig":true,"justification":"component_not_present"},
{"issuer":"vendorX-distro","status":"AFFECTED","weight":0.25,"sig":false,"justification":"generic"}
],
"policy_factors": {"na_threshold":1.0,"aff_threshold":0.6,"recency_decay_days":90},
"mapping_issues": [{"kind":"cpe_to_purl","score":0.6,"detail":"CPE wildcard matched multiple purls"}],
"explanation_steps": [
"Exact purl match found for maintainer VEX; weight 0.9",
"Distro advisory generic; scope score 0.5; effective weight 0.25",
"NA threshold met. Result set to NOT_AFFECTED"
]
}
```
**RemediationPlan JSON**
```json
{
"tuple": {"artifact_id":"svc:checkout@1.9.0","purl":"pkg:npm/lodash@4.17.20","advisory_key":"CVE-2025-12345"},
"options": [
{
"kind": "upgrade",
"target_version": "4.17.21",
"feasibility": 0.92,
"blast_radius": {"direct_callers":3,"transitive_depth":2},
"effort": "low | medium | high",
"rationale": "Semver patch, no breaking APIs in release notes",
"links": ["release_notes_uri"]
},
{
"kind": "workaround",
"action": "Set SAFE_MODE=true",
"feasibility": 0.6,
"blast_radius": {"feature_flags":["SAFE_MODE"]},
"effort": "low",
"rationale": "Vendor states mitigation reduces attack surface on feature X"
}
],
"preferred": 0,
"policy_effects": {"sla_days": 7, "severity_override": "medium_if_not_fixed"},
"generated_at": "2025-10-25T12:00:00Z"
}
```
### 3.4 APIs
```
POST /advisory/ai/summary
{
"advisory_key":"CVE-2025-12345",
"artifact_id":"svc:checkout@1.9.0",
"purl":"pkg:npm/lodash@4.17.20",
"sources":["csaf:*","osv:*"], // optional filters
"policy_version":"1.3.0",
"lang":"en"
}
-> 200 { "summary_text":"...", "summary": {AdvisorySummary}, "citations":[...] }
POST /advisory/ai/conflict
{
"artifact_id":"svc:checkout@1.9.0",
"purl":"pkg:npm/lodash@4.17.20",
"advisory_key":"CVE-2025-12345",
"policy_version":"1.3.0"
}
-> 200 { "explanation_text":"...", "explanation": {ConflictExplanation} }
POST /advisory/ai/remediation
{
"artifact_id":"svc:checkout@1.9.0",
"purl":"pkg:npm/lodash@4.17.20",
"advisory_key":"CVE-2025-12345",
"policy_version":"1.3.0",
"max_options":5,
"strategy_preference":["upgrade","backport","workaround"]
}
-> 200 { "plan_text":"...", "plan": {RemediationPlan} }
POST /advisory/ai/batch
{
"items":[ {tuple}, {tuple}, ... ],
"task":"summary | conflict | remediation",
"policy_version":"1.3.0"
}
-> 207 multi-status
```
Status codes: `400` invalid, `403` RBAC, `404` missing evidence, `409` conflict lock, `422` output validation failed, `429` rate limit.
### 3.5 Console (Web UI)
* Surfaces:
* Vuln Explorer detail: “Advisory AI” side panel with 3 tabs: Summary, Conflict, Remediation.
* Consensus Lens detail: prominent “Explain conflict” button.
* Policy Studio sim: “Show effect on assistant output” preview.
* UX details:
* Citations are footnotes with hover to show source paragraph.
* “Copy as ticket” produces Markdown and JSON.
* Plan options show feasibility bar, blast radius chips, and required approvals per policy.
* Injection warnings appear if advisory text included unsafe instructions.
* A11y: ARIA tags for tabs, keyboard shortcuts `G` to generate, `R` to refresh, `C` to copy JSON.
### 3.6 CLI
```
stella advise summarize --advisory CVE-2025-12345 --artifact svc:checkout@1.9.0 --purl pkg:npm/lodash@4.17.20 --policy 1.3.0 --json
stella advise explain --advisory CVE-2025-12345 --artifact svc:checkout@1.9.0 --purl pkg:npm/lodash@4.17.20 --policy 1.3.0
stella advise remediate --advisory CVE-2025-12345 --artifact svc:checkout@1.9.0 --purl pkg:npm/lodash@4.17.20 --policy 1.3.0 --strategy upgrade,workaround --out plan.json
stella advise batch --file tuples.json --task remediation --policy 1.3.0
```
Exit codes: `0` ok, `2` invalid args, `4` not found, `5` denied, `7` validation fail.
### 3.7 RBAC and security
* Roles:
* Viewer can run summaries and read explanations.
* Operator can run remediation and export plans.
* Admin can toggle model endpoints and guardrail settings.
* Defaults:
* Remote model calls disabled.
* Redaction on.
* Prompt logging anonymized.
* Outputs stored as derived artifacts with TTL (default 30 days) unless pinned to a ticket.
### 3.8 Observability
* Metrics:
* `advisory_ai_latency_ms` by task type.
* `advisory_ai_guardrail_blocks_total`.
* `advisory_ai_output_validation_fail_total`.
* `advisory_ai_citation_coverage` gauge.
* Traces: retriever spans, tool calls, model inference, validator.
* Logs: include tuple key, token usage, truncation events, and guardrail outcomes.
### 3.9 Performance
* Targets:
* P95 under 1.5 s for Summary and Conflict with warm caches.
* P95 under 2.5 s for Remediation on medium SBOMs (1000 packages).
* Batch throughput 10 tuples per second per worker.
### 3.10 Edge cases
* Advisory missing fixed versions: produce workaroundonly plan and mark feasibility low.
* Conflicts with neartie weights: declare “DISPUTED” and require human approval, no auto plan preferred.
* Exotic version schemes: fallback to string compare with warning and feasibility cap.
* Private packages: no public release notes. Prefer internal changelog links if attached to artifact metadata.
* Multienv differences: render perenv deltas when policy knobs differ (dev vs prod).
---
## 4) Implementation plan
### 4.1 Services and components
* **New:** `src/StellaOps.AdvisoryAI`
* `retriever/` wrappers for Conseiller, Excitator, VEX Lens, SBOM.
* `deterministic/` version and path analyzers.
* `orchestrator/` task routers and prompt builders.
* `guardrails/` injection, redaction, output validator.
* `api/` REST endpoints and schema enforcement.
* **Updates:**
* Conseiller: expose paragraphlevel anchors for advisories.
* Excitator: expose justifications and product trees in normalized form.
* VEX Lens: stable API for quorum and rationale.
* SBOM Service: efficient path queries and versions timeline per purl.
### 4.2 Packaging
* Container images:
* `stella/advisory-ai:<<version>>`
* `stella/inference:<<version>>` (if using onprem model)
* Helm values to toggle remote inference and GPU.
### 4.3 Rollout
* Phase 1: Summary and Conflict readonly.
* Phase 2: Remediation with “Copy as ticket”.
* Phase 3: Batch APIs, CLI, and Policy Studio simulation hooks.
---
## 5) Documentation changes
Create or update the following files. Each doc ends with the imposed rule statement.
1. `/docs/advisory-ai/overview.md`
What it is, capabilities, guardrails, AOC alignment, RBAC.
2. `/docs/advisory-ai/architecture.md`
RAG design, retrievers, orchestrator, deterministics, models, caching.
3. `/docs/advisory-ai/api.md`
Endpoint specs, payload schemas, error codes, examples.
4. `/docs/advisory-ai/console.md`
Screens, actions, a11y, how citations work, copyasticket.
5. `/docs/advisory-ai/cli.md`
Command usage, exit codes, piping examples.
6. `/docs/policy/assistant-parameters.md`
Temperature, max tokens, plan ranking weights, TTLs.
7. `/docs/security/assistant-guardrails.md`
Redaction rules, injection defense, output validation, logging.
8. `/docs/sbom/remediation-heuristics.md`
Feasibility scoring, blast radius, effort classes.
9. `/docs/runbooks/assistant-ops.md`
Warmup, cache priming, model outages, scaling, oncall steps.
---
## 6) Engineering tasks
### Backend core
* [ ] Implement structured and vector retrievers with paragraph anchors from Conseiller.
* [ ] Implement VEX retriever using Lens APIs with caching.
* [ ] Build deterministics: ecosystem comparators, range checks, dependency path scorer.
* [ ] Implement orchestrator with taskspecific templates and tool call pipeline.
* [ ] Implement guardrails and validators with hard failure on invalid JSON.
* [ ] Add RBAC to endpoints and anonymized prompt logging.
* [ ] Add caching layer with tuplekeyed entries and policy version scoping.
### Integrations
* [ ] Conseiller: expose advisory chunk API and metadata needed for citations.
* [ ] Excitator: ensure justifications and product trees are queryable.
* [ ] VEX Lens: add “policy factors” endpoint for explanation rendering.
* [ ] SBOM Service: implement `GET /sbom/paths?purl=...` and version timeline.
### Console
* [ ] Build Advisory AI panel with 3 tabs and citation tooltips.
* [ ] Implement “Copy as ticket” (Markdown + JSON) and download.
* [ ] Add injection warning banner when triggered.
* [ ] Respect a11y requirements and shortcuts.
### CLI
* [ ] `stella advise summarize|explain|remediate|batch` with JSON output.
* [ ] Add `--out` option to save plans and summaries.
* [ ] Tests for piping and jq workflows.
### Observability
* [ ] Emit metrics and traces listed in §3.8.
* [ ] Dashboards: latency, guardrail blocks, validation fails, coverage.
### Docs
* [ ] Write all files in §5 with examples and screenshots.
* [ ] Crosslink to VEX Lens and Vulnerability Explorer docs.
> **Imposed rule:** Work of this type or tasks of this type on this component must also be applied everywhere else it should be applied.
---
## 7) Acceptance criteria
* Summaries cite specific source sections and reflect affected ranges and fixed versions correctly for at least 95% of a validation set.
* Conflict explanations enumerate issuers, weights, justifications, mapping issues, and policy thresholds that caused the consensus state.
* Remediation plans output at least one feasible option when a fixed version exists and correctly flag “no public fix” cases.
* JSON schemas validate for all outputs.
* Console shows the panel with citations, copyasticket, and a11y passes.
* CLI produces identical JSON to API responses.
* Guardrails block injection attempts and redact secrets in prompts.
* P95 latency targets are met with warm caches.
* No mutation of raw advisory or VEX evidence occurs anywhere in the pipeline.
---
## 8) Risks and mitigations
* **Prompt injection in advisory text.** Strip instructions, sandbox chunks, and highlight to user when removed.
* **Hallucinated facts.** Hard validation requires facts to appear in structured inputs or cited text. Fail closed if not provable.
* **Mapping errors produce bad hints.** Depend on SBOM Graph and VEX Lens scope scores; cap feasibility when scope is weak.
* **Model outage.** Degrade to deterministic summaries (shorter, but accurate).
* **Privacy concerns.** Default onprem inference, remote endpoints optin with clear flags and audit logs.
---
## 9) Test plan
* **Unit:** version comparators, range checks, feasibility scoring, output validators.
* **Golden files:** advisory sets mapped to expected summaries and plans; diff on each build.
* **Injection tests:** adversarial advisories with “ignore prior instructions” payloads must be neutralized.
* **Integration:** Conseiller→Advisory AI→Console loop, with VEX Lens conflicts and SBOM graph lookups.
* **E2E:** generate summary, explanation, and plan for representative ecosystems (npm, Maven, PyPI, Go, RPM/DEB).
* **Perf:** soak tests with 5k tuples batch; observe cache hit ratios and P95.
* **A11y:** keyboard navigation and screen reader labels.
---
## 10) Philosophy
* **Facts first.** If it is not in structured inputs or citations, it does not exist.
* **Explain everything.** Humans should see exactly why the tool said what it said.
* **Helpful by default.** Plans must consider the real dependency graph and environment, not fantasy.
* **No silent merges.** Evidence is sacred. Summaries and plans are separate, auditable derivatives.
> Final reminder: **Work of this type or tasks of this type on this component must also be applied everywhere else it should be applied.**

523
EPIC_9.md Normal file
View File

@@ -0,0 +1,523 @@
Below is the “maximum documentation” bundle for Epic 9. Paste it into your repo and pretend the ingestion chaos was always under control.
> **Imposed rule:** Work of this type or tasks of this type on this component must also be applied everywhere else it should be applied.
---
# Epic 9: Source & Job Orchestrator Dashboard
**Short name:** `Orchestrator Dashboard`
**Primary service:** `orchestrator` (scheduler, queues, ratelimits, job state)
**Surfaces:** Console (Web UI), CLI, Web API
**Touches:** Conseiller (Feedser), Excitator (Vexer), VEX Consensus Lens, SBOM Service, Policy Engine, Findings Ledger, Authority (authN/Z), Telemetry/Analytics, Object Storage, Relational DB, Message Bus
**AOC ground rule:** Conseiller and Excitator aggregate but never merge. The orchestrator schedules, tracks and recovers jobs; it does not transform evidence beyond transport and storage. No “smart” merging in flight.
---
## 1) What it is
The Source & Job Orchestrator Dashboard is the control surface for every data source and pipeline run across StellaOps. It gives operators:
* Live health of all advisory/VEX/SBOM sources and derived jobs.
* Endtoend pipeline visibility as DAGs and timelines.
* Controls for pausing, backfilling, replaying, throttling and retrying.
* Error pattern analysis, ratelimit observability and backpressure insights.
* Provenance and audit trails from initial fetch through parse, normalize, index and policy evaluation.
The dashboard sits over the `orchestrator` service, which maintains job state, schedules runs, enforces quotas and rate limits, and collects metrics from worker pools embedded in Conseiller, Excitator, SBOM and related services.
---
## 2) Why (brief)
Ingestion breaks quietly and then loudly. Without a unified control plane, you learn about it from angry users or empty indexes. This dashboard shortens incident MTTR, enables safe backfills, and makes compliance reviewers stop sending emails with twelve attachments and one emoji.
---
## 3) How it should work (maximum detail)
### 3.1 Capabilities
* **Source registry**
* Register, tag and version connectors (OSV, GHSA, CSAF endpoints, vendor PDF scrapers, distro feeds, RSS, S3 drops, internal registries).
* Store connection details, secrets (via KMS), ratelimit policy, schedules, and ownership metadata.
* Validate and “test connection” safely.
* **Job orchestration**
* Create DAGs composed of job types: `fetch`, `parse`, `normalize`, `dedupe`, `index`, `consensus_compute`, `policy_eval`, `crosslink`, `sbom_ingest`, `sbom_index`.
* Priorities, queues, concurrency caps, exponential backoff, circuit breakers.
* Idempotency keys and output artifact hashing to avoid duplicate work.
* Eventtime watermarks for backfills without double counting.
* **Observability & control**
* Gantt timeline and realtime DAG view with critical path highlighting.
* Backpressure and queue depth heatmaps.
* Error clustering by class (HTTP 429, TLS, schema mismatch, parse failure, upstream 5xx).
* Persource SLOs and SLA budgets with burnrate alerts.
* Oneclick actions: retry, replay range, pause/resume, throttle/unthrottle, reroute to canary workers.
* **Provenance & audit**
* Immutable run ledger linking input artifact → every job → output artifact.
* Schema version tracking and drift detection.
* Operator actions recorded with reason and ticket reference.
* **Safety**
* Secret redaction everywhere.
* Tenant isolation at API, queue and storage layers.
* AOC: no inflight merges of advisory or VEX content.
### 3.2 Core architecture
* **orchestrator (service)**
* Maintains job state in Postgres (`sources`, `runs`, `jobs`, `artifacts`, `dag_edges`, `quotas`, `schedules`).
* Publishes work to a message bus (e.g., `topic.jobs.ready.<queue>`).
* Distributed tokenbucket rate limiter per source/tenant/host.
* Watchdog for stuck jobs and circuit breakers for flapping sources.
* Watermark manager for backfills (eventtime windows).
* **worker SDK**
* Lightweight library embedded in Conseiller/Excitator/SBOM workers to:
* Claim work, heartbeat, update progress, report metrics.
* Emit artifact metadata and checksums.
* Enforce idempotency via orchestratorsupplied key.
* **object store**
* Raw payloads and intermediate artifacts organized by schema and hash:
* `advisory/raw/<source_id>/<event_time>/<sha256>.json|pdf`
* `advisory/normalized/<schema_ver>/<hash>.json`
* `vex/raw|normalized/...`
* `sbom/raw|graph/...`
* **web API**
* CRUD for sources, runs, jobs, schedules, quotas.
* Control actions (retry, cancel, pause, backfill).
* Streaming updates via WebSocket/SSE for the Console.
* **console**
* React app consuming Orchestrator APIs, rendering DAGs, timelines, health charts and action panels with RBAC.
### 3.3 Data model (selected tables)
* `sources`
* `id`, `kind` (`advisory|vex|sbom|internal`), `subtype` (e.g., `osv`, `ghsa`, `csaf`, `vendor_pdf`), `display_name`, `owner_team`, `schedule_cron`, `rate_policy`, `enabled`, `secrets_ref`, `tags`, `schema_hint`, `created_at`, `updated_at`.
* `runs`
* `id`, `source_id`, `trigger` (`schedule|manual|event|backfill`), `window_start`, `window_end`, `state`, `started_at`, `finished_at`, `stats_json`.
* `jobs`
* `id`, `run_id`, `type`, `queue`, `priority`, `state` (`pending|running|succeeded|failed|canceled|deadletter`), `attempt`, `max_attempt`, `idempotency_key`, `input_artifact_id`, `output_artifact_id`, `worker_id`, `created_at`, `started_at`, `finished_at`, `error_class`, `error_message`, `metrics_json`.
* `dag_edges`
* `from_job_id`, `to_job_id`, `edge_kind` (`success_only|always`).
* `artifacts`
* `id`, `kind` (`raw|normalized|index|consensus`), `schema_ver`, `hash`, `uri`, `bytes`, `meta_json`, `created_at`.
* `quotas`
* `tenant_id`, `resource` (`requests_per_min`, `concurrent_jobs`), `limit`, `window_sec`.
* `schedules`
* Persource cron plus jitter, timezone, blackout windows.
### 3.4 Job lifecycle
1. **Plan**
Scheduler creates a `run` for a source and plans a DAG: e.g., `fetch → parse → normalize → dedupe → index → policy_eval` (advisory) or `fetch → parse → normalize → consensus_compute` (VEX).
2. **Enqueue**
Ready nodes become `jobs` with queue, priority, idempotency key and optional ratelimit tokens reserved.
3. **Execute**
Worker claims job, heartbeats every N seconds. Output artifacts are stored and linked. Failures are classified and retried with exponential backoff and jitter, up to `max_attempt`.
4. **Complete**
Downstream nodes unblock. On run completion, orchestrator computes SLO deltas and emits run summary.
5. **Deadletter**
Jobs exceeding attempts move to a DLQ with structured context and suggested remediation.
### 3.5 Scheduling, backpressure, ratelimits
* **Token bucket** per `{tenant, source.host}` with adaptive refill if upstream 429/503 seen.
* **Concurrency caps** per source and per job type to avoid thundering herd.
* **Backpressure signals** from queue depth, worker CPU, and upstream error rates; scheduler reduces inflight issuance accordingly.
* **Backfills** use eventtime windows with immutable watermarks to avoid reprocessing.
* **Blackout windows** for vendor maintenance periods.
### 3.6 APIs
```
POST /orchestrator/sources
GET /orchestrator/sources?kind=&tag=&q=
GET /orchestrator/sources/{id}
PATCH /orchestrator/sources/{id}
POST /orchestrator/sources/{id}/actions:test|pause|resume|sync-now
POST /orchestrator/sources/{id}/backfill { "from":"2024-01-01", "to":"2024-03-01" }
GET /orchestrator/runs?source_id=&state=&from=&to=
GET /orchestrator/runs/{run_id}
GET /orchestrator/runs/{run_id}/dag
POST /orchestrator/runs/{run_id}/cancel
GET /orchestrator/jobs?state=&type=&queue=&source_id=
GET /orchestrator/jobs/{job_id}
POST /orchestrator/jobs/{job_id}/actions:retry|cancel|prioritize
GET /orchestrator/metrics/overview
GET /orchestrator/errors/top?window=1h
GET /orchestrator/quotas
PATCH /orchestrator/quotas/{tenant_id}
WS /orchestrator/streams/updates
```
### 3.7 Console (Web UI)
* **Overview**
* KPI tiles: sources healthy, runs in progress, queue depth, error rate, burnrate to SLO.
* Heatmap of source health by last 24h success ratio.
* **Sources**
* Grid with filters, inline status (active, paused, throttled), next run eta, last error class.
* Detail panel: config, secrets status (redacted), schedule, rate limits, ownership, run history, action buttons.
* **Runs**
* Timeline (Gantt) with critical path, duration distribution, and perstage breakdown.
* Run detail: DAG view with node metrics, artifacts, logs, action menu (cancel).
* **Jobs**
* Live table with state filters and “tail” view.
* Job detail: payload preview (redacted), worker, attempts, stack traces, linked artifacts.
* **Errors**
* Clusters by class and signature, suggested remediations (pause source, lower concurrency, patch parser).
* **Queues & Backpressure**
* Perqueue depth, service rate, inflight, age percentiles.
* Ratelimit tokens graphs per source host.
* **Controls**
* Backfill wizard with eventtime preview and safety checks.
* Canary routing: route 5% of next 100 runs to a new worker pool.
* **A11y**
* Keyboard nav, ARIA roles for DAG nodes, live regions for updates, colorblind friendly graphs.
### 3.8 CLI
```
stella orch sources list --kind advisory --tag prod
stella orch sources add --file source.yaml
stella orch sources test <source-id>
stella orch sources pause <source-id> # or resume
stella orch sources sync-now <source-id>
stella orch sources backfill <source-id> --from 2024-01-01 --to 2024-03-01
stella orch runs list --source <id> --state running
stella orch runs show <run-id> --dag
stella orch runs cancel <run-id>
stella orch jobs list --state failed --type parse --limit 100
stella orch jobs retry <job-id>
stella orch jobs cancel <job-id>
stella orch jobs tail --queue normalize --follow
stella orch quotas get --tenant default
stella orch quotas set --tenant default --concurrent-jobs 50 --rpm 1200
```
Exit codes: `0` success, `2` invalid args, `4` not found, `5` denied, `7` precondition failed, `8` ratelimited.
### 3.9 RBAC & security
* **Roles**
* `Orch.Viewer`: readonly sources/runs/jobs/metrics.
* `Orch.Operator`: perform actions on sources and jobs, launch backfills.
* `Orch.Admin`: manage quotas, schedules, connector versions, and delete sources.
* **Secrets**
* Stored only as references to your KMS; never persisted in cleartext.
* Console shows redact badges and last rotated timestamp.
* **Tenancy**
* Source, run, job rows scoped by tenant id.
* Queue names and token buckets namespaced per tenant.
* **Compliance**
* Full audit log for every operator action with “reason” and optional ticket link.
* Exportable run ledger for audits.
### 3.10 Observability
* **Metrics (examples)**
* `orch_jobs_inflight{type,queue}`
* `orch_jobs_latency_ms{type,percentile}`
* `orch_rate_tokens_available{source}`
* `orch_error_rate{source,error_class}`
* `orch_slo_burn_rate{source,slo}`
* `orch_deadletter_total{source,type}`
* **Traces**
* Span per job with baggage: `run_id`, `source_id`, `artifact_id`.
* Links across services to Conseiller/Excitator/SBOM workers.
* **Logs**
* Structured JSON with correlation ids, attempt numbers and redacted payload previews.
### 3.11 Performance targets
* Job dispatch P95 < 150 ms after dependency satisfied.
* Scheduler loop P95 < 500 ms for 10k pending jobs.
* Console live updates subsecond at 1k events/sec per tenant.
* Backfill throughput 200 jobs/sec per worker pool with zero dupes.
### 3.12 Edge cases & behaviors
* **Upstream 429 storms:** autothrottle, pause optional, recommend extended jitter.
* **Schema drift:** parser moves job to DLQ with `error_class=schema_mismatch` and opens a change ticket via webhook.
* **Flapping source:** circuit breaker opens after N consecutive failures; requires human resume”.
* **Clock skew:** watermark logic uses upstream event time; large skews flagged.
* **Idempotency collisions:** new attempt yields noop if artifact hash already exists.
---
## 4) Implementation plan
### 4.1 Modules (new and updated)
* New service: `src/StellaOps.Orchestrator`
* `api/` REST + WS handlers
* `scheduler/` run planner, DAG builder, watermark/backfill logic
* `queues/` publisher and consumer abstractions
* `ratelimit/` token bucket and adaptive controller
* `state/` Postgres repositories and migrations
* `audit/` action logging and export
* `metrics/` Prometheus exporters
* `security/` tenant scoping, KMS client, secret refs
* Worker SDKs:
* `src/StellaOps.Orchestrator.WorkerSdk.Go` and `src/StellaOps.Orchestrator.WorkerSdk.Python` with job claim, heartbeat, progress, artifact publish, and structured error reporting.
* Console:
* `console/apps/orch/` pages: Overview, Sources, Runs, Jobs, Errors, Queues.
* `components/dag-view/`, `components/gantt/`, `components/health-heatmap/`.
* Updates to existing services:
* Conseiller/Excitator/SBOM workers adopt SDK and emit artifacts with schema/version/fingerprint.
* VEX Lens exposes `consensus_compute` as a jobable operation.
* Policy Engine exposes `policy_eval` as a job type for scheduled recalcs.
### 4.2 Packaging & deployment
* Containers:
* `stella/orchestrator:<ver>`
* `stella/worker-sdk-examples:<ver>` for canary pools
* Helm values:
* Queues/topics, pertenant concurrency, ratelimit defaults, WS replica count.
* KMS integration secrets.
* Migrations:
* Flyway/Goose migrations for new tables and indexes.
### 4.3 Rollout strategy
* Phase 1: Readonly dashboard fed by existing job tables; no controls.
* Phase 2: Control actions enabled for nonprod tenants.
* Phase 3: Backfills and quota management, then GA.
---
## 5) Documentation changes
Create/update the following, each ending with the imposed rule statement.
1. `/docs/orchestrator/overview.md`
Concepts, roles, responsibilities, AOC alignment.
2. `/docs/orchestrator/architecture.md`
Scheduler, DAGs, watermarks, queues, ratelimits, data model.
3. `/docs/orchestrator/api.md`
Endpoints, WebSocket events, error codes, examples.
4. `/docs/orchestrator/console.md`
Screens, actions, a11y, live updates.
5. `/docs/orchestrator/cli.md`
Commands, examples, exit codes, scripting patterns.
6. `/docs/orchestrator/runledger.md`
Provenance and audit export format.
7. `/docs/security/secretshandling.md`
KMS references, redaction rules, operator hygiene.
8. `/docs/operations/orchestratorrunbook.md`
Common failures, backfill guide, circuit breakers, tuning.
9. `/docs/schemas/artifacts.md`
Artifact kinds, schema versions, hashing, storage layout.
10. `/docs/slo/orchestratorslo.md`
SLO definitions, measurement, alerting.
---
## 6) Engineering tasks
### Backend (orchestrator)
* [ ] Stand up Postgres schemas and indices for sources, runs, jobs, dag_edges, artifacts, quotas, schedules.
* [ ] Implement scheduler: DAG planner, dependency resolver, critical path computation.
* [ ] Implement rate limiter with adaptive behavior on 429/503 and pertenant tokens.
* [ ] Implement watermark/backfill manager with eventtime windows and idempotency keys.
* [ ] Implement API endpoints + OpenAPI spec + request validation.
* [ ] Implement WebSocket/SSE event stream for live updates.
* [ ] Implement audit logging and export.
* [ ] Implement deadletter store and replay.
### Worker SDKs and integrations
* [ ] Build Go/Python SDKs with claim/heartbeat/progress API.
* [ ] Integrate SDK into Conseiller, Excitator, SBOM workers; ensure artifact emission with schema ver.
* [ ] Add `consensus_compute` and `policy_eval` as job types with deterministic inputs/outputs.
### Console
* [ ] Overview tiles and health heatmap.
* [ ] Source list/detail with actions and config view.
* [ ] Runs timeline (Gantt) and DAG visualization with node inspector.
* [ ] Jobs tail with live updates and filters.
* [ ] Errors clustering and suggested remediations.
* [ ] Queues/backpressure dashboard.
* [ ] Backfill wizard with safety checks.
### Observability
* [ ] Emit metrics listed in §3.10 and wire traces across services.
* [ ] Dashboards: health, queue depth, error classes, burnrate, dispatch latency.
* [ ] Alerts for SLO burn and circuit breaker opens.
### Security & RBAC
* [ ] Enforce tenant scoping on all endpoints; test leakage.
* [ ] Wire KMS for secret refs and redact everywhere.
* [ ] Implement `Orch.Viewer|Operator|Admin` roles and check in Console and API.
### Docs
* [ ] Author all files in §5 with examples and screenshots.
* [ ] Crosslink from Conseiller/Excitator/SBOM pages to the dashboard docs.
* [ ] Append imposed rule to each page.
> **Imposed rule:** Work of this type or tasks of this type on this component must also be applied everywhere else it should be applied.
---
## 7) Acceptance criteria
* Operators can: pause/resume a source, run syncnow,” initiate a backfill for a date range, and retry/cancel individual jobs from Console and CLI.
* DAG and timeline reflect reality within 1 second of job state changes at P95.
* Backfills do not create duplicate artifacts; idempotency proven by hash equality.
* Rate limiter reduces 429s by 80% under simulated throttle tests.
* Audit log includes who/when/why for every operator action.
* Provenance ledger exports a complete chain for any artifact.
* RBAC prevents nonadmins from quota changes; tenancy isolation proven via automated tests.
* SLO dashboard shows burnrate and triggers alerts under injected failure.
---
## 8) Risks & mitigations
* **Orchestrator becomes a single bottleneck.**
Horizontal scale stateless workers; DB indexes tuned; job state updates batched; cache hot paths.
* **Secret spillage.**
Only KMS references stored; aggressive redaction; log scrubbing in SDK.
* **Overeager backfills overwhelm upstream.**
Enforce persource quotas and sandbox previews; dryrun backfills first.
* **Schema drift silently corrupts normalization.**
Hardfail on mismatch; DLQ with clear signatures; schema registry gating.
* **Flapping sources cause alert fatigue.**
Circuit breaker with cooldown and deduped alerts; error budget policy.
---
## 9) Test plan
* **Unit**
Scheduler DAG building, topological sort, backoff math, token bucket, watermark math.
* **Integration**
Orchestrator worker SDK, artifact store wiring, DLQ replay, audit pipeline.
* **Chaos**
Inject 429 storms, packet loss, worker crashes; verify throttling and recovery.
* **Backfill**
Simulate overlapping windows and verify idempotency and watermark correctness.
* **Perf**
10k concurrent jobs: dispatch latency, DB contention, WebSocket fanout.
* **Security**
Multitenant isolation tests; KMS mock tests for secret access; RBAC matrix.
* **UX/A11y**
Screen reader labels on DAG, keyboard navigation, live region updates.
---
## 10) Philosophy
* **Make the invisible visible.** Pipelines should be legible at a glance.
* **Prefer reproducibility to heroics.** Idempotency and provenance over we think it ran.”
* **Safeguards before speed.** Throttle first, retry thoughtfully, never melt upstreams.
* **No silent merges.** Evidence remains immutable; transformations are explicit, logged and reversible.
> Final reminder: **Work of this type or tasks of this type on this component must also be applied everywhere else it should be applied.**

File diff suppressed because it is too large Load Diff

Binary file not shown.

View File

@@ -1,93 +1,93 @@
<Project Sdk="Microsoft.NET.Sdk">
<PropertyGroup>
<TargetFrameworks>net472;netstandard2.1</TargetFrameworks>
<Authors>Johannes Hoppe and many contributors</Authors>
<Description>Mongo2Go is a managed wrapper around MongoDB binaries. It targets .NET Framework 4.7.2 and .NET Standard 2.1.
This Nuget package contains the executables of mongod, mongoimport and mongoexport v4.4.4 for Windows, Linux and macOS.
Mongo2Go has two use cases:
1. Providing multiple, temporary and isolated MongoDB databases for integration tests
2. Providing a quick to set up MongoDB database for a local developer environment</Description>
<Company>HAUS HOPPE - ITS</Company>
<Copyright>Copyright © 2012-2025 Johannes Hoppe and many ❤️ contributors</Copyright>
<GenerateDocumentationFile>true</GenerateDocumentationFile>
<PackageIcon>icon.png</PackageIcon>
<PackageLicenseExpression>MIT</PackageLicenseExpression>
<PackageProjectUrl>https://github.com/Mongo2Go/Mongo2Go</PackageProjectUrl>
<PackageReleaseNotes>https://github.com/Mongo2Go/Mongo2Go/releases</PackageReleaseNotes>
<PackageTags>MongoDB Mongo unit test integration runner</PackageTags>
<RepositoryUrl>https://github.com/Mongo2Go/Mongo2Go</RepositoryUrl>
<RepositoryType>git</RepositoryType>
<AssemblyTitle>Mongo2Go</AssemblyTitle>
<AssemblyDescription>Mongo2Go is a managed wrapper around MongoDB binaries.</AssemblyDescription>
</PropertyGroup>
<PropertyGroup Condition="'$(Configuration)|$(TargetFramework)|$(Platform)'=='Debug|netstandard2.1|AnyCPU'">
<WarningLevel>4</WarningLevel>
<NoWarn>1701;1702;1591;1573</NoWarn>
</PropertyGroup>
<PropertyGroup Condition="'$(Configuration)|$(TargetFramework)|$(Platform)'=='Release|netstandard2.1|AnyCPU'">
<WarningLevel>4</WarningLevel>
<NoWarn>1701;1702;1591;1573</NoWarn>
</PropertyGroup>
<PropertyGroup Condition="'$(Configuration)|$(TargetFramework)|$(Platform)'=='Debug|net48|AnyCPU'">
<NoWarn>1701;1702;1591;1573</NoWarn>
</PropertyGroup>
<PropertyGroup Condition="'$(Configuration)|$(TargetFramework)|$(Platform)'=='Release|net48|AnyCPU'">
<NoWarn>1701;1702;1591;1573</NoWarn>
</PropertyGroup>
<PropertyGroup Label="Restoring">
<DisableImplicitNuGetFallbackFolder>true</DisableImplicitNuGetFallbackFolder>
<RestorePackagesWithLockFile>true</RestorePackagesWithLockFile>
<RestoreLockedMode Condition="$(ContinuousIntegrationBuild) == 'true'">true</RestoreLockedMode>
</PropertyGroup>
<PropertyGroup Label="SourceLink">
<DebugType>embedded</DebugType>
<EmbedUntrackedSources>true</EmbedUntrackedSources>
<PublishRepositoryUrl>true</PublishRepositoryUrl>
</PropertyGroup>
<PropertyGroup Label="MinVer">
<MinVerTagPrefix>v</MinVerTagPrefix>
</PropertyGroup>
<ItemGroup>
<None Update="packages.lock.json" Visible="false" />
<None Include="../mongo2go_200_200.png" Visible="false">
<Pack>true</Pack>
<PackagePath>icon.png</PackagePath>
</None>
<None Include="../../tools/mongodb*/**" Visible="false">
<Pack>true</Pack>
<PackagePath>tools</PackagePath>
</None>
</ItemGroup>
<ItemGroup>
<PackageReference Include="Microsoft.Extensions.Logging.Abstractions" Version="6.0.0" />
<PackageReference Include="Microsoft.SourceLink.GitHub" Version="1.0.0" PrivateAssets="all" />
<PackageReference Include="MinVer" Version="2.5.0" PrivateAssets="all" />
<PackageReference Include="MongoDB.Driver" Version="3.5.0" />
<PackageReference Include="SharpCompress" Version="0.41.0" />
<PackageReference Include="System.Text.Json" Version="6.0.10" />
<PackageReference Include="Microsoft.NETFramework.ReferenceAssemblies" Version="1.0.3" PrivateAssets="all" />
</ItemGroup>
<ItemGroup>
<InternalsVisibleTo Include="Mongo2GoTests" />
</ItemGroup>
<Target Name="PrintPackageVersionForGitHubActions" AfterTargets="Pack">
<Message Importance="high" Text="version=$(PackageVersion)" />
<Message Importance="high" Text="nupkg-filename=$(PackageId).$(PackageVersion).nupkg" />
</Target>
</Project>
<Project Sdk="Microsoft.NET.Sdk">
<PropertyGroup>
<TargetFrameworks>net472;netstandard2.1</TargetFrameworks>
<Authors>Johannes Hoppe and many contributors</Authors>
<Description>Mongo2Go is a managed wrapper around MongoDB binaries. It targets .NET Framework 4.7.2 and .NET Standard 2.1.
This Nuget package contains the executables of mongod, mongoimport and mongoexport v4.4.4 for Windows, Linux and macOS.
Mongo2Go has two use cases:
1. Providing multiple, temporary and isolated MongoDB databases for integration tests
2. Providing a quick to set up MongoDB database for a local developer environment</Description>
<Company>HAUS HOPPE - ITS</Company>
<Copyright>Copyright © 2012-2025 Johannes Hoppe and many ❤️ contributors</Copyright>
<GenerateDocumentationFile>true</GenerateDocumentationFile>
<PackageIcon>icon.png</PackageIcon>
<PackageLicenseExpression>MIT</PackageLicenseExpression>
<PackageProjectUrl>https://github.com/Mongo2Go/Mongo2Go</PackageProjectUrl>
<PackageReleaseNotes>https://github.com/Mongo2Go/Mongo2Go/releases</PackageReleaseNotes>
<PackageTags>MongoDB Mongo unit test integration runner</PackageTags>
<RepositoryUrl>https://github.com/Mongo2Go/Mongo2Go</RepositoryUrl>
<RepositoryType>git</RepositoryType>
<AssemblyTitle>Mongo2Go</AssemblyTitle>
<AssemblyDescription>Mongo2Go is a managed wrapper around MongoDB binaries.</AssemblyDescription>
</PropertyGroup>
<PropertyGroup Condition="'$(Configuration)|$(TargetFramework)|$(Platform)'=='Debug|netstandard2.1|AnyCPU'">
<WarningLevel>4</WarningLevel>
<NoWarn>1701;1702;1591;1573</NoWarn>
</PropertyGroup>
<PropertyGroup Condition="'$(Configuration)|$(TargetFramework)|$(Platform)'=='Release|netstandard2.1|AnyCPU'">
<WarningLevel>4</WarningLevel>
<NoWarn>1701;1702;1591;1573</NoWarn>
</PropertyGroup>
<PropertyGroup Condition="'$(Configuration)|$(TargetFramework)|$(Platform)'=='Debug|net48|AnyCPU'">
<NoWarn>1701;1702;1591;1573</NoWarn>
</PropertyGroup>
<PropertyGroup Condition="'$(Configuration)|$(TargetFramework)|$(Platform)'=='Release|net48|AnyCPU'">
<NoWarn>1701;1702;1591;1573</NoWarn>
</PropertyGroup>
<PropertyGroup Label="Restoring">
<DisableImplicitNuGetFallbackFolder>true</DisableImplicitNuGetFallbackFolder>
<RestorePackagesWithLockFile>true</RestorePackagesWithLockFile>
<RestoreLockedMode Condition="$(ContinuousIntegrationBuild) == 'true'">true</RestoreLockedMode>
</PropertyGroup>
<PropertyGroup Label="SourceLink">
<DebugType>embedded</DebugType>
<EmbedUntrackedSources>true</EmbedUntrackedSources>
<PublishRepositoryUrl>true</PublishRepositoryUrl>
</PropertyGroup>
<PropertyGroup Label="MinVer">
<MinVerTagPrefix>v</MinVerTagPrefix>
</PropertyGroup>
<ItemGroup>
<None Update="packages.lock.json" Visible="false" />
<None Include="../mongo2go_200_200.png" Visible="false">
<Pack>true</Pack>
<PackagePath>icon.png</PackagePath>
</None>
<None Include="../../tools/mongodb*/**" Visible="false">
<Pack>true</Pack>
<PackagePath>tools</PackagePath>
</None>
</ItemGroup>
<ItemGroup>
<PackageReference Include="Microsoft.Extensions.Logging.Abstractions" Version="10.0.0-rc.2.25502.107" />
<PackageReference Include="Microsoft.SourceLink.GitHub" Version="1.0.0" PrivateAssets="all" />
<PackageReference Include="MinVer" Version="2.5.0" PrivateAssets="all" />
<PackageReference Include="MongoDB.Driver" Version="3.5.0" />
<PackageReference Include="SharpCompress" Version="0.41.0" />
<PackageReference Include="System.Text.Json" Version="6.0.10" />
<PackageReference Include="Microsoft.NETFramework.ReferenceAssemblies" Version="1.0.3" PrivateAssets="all" />
</ItemGroup>
<ItemGroup>
<InternalsVisibleTo Include="Mongo2GoTests" />
</ItemGroup>
<Target Name="PrintPackageVersionForGitHubActions" AfterTargets="Pack">
<Message Importance="high" Text="version=$(PackageVersion)" />
<Message Importance="high" Text="nupkg-filename=$(PackageId).$(PackageVersion).nupkg" />
</Target>
</Project>

View File

@@ -1,21 +1,21 @@
<Project Sdk="Microsoft.NET.Sdk">
<PropertyGroup>
<TargetFramework>net8.0</TargetFramework>
<IsPackable>false</IsPackable>
</PropertyGroup>
<ItemGroup>
<ProjectReference Include="..\Mongo2Go\Mongo2Go.csproj" />
</ItemGroup>
<ItemGroup>
<PackageReference Include="FluentAssertions" Version="5.10.3" />
<PackageReference Include="Machine.Specifications" Version="1.0.0" />
<PackageReference Include="Machine.Specifications.Runner.VisualStudio" Version="2.10.1" />
<PackageReference Include="MELT" Version="0.7.0" />
<PackageReference Include="Microsoft.Extensions.Logging" Version="6.0.0" />
<PackageReference Include="Microsoft.NET.Test.Sdk" Version="16.9.1" />
<PackageReference Include="Moq" Version="4.16.1" />
<PackageReference Include="Newtonsoft.Json" Version="13.0.3" />
<PackageReference Include="System.Net.Http" Version="4.3.4" />
<PackageReference Include="System.Text.RegularExpressions" Version="4.3.1" />
</ItemGroup>
<Project Sdk="Microsoft.NET.Sdk">
<PropertyGroup>
<TargetFramework>net8.0</TargetFramework>
<IsPackable>false</IsPackable>
</PropertyGroup>
<ItemGroup>
<ProjectReference Include="..\Mongo2Go\Mongo2Go.csproj" />
</ItemGroup>
<ItemGroup>
<PackageReference Include="FluentAssertions" Version="5.10.3" />
<PackageReference Include="Machine.Specifications" Version="1.0.0" />
<PackageReference Include="Machine.Specifications.Runner.VisualStudio" Version="2.10.1" />
<PackageReference Include="MELT" Version="0.7.0" />
<PackageReference Include="Microsoft.Extensions.Logging" Version="6.0.0" />
<PackageReference Include="Microsoft.NET.Test.Sdk" Version="16.9.1" />
<PackageReference Include="Moq" Version="4.16.1" />
<PackageReference Include="Newtonsoft.Json" Version="13.0.3" />
<PackageReference Include="System.Net.Http" Version="4.3.4" />
<PackageReference Include="System.Text.RegularExpressions" Version="4.3.1" />
</ItemGroup>
</Project>

Binary file not shown.

View File

@@ -6,11 +6,13 @@
<packageSources>
<clear />
<add key="local" value="local-nuget" />
<add key="dotnet-public" value="https://pkgs.dev.azure.com/dnceng/public/_packaging/dotnet-public/nuget/v3/index.json" />
<add key="nuget.org" value="https://api.nuget.org/v3/index.json" />
</packageSources>
<packageSourceMapping>
<packageSource key="local">
<package pattern="Mongo2Go" />
<package pattern="Microsoft.IdentityModel.Tokens" />
<package pattern="Microsoft.Extensions.Http.Polly" />
<package pattern="Microsoft.Extensions.Caching.Memory" />
<package pattern="Microsoft.Extensions.Configuration" />
@@ -30,6 +32,11 @@
<package pattern="System.Memory" />
<package pattern="System.Runtime.CompilerServices.Unsafe" />
</packageSource>
<packageSource key="dotnet-public">
<package pattern="Microsoft.Extensions.*" />
<package pattern="Microsoft.AspNetCore.*" />
<package pattern="Microsoft.Data.Sqlite" />
</packageSource>
<packageSource key="nuget.org">
<package pattern="*" />
</packageSource>

1254
SPRINTS.md

File diff suppressed because it is too large Load Diff

View File

@@ -32,7 +32,7 @@ This file describe implementation of Stella Ops (docs/README.md). Implementation
| Sprint 9 | Policy Foundations | src/StellaOps.Policy/TASKS.md | DONE (2025-10-19) | Policy Guild | POLICY-CORE-09-005 | Scoring/quiet engine compute score, enforce VEX-only quiet rules, emit inputs and provenance. |
| Sprint 9 | Policy Foundations | src/StellaOps.Policy/TASKS.md | DONE (2025-10-19) | Policy Guild | POLICY-CORE-09-006 | Unknown state & confidence decay deterministic bands surfaced in policy outputs. |
| Sprint 9 | Docs & Governance | docs/TASKS.md | DONE (2025-10-21) | Platform Events Guild | PLATFORM-EVENTS-09-401 | Embed canonical event samples into contract/integration tests and ensure CI validates payloads against published schemas. |
| Sprint 10 | Benchmarks | bench/TASKS.md | DONE (2025-10-21) | Bench Guild, Language Analyzer Guild | BENCH-SCANNER-10-002 | Wire real language analyzers into bench harness & refresh baselines post-implementation. |
| Sprint 10 | Benchmarks | src/StellaOps.Bench/TASKS.md | DONE (2025-10-21) | Bench Guild, Language Analyzer Guild | BENCH-SCANNER-10-002 | Wire real language analyzers into bench harness & refresh baselines post-implementation. |
| Sprint 10 | Scanner Analyzers & SBOM | src/StellaOps.Scanner.Analyzers.Lang/TASKS.md | DONE (2025-10-21) | Language Analyzer Guild | SCANNER-ANALYZERS-LANG-10-302 | Node analyzer handling workspaces/symlinks emitting `pkg:npm`. |
| Sprint 10 | Scanner Analyzers & SBOM | src/StellaOps.Scanner.Analyzers.Lang/TASKS.md | DONE (2025-10-21) | Language Analyzer Guild | SCANNER-ANALYZERS-LANG-10-303 | Python analyzer reading `*.dist-info`, RECORD hashes, entry points. |
| Sprint 10 | Scanner Analyzers & SBOM | src/StellaOps.Scanner.Analyzers.Lang/TASKS.md | DONE (2025-10-22) | Language Analyzer Guild | SCANNER-ANALYZERS-LANG-10-304 | Go analyzer leveraging buildinfo for `pkg:golang` components. |

34
SPRINTS_PRIOR_20251025.md Normal file
View File

@@ -0,0 +1,34 @@
This file describe implementation of Stella Ops (docs/README.md). Implementation must respect rules from AGENTS.md (read if you have not).
| Sprint | Theme | Tasks File Path | Status | Type of Specialist | Task ID | Task Description |
| --- | --- | --- | --- | --- | --- | --- |
| Sprint 11 | Signing Chain Bring-up | src/StellaOps.Attestor/TASKS.md | DONE (2025-10-19) | Attestor Guild | ATTESTOR-API-11-201 | `/rekor/entries` submission pipeline with dedupe, proof acquisition, and persistence. |
| Sprint 11 | Signing Chain Bring-up | src/StellaOps.Attestor/TASKS.md | DONE (2025-10-19) | Attestor Guild | ATTESTOR-VERIFY-11-202 | `/rekor/verify` + retrieval endpoints validating signatures and Merkle proofs. |
| Sprint 11 | Signing Chain Bring-up | src/StellaOps.Attestor/TASKS.md | DONE (2025-10-19) | Attestor Guild | ATTESTOR-OBS-11-203 | Telemetry, alerting, mTLS hardening, and archive workflow for Attestor. |
| Sprint 11 | Storage Platform Hardening | src/StellaOps.Scanner.Storage/TASKS.md | DONE (2025-10-23) | Scanner Storage Guild | SCANNER-STORAGE-11-401 | Migrate scanner object storage integration from MinIO to RustFS with data migration plan. |
| Sprint 11 | UI Integration | src/StellaOps.UI/TASKS.md | DONE (2025-10-23) | UI Guild | UI-ATTEST-11-005 | Attestation visibility (Rekor id, status) on Scan Detail. |
| Sprint 12 | Runtime Guardrails | src/StellaOps.Zastava.Core/TASKS.md | DONE (2025-10-23) | Zastava Core Guild | ZASTAVA-CORE-12-201 | Define runtime event/admission DTOs, hashing helpers, and versioning strategy. |
| Sprint 12 | Runtime Guardrails | src/StellaOps.Zastava.Core/TASKS.md | DONE (2025-10-23) | Zastava Core Guild | ZASTAVA-CORE-12-202 | Provide configuration/logging/metrics utilities shared by Observer/Webhook. |
| Sprint 12 | Runtime Guardrails | src/StellaOps.Zastava.Core/TASKS.md | DONE (2025-10-23) | Zastava Core Guild | ZASTAVA-CORE-12-203 | Authority client helpers, OpTok caching, and security guardrails for runtime services. |
| Sprint 12 | Runtime Guardrails | src/StellaOps.Zastava.Core/TASKS.md | DONE (2025-10-23) | Zastava Core Guild | ZASTAVA-OPS-12-204 | Operational runbooks, alert rules, and dashboard exports for runtime plane. |
| Sprint 12 | Runtime Guardrails | src/StellaOps.Zastava.Observer/TASKS.md | DONE (2025-10-24) | Zastava Observer Guild | ZASTAVA-OBS-12-001 | Container lifecycle watcher emitting deterministic runtime events with buffering. |
| Sprint 12 | Runtime Guardrails | src/StellaOps.Zastava.Observer/TASKS.md | DONE (2025-10-24) | Zastava Observer Guild | ZASTAVA-OBS-12-002 | Capture entrypoint traces + loaded libraries, hashing binaries and linking to baseline SBOM. |
| Sprint 12 | Runtime Guardrails | src/StellaOps.Zastava.Observer/TASKS.md | DONE (2025-10-24) | Zastava Observer Guild | ZASTAVA-OBS-12-003 | Posture checks for signatures/SBOM/attestation with offline caching. |
| Sprint 12 | Runtime Guardrails | src/StellaOps.Zastava.Observer/TASKS.md | DONE (2025-10-24) | Zastava Observer Guild | ZASTAVA-OBS-12-004 | Batch `/runtime/events` submissions with disk-backed buffer and rate limits. |
| Sprint 12 | Runtime Guardrails | src/StellaOps.Zastava.Webhook/TASKS.md | DONE (2025-10-24) | Zastava Webhook Guild | ZASTAVA-WEBHOOK-12-101 | Admission controller host with TLS bootstrap and Authority auth. |
| Sprint 12 | Runtime Guardrails | src/StellaOps.Zastava.Webhook/TASKS.md | DONE (2025-10-24) | Zastava Webhook Guild | ZASTAVA-WEBHOOK-12-102 | Query Scanner `/policy/runtime`, resolve digests, enforce verdicts. |
| Sprint 12 | Runtime Guardrails | src/StellaOps.Zastava.Webhook/TASKS.md | DONE (2025-10-24) | Zastava Webhook Guild | ZASTAVA-WEBHOOK-12-103 | Caching, fail-open/closed toggles, metrics/logging for admission decisions. |
| Sprint 12 | Runtime Guardrails | src/StellaOps.Zastava.Webhook/TASKS.md | DONE (2025-10-24) | Zastava Webhook Guild | ZASTAVA-WEBHOOK-12-104 | Wire `/admission` endpoint to runtime policy client and emit allow/deny envelopes. |
| Sprint 12 | Runtime Guardrails | src/StellaOps.Scanner.WebService/TASKS.md | DONE (2025-10-24) | Scanner WebService Guild | SCANNER-RUNTIME-12-302 | `/policy/runtime` endpoint joining SBOM baseline + policy verdict, returning admission guidance. |
| Sprint 12 | Runtime Guardrails | src/StellaOps.Scanner.WebService/TASKS.md | DONE (2025-10-24) | Scanner WebService Guild | SCANNER-RUNTIME-12-303 | Align `/policy/runtime` verdicts with canonical policy evaluation (Feedser/Vexer). |
| Sprint 12 | Runtime Guardrails | src/StellaOps.Scanner.WebService/TASKS.md | DONE (2025-10-24) | Scanner WebService Guild | SCANNER-RUNTIME-12-304 | Integrate attestation verification into runtime policy metadata. |
| Sprint 12 | Runtime Guardrails | src/StellaOps.Scanner.WebService/TASKS.md | DONE (2025-10-24) | Scanner WebService Guild | SCANNER-RUNTIME-12-305 | Deliver shared fixtures + e2e validation with Zastava/CLI teams. |
| Sprint 13 | UX & CLI Experience | src/StellaOps.UI/TASKS.md | DONE (2025-10-23) | UI Guild | UI-AUTH-13-001 | Integrate Authority OIDC + DPoP flows with session management. |
| Sprint 13 | UX & CLI Experience | src/StellaOps.UI/TASKS.md | DONE (2025-10-25) | UI Guild | UI-NOTIFY-13-006 | Notify panel: channels/rules CRUD, deliveries view, test send. |
| Sprint 13 | Platform Reliability | ops/devops/TASKS.md | DONE (2025-10-25) | DevOps Guild, Platform Leads | DEVOPS-NUGET-13-001 | Wire up .NET 10 preview feeds/local mirrors so `dotnet restore` succeeds offline; document updated NuGet bootstrap. |
| Sprint 15 | Notify Foundations | src/StellaOps.Notify.Queue/TASKS.md | DONE (2025-10-23) | Notify Queue Guild | NOTIFY-QUEUE-15-401 | Bus abstraction + Redis Streams adapter with ordering/idempotency. |
| Sprint 15 | Notify Foundations | src/StellaOps.Notify.Queue/TASKS.md | DONE (2025-10-23) | Notify Queue Guild | NOTIFY-QUEUE-15-402 | NATS JetStream adapter with health probes and failover. |
| Sprint 15 | Notify Foundations | src/StellaOps.Notify.Queue/TASKS.md | DONE (2025-10-23) | Notify Queue Guild | NOTIFY-QUEUE-15-403 | Delivery queue with retry/dead-letter + metrics. |
| Sprint 15 | Notify Foundations | src/StellaOps.Notify.Worker/TASKS.md | DONE (2025-10-23) | Notify Worker Guild | NOTIFY-WORKER-15-201 | Bus subscription + leasing loop with backoff. |
| Sprint 17 | Symbol Intelligence & Forensics | src/StellaOps.Zastava.Observer/TASKS.md | DONE (2025-10-25) | Zastava Observer Guild | ZASTAVA-OBS-17-005 | Collect GNU build-id during runtime observation and attach it to emitted events. |
| Sprint 17 | Symbol Intelligence & Forensics | src/StellaOps.Scanner.WebService/TASKS.md | DONE (2025-10-25) | Scanner WebService Guild | SCANNER-RUNTIME-17-401 | Persist runtime build-id observations and expose them for debug-symbol correlation. |

View File

@@ -1,12 +0,0 @@
# Current Focus FEEDCONN-CERTCC
| Task | Status | Notes |
|---|---|---|
|FEEDCONN-CERTCC-02-005 Deterministic fixtures/tests|DONE (2025-10-11)|Snapshot regression for summary/detail fetch landed; fixtures regenerate via `UPDATE_CERTCC_FIXTURES`.|
|FEEDCONN-CERTCC-02-008 Snapshot coverage handoff|DONE (2025-10-11)|Fixtures + README guidance shipped; QA can rerun with `UPDATE_CERTCC_FIXTURES=1` and share recorded-request diff with Merge.|
|FEEDCONN-CERTCC-02-007 Connector test harness remediation|DONE (2025-10-11)|Harness now resets time provider, wires Source.Common, and verifies VINCE canned responses across fetch→parse→map.|
|FEEDCONN-CERTCC-02-009 Detail/map reintegration plan|DONE (2025-10-11)|Plan published in `src/StellaOps.Concelier.Connector.CertCc/FEEDCONN-CERTCC-02-009_PLAN.md`; outlines staged enablement + rollback.|
# Connector Apple Status
| Task | Status | Notes |
|---|---|---|
|FEEDCONN-APPLE-02-003 Telemetry & documentation|DONE (2025-10-11)|Apple connector meter registered with WebService OpenTelemetry metrics; README and fixtures highlight normalizedVersions coverage for conflict sprint handoff.|

View File

@@ -1,8 +0,0 @@
# Benchmarks Task Board
| ID | Status | Owner(s) | Depends on | Description | Exit Criteria |
|----|--------|----------|------------|-------------|---------------|
| BENCH-SCANNER-10-001 | DONE | Bench Guild, Scanner Team | SCANNER-ANALYZERS-LANG-10-303 | Analyzer microbench harness (node_modules, site-packages) + baseline CSV. | Harness committed under `bench/Scanner.Analyzers`; baseline CSV recorded; CI job publishes results. |
| BENCH-SCANNER-10-002 | DONE (2025-10-21) | Bench Guild, Language Analyzer Guild | SCANNER-ANALYZERS-LANG-10-301..309 | Wire real language analyzers into bench harness & refresh baselines post-implementation. | Harness executes analyzer assemblies end-to-end; updated baseline committed; CI trend doc linked. |
| BENCH-IMPACT-16-001 | TODO | Bench Guild, Scheduler Team | SCHED-IMPACT-16-301 | ImpactIndex throughput bench (resolve 10k productKeys) + RAM profile. | Benchmark script ready; baseline metrics recorded; alert thresholds defined. |
| BENCH-NOTIFY-15-001 | TODO | Bench Guild, Notify Team | NOTIFY-ENGINE-15-301 | Notify dispatch throughput bench (vary rule density) with results CSV. | Bench executed; results stored; regression alert configured. |

View File

@@ -5,20 +5,34 @@ This directory contains deterministic deployment bundles for the core Stella Ops
## Structure
- `releases/` canonical release manifests (edge, stable, airgap) used to source image digests.
- `compose/` Docker Compose bundles for dev/stage/airgap targets plus `.env` seed files.
- `compose/docker-compose.mirror.yaml` managed mirror bundle for `*.stella-ops.org` with gateway cache and multi-tenant auth.
- `helm/stellaops/` multi-profile Helm chart with values files for dev/stage/airgap.
- `tools/validate-profiles.sh` helper that runs `docker compose config` and `helm lint/template` for every profile.
- `compose/` Docker Compose bundles for dev/stage/airgap targets plus `.env` seed files.
- `compose/docker-compose.mirror.yaml` managed mirror bundle for `*.stella-ops.org` with gateway cache and multi-tenant auth.
- `compose/docker-compose.telemetry.yaml` optional OpenTelemetry collector overlay (mutual TLS, OTLP pipelines).
- `compose/docker-compose.telemetry-storage.yaml` optional Prometheus/Tempo/Loki stack for observability backends.
- `helm/stellaops/` multi-profile Helm chart with values files for dev/stage/airgap.
- `telemetry/` shared OpenTelemetry collector configuration and certificate artefacts (generated via tooling).
- `tools/validate-profiles.sh` helper that runs `docker compose config` and `helm lint/template` for every profile.
## Workflow
1. Update or add a release manifest under `releases/` with the new digests.
2. Mirror the digests into the Compose and Helm profiles that correspond to that channel.
3. Run `deploy/tools/validate-profiles.sh` (requires Docker CLI and Helm) to ensure the bundles lint and template cleanly.
4. Commit the change alongside any documentation updates (e.g. install guide cross-links).
3. Run `deploy/tools/validate-profiles.sh` (requires Docker CLI and Helm) to ensure the bundles lint and template cleanly.
4. If telemetry ingest is required for the release, generate development certificates using
`./ops/devops/telemetry/generate_dev_tls.sh` and run the collector smoke test with
`python ./ops/devops/telemetry/smoke_otel_collector.py` to verify the OTLP endpoints.
5. Commit the change alongside any documentation updates (e.g. install guide cross-links).
Maintaining the digest linkage keeps offline/air-gapped installs reproducible and avoids tag drift between environments.
### Additional tooling
- `deploy/tools/check-channel-alignment.py` verifies that Helm/Compose profiles reference the exact images listed in a release manifest. Run it for each channel before promoting a release.
- `ops/devops/telemetry/generate_dev_tls.sh` produces local CA/server/client certificates for Compose-based collector testing.
- `ops/devops/telemetry/smoke_otel_collector.py` sends OTLP traffic and asserts the collector accepted traces, metrics, and logs.
- `ops/devops/telemetry/package_offline_bundle.py` packages telemetry assets (config/Helm/Compose) into a signed tarball for air-gapped installs.
- `docs/ops/deployment-upgrade-runbook.md` end-to-end instructions for upgrade, rollback, and channel promotion workflows (Helm + Compose).
## CI smoke checks
The `.gitea/workflows/build-test-deploy.yml` pipeline includes a `notify-smoke` stage that validates scanner event propagation after staging deployments. Configure the following repository secrets (or environment-level secrets) so the job can connect to Redis and the Notify API:

View File

@@ -7,21 +7,52 @@ These Compose bundles ship the minimum services required to exercise the scanner
| Path | Purpose |
| ---- | ------- |
| `docker-compose.dev.yaml` | Edge/nightly stack tuned for laptops and iterative work. |
| `docker-compose.stage.yaml` | Stable channel stack mirroring pre-production clusters. |
| `docker-compose.airgap.yaml` | Stable stack with air-gapped defaults (no outbound hostnames). |
| `docker-compose.mirror.yaml` | Managed mirror topology for `*.stella-ops.org` distribution (Concelier + Excititor + CDN gateway). |
| `env/*.env.example` | Seed `.env` files that document required secrets and ports per profile. |
| `docker-compose.stage.yaml` | Stable channel stack mirroring pre-production clusters. |
| `docker-compose.prod.yaml` | Production cutover stack with front-door network hand-off and Notify events enabled. |
| `docker-compose.airgap.yaml` | Stable stack with air-gapped defaults (no outbound hostnames). |
| `docker-compose.mirror.yaml` | Managed mirror topology for `*.stella-ops.org` distribution (Concelier + Excititor + CDN gateway). |
| `docker-compose.telemetry.yaml` | Optional OpenTelemetry collector overlay (mutual TLS, OTLP ingest endpoints). |
| `docker-compose.telemetry-storage.yaml` | Prometheus/Tempo/Loki storage overlay with multi-tenant defaults. |
| `env/*.env.example` | Seed `.env` files that document required secrets and ports per profile. |
## Usage
```bash
cp env/dev.env.example dev.env
docker compose --env-file dev.env -f docker-compose.dev.yaml config
docker compose --env-file dev.env -f docker-compose.dev.yaml up -d
```
```bash
cp env/dev.env.example dev.env
docker compose --env-file dev.env -f docker-compose.dev.yaml config
docker compose --env-file dev.env -f docker-compose.dev.yaml up -d
```
The stage and airgap variants behave the same way—swap the file names accordingly. All profiles expose 443/8443 for the UI and REST APIs, and they share a `stellaops` Docker network scoped to the compose project.
> **Graph Explorer reminder:** If you enable Cartographer or Graph API containers alongside these profiles, update `etc/authority.yaml` so the `cartographer-service` client is marked with `properties.serviceIdentity: "cartographer"` and carries a tenant hint. The Authority host now refuses `graph:write` tokens without that marker, so apply the configuration change before rolling out the updated images.
### Telemetry collector overlay
The OpenTelemetry collector overlay is optional and can be layered on top of any profile:
```bash
./ops/devops/telemetry/generate_dev_tls.sh
docker compose -f docker-compose.telemetry.yaml up -d
python ../../ops/devops/telemetry/smoke_otel_collector.py --host localhost
docker compose -f docker-compose.telemetry-storage.yaml up -d
```
The generator script creates a development CA plus server/client certificates under
`deploy/telemetry/certs/`. The smoke test sends OTLP/HTTP payloads using the generated
client certificate and asserts the collector reports accepted traces, metrics, and logs.
The storage overlay starts Prometheus, Tempo, and Loki with multitenancy enabled so you
can validate the end-to-end pipeline before promoting changes to staging. Adjust the
configs in `deploy/telemetry/storage/` before running in production.
Mount the same certificates when running workloads so the collector can enforce mutual TLS.
For production cutovers copy `env/prod.env.example` to `prod.env`, update the secret placeholders, and create the external network expected by the profile:
```bash
docker network create stellaops_frontdoor
docker compose --env-file prod.env -f docker-compose.prod.yaml config
```
### Scanner event stream settings
Scanner WebService can emit signed `scanner.report.*` events to Redis Streams when `SCANNER__EVENTS__ENABLED=true`. Each profile ships environment placeholders you can override in the `.env` file:
@@ -35,6 +66,10 @@ Scanner WebService can emit signed `scanner.report.*` events to Redis Streams wh
Helm values mirror the same knobs under each services `env` map (see `deploy/helm/stellaops/values-*.yaml`).
### Front-door network hand-off
`docker-compose.prod.yaml` adds a `frontdoor` network so operators can attach Traefik, Envoy, or an on-prem load balancer that terminates TLS. Override `FRONTDOOR_NETWORK` in `prod.env` if your reverse proxy uses a different bridge name. Attach only the externally reachable services (Authority, Signer, Attestor, Concelier, Scanner Web, Notify Web, UI) to that network—internal infrastructure (Mongo, MinIO, RustFS, NATS) stays on the private `stellaops` network.
### Updating to a new release
1. Import the new manifest into `deploy/releases/` (see `deploy/README.md`).

View File

@@ -0,0 +1,237 @@
x-release-labels: &release-labels
com.stellaops.release.version: "2025.09.2"
com.stellaops.release.channel: "stable"
com.stellaops.profile: "prod"
networks:
stellaops:
driver: bridge
frontdoor:
external: true
name: ${FRONTDOOR_NETWORK:-stellaops_frontdoor}
volumes:
mongo-data:
minio-data:
rustfs-data:
concelier-jobs:
nats-data:
services:
mongo:
image: docker.io/library/mongo@sha256:c258b26dbb7774f97f52aff52231ca5f228273a84329c5f5e451c3739457db49
command: ["mongod", "--bind_ip_all"]
restart: unless-stopped
environment:
MONGO_INITDB_ROOT_USERNAME: "${MONGO_INITDB_ROOT_USERNAME}"
MONGO_INITDB_ROOT_PASSWORD: "${MONGO_INITDB_ROOT_PASSWORD}"
volumes:
- mongo-data:/data/db
networks:
- stellaops
labels: *release-labels
minio:
image: docker.io/minio/minio@sha256:14cea493d9a34af32f524e538b8346cf79f3321eff8e708c1e2960462bd8936e
command: ["server", "/data", "--console-address", ":9001"]
restart: unless-stopped
environment:
MINIO_ROOT_USER: "${MINIO_ROOT_USER}"
MINIO_ROOT_PASSWORD: "${MINIO_ROOT_PASSWORD}"
volumes:
- minio-data:/data
ports:
- "${MINIO_CONSOLE_PORT:-9001}:9001"
networks:
- stellaops
labels: *release-labels
rustfs:
image: registry.stella-ops.org/stellaops/rustfs:2025.10.0-edge
command: ["serve", "--listen", "0.0.0.0:8080", "--root", "/data"]
restart: unless-stopped
environment:
RUSTFS__LOG__LEVEL: info
RUSTFS__STORAGE__PATH: /data
volumes:
- rustfs-data:/data
ports:
- "${RUSTFS_HTTP_PORT:-8080}:8080"
networks:
- stellaops
labels: *release-labels
nats:
image: docker.io/library/nats@sha256:c82559e4476289481a8a5196e675ebfe67eea81d95e5161e3e78eccfe766608e
command:
- "-js"
- "-sd"
- /data
restart: unless-stopped
ports:
- "${NATS_CLIENT_PORT:-4222}:4222"
volumes:
- nats-data:/data
networks:
- stellaops
labels: *release-labels
authority:
image: registry.stella-ops.org/stellaops/authority@sha256:b0348bad1d0b401cc3c71cb40ba034c8043b6c8874546f90d4783c9dbfcc0bf5
restart: unless-stopped
depends_on:
- mongo
environment:
STELLAOPS_AUTHORITY__ISSUER: "${AUTHORITY_ISSUER}"
STELLAOPS_AUTHORITY__MONGO__CONNECTIONSTRING: "mongodb://${MONGO_INITDB_ROOT_USERNAME}:${MONGO_INITDB_ROOT_PASSWORD}@mongo:27017"
STELLAOPS_AUTHORITY__PLUGINDIRECTORIES__0: "/app/plugins"
STELLAOPS_AUTHORITY__PLUGINS__CONFIGURATIONDIRECTORY: "/app/etc/authority.plugins"
volumes:
- ../../etc/authority.yaml:/etc/authority.yaml:ro
- ../../etc/authority.plugins:/app/etc/authority.plugins:ro
ports:
- "${AUTHORITY_PORT:-8440}:8440"
networks:
- stellaops
- frontdoor
labels: *release-labels
signer:
image: registry.stella-ops.org/stellaops/signer@sha256:8ad574e61f3a9e9bda8a58eb2700ae46813284e35a150b1137bc7c2b92ac0f2e
restart: unless-stopped
depends_on:
- authority
environment:
SIGNER__AUTHORITY__BASEURL: "https://authority:8440"
SIGNER__POE__INTROSPECTURL: "${SIGNER_POE_INTROSPECT_URL}"
SIGNER__STORAGE__MONGO__CONNECTIONSTRING: "mongodb://${MONGO_INITDB_ROOT_USERNAME}:${MONGO_INITDB_ROOT_PASSWORD}@mongo:27017"
ports:
- "${SIGNER_PORT:-8441}:8441"
networks:
- stellaops
- frontdoor
labels: *release-labels
attestor:
image: registry.stella-ops.org/stellaops/attestor@sha256:0534985f978b0b5d220d73c96fddd962cd9135f616811cbe3bff4666c5af568f
restart: unless-stopped
depends_on:
- signer
environment:
ATTESTOR__SIGNER__BASEURL: "https://signer:8441"
ATTESTOR__MONGO__CONNECTIONSTRING: "mongodb://${MONGO_INITDB_ROOT_USERNAME}:${MONGO_INITDB_ROOT_PASSWORD}@mongo:27017"
ports:
- "${ATTESTOR_PORT:-8442}:8442"
networks:
- stellaops
- frontdoor
labels: *release-labels
concelier:
image: registry.stella-ops.org/stellaops/concelier@sha256:c58cdcaee1d266d68d498e41110a589dd204b487d37381096bd61ab345a867c5
restart: unless-stopped
depends_on:
- mongo
- minio
environment:
CONCELIER__STORAGE__MONGO__CONNECTIONSTRING: "mongodb://${MONGO_INITDB_ROOT_USERNAME}:${MONGO_INITDB_ROOT_PASSWORD}@mongo:27017"
CONCELIER__STORAGE__S3__ENDPOINT: "http://minio:9000"
CONCELIER__STORAGE__S3__ACCESSKEYID: "${MINIO_ROOT_USER}"
CONCELIER__STORAGE__S3__SECRETACCESSKEY: "${MINIO_ROOT_PASSWORD}"
CONCELIER__AUTHORITY__BASEURL: "https://authority:8440"
volumes:
- concelier-jobs:/var/lib/concelier/jobs
ports:
- "${CONCELIER_PORT:-8445}:8445"
networks:
- stellaops
- frontdoor
labels: *release-labels
scanner-web:
image: registry.stella-ops.org/stellaops/scanner-web@sha256:14b23448c3f9586a9156370b3e8c1991b61907efa666ca37dd3aaed1e79fe3b7
restart: unless-stopped
depends_on:
- concelier
- rustfs
- nats
environment:
SCANNER__STORAGE__MONGO__CONNECTIONSTRING: "mongodb://${MONGO_INITDB_ROOT_USERNAME}:${MONGO_INITDB_ROOT_PASSWORD}@mongo:27017"
SCANNER__ARTIFACTSTORE__DRIVER: "rustfs"
SCANNER__ARTIFACTSTORE__ENDPOINT: "http://rustfs:8080/api/v1"
SCANNER__ARTIFACTSTORE__BUCKET: "scanner-artifacts"
SCANNER__ARTIFACTSTORE__TIMEOUTSECONDS: "30"
SCANNER__QUEUE__BROKER: "${SCANNER_QUEUE_BROKER}"
SCANNER__EVENTS__ENABLED: "${SCANNER_EVENTS_ENABLED:-true}"
SCANNER__EVENTS__DRIVER: "${SCANNER_EVENTS_DRIVER:-redis}"
SCANNER__EVENTS__DSN: "${SCANNER_EVENTS_DSN:-}"
SCANNER__EVENTS__STREAM: "${SCANNER_EVENTS_STREAM:-stella.events}"
SCANNER__EVENTS__PUBLISHTIMEOUTSECONDS: "${SCANNER_EVENTS_PUBLISH_TIMEOUT_SECONDS:-5}"
SCANNER__EVENTS__MAXSTREAMLENGTH: "${SCANNER_EVENTS_MAX_STREAM_LENGTH:-10000}"
ports:
- "${SCANNER_WEB_PORT:-8444}:8444"
networks:
- stellaops
- frontdoor
labels: *release-labels
scanner-worker:
image: registry.stella-ops.org/stellaops/scanner-worker@sha256:32e25e76386eb9ea8bee0a1ad546775db9a2df989fab61ac877e351881960dab
restart: unless-stopped
depends_on:
- scanner-web
- rustfs
- nats
environment:
SCANNER__STORAGE__MONGO__CONNECTIONSTRING: "mongodb://${MONGO_INITDB_ROOT_USERNAME}:${MONGO_INITDB_ROOT_PASSWORD}@mongo:27017"
SCANNER__ARTIFACTSTORE__DRIVER: "rustfs"
SCANNER__ARTIFACTSTORE__ENDPOINT: "http://rustfs:8080/api/v1"
SCANNER__ARTIFACTSTORE__BUCKET: "scanner-artifacts"
SCANNER__ARTIFACTSTORE__TIMEOUTSECONDS: "30"
SCANNER__QUEUE__BROKER: "${SCANNER_QUEUE_BROKER}"
networks:
- stellaops
labels: *release-labels
notify-web:
image: ${NOTIFY_WEB_IMAGE:-registry.stella-ops.org/stellaops/notify-web:2025.09.2}
restart: unless-stopped
depends_on:
- mongo
- authority
environment:
DOTNET_ENVIRONMENT: Production
volumes:
- ../../etc/notify.prod.yaml:/app/etc/notify.yaml:ro
ports:
- "${NOTIFY_WEB_PORT:-8446}:8446"
networks:
- stellaops
- frontdoor
labels: *release-labels
excititor:
image: registry.stella-ops.org/stellaops/excititor@sha256:59022e2016aebcef5c856d163ae705755d3f81949d41195256e935ef40a627fa
restart: unless-stopped
depends_on:
- concelier
environment:
EXCITITOR__CONCELIER__BASEURL: "https://concelier:8445"
EXCITITOR__STORAGE__MONGO__CONNECTIONSTRING: "mongodb://${MONGO_INITDB_ROOT_USERNAME}:${MONGO_INITDB_ROOT_PASSWORD}@mongo:27017"
networks:
- stellaops
labels: *release-labels
web-ui:
image: registry.stella-ops.org/stellaops/web-ui@sha256:10d924808c48e4353e3a241da62eb7aefe727a1d6dc830eb23a8e181013b3a23
restart: unless-stopped
depends_on:
- scanner-web
environment:
STELLAOPS_UI__BACKEND__BASEURL: "https://scanner-web:8444"
ports:
- "${UI_PORT:-8443}:8443"
networks:
- stellaops
- frontdoor
labels: *release-labels

View File

@@ -0,0 +1,57 @@
version: "3.9"
services:
prometheus:
image: prom/prometheus:v2.53.0
container_name: stellaops-prometheus
command:
- "--config.file=/etc/prometheus/prometheus.yaml"
volumes:
- ../telemetry/storage/prometheus.yaml:/etc/prometheus/prometheus.yaml:ro
- prometheus-data:/prometheus
- ../telemetry/certs:/etc/telemetry/tls:ro
- ../telemetry/storage/auth:/etc/telemetry/auth:ro
environment:
PROMETHEUS_COLLECTOR_TARGET: stellaops-otel-collector:9464
ports:
- "9090:9090"
depends_on:
- tempo
- loki
tempo:
image: grafana/tempo:2.5.0
container_name: stellaops-tempo
command:
- "-config.file=/etc/tempo/tempo.yaml"
volumes:
- ../telemetry/storage/tempo.yaml:/etc/tempo/tempo.yaml:ro
- ../telemetry/storage/tenants/tempo-overrides.yaml:/etc/telemetry/tenants/tempo-overrides.yaml:ro
- ../telemetry/certs:/etc/telemetry/tls:ro
- tempo-data:/var/tempo
ports:
- "3200:3200"
environment:
TEMPO_ZONE: docker
loki:
image: grafana/loki:3.1.0
container_name: stellaops-loki
command:
- "-config.file=/etc/loki/loki.yaml"
volumes:
- ../telemetry/storage/loki.yaml:/etc/loki/loki.yaml:ro
- ../telemetry/storage/tenants/loki-overrides.yaml:/etc/telemetry/tenants/loki-overrides.yaml:ro
- ../telemetry/certs:/etc/telemetry/tls:ro
- loki-data:/var/loki
ports:
- "3100:3100"
volumes:
prometheus-data:
tempo-data:
loki-data:
networks:
default:
name: stellaops-telemetry

View File

@@ -0,0 +1,34 @@
version: "3.9"
services:
otel-collector:
image: otel/opentelemetry-collector:0.105.0
container_name: stellaops-otel-collector
command:
- "--config=/etc/otel-collector/config.yaml"
environment:
STELLAOPS_OTEL_TLS_CERT: /etc/otel-collector/tls/collector.crt
STELLAOPS_OTEL_TLS_KEY: /etc/otel-collector/tls/collector.key
STELLAOPS_OTEL_TLS_CA: /etc/otel-collector/tls/ca.crt
STELLAOPS_OTEL_PROMETHEUS_ENDPOINT: 0.0.0.0:9464
STELLAOPS_OTEL_REQUIRE_CLIENT_CERT: "true"
STELLAOPS_TENANT_ID: dev
volumes:
- ../telemetry/otel-collector-config.yaml:/etc/otel-collector/config.yaml:ro
- ../telemetry/certs:/etc/otel-collector/tls:ro
ports:
- "4317:4317" # OTLP gRPC (mTLS)
- "4318:4318" # OTLP HTTP (mTLS)
- "9464:9464" # Prometheus exporter (mTLS)
- "13133:13133" # Health check
- "1777:1777" # pprof
healthcheck:
test: ["CMD", "curl", "-fsk", "--cert", "/etc/otel-collector/tls/client.crt", "--key", "/etc/otel-collector/tls/client.key", "--cacert", "/etc/otel-collector/tls/ca.crt", "https://localhost:13133/healthz"]
interval: 30s
start_period: 15s
timeout: 5s
retries: 3
networks:
default:
name: stellaops-telemetry

29
deploy/compose/env/prod.env.example vendored Normal file
View File

@@ -0,0 +1,29 @@
# Substitutions for docker-compose.prod.yaml
# ⚠️ Replace all placeholder secrets with values sourced from your secret manager.
MONGO_INITDB_ROOT_USERNAME=stellaops-prod
MONGO_INITDB_ROOT_PASSWORD=REPLACE_WITH_STRONG_PASSWORD
MINIO_ROOT_USER=stellaops-prod
MINIO_ROOT_PASSWORD=REPLACE_WITH_STRONG_PASSWORD
# Expose the MinIO console only to trusted operator networks.
MINIO_CONSOLE_PORT=39001
RUSTFS_HTTP_PORT=8080
AUTHORITY_ISSUER=https://authority.prod.stella-ops.org
AUTHORITY_PORT=8440
SIGNER_POE_INTROSPECT_URL=https://licensing.prod.stella-ops.org/introspect
SIGNER_PORT=8441
ATTESTOR_PORT=8442
CONCELIER_PORT=8445
SCANNER_WEB_PORT=8444
UI_PORT=8443
NATS_CLIENT_PORT=4222
SCANNER_QUEUE_BROKER=nats://nats:4222
# `true` enables signed scanner events for Notify ingestion.
SCANNER_EVENTS_ENABLED=true
SCANNER_EVENTS_DRIVER=redis
# Leave SCANNER_EVENTS_DSN empty to inherit the Redis queue DSN when SCANNER_QUEUE_BROKER uses redis://.
SCANNER_EVENTS_DSN=
SCANNER_EVENTS_STREAM=stella.events
SCANNER_EVENTS_PUBLISH_TIMEOUT_SECONDS=5
SCANNER_EVENTS_MAX_STREAM_LENGTH=10000
# External reverse proxy (Traefik, Envoy, etc.) that terminates TLS.
FRONTDOOR_NETWORK=stellaops_frontdoor

View File

@@ -0,0 +1,64 @@
receivers:
otlp:
protocols:
grpc:
endpoint: 0.0.0.0:4317
tls:
cert_file: ${STELLAOPS_OTEL_TLS_CERT:?STELLAOPS_OTEL_TLS_CERT not set}
key_file: ${STELLAOPS_OTEL_TLS_KEY:?STELLAOPS_OTEL_TLS_KEY not set}
client_ca_file: ${STELLAOPS_OTEL_TLS_CA:?STELLAOPS_OTEL_TLS_CA not set}
require_client_certificate: ${STELLAOPS_OTEL_REQUIRE_CLIENT_CERT:true}
http:
endpoint: 0.0.0.0:4318
tls:
cert_file: ${STELLAOPS_OTEL_TLS_CERT:?STELLAOPS_OTEL_TLS_CERT not set}
key_file: ${STELLAOPS_OTEL_TLS_KEY:?STELLAOPS_OTEL_TLS_KEY not set}
client_ca_file: ${STELLAOPS_OTEL_TLS_CA:?STELLAOPS_OTEL_TLS_CA not set}
require_client_certificate: ${STELLAOPS_OTEL_REQUIRE_CLIENT_CERT:true}
processors:
attributes/tenant-tag:
actions:
- key: tenant.id
action: insert
value: ${STELLAOPS_TENANT_ID:unknown}
batch:
send_batch_size: 1024
timeout: 5s
exporters:
logging:
verbosity: normal
prometheus:
endpoint: ${STELLAOPS_OTEL_PROMETHEUS_ENDPOINT:0.0.0.0:9464}
enable_open_metrics: true
metric_expiration: 5m
tls:
cert_file: ${STELLAOPS_OTEL_TLS_CERT:?STELLAOPS_OTEL_TLS_CERT not set}
key_file: ${STELLAOPS_OTEL_TLS_KEY:?STELLAOPS_OTEL_TLS_KEY not set}
client_ca_file: ${STELLAOPS_OTEL_TLS_CA:?STELLAOPS_OTEL_TLS_CA not set}
extensions:
health_check:
endpoint: ${STELLAOPS_OTEL_HEALTH_ENDPOINT:0.0.0.0:13133}
pprof:
endpoint: ${STELLAOPS_OTEL_PPROF_ENDPOINT:0.0.0.0:1777}
service:
telemetry:
logs:
level: ${STELLAOPS_OTEL_LOG_LEVEL:info}
extensions: [health_check, pprof]
pipelines:
traces:
receivers: [otlp]
processors: [attributes/tenant-tag, batch]
exporters: [logging]
metrics:
receivers: [otlp]
processors: [attributes/tenant-tag, batch]
exporters: [logging, prometheus]
logs:
receivers: [otlp]
processors: [attributes/tenant-tag, batch]
exporters: [logging]

View File

@@ -1,6 +1,18 @@
{{- define "stellaops.name" -}}
{{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" -}}
{{- end -}}
{{- end -}}
{{- define "stellaops.telemetryCollector.config" -}}
{{- if .Values.telemetry.collector.config }}
{{ tpl .Values.telemetry.collector.config . }}
{{- else }}
{{ tpl (.Files.Get "files/otel-collector-config.yaml") . }}
{{- end }}
{{- end -}}
{{- define "stellaops.telemetryCollector.fullname" -}}
{{- printf "%s-otel-collector" (include "stellaops.name" .) | trunc 63 | trimSuffix "-" -}}
{{- end -}}
{{- define "stellaops.fullname" -}}
{{- $name := default .root.Chart.Name .root.Values.fullnameOverride -}}

View File

@@ -0,0 +1,121 @@
{{- if .Values.telemetry.collector.enabled }}
apiVersion: v1
kind: ConfigMap
metadata:
name: {{ include "stellaops.telemetryCollector.fullname" . }}
labels:
{{- include "stellaops.labels" (dict "root" . "name" "otel-collector" "svc" (dict "class" "telemetry")) | nindent 4 }}
data:
config.yaml: |
{{ include "stellaops.telemetryCollector.config" . | indent 4 }}
---
apiVersion: apps/v1
kind: Deployment
metadata:
name: {{ include "stellaops.telemetryCollector.fullname" . }}
labels:
{{- include "stellaops.labels" (dict "root" . "name" "otel-collector" "svc" (dict "class" "telemetry")) | nindent 4 }}
spec:
replicas: {{ .Values.telemetry.collector.replicas | default 1 }}
selector:
matchLabels:
app.kubernetes.io/name: {{ include "stellaops.name" . | quote }}
app.kubernetes.io/component: "otel-collector"
template:
metadata:
labels:
app.kubernetes.io/name: {{ include "stellaops.name" . | quote }}
app.kubernetes.io/component: "otel-collector"
stellaops.profile: {{ .Values.global.profile | quote }}
spec:
containers:
- name: otel-collector
image: {{ .Values.telemetry.collector.image | default "otel/opentelemetry-collector:0.105.0" | quote }}
args:
- "--config=/etc/otel/config.yaml"
ports:
- name: otlp-grpc
containerPort: 4317
- name: otlp-http
containerPort: 4318
- name: metrics
containerPort: 9464
- name: health
containerPort: 13133
- name: pprof
containerPort: 1777
env:
- name: STELLAOPS_OTEL_TLS_CERT
value: {{ .Values.telemetry.collector.tls.certPath | default "/etc/otel/tls/tls.crt" | quote }}
- name: STELLAOPS_OTEL_TLS_KEY
value: {{ .Values.telemetry.collector.tls.keyPath | default "/etc/otel/tls/tls.key" | quote }}
- name: STELLAOPS_OTEL_TLS_CA
value: {{ .Values.telemetry.collector.tls.caPath | default "/etc/otel/tls/ca.crt" | quote }}
- name: STELLAOPS_OTEL_PROMETHEUS_ENDPOINT
value: {{ .Values.telemetry.collector.prometheusEndpoint | default "0.0.0.0:9464" | quote }}
- name: STELLAOPS_OTEL_REQUIRE_CLIENT_CERT
value: {{ .Values.telemetry.collector.requireClientCert | default true | quote }}
- name: STELLAOPS_TENANT_ID
value: {{ .Values.telemetry.collector.defaultTenant | default "unknown" | quote }}
- name: STELLAOPS_OTEL_LOG_LEVEL
value: {{ .Values.telemetry.collector.logLevel | default "info" | quote }}
volumeMounts:
- name: config
mountPath: /etc/otel/config.yaml
subPath: config.yaml
readOnly: true
- name: tls
mountPath: /etc/otel/tls
readOnly: true
livenessProbe:
httpGet:
scheme: HTTPS
port: health
path: /healthz
initialDelaySeconds: 10
periodSeconds: 30
readinessProbe:
httpGet:
scheme: HTTPS
port: health
path: /healthz
initialDelaySeconds: 5
periodSeconds: 15
{{- with .Values.telemetry.collector.resources }}
resources:
{{ toYaml . | indent 12 }}
{{- end }}
volumes:
- name: config
configMap:
name: {{ include "stellaops.telemetryCollector.fullname" . }}
- name: tls
secret:
secretName: {{ .Values.telemetry.collector.tls.secretName | required "telemetry.collector.tls.secretName is required" }}
{{- if .Values.telemetry.collector.tls.items }}
items:
{{ toYaml .Values.telemetry.collector.tls.items | indent 14 }}
{{- end }}
---
apiVersion: v1
kind: Service
metadata:
name: {{ include "stellaops.telemetryCollector.fullname" . }}
labels:
{{- include "stellaops.labels" (dict "root" . "name" "otel-collector" "svc" (dict "class" "telemetry")) | nindent 4 }}
spec:
type: ClusterIP
selector:
app.kubernetes.io/name: {{ include "stellaops.name" . | quote }}
app.kubernetes.io/component: "otel-collector"
ports:
- name: otlp-grpc
port: {{ .Values.telemetry.collector.service.grpcPort | default 4317 }}
targetPort: otlp-grpc
- name: otlp-http
port: {{ .Values.telemetry.collector.service.httpPort | default 4318 }}
targetPort: otlp-http
- name: metrics
port: {{ .Values.telemetry.collector.service.metricsPort | default 9464 }}
targetPort: metrics
{{- end }}

View File

@@ -1,15 +1,22 @@
global:
profile: dev
release:
version: "2025.10.0-edge"
channel: edge
manifestSha256: "822f82987529ea38d2321dbdd2ef6874a4062a117116a20861c26a8df1807beb"
image:
pullPolicy: IfNotPresent
labels:
stellaops.io/channel: edge
configMaps:
global:
profile: dev
release:
version: "2025.10.0-edge"
channel: edge
manifestSha256: "822f82987529ea38d2321dbdd2ef6874a4062a117116a20861c26a8df1807beb"
image:
pullPolicy: IfNotPresent
labels:
stellaops.io/channel: edge
telemetry:
collector:
enabled: true
defaultTenant: dev
tls:
secretName: stellaops-otel-tls
configMaps:
notify-config:
data:
notify.yaml: |

View File

@@ -0,0 +1,221 @@
global:
profile: prod
release:
version: "2025.09.2"
channel: stable
manifestSha256: "dc3c8fe1ab83941c838ccc5a8a5862f7ddfa38c2078e580b5649db26554565b7"
image:
pullPolicy: IfNotPresent
labels:
stellaops.io/channel: stable
stellaops.io/profile: prod
configMaps:
notify-config:
data:
notify.yaml: |
storage:
driver: mongo
connectionString: "mongodb://stellaops-mongo:27017"
database: "stellaops_notify_prod"
commandTimeoutSeconds: 45
authority:
enabled: true
issuer: "https://authority.prod.stella-ops.org"
metadataAddress: "https://authority.prod.stella-ops.org/.well-known/openid-configuration"
requireHttpsMetadata: true
allowAnonymousFallback: false
backchannelTimeoutSeconds: 30
tokenClockSkewSeconds: 60
audiences:
- notify
readScope: notify.read
adminScope: notify.admin
api:
basePath: "/api/v1/notify"
internalBasePath: "/internal/notify"
tenantHeader: "X-StellaOps-Tenant"
plugins:
baseDirectory: "/opt/stellaops"
directory: "plugins/notify"
searchPatterns:
- "StellaOps.Notify.Connectors.*.dll"
orderedPlugins:
- StellaOps.Notify.Connectors.Slack
- StellaOps.Notify.Connectors.Teams
- StellaOps.Notify.Connectors.Email
- StellaOps.Notify.Connectors.Webhook
telemetry:
enableRequestLogging: true
minimumLogLevel: Information
services:
authority:
image: registry.stella-ops.org/stellaops/authority@sha256:b0348bad1d0b401cc3c71cb40ba034c8043b6c8874546f90d4783c9dbfcc0bf5
service:
port: 8440
env:
STELLAOPS_AUTHORITY__ISSUER: "https://authority.prod.stella-ops.org"
STELLAOPS_AUTHORITY__PLUGINDIRECTORIES__0: "/app/plugins"
STELLAOPS_AUTHORITY__PLUGINS__CONFIGURATIONDIRECTORY: "/app/etc/authority.plugins"
envFrom:
- secretRef:
name: stellaops-prod-core
signer:
image: registry.stella-ops.org/stellaops/signer@sha256:8ad574e61f3a9e9bda8a58eb2700ae46813284e35a150b1137bc7c2b92ac0f2e
service:
port: 8441
env:
SIGNER__AUTHORITY__BASEURL: "https://stellaops-authority:8440"
SIGNER__POE__INTROSPECTURL: "https://licensing.prod.stella-ops.org/introspect"
envFrom:
- secretRef:
name: stellaops-prod-core
attestor:
image: registry.stella-ops.org/stellaops/attestor@sha256:0534985f978b0b5d220d73c96fddd962cd9135f616811cbe3bff4666c5af568f
service:
port: 8442
env:
ATTESTOR__SIGNER__BASEURL: "https://stellaops-signer:8441"
envFrom:
- secretRef:
name: stellaops-prod-core
concelier:
image: registry.stella-ops.org/stellaops/concelier@sha256:c58cdcaee1d266d68d498e41110a589dd204b487d37381096bd61ab345a867c5
service:
port: 8445
env:
CONCELIER__STORAGE__S3__ENDPOINT: "http://stellaops-minio:9000"
CONCELIER__AUTHORITY__BASEURL: "https://stellaops-authority:8440"
envFrom:
- secretRef:
name: stellaops-prod-core
volumeMounts:
- name: concelier-jobs
mountPath: /var/lib/concelier/jobs
volumeClaims:
- name: concelier-jobs
claimName: stellaops-concelier-jobs
scanner-web:
image: registry.stella-ops.org/stellaops/scanner-web@sha256:14b23448c3f9586a9156370b3e8c1991b61907efa666ca37dd3aaed1e79fe3b7
service:
port: 8444
env:
SCANNER__ARTIFACTSTORE__DRIVER: "rustfs"
SCANNER__ARTIFACTSTORE__ENDPOINT: "http://stellaops-rustfs:8080/api/v1"
SCANNER__ARTIFACTSTORE__BUCKET: "scanner-artifacts"
SCANNER__ARTIFACTSTORE__TIMEOUTSECONDS: "30"
SCANNER__QUEUE__BROKER: "nats://stellaops-nats:4222"
SCANNER__EVENTS__ENABLED: "true"
SCANNER__EVENTS__DRIVER: "redis"
SCANNER__EVENTS__DSN: ""
SCANNER__EVENTS__STREAM: "stella.events"
SCANNER__EVENTS__PUBLISHTIMEOUTSECONDS: "5"
SCANNER__EVENTS__MAXSTREAMLENGTH: "10000"
envFrom:
- secretRef:
name: stellaops-prod-core
scanner-worker:
image: registry.stella-ops.org/stellaops/scanner-worker@sha256:32e25e76386eb9ea8bee0a1ad546775db9a2df989fab61ac877e351881960dab
replicas: 3
env:
SCANNER__ARTIFACTSTORE__DRIVER: "rustfs"
SCANNER__ARTIFACTSTORE__ENDPOINT: "http://stellaops-rustfs:8080/api/v1"
SCANNER__ARTIFACTSTORE__BUCKET: "scanner-artifacts"
SCANNER__ARTIFACTSTORE__TIMEOUTSECONDS: "30"
SCANNER__QUEUE__BROKER: "nats://stellaops-nats:4222"
SCANNER__EVENTS__ENABLED: "true"
SCANNER__EVENTS__DRIVER: "redis"
SCANNER__EVENTS__DSN: ""
SCANNER__EVENTS__STREAM: "stella.events"
SCANNER__EVENTS__PUBLISHTIMEOUTSECONDS: "5"
SCANNER__EVENTS__MAXSTREAMLENGTH: "10000"
envFrom:
- secretRef:
name: stellaops-prod-core
notify-web:
image: registry.stella-ops.org/stellaops/notify-web:2025.09.2
service:
port: 8446
env:
DOTNET_ENVIRONMENT: Production
envFrom:
- secretRef:
name: stellaops-prod-notify
configMounts:
- name: notify-config
mountPath: /app/etc/notify.yaml
subPath: notify.yaml
configMap: notify-config
excititor:
image: registry.stella-ops.org/stellaops/excititor@sha256:59022e2016aebcef5c856d163ae705755d3f81949d41195256e935ef40a627fa
env:
EXCITITOR__CONCELIER__BASEURL: "https://stellaops-concelier:8445"
envFrom:
- secretRef:
name: stellaops-prod-core
web-ui:
image: registry.stella-ops.org/stellaops/web-ui@sha256:10d924808c48e4353e3a241da62eb7aefe727a1d6dc830eb23a8e181013b3a23
service:
port: 8443
env:
STELLAOPS_UI__BACKEND__BASEURL: "https://stellaops-scanner-web:8444"
mongo:
class: infrastructure
image: docker.io/library/mongo@sha256:c258b26dbb7774f97f52aff52231ca5f228273a84329c5f5e451c3739457db49
service:
port: 27017
command:
- mongod
- --bind_ip_all
envFrom:
- secretRef:
name: stellaops-prod-mongo
volumeMounts:
- name: mongo-data
mountPath: /data/db
volumeClaims:
- name: mongo-data
claimName: stellaops-mongo-data
minio:
class: infrastructure
image: docker.io/minio/minio@sha256:14cea493d9a34af32f524e538b8346cf79f3321eff8e708c1e2960462bd8936e
service:
port: 9000
command:
- server
- /data
- --console-address
- :9001
envFrom:
- secretRef:
name: stellaops-prod-minio
volumeMounts:
- name: minio-data
mountPath: /data
volumeClaims:
- name: minio-data
claimName: stellaops-minio-data
rustfs:
class: infrastructure
image: registry.stella-ops.org/stellaops/rustfs:2025.10.0-edge
service:
port: 8080
command:
- serve
- --listen
- 0.0.0.0:8080
- --root
- /data
env:
RUSTFS__LOG__LEVEL: info
RUSTFS__STORAGE__PATH: /data
volumeMounts:
- name: rustfs-data
mountPath: /data
volumeClaims:
- name: rustfs-data
claimName: stellaops-rustfs-data

View File

@@ -1,13 +1,20 @@
global:
profile: stage
release:
version: "2025.09.2"
global:
profile: stage
release:
version: "2025.09.2"
channel: stable
manifestSha256: "dc3c8fe1ab83941c838ccc5a8a5862f7ddfa38c2078e580b5649db26554565b7"
image:
pullPolicy: IfNotPresent
labels:
stellaops.io/channel: stable
labels:
stellaops.io/channel: stable
telemetry:
collector:
enabled: true
defaultTenant: stage
tls:
secretName: stellaops-otel-tls-stage
configMaps:
notify-config:

View File

@@ -1,10 +1,37 @@
global:
release:
version: ""
channel: ""
manifestSha256: ""
profile: ""
image:
pullPolicy: IfNotPresent
labels: {}
services: {}
global:
release:
version: ""
channel: ""
manifestSha256: ""
profile: ""
image:
pullPolicy: IfNotPresent
labels: {}
telemetry:
collector:
enabled: false
replicas: 1
image: otel/opentelemetry-collector:0.105.0
requireClientCert: true
defaultTenant: unknown
logLevel: info
tls:
secretName: ""
certPath: /etc/otel/tls/tls.crt
keyPath: /etc/otel/tls/tls.key
caPath: /etc/otel/tls/ca.crt
items:
- key: tls.crt
path: tls.crt
- key: tls.key
path: tls.key
- key: ca.crt
path: ca.crt
service:
grpcPort: 4317
httpPort: 4318
metricsPort: 9464
resources: {}
services: {}

1
deploy/telemetry/.gitignore vendored Normal file
View File

@@ -0,0 +1 @@
certs/

View File

@@ -0,0 +1,35 @@
# Telemetry Collector Assets
These assets provision the default OpenTelemetry Collector instance required by
`DEVOPS-OBS-50-001`. The collector acts as the secured ingest point for traces,
metrics, and logs emitted by StellaOps services.
## Contents
| File | Purpose |
| ---- | ------- |
| `otel-collector-config.yaml` | Baseline collector configuration (mutual TLS, OTLP receivers, Prometheus exporter). |
| `storage/prometheus.yaml` | Prometheus scrape configuration tuned for the collector and service tenants. |
| `storage/tempo.yaml` | Tempo configuration with multitenancy, WAL, and compaction settings. |
| `storage/loki.yaml` | Loki configuration enabling multitenant log ingestion with retention policies. |
| `storage/tenants/*.yaml` | Per-tenant overrides for Tempo and Loki rate/retention controls. |
## Development workflow
1. Generate development certificates (collector + client) using
`ops/devops/telemetry/generate_dev_tls.sh`.
2. Launch the collector via `docker compose -f docker-compose.telemetry.yaml up`.
3. Launch the storage backends (Prometheus, Tempo, Loki) via
`docker compose -f docker-compose.telemetry-storage.yaml up`.
4. Run the smoke test: `python ops/devops/telemetry/smoke_otel_collector.py`.
5. Explore the storage configuration (`storage/README.md`) to tune retention/limits.
The smoke test sends OTLP traffic over TLS and asserts the collector accepted
traces, metrics, and logs by scraping the Prometheus metrics endpoint.
## Kubernetes
The Helm chart consumes the same configuration (see `values.yaml`). Provide TLS
material via a secret referenced by `telemetry.collector.tls.secretName`,
containing `ca.crt`, `tls.crt`, and `tls.key`. Client certificates are required
for ingestion and should be issued by the same CA.

View File

@@ -0,0 +1,67 @@
receivers:
otlp:
protocols:
grpc:
endpoint: 0.0.0.0:4317
tls:
cert_file: ${STELLAOPS_OTEL_TLS_CERT:?STELLAOPS_OTEL_TLS_CERT not set}
key_file: ${STELLAOPS_OTEL_TLS_KEY:?STELLAOPS_OTEL_TLS_KEY not set}
client_ca_file: ${STELLAOPS_OTEL_TLS_CA:?STELLAOPS_OTEL_TLS_CA not set}
require_client_certificate: ${STELLAOPS_OTEL_REQUIRE_CLIENT_CERT:true}
http:
endpoint: 0.0.0.0:4318
tls:
cert_file: ${STELLAOPS_OTEL_TLS_CERT:?STELLAOPS_OTEL_TLS_CERT not set}
key_file: ${STELLAOPS_OTEL_TLS_KEY:?STELLAOPS_OTEL_TLS_KEY not set}
client_ca_file: ${STELLAOPS_OTEL_TLS_CA:?STELLAOPS_OTEL_TLS_CA not set}
require_client_certificate: ${STELLAOPS_OTEL_REQUIRE_CLIENT_CERT:true}
processors:
attributes/tenant-tag:
actions:
- key: tenant.id
action: insert
value: ${STELLAOPS_TENANT_ID:unknown}
batch:
send_batch_size: 1024
timeout: 5s
exporters:
logging:
verbosity: normal
prometheus:
endpoint: ${STELLAOPS_OTEL_PROMETHEUS_ENDPOINT:0.0.0.0:9464}
enable_open_metrics: true
metric_expiration: 5m
tls:
cert_file: ${STELLAOPS_OTEL_TLS_CERT:?STELLAOPS_OTEL_TLS_CERT not set}
key_file: ${STELLAOPS_OTEL_TLS_KEY:?STELLAOPS_OTEL_TLS_KEY not set}
client_ca_file: ${STELLAOPS_OTEL_TLS_CA:?STELLAOPS_OTEL_TLS_CA not set}
# Additional OTLP exporters can be configured by extending this section at runtime.
# For example, set STELLAOPS_OTEL_UPSTREAM_ENDPOINT and mount certificates, then
# add the exporter via a sidecar overlay.
extensions:
health_check:
endpoint: ${STELLAOPS_OTEL_HEALTH_ENDPOINT:0.0.0.0:13133}
pprof:
endpoint: ${STELLAOPS_OTEL_PPROF_ENDPOINT:0.0.0.0:1777}
service:
telemetry:
logs:
level: ${STELLAOPS_OTEL_LOG_LEVEL:info}
extensions: [health_check, pprof]
pipelines:
traces:
receivers: [otlp]
processors: [attributes/tenant-tag, batch]
exporters: [logging]
metrics:
receivers: [otlp]
processors: [attributes/tenant-tag, batch]
exporters: [logging, prometheus]
logs:
receivers: [otlp]
processors: [attributes/tenant-tag, batch]
exporters: [logging]

View File

@@ -0,0 +1,33 @@
# Telemetry Storage Stack
Configuration snippets for the default StellaOps observability backends used in
staging and production environments. The stack comprises:
- **Prometheus** for metrics (scraping the collector's Prometheus exporter)
- **Tempo** for traces (OTLP ingest via mTLS)
- **Loki** for logs (HTTP ingest with tenant isolation)
## Files
| Path | Description |
| ---- | ----------- |
| `prometheus.yaml` | Scrape configuration for the collector (mTLS + bearer token placeholder). |
| `tempo.yaml` | Tempo configuration with multitenancy enabled and local storage paths. |
| `loki.yaml` | Loki configuration enabling per-tenant overrides and boltdb-shipper storage. |
| `tenants/tempo-overrides.yaml` | Example tenant overrides for Tempo (retention, limits). |
| `tenants/loki-overrides.yaml` | Example tenant overrides for Loki (rate limits, retention). |
| `auth/` | Placeholder directory for Prometheus bearer token files (e.g., `token`). |
These configurations are referenced by the Docker Compose overlay
(`deploy/compose/docker-compose.telemetry-storage.yaml`) and the staging rollout documented in
`docs/ops/telemetry-storage.md`. Adjust paths, credentials, and overrides before running in
connected environments. Place the Prometheus bearer token in `auth/token` when using the
Compose overlay (the directory contains a `.gitkeep` placeholder and is gitignored by default).
## Security
- Both Tempo and Loki require mutual TLS.
- Prometheus uses mTLS plus a bearer token that should be minted by Authority.
- Update the overrides files to enforce per-tenant retention/ingestion limits.
For comprehensive deployment steps see `docs/ops/telemetry-storage.md`.

View File

@@ -0,0 +1,48 @@
auth_enabled: true
server:
http_listen_port: 3100
log_level: info
common:
ring:
instance_addr: 127.0.0.1
kvstore:
store: inmemory
replication_factor: 1
path_prefix: /var/loki
schema_config:
configs:
- from: 2024-01-01
store: boltdb-shipper
object_store: filesystem
schema: v13
index:
prefix: loki_index_
period: 24h
storage_config:
filesystem:
directory: /var/loki/chunks
boltdb_shipper:
active_index_directory: /var/loki/index
cache_location: /var/loki/index_cache
shared_store: filesystem
ruler:
storage:
type: local
local:
directory: /var/loki/rules
rule_path: /tmp/loki-rules
enable_api: true
limits_config:
enforce_metric_name: false
reject_old_samples: true
reject_old_samples_max_age: 168h
max_entries_limit_per_query: 5000
ingestion_rate_mb: 10
ingestion_burst_size_mb: 20
per_tenant_override_config: /etc/telemetry/tenants/loki-overrides.yaml

View File

@@ -0,0 +1,19 @@
global:
scrape_interval: 15s
evaluation_interval: 30s
scrape_configs:
- job_name: "stellaops-otel-collector"
scheme: https
metrics_path: /
tls_config:
ca_file: ${PROMETHEUS_TLS_CA_FILE:-/etc/telemetry/tls/ca.crt}
cert_file: ${PROMETHEUS_TLS_CERT_FILE:-/etc/telemetry/tls/client.crt}
key_file: ${PROMETHEUS_TLS_KEY_FILE:-/etc/telemetry/tls/client.key}
insecure_skip_verify: false
authorization:
type: Bearer
credentials_file: ${PROMETHEUS_BEARER_TOKEN_FILE:-/etc/telemetry/auth/token}
static_configs:
- targets:
- ${PROMETHEUS_COLLECTOR_TARGET:-stellaops-otel-collector:9464}

View File

@@ -0,0 +1,56 @@
multitenancy_enabled: true
usage_report:
reporting_enabled: false
server:
http_listen_port: 3200
log_level: info
distributor:
receivers:
otlp:
protocols:
grpc:
tls:
cert_file: ${TEMPO_TLS_CERT_FILE:-/etc/telemetry/tls/server.crt}
key_file: ${TEMPO_TLS_KEY_FILE:-/etc/telemetry/tls/server.key}
client_ca_file: ${TEMPO_TLS_CA_FILE:-/etc/telemetry/tls/ca.crt}
require_client_cert: true
http:
tls:
cert_file: ${TEMPO_TLS_CERT_FILE:-/etc/telemetry/tls/server.crt}
key_file: ${TEMPO_TLS_KEY_FILE:-/etc/telemetry/tls/server.key}
client_ca_file: ${TEMPO_TLS_CA_FILE:-/etc/telemetry/tls/ca.crt}
require_client_cert: true
ingester:
lifecycler:
ring:
instance_availability_zone: ${TEMPO_ZONE:-zone-a}
trace_idle_period: 10s
max_block_bytes: 1_048_576
compactor:
compaction:
block_retention: 168h
metrics_generator:
registry:
external_labels:
cluster: stellaops
storage:
trace:
backend: local
local:
path: /var/tempo/traces
wal:
path: /var/tempo/wal
metrics:
backend: prometheus
overrides:
defaults:
ingestion_rate_limit_bytes: 1048576
max_traces_per_user: 200000
per_tenant_override_config: /etc/telemetry/tenants/tempo-overrides.yaml

View File

@@ -0,0 +1,19 @@
# Example Loki per-tenant overrides
# Adjust according to https://grafana.com/docs/loki/latest/configuration/#limits_config
stellaops-dev:
ingestion_rate_mb: 10
ingestion_burst_size_mb: 20
max_global_streams_per_user: 5000
retention_period: 168h
stellaops-stage:
ingestion_rate_mb: 20
ingestion_burst_size_mb: 40
max_global_streams_per_user: 10000
retention_period: 336h
__default__:
ingestion_rate_mb: 5
ingestion_burst_size_mb: 10
retention_period: 72h

View File

@@ -0,0 +1,16 @@
# Example Tempo per-tenant overrides
# Consult https://grafana.com/docs/tempo/latest/configuration/#limits-configuration
# before applying in production.
stellaops-dev:
traces_per_second_limit: 100000
max_bytes_per_trace: 10485760
max_search_bytes_per_trace: 20971520
stellaops-stage:
traces_per_second_limit: 200000
max_bytes_per_trace: 20971520
__default__:
traces_per_second_limit: 50000
max_bytes_per_trace: 5242880

View File

@@ -0,0 +1,130 @@
#!/usr/bin/env python3
"""
Ensure deployment bundles reference the images defined in a release manifest.
Usage:
./deploy/tools/check-channel-alignment.py \
--release deploy/releases/2025.10-edge.yaml \
--target deploy/helm/stellaops/values-dev.yaml \
--target deploy/compose/docker-compose.dev.yaml
For every target file, the script scans `image:` declarations and verifies that
any image belonging to a repository listed in the release manifest matches the
exact digest or tag recorded there. Images outside of the manifest (for example,
supporting services such as `nats`) are ignored.
"""
from __future__ import annotations
import argparse
import pathlib
import re
import sys
from typing import Dict, Iterable, List, Optional, Set
IMAGE_LINE = re.compile(r"^\s*image:\s*['\"]?(?P<image>\S+)['\"]?\s*$")
def extract_images(path: pathlib.Path) -> List[str]:
images: List[str] = []
for line in path.read_text(encoding="utf-8").splitlines():
match = IMAGE_LINE.match(line)
if match:
images.append(match.group("image"))
return images
def image_repo(image: str) -> str:
if "@" in image:
return image.split("@", 1)[0]
# Split on the last colon to preserve registries with ports (e.g. localhost:5000)
if ":" in image:
prefix, tag = image.rsplit(":", 1)
if "/" in tag:
# handle digestive colon inside path (unlikely)
return image
return prefix
return image
def load_release_map(release_path: pathlib.Path) -> Dict[str, str]:
release_map: Dict[str, str] = {}
for image in extract_images(release_path):
repo = image_repo(image)
release_map[repo] = image
return release_map
def check_target(
target_path: pathlib.Path,
release_map: Dict[str, str],
ignore_repos: Set[str],
) -> List[str]:
errors: List[str] = []
for image in extract_images(target_path):
repo = image_repo(image)
if repo in ignore_repos:
continue
if repo not in release_map:
continue
expected = release_map[repo]
if image != expected:
errors.append(
f"{target_path}: {image} does not match release value {expected}"
)
return errors
def parse_args(argv: Optional[Iterable[str]] = None) -> argparse.Namespace:
parser = argparse.ArgumentParser(description=__doc__)
parser.add_argument(
"--release",
required=True,
type=pathlib.Path,
help="Path to the release manifest (YAML)",
)
parser.add_argument(
"--target",
action="append",
required=True,
type=pathlib.Path,
help="Deployment profile to validate against the release manifest",
)
parser.add_argument(
"--ignore-repo",
action="append",
default=[],
help="Repository prefix to ignore (may be repeated)",
)
return parser.parse_args(argv)
def main(argv: Optional[Iterable[str]] = None) -> int:
args = parse_args(argv)
release_map = load_release_map(args.release)
ignore_repos = {repo.rstrip("/") for repo in args.ignore_repo}
if not release_map:
print(f"error: no images found in release manifest {args.release}", file=sys.stderr)
return 2
total_errors: List[str] = []
for target in args.target:
if not target.exists():
total_errors.append(f"{target}: file not found")
continue
total_errors.extend(check_target(target, release_map, ignore_repos))
if total_errors:
print("✖ channel alignment check failed:", file=sys.stderr)
for err in total_errors:
print(f" - {err}", file=sys.stderr)
return 1
print("✓ deployment profiles reference release images for the inspected repositories.")
return 0
if __name__ == "__main__":
raise SystemExit(main())

View File

@@ -1,53 +1,61 @@
#!/usr/bin/env bash
set -euo pipefail
ROOT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)"
COMPOSE_DIR="$ROOT_DIR/compose"
HELM_DIR="$ROOT_DIR/helm/stellaops"
compose_profiles=(
"docker-compose.dev.yaml:env/dev.env.example"
"docker-compose.stage.yaml:env/stage.env.example"
"docker-compose.airgap.yaml:env/airgap.env.example"
"docker-compose.mirror.yaml:env/mirror.env.example"
)
docker_ready=false
if command -v docker >/dev/null 2>&1; then
if docker compose version >/dev/null 2>&1; then
docker_ready=true
else
echo "⚠️ docker CLI present but Compose plugin unavailable; skipping compose validation" >&2
fi
else
echo "⚠️ docker CLI not found; skipping compose validation" >&2
fi
if [[ "$docker_ready" == "true" ]]; then
for entry in "${compose_profiles[@]}"; do
IFS=":" read -r compose_file env_file <<<"$entry"
printf '→ validating %s with %s\n' "$compose_file" "$env_file"
docker compose \
--env-file "$COMPOSE_DIR/$env_file" \
-f "$COMPOSE_DIR/$compose_file" config >/dev/null
done
fi
helm_values=(
"$HELM_DIR/values-dev.yaml"
"$HELM_DIR/values-stage.yaml"
"$HELM_DIR/values-airgap.yaml"
"$HELM_DIR/values-mirror.yaml"
)
if command -v helm >/dev/null 2>&1; then
for values in "${helm_values[@]}"; do
printf '→ linting Helm chart with %s\n' "$(basename "$values")"
helm lint "$HELM_DIR" -f "$values"
helm template test-release "$HELM_DIR" -f "$values" >/dev/null
done
else
echo "⚠️ helm CLI not found; skipping Helm lint/template" >&2
fi
printf 'Profiles validated (where tooling was available).\n'
#!/usr/bin/env bash
set -euo pipefail
ROOT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)"
COMPOSE_DIR="$ROOT_DIR/compose"
HELM_DIR="$ROOT_DIR/helm/stellaops"
compose_profiles=(
"docker-compose.dev.yaml:env/dev.env.example"
"docker-compose.stage.yaml:env/stage.env.example"
"docker-compose.prod.yaml:env/prod.env.example"
"docker-compose.airgap.yaml:env/airgap.env.example"
"docker-compose.mirror.yaml:env/mirror.env.example"
"docker-compose.telemetry.yaml:"
"docker-compose.telemetry-storage.yaml:"
)
docker_ready=false
if command -v docker >/dev/null 2>&1; then
if docker compose version >/dev/null 2>&1; then
docker_ready=true
else
echo "⚠️ docker CLI present but Compose plugin unavailable; skipping compose validation" >&2
fi
else
echo "⚠️ docker CLI not found; skipping compose validation" >&2
fi
if [[ "$docker_ready" == "true" ]]; then
for entry in "${compose_profiles[@]}"; do
IFS=":" read -r compose_file env_file <<<"$entry"
printf '→ validating %s with %s\n' "$compose_file" "$env_file"
if [[ -n "$env_file" ]]; then
docker compose \
--env-file "$COMPOSE_DIR/$env_file" \
-f "$COMPOSE_DIR/$compose_file" config >/dev/null
else
docker compose -f "$COMPOSE_DIR/$compose_file" config >/dev/null
fi
done
fi
helm_values=(
"$HELM_DIR/values-dev.yaml"
"$HELM_DIR/values-stage.yaml"
"$HELM_DIR/values-prod.yaml"
"$HELM_DIR/values-airgap.yaml"
"$HELM_DIR/values-mirror.yaml"
)
if command -v helm >/dev/null 2>&1; then
for values in "${helm_values[@]}"; do
printf '→ linting Helm chart with %s\n' "$(basename "$values")"
helm lint "$HELM_DIR" -f "$values"
helm template test-release "$HELM_DIR" -f "$values" >/dev/null
done
else
echo "⚠️ helm CLI not found; skipping Helm lint/template" >&2
fi
printf 'Profiles validated (where tooling was available).\n'

View File

@@ -565,7 +565,7 @@ Content-Type: application/json
"containerId": "containerd://bead5...",
"imageRef": "ghcr.io/acme/api@sha256:deadbeef"
},
"process": { "pid": 12345, "entrypoint": ["/start.sh", "--serve"] },
"process": { "pid": 12345, "entrypoint": ["/start.sh", "--serve"], "buildId": "5f0c7c3c..." },
"loadedLibs": [
{ "path": "/lib/x86_64-linux-gnu/libssl.so.3", "inode": 123456, "sha256": "abc123..." }
],
@@ -627,7 +627,33 @@ See `docs/dev/32_AUTH_CLIENT_GUIDE.md` for recommended profiles (online vs. air-
| `stellaops-cli offline kit import` | Upload an offline kit bundle to the backend | `<bundle.tgz>` (argument)<br>`--manifest <path>`<br>`--bundle-signature <path>`<br>`--manifest-signature <path>` | Validates digests when metadata is present, then posts multipart payloads to `POST /api/offline-kit/import`; logs the submitted import ID/status for air-gapped rollout tracking. |
| `stellaops-cli offline kit status` | Display imported offline kit details | `--json` | Shows bundle id/kind, captured/imported timestamps, digests, and component versions; `--json` emits machine-readable output for scripting. |
| `stellaops-cli config show` | Display resolved configuration | — | Masks secret values; helpful for airgapped installs |
| `stellaops-cli runtime policy test` | Ask Scanner.WebService for runtime verdicts (Webhook parity) | `--image/-i <digest>` (repeatable, comma/space lists supported)<br>`--file/-f <path>`<br>`--namespace/--ns <name>`<br>`--label/-l key=value` (repeatable)<br>`--json` | Posts to `POST /api/v1/scanner/policy/runtime`, deduplicates image digests, and prints TTL/policy revision plus per-image columns for signed state, SBOM referrers, quieted-by metadata, confidence, and Rekor attestation (uuid + verified flag). Accepts newline/whitespace-delimited stdin when piped; `--json` emits the raw response without additional logging. |
| `stellaops-cli runtime policy test` | Ask Scanner.WebService for runtime verdicts (Webhook parity) | `--image/-i <digest>` (repeatable, comma/space lists supported)<br>`--file/-f <path>`<br>`--namespace/--ns <name>`<br>`--label/-l key=value` (repeatable)<br>`--json` | Posts to `POST /api/v1/scanner/policy/runtime`, deduplicates image digests, and prints TTL/policy revision plus per-image columns for signed state, SBOM referrers, quieted-by metadata, confidence, Rekor attestation (uuid + verified flag), and recently observed build IDs (shortened for readability). Accepts newline/whitespace-delimited stdin when piped; `--json` emits the raw response without additional logging. |
#### Example: Pivot from runtime verdicts to debug symbols
```bash
$ stellaops-cli runtime policy test \
--image ghcr.io/acme/payments@sha256:4f7d55f6... \
--namespace payments
Image Digest Signed SBOM Build IDs TTL
ghcr.io/acme/payments@sha256:4f7d55f6... yes present 5f0c7c3c..., 1122aabbccddeeff... 04:59:55
```
1. Copy one of the hashes (e.g. `5f0c7c3cb4d9f8a4f1c1d5c6b7e8f90123456789`) and locate the bundled debug artefact:
```bash
ls offline-kit/debug/.build-id/5f/0c7c3cb4d9f8a4f1c1d5c6b7e8f90123456789.debug
```
2. Confirm the running binary advertises the same GNU build-id:
```bash
readelf -n /proc/$(pgrep -f payments-api | head -n1)/exe | grep -i 'Build ID'
```
3. If you operate a debuginfod mirror backed by the Offline Kit tree, resolve symbols with:
```bash
debuginfod-find debuginfo 5f0c7c3cb4d9f8a4f1c1d5c6b7e8f90123456789 >/tmp/payments-api.debug
```
See [Offline Kit step 0](24_OFFLINE_KIT.md#0-prepare-the-debug-store) for instructions on mirroring the debug store before packaging.
`POST /api/v1/scanner/policy/runtime` responds with one entry per digest. Each result now includes:
@@ -635,6 +661,7 @@ See `docs/dev/32_AUTH_CLIENT_GUIDE.md` for recommended profiles (online vs. air-
- `confidence` (0-1 double) derived from canonical `PolicyPreviewService` evaluation and `quieted`/`quietedBy` flags for muted findings.
- `rekor` block carrying `uuid`, `url`, and the attestor-backed `verified` boolean when Rekor inclusion proofs have been confirmed.
- `metadata` (stringified JSON) capturing runtime heuristics, policy issues, evaluated findings, and timestamps for downstream audit.
- `buildIds` (array) lists up to three distinct GNU build-id hashes recently observed for that digest so debuggers can derive `/usr/lib/debug/.build-id/<aa>/<rest>.debug` paths for symbol stores.
When running on an interactive terminal without explicit override flags, the CLI uses Spectre.Console prompts to let you choose per-run ORAS/offline bundle behaviour.
@@ -738,7 +765,7 @@ For offline workflows, configure `StellaOps:Offline:KitsDirectory` (or `STELLAOP
"ClientSecret": "REDACTED",
"Username": "",
"Password": "",
"Scope": "concelier.jobs.trigger",
"Scope": "concelier.jobs.trigger advisory:ingest advisory:read",
"TokenCacheDirectory": ""
}
}

View File

@@ -1,61 +1,61 @@
# 10 · Concelier + CLI Quickstart
This guide walks through configuring the Concelier web service and the `stellaops-cli`
tool so an operator can ingest advisories, merge them, and publish exports from a
single workstation. It focuses on deployment-facing surfaces only (configuration,
runtime wiring, CLI usage) and leaves connector/internal customization for later.
---
## 0 · Prerequisites
- .NET SDK **10.0.100-preview** (matches `global.json`)
- MongoDB instance reachable from the host (local Docker or managed)
- `trivy-db` binary on `PATH` for Trivy exports (and `oras` if publishing to OCI)
- Plugin assemblies present in `StellaOps.Concelier.PluginBinaries/` (already included in the repo)
- Optional: Docker/Podman runtime if you plan to run scanners locally
> **Tip** air-gapped installs should preload `trivy-db` and `oras` binaries into the
> runner image since Concelier never fetches them dynamically.
---
## 1 · Configure Concelier
1. Copy the sample config to the expected location (CI/CD pipelines can stamp values
into this file during deployment—see the “Deployment automation” note below):
```bash
mkdir -p etc
cp etc/concelier.yaml.sample etc/concelier.yaml
```
2. Edit `etc/concelier.yaml` and update the MongoDB DSN (and optional database name).
The default template configures plug-in discovery to look in `StellaOps.Concelier.PluginBinaries/`
and disables remote telemetry exporters by default.
3. (Optional) Override settings via environment variables. All keys are prefixed with
`CONCELIER_`. Example:
```bash
export CONCELIER_STORAGE__DSN="mongodb://user:pass@mongo:27017/concelier"
export CONCELIER_TELEMETRY__ENABLETRACING=false
```
4. Start the web service from the repository root:
```bash
dotnet run --project src/StellaOps.Concelier.WebService
```
On startup Concelier validates the options, boots MongoDB indexes, loads plug-ins,
and exposes:
- `GET /health` returns service status and telemetry settings
- `GET /ready` performs a MongoDB `ping`
- `GET /jobs` + `POST /jobs/{kind}` inspect and trigger connector/export jobs
> **Security note** authentication now ships via StellaOps Authority. Keep
# 10 · Concelier + CLI Quickstart
This guide walks through configuring the Concelier web service and the `stellaops-cli`
tool so an operator can ingest advisories, merge them, and publish exports from a
single workstation. It focuses on deployment-facing surfaces only (configuration,
runtime wiring, CLI usage) and leaves connector/internal customization for later.
---
## 0 · Prerequisites
- .NET SDK **10.0.100-preview** (matches `global.json`)
- MongoDB instance reachable from the host (local Docker or managed)
- `trivy-db` binary on `PATH` for Trivy exports (and `oras` if publishing to OCI)
- Plugin assemblies present in `StellaOps.Concelier.PluginBinaries/` (already included in the repo)
- Optional: Docker/Podman runtime if you plan to run scanners locally
> **Tip** air-gapped installs should preload `trivy-db` and `oras` binaries into the
> runner image since Concelier never fetches them dynamically.
---
## 1 · Configure Concelier
1. Copy the sample config to the expected location (CI/CD pipelines can stamp values
into this file during deployment—see the “Deployment automation” note below):
```bash
mkdir -p etc
cp etc/concelier.yaml.sample etc/concelier.yaml
```
2. Edit `etc/concelier.yaml` and update the MongoDB DSN (and optional database name).
The default template configures plug-in discovery to look in `StellaOps.Concelier.PluginBinaries/`
and disables remote telemetry exporters by default.
3. (Optional) Override settings via environment variables. All keys are prefixed with
`CONCELIER_`. Example:
```bash
export CONCELIER_STORAGE__DSN="mongodb://user:pass@mongo:27017/concelier"
export CONCELIER_TELEMETRY__ENABLETRACING=false
```
4. Start the web service from the repository root:
```bash
dotnet run --project src/StellaOps.Concelier.WebService
```
On startup Concelier validates the options, boots MongoDB indexes, loads plug-ins,
and exposes:
- `GET /health` returns service status and telemetry settings
- `GET /ready` performs a MongoDB `ping`
- `GET /jobs` + `POST /jobs/{kind}` inspect and trigger connector/export jobs
> **Security note** authentication now ships via StellaOps Authority. Keep
> `authority.allowAnonymousFallback: true` only during the staged rollout and
> disable it before **2025-12-31 UTC** so tokens become mandatory.
@@ -66,230 +66,244 @@ Rollout checkpoints for the two Authority toggles:
| **Validation (staging)** | `true` | `true` | Verify token issuance, CLI scopes, and audit log noise without breaking cron jobs. | Watch `Concelier.Authorization.Audit` for `bypass=True` events and scope gaps; confirm CLI `auth status` succeeds. |
| **Cutover rehearsal** | `true` | `false` | Exercise production-style enforcement before the deadline; ensure only approved maintenance ranges remain in `bypassNetworks`. | Expect some HTTP 401s; verify `web.jobs.triggered` metrics flatten for unauthenticated calls and audit logs highlight missing tokens. |
| **Enforced (steady state)** | `true` | `false` | Production baseline after the 2025-12-31 UTC cutoff. | Alert on new `bypass=True` entries and on repeated 401 bursts; correlate with Authority availability dashboards. |
### Authority companion configuration (preview)
1. Copy the Authority sample configuration:
```bash
cp etc/authority.yaml.sample etc/authority.yaml
```
2. Update the issuer URL, token lifetimes, and plug-in descriptors to match your
environment. Authority expects per-plugin manifests in `etc/authority.plugins/`;
sample `standard.yaml` and `ldap.yaml` files are provided as starting points.
For air-gapped installs keep the default plug-in binary directory
(`../StellaOps.Authority.PluginBinaries`) so packaged plug-ins load without outbound access.
3. Environment variables prefixed with `STELLAOPS_AUTHORITY_` override individual
fields. Example:
```bash
export STELLAOPS_AUTHORITY__ISSUER="https://authority.stella-ops.local"
export STELLAOPS_AUTHORITY__PLUGINDIRECTORIES__0="/srv/authority/plugins"
```
---
## 2 · Configure the CLI
The CLI reads configuration from JSON/YAML files *and* environment variables. The
defaults live in `src/StellaOps.Cli/appsettings.json` and expect overrides at runtime.
| Setting | Environment variable | Default | Purpose |
| ------- | -------------------- | ------- | ------- |
| `BackendUrl` | `STELLAOPS_BACKEND_URL` | _empty_ | Base URL of the Concelier web service |
| `ApiKey` | `API_KEY` | _empty_ | Reserved for legacy key auth; leave empty when using Authority |
| `ScannerCacheDirectory` | `STELLAOPS_SCANNER_CACHE_DIRECTORY` | `scanners` | Local cache folder |
| `ResultsDirectory` | `STELLAOPS_RESULTS_DIRECTORY` | `results` | Where scan outputs are written |
| `Authority.Url` | `STELLAOPS_AUTHORITY_URL` | _empty_ | StellaOps Authority issuer/token endpoint |
| `Authority.ClientId` | `STELLAOPS_AUTHORITY_CLIENT_ID` | _empty_ | Client identifier for the CLI |
| `Authority.ClientSecret` | `STELLAOPS_AUTHORITY_CLIENT_SECRET` | _empty_ | Client secret (omit when using username/password grant) |
| `Authority.Username` | `STELLAOPS_AUTHORITY_USERNAME` | _empty_ | Username for password grant flows |
| `Authority.Password` | `STELLAOPS_AUTHORITY_PASSWORD` | _empty_ | Password for password grant flows |
| `Authority.Scope` | `STELLAOPS_AUTHORITY_SCOPE` | `concelier.jobs.trigger` | OAuth scope requested for backend operations |
| `Authority.TokenCacheDirectory` | `STELLAOPS_AUTHORITY_TOKEN_CACHE_DIR` | `~/.stellaops/tokens` | Directory that persists cached tokens |
| `Authority.Resilience.EnableRetries` | `STELLAOPS_AUTHORITY_ENABLE_RETRIES` | `true` | Toggle Polly retry handler for Authority HTTP calls |
| `Authority.Resilience.RetryDelays` | `STELLAOPS_AUTHORITY_RETRY_DELAYS` | `1s,2s,5s` | Comma- or space-separated backoff delays (hh:mm:ss) |
| `Authority.Resilience.AllowOfflineCacheFallback` | `STELLAOPS_AUTHORITY_ALLOW_OFFLINE_CACHE_FALLBACK` | `true` | Allow CLI to reuse cached discovery/JWKS metadata when Authority is offline |
| `Authority.Resilience.OfflineCacheTolerance` | `STELLAOPS_AUTHORITY_OFFLINE_CACHE_TOLERANCE` | `00:10:00` | Additional tolerance window applied to cached metadata |
Example bootstrap:
```bash
export STELLAOPS_BACKEND_URL="http://localhost:5000"
export STELLAOPS_RESULTS_DIRECTORY="$HOME/.stellaops/results"
export STELLAOPS_AUTHORITY_URL="https://authority.local"
export STELLAOPS_AUTHORITY_CLIENT_ID="concelier-cli"
export STELLAOPS_AUTHORITY_CLIENT_SECRET="s3cr3t"
dotnet run --project src/StellaOps.Cli -- db merge
# Acquire a bearer token and confirm cache state
dotnet run --project src/StellaOps.Cli -- auth login
dotnet run --project src/StellaOps.Cli -- auth status
dotnet run --project src/StellaOps.Cli -- auth whoami
```
Refer to `docs/dev/32_AUTH_CLIENT_GUIDE.md` for deeper guidance on tuning retry/offline settings and rollout checklists.
To persist configuration, you can create `stellaops-cli.yaml` next to the binary or
rely on environment variables for ephemeral runners.
---
## 3 · Operating Workflow
1. **Trigger connector fetch stages**
```bash
dotnet run --project src/StellaOps.Cli -- db fetch --source osv --stage fetch
dotnet run --project src/StellaOps.Cli -- db fetch --source osv --stage parse
dotnet run --project src/StellaOps.Cli -- db fetch --source osv --stage map
```
Use `--mode resume` when continuing from a previous window:
```bash
dotnet run --project src/StellaOps.Cli -- db fetch --source redhat --stage fetch --mode resume
```
2. **Merge canonical advisories**
```bash
dotnet run --project src/StellaOps.Cli -- db merge
```
3. **Produce exports**
```bash
# JSON tree (vuln-list style)
dotnet run --project src/StellaOps.Cli -- db export --format json
# Trivy DB (delta example)
dotnet run --project src/StellaOps.Cli -- db export --format trivy-db --delta
```
Concelier always produces a deterministic OCI layout. The first run after a clean
bootstrap emits a **full** baseline; subsequent `--delta` runs reuse the previous
baselines blobs when only JSON manifests change. If the exporter detects that a
prior delta is still active (i.e., `LastDeltaDigest` is recorded) it automatically
upgrades the next run to a full export and resets the baseline so operators never
chain deltas indefinitely. The CLI exposes `--publish-full/--publish-delta` (for
ORAS pushes) and `--include-full/--include-delta` (for offline bundles) should you
need to override the defaults interactively.
**Smoke-check delta reuse:** after the first baseline completes, run the export a
second time with `--delta` and verify that the new directory reports `mode=delta`
while reusing the previous layer blob.
```bash
export_root=${CONCELIER_EXPORT_ROOT:-exports/trivy}
base=$(ls -1d "$export_root"/* | sort | tail -n2 | head -n1)
delta=$(ls -1d "$export_root"/* | sort | tail -n1)
jq -r '.mode,.baseExportId' "$delta/metadata.json"
base_manifest=$(jq -r '.manifests[0].digest' "$base/index.json")
delta_manifest=$(jq -r '.manifests[0].digest' "$delta/index.json")
printf 'baseline manifest: %s\ndelta manifest: %s\n' "$base_manifest" "$delta_manifest"
layer_digest=$(jq -r '.layers[0].digest' "$base/blobs/sha256/${base_manifest#sha256:}")
cmp "$base/blobs/sha256/${layer_digest#sha256:}" \
"$delta/blobs/sha256/${layer_digest#sha256:}"
```
`cmp` returning exit code `0` confirms the delta export reuses the baselines
`db.tar.gz` layer instead of rebuilding it.
4. **Manage scanners (optional)**
```bash
dotnet run --project src/StellaOps.Cli -- scanner download --channel stable
dotnet run --project src/StellaOps.Cli -- scan run --entry scanners/latest/Scanner.dll --target ./sboms
dotnet run --project src/StellaOps.Cli -- scan upload --file results/scan-001.json
```
Add `--verbose` to any command for structured console logs. All commands honour
`Ctrl+C` cancellation and exit with non-zero status codes when the backend returns
a problem document.
---
## 4 · Verification Checklist
- Concelier `/health` returns `"status":"healthy"` and Storage bootstrap is marked
complete after startup.
- CLI commands return HTTP 202 with a `Location` header (job tracking URL) when
triggering Concelier jobs.
- Export artefacts are materialised under the configured output directories and
their manifests record digests.
- MongoDB contains the expected `document`, `dto`, `advisory`, and `export_state`
collections after a run.
---
## 5 · Deployment Automation
- Treat `etc/concelier.yaml.sample` as the canonical template. CI/CD should copy it to
the deployment artifact and replace placeholders (DSN, telemetry endpoints, cron
overrides) with environment-specific secrets.
- Keep secret material (Mongo credentials, OTLP tokens) outside of the repository;
inject them via secret stores or pipeline variables at stamp time.
- When building container images, include `trivy-db` (and `oras` if used) so air-gapped
clusters do not need outbound downloads at runtime.
---
## 5 · Next Steps
- Enable authority-backed authentication in non-production first. Set
`authority.enabled: true` while keeping `authority.allowAnonymousFallback: true`
to observe logs, then flip it to `false` before 2025-12-31 UTC to enforce tokens.
- Automate the workflow above via CI/CD (compose stack or Kubernetes CronJobs).
- Pair with the Concelier connector teams when enabling additional sources so their
module-specific requirements are pulled in safely.
---
### Authority companion configuration (preview)
1. Copy the Authority sample configuration:
```bash
cp etc/authority.yaml.sample etc/authority.yaml
```
2. Update the issuer URL, token lifetimes, and plug-in descriptors to match your
environment. Authority expects per-plugin manifests in `etc/authority.plugins/`;
sample `standard.yaml` and `ldap.yaml` files are provided as starting points.
For air-gapped installs keep the default plug-in binary directory
(`../StellaOps.Authority.PluginBinaries`) so packaged plug-ins load without outbound access.
3. Environment variables prefixed with `STELLAOPS_AUTHORITY_` override individual
fields. Example:
```bash
export STELLAOPS_AUTHORITY__ISSUER="https://authority.stella-ops.local"
export STELLAOPS_AUTHORITY__PLUGINDIRECTORIES__0="/srv/authority/plugins"
```
---
## 2 · Configure the CLI
The CLI reads configuration from JSON/YAML files *and* environment variables. The
defaults live in `src/StellaOps.Cli/appsettings.json` and expect overrides at runtime.
| Setting | Environment variable | Default | Purpose |
| ------- | -------------------- | ------- | ------- |
| `BackendUrl` | `STELLAOPS_BACKEND_URL` | _empty_ | Base URL of the Concelier web service |
| `ApiKey` | `API_KEY` | _empty_ | Reserved for legacy key auth; leave empty when using Authority |
| `ScannerCacheDirectory` | `STELLAOPS_SCANNER_CACHE_DIRECTORY` | `scanners` | Local cache folder |
| `ResultsDirectory` | `STELLAOPS_RESULTS_DIRECTORY` | `results` | Where scan outputs are written |
| `Authority.Url` | `STELLAOPS_AUTHORITY_URL` | _empty_ | StellaOps Authority issuer/token endpoint |
| `Authority.ClientId` | `STELLAOPS_AUTHORITY_CLIENT_ID` | _empty_ | Client identifier for the CLI |
| `Authority.ClientSecret` | `STELLAOPS_AUTHORITY_CLIENT_SECRET` | _empty_ | Client secret (omit when using username/password grant) |
| `Authority.Username` | `STELLAOPS_AUTHORITY_USERNAME` | _empty_ | Username for password grant flows |
| `Authority.Password` | `STELLAOPS_AUTHORITY_PASSWORD` | _empty_ | Password for password grant flows |
| `Authority.Scope` | `STELLAOPS_AUTHORITY_SCOPE` | `concelier.jobs.trigger advisory:ingest` | Space-separated OAuth scopes requested for backend operations |
| `Authority.TokenCacheDirectory` | `STELLAOPS_AUTHORITY_TOKEN_CACHE_DIR` | `~/.stellaops/tokens` | Directory that persists cached tokens |
| `Authority.Resilience.EnableRetries` | `STELLAOPS_AUTHORITY_ENABLE_RETRIES` | `true` | Toggle Polly retry handler for Authority HTTP calls |
| `Authority.Resilience.RetryDelays` | `STELLAOPS_AUTHORITY_RETRY_DELAYS` | `1s,2s,5s` | Comma- or space-separated backoff delays (hh:mm:ss) |
| `Authority.Resilience.AllowOfflineCacheFallback` | `STELLAOPS_AUTHORITY_ALLOW_OFFLINE_CACHE_FALLBACK` | `true` | Allow CLI to reuse cached discovery/JWKS metadata when Authority is offline |
| `Authority.Resilience.OfflineCacheTolerance` | `STELLAOPS_AUTHORITY_OFFLINE_CACHE_TOLERANCE` | `00:10:00` | Additional tolerance window applied to cached metadata |
Example bootstrap:
```bash
export STELLAOPS_BACKEND_URL="http://localhost:5000"
export STELLAOPS_RESULTS_DIRECTORY="$HOME/.stellaops/results"
export STELLAOPS_AUTHORITY_URL="https://authority.local"
export STELLAOPS_AUTHORITY_CLIENT_ID="concelier-cli"
export STELLAOPS_AUTHORITY_CLIENT_SECRET="s3cr3t"
export STELLAOPS_AUTHORITY_SCOPE="concelier.jobs.trigger advisory:ingest advisory:read"
dotnet run --project src/StellaOps.Cli -- db merge
# Acquire a bearer token and confirm cache state
dotnet run --project src/StellaOps.Cli -- auth login
dotnet run --project src/StellaOps.Cli -- auth status
dotnet run --project src/StellaOps.Cli -- auth whoami
```
Refer to `docs/dev/32_AUTH_CLIENT_GUIDE.md` for deeper guidance on tuning retry/offline settings and rollout checklists.
To persist configuration, you can create `stellaops-cli.yaml` next to the binary or
rely on environment variables for ephemeral runners.
---
## 3 · Operating Workflow
1. **Trigger connector fetch stages**
```bash
dotnet run --project src/StellaOps.Cli -- db fetch --source osv --stage fetch
dotnet run --project src/StellaOps.Cli -- db fetch --source osv --stage parse
dotnet run --project src/StellaOps.Cli -- db fetch --source osv --stage map
```
Use `--mode resume` when continuing from a previous window:
```bash
dotnet run --project src/StellaOps.Cli -- db fetch --source redhat --stage fetch --mode resume
```
2. **Merge canonical advisories**
```bash
dotnet run --project src/StellaOps.Cli -- db merge
```
3. **Produce exports**
```bash
# JSON tree (vuln-list style)
dotnet run --project src/StellaOps.Cli -- db export --format json
# Trivy DB (delta example)
dotnet run --project src/StellaOps.Cli -- db export --format trivy-db --delta
```
Concelier always produces a deterministic OCI layout. The first run after a clean
bootstrap emits a **full** baseline; subsequent `--delta` runs reuse the previous
baselines blobs when only JSON manifests change. If the exporter detects that a
prior delta is still active (i.e., `LastDeltaDigest` is recorded) it automatically
upgrades the next run to a full export and resets the baseline so operators never
chain deltas indefinitely. The CLI exposes `--publish-full/--publish-delta` (for
ORAS pushes) and `--include-full/--include-delta` (for offline bundles) should you
need to override the defaults interactively.
**Smoke-check delta reuse:** after the first baseline completes, run the export a
second time with `--delta` and verify that the new directory reports `mode=delta`
while reusing the previous layer blob.
```bash
export_root=${CONCELIER_EXPORT_ROOT:-exports/trivy}
base=$(ls -1d "$export_root"/* | sort | tail -n2 | head -n1)
delta=$(ls -1d "$export_root"/* | sort | tail -n1)
jq -r '.mode,.baseExportId' "$delta/metadata.json"
base_manifest=$(jq -r '.manifests[0].digest' "$base/index.json")
delta_manifest=$(jq -r '.manifests[0].digest' "$delta/index.json")
printf 'baseline manifest: %s\ndelta manifest: %s\n' "$base_manifest" "$delta_manifest"
layer_digest=$(jq -r '.layers[0].digest' "$base/blobs/sha256/${base_manifest#sha256:}")
cmp "$base/blobs/sha256/${layer_digest#sha256:}" \
"$delta/blobs/sha256/${layer_digest#sha256:}"
```
`cmp` returning exit code `0` confirms the delta export reuses the baselines
`db.tar.gz` layer instead of rebuilding it.
4. **Manage scanners (optional)**
```bash
dotnet run --project src/StellaOps.Cli -- scanner download --channel stable
dotnet run --project src/StellaOps.Cli -- scan run --entry scanners/latest/Scanner.dll --target ./sboms
dotnet run --project src/StellaOps.Cli -- scan upload --file results/scan-001.json
```
Add `--verbose` to any command for structured console logs. All commands honour
`Ctrl+C` cancellation and exit with non-zero status codes when the backend returns
a problem document.
---
## 4 · Verification Checklist
- Concelier `/health` returns `"status":"healthy"` and Storage bootstrap is marked
complete after startup.
- CLI commands return HTTP 202 with a `Location` header (job tracking URL) when
triggering Concelier jobs.
- Export artefacts are materialised under the configured output directories and
their manifests record digests.
- MongoDB contains the expected `document`, `dto`, `advisory`, and `export_state`
collections after a run.
---
## 5 · Deployment Automation
- Treat `etc/concelier.yaml.sample` as the canonical template. CI/CD should copy it to
the deployment artifact and replace placeholders (DSN, telemetry endpoints, cron
overrides) with environment-specific secrets.
- Keep secret material (Mongo credentials, OTLP tokens) outside of the repository;
inject them via secret stores or pipeline variables at stamp time.
- When building container images, include `trivy-db` (and `oras` if used) so air-gapped
clusters do not need outbound downloads at runtime.
---
## 5 · Next Steps
- Enable authority-backed authentication in non-production first. Set
`authority.enabled: true` while keeping `authority.allowAnonymousFallback: true`
to observe logs, then flip it to `false` before 2025-12-31 UTC to enforce tokens.
- Automate the workflow above via CI/CD (compose stack or Kubernetes CronJobs).
- Pair with the Concelier connector teams when enabling additional sources so their
module-specific requirements are pulled in safely.
---
## 6 · Authority Integration
- Concelier now authenticates callers through StellaOps Authority using OAuth 2.0
resource server flows. Populate the `authority` block in `concelier.yaml`:
```yaml
authority:
enabled: true
allowAnonymousFallback: false # keep true only during the staged rollout window
issuer: "https://authority.example.org"
audiences:
- "api://concelier"
requiredScopes:
- "concelier.jobs.trigger"
clientId: "concelier-jobs"
clientSecretFile: "../secrets/concelier-jobs.secret"
clientScopes:
- "concelier.jobs.trigger"
bypassNetworks:
- "127.0.0.1/32"
- "::1/128"
```
- Store the client secret outside of source control. Either provide it via
`authority.clientSecret` (environment variable `CONCELIER_AUTHORITY__CLIENTSECRET`)
or point `authority.clientSecretFile` to a file mounted at runtime.
- Cron jobs running on the same host can keep using the API thanks to the loopback
bypass mask. Add additional CIDR ranges as needed; every bypass is logged.
- Export the same configuration to Kubernetes or systemd by setting environment
variables such as:
```bash
export CONCELIER_AUTHORITY__ENABLED=true
export CONCELIER_AUTHORITY__ALLOWANONYMOUSFALLBACK=false
export CONCELIER_AUTHORITY__ISSUER="https://authority.example.org"
export CONCELIER_AUTHORITY__CLIENTID="concelier-jobs"
export CONCELIER_AUTHORITY__CLIENTSECRETFILE="/var/run/secrets/concelier/authority-client"
```
```yaml
authority:
enabled: true
allowAnonymousFallback: false # keep true only during the staged rollout window
issuer: "https://authority.example.org"
audiences:
- "api://concelier"
requiredScopes:
- "concelier.jobs.trigger"
- "advisory:read"
- "advisory:ingest"
requiredTenants:
- "tenant-default"
clientId: "concelier-jobs"
clientSecretFile: "../secrets/concelier-jobs.secret"
clientScopes:
- "concelier.jobs.trigger"
- "advisory:read"
- "advisory:ingest"
bypassNetworks:
- "127.0.0.1/32"
- "::1/128"
```
- Store the client secret outside of source control. Either provide it via
`authority.clientSecret` (environment variable `CONCELIER_AUTHORITY__CLIENTSECRET`)
or point `authority.clientSecretFile` to a file mounted at runtime.
- Cron jobs running on the same host can keep using the API thanks to the loopback
bypass mask. Add additional CIDR ranges as needed; every bypass is logged.
- Export the same configuration to Kubernetes or systemd by setting environment
variables such as:
```bash
export CONCELIER_AUTHORITY__ENABLED=true
export CONCELIER_AUTHORITY__ALLOWANONYMOUSFALLBACK=false
export CONCELIER_AUTHORITY__ISSUER="https://authority.example.org"
export CONCELIER_AUTHORITY__CLIENTID="concelier-jobs"
export CONCELIER_AUTHORITY__CLIENTSECRETFILE="/var/run/secrets/concelier/authority-client"
export CONCELIER_AUTHORITY__REQUIREDSCOPES__0="concelier.jobs.trigger"
export CONCELIER_AUTHORITY__REQUIREDSCOPES__1="advisory:read"
export CONCELIER_AUTHORITY__REQUIREDSCOPES__2="advisory:ingest"
export CONCELIER_AUTHORITY__CLIENTSCOPES__0="concelier.jobs.trigger"
export CONCELIER_AUTHORITY__CLIENTSCOPES__1="advisory:read"
export CONCELIER_AUTHORITY__CLIENTSCOPES__2="advisory:ingest"
export CONCELIER_AUTHORITY__REQUIREDTENANTS__0="tenant-default"
```
- CLI commands already pass `Authorization` headers when credentials are supplied.
Configure the CLI with matching Authority settings (`docs/09_API_CLI_REFERENCE.md`)
so that automation can obtain tokens with the same client credentials. Concelier

View File

@@ -36,21 +36,59 @@ Authority persists every issued token in MongoDB so operators can audit or revok
- **Collection:** `authority_tokens`
- **Key fields:**
- `tokenId`, `type` (`access_token`, `refresh_token`, `device_code`, `authorization_code`)
- `subjectId`, `clientId`, ordered `scope` array
- `status` (`valid`, `revoked`, `expired`), `createdAt`, optional `expiresAt`
- `revokedAt`, machine-readable `revokedReason`, optional `revokedReasonDescription`
- `revokedMetadata` (string dictionary for plugin-specific context)
- **Persistence flow:** `PersistTokensHandler` stamps missing JWT IDs, normalises scopes, and stores every principal emitted by OpenIddict.
- **Revocation flow:** `AuthorityTokenStore.UpdateStatusAsync` flips status, records the reason metadata, and is invoked by token revocation handlers and plugin provisioning events (e.g., disabling a user).
- `tokenId`, `type` (`access_token`, `refresh_token`, `device_code`, `authorization_code`)
- `subjectId`, `clientId`, ordered `scope` array
- `tenant` (lower-cased tenant hint from the issuing client, omitted for global clients)
- `status` (`valid`, `revoked`, `expired`), `createdAt`, optional `expiresAt`
- `revokedAt`, machine-readable `revokedReason`, optional `revokedReasonDescription`
- `revokedMetadata` (string dictionary for plugin-specific context)
- **Persistence flow:** `PersistTokensHandler` stamps missing JWT IDs, normalises scopes, and stores every principal emitted by OpenIddict.
- **Revocation flow:** `AuthorityTokenStore.UpdateStatusAsync` flips status, records the reason metadata, and is invoked by token revocation handlers and plugin provisioning events (e.g., disabling a user).
- **Expiry maintenance:** `AuthorityTokenStore.DeleteExpiredAsync` prunes non-revoked tokens past their `expiresAt` timestamp. Operators should schedule this in maintenance windows if large volumes of tokens are issued.
### Expectations for resource servers
Resource servers (Concelier WebService, Backend, Agent) **must not** assume in-memory caches are authoritative. They should:
- cache `/jwks` and `/revocations/export` responses within configured lifetimes;
- honour `revokedReason` metadata when shaping audit trails;
- treat `status != "valid"` or missing tokens as immediate denial conditions.
- honour `revokedReason` metadata when shaping audit trails;
- treat `status != "valid"` or missing tokens as immediate denial conditions.
### Tenant propagation
- Client provisioning (bootstrap or plug-in) accepts a `tenant` hint. Authority normalises the value (`trim().ToLowerInvariant()`) and persists it alongside the registration. Clients without an explicit tenant remain global.
- Issued principals include the `stellaops:tenant` claim. `PersistTokensHandler` mirrors this claim into `authority_tokens.tenant`, enabling per-tenant revocation and reporting.
- Rate limiter metadata now tags requests with `authority.tenant`, unlocking per-tenant throughput metrics and diagnostic filters. Audit events (`authority.client_credentials.grant`, `authority.password.grant`, bootstrap flows) surface the tenant and login attempt documents index on `{tenant, occurredAt}` for quick queries.
- Password grant flows reuse the client registration's tenant and enforce the configured scope allow-list. Requested scopes outside that list (or mismatched tenants) trigger `invalid_scope`/`invalid_client` failures, ensuring cross-tenant access is denied before token issuance.
### Default service scopes
| Client ID | Purpose | Scopes granted | Sender constraint | Tenant |
|----------------------|---------------------------------------|--------------------------------------|-------------------|-----------------|
| `concelier-ingest` | Concelier raw advisory ingestion | `advisory:ingest`, `advisory:read` | `dpop` | `tenant-default` |
| `excitor-ingest` | Excititor raw VEX ingestion | `vex:ingest`, `vex:read` | `dpop` | `tenant-default` |
| `aoc-verifier` | Aggregation-only contract verification | `aoc:verify` | `dpop` | `tenant-default` |
| `cartographer-service` | Graph snapshot construction | `graph:write`, `graph:read` | `dpop` | `tenant-default` |
| `graph-api` | Graph Explorer gateway/API | `graph:read`, `graph:export`, `graph:simulate` | `dpop` | `tenant-default` |
| `vuln-explorer-ui` | Vuln Explorer UI/API | `vuln:read` | `dpop` | `tenant-default` |
> **Secret hygiene (20251027):** The repository includes a convenience `etc/authority.yaml` for compose/helm smoke tests. Every entrys `secretFile` points to `etc/secrets/*.secret`, which ship with `*-change-me` placeholders—replace them with strong values (and wire them through your vault/secret manager) before issuing tokens in CI, staging, or production.
These registrations are provided as examples in `etc/authority.yaml.sample`. Clone them per tenant (for example `concelier-tenant-a`, `concelier-tenant-b`) so tokens remain tenant-scoped by construction.
Graph Explorer introduces dedicated scopes: `graph:write` for Cartographer build jobs, `graph:read` for query/read operations, `graph:export` for long-running export downloads, and `graph:simulate` for what-if overlays. Assign only the scopes a client actually needs to preserve least privilege—UI-facing clients should typically request read/export access, while background services (Cartographer, Scheduler) require write privileges.
#### Least-privilege guidance for graph clients
- **Service identities** The Cartographer worker should request `graph:write` and `graph:read` only; grant `graph:simulate` exclusively to pipeline automation that invokes Policy Engine overlays on demand. Keep `graph:export` scoped to API gateway components responsible for streaming GraphML/JSONL artifacts. Authority enforces this by rejecting `graph:write` tokens that lack `properties.serviceIdentity: cartographer`.
- **Tenant propagation** Every client registration must pin a `tenant` hint. Authority normalises the value and stamps it into issued tokens (`stellaops:tenant`) so downstream services (Scheduler, Graph API, Console) can enforce tenant isolation without custom headers. Graph scopes (`graph:read`, `graph:write`, `graph:export`, `graph:simulate`) are denied if the tenant hint is missing.
- **SDK alignment** Use the generated `StellaOpsScopes` constants in service code to request graph scopes. Hard-coded strings risk falling out of sync as additional graph capabilities are added.
- **DPOP for automation** Maintain sender-constrained (`dpop`) flows for Cartographer and Scheduler to limit reuse of access tokens if a build host is compromised. For UI-facing tokens, pair `graph:read`/`graph:export` with short lifetimes and enforce refresh-token rotation at the gateway.
#### Vuln Explorer permalinks
- **Scope** `vuln:read` authorises Vuln Explorer to fetch advisory/linkset evidence and issue shareable links. Assign it only to front-end/API clients that must render vulnerability details.
- **Signed links** `POST /permalinks/vuln` (requires `vuln:read`) accepts `{ "tenant": "tenant-a", "resourceKind": "vulnerability", "state": { ... }, "expiresInSeconds": 86400 }` and returns a JWT (`token`) plus `issuedAt`/`expiresAt`. The token embeds the tenant, requested state, and `vuln:read` scope and is signed with the same Authority signing keys published via `/jwks`.
- **Validation** Resource servers verify the permalink using cached JWKS: check signature, ensure the tenant matches the current request context, honour the expiry, and enforce the contained `vuln:read` scope. The payloads `resource.state` block is opaque JSON so UIs can round-trip filters/search terms without new schema changes.
## 4. Revocation Pipeline
Authority centralises revocation in `authority_revocations` with deterministic categories:
@@ -119,18 +157,38 @@ Authority signs revocation bundles and publishes JWKS entries via the new signin
The rotation API leverages the same cryptography abstractions as revocation signing; no restart is required and the previous key is marked `retired` but kept available for verification.
## 6. Bootstrap & Administrative Endpoints
Administrative APIs live under `/internal/*` and require the bootstrap API key plus rate-limiter compliance.
| Endpoint | Method | Description |
| --- | --- | --- |
| `/internal/users` | `POST` | Provision initial administrative accounts through the registered password-capable plug-in. Emits structured audit events. |
| `/internal/clients` | `POST` | Provision OAuth clients (client credentials / device code). |
| `/internal/revocations/export` | `GET` | Export revocation bundle + detached JWS + digest. |
| `/internal/signing/rotate` | `POST` | Promote a new signing key (see SOP above). Request body accepts `keyId`, `location`, optional `source`, `algorithm`, `provider`, and metadata. |
All administrative calls emit `AuthEventRecord` entries enriched with correlation IDs, PII tags, and network metadata for offline SOC ingestion.
## 7. Configuration Reference
Administrative APIs live under `/internal/*` and require the bootstrap API key plus rate-limiter compliance.
| Endpoint | Method | Description |
| --- | --- | --- |
| `/internal/users` | `POST` | Provision initial administrative accounts through the registered password-capable plug-in. Emits structured audit events. |
| `/internal/clients` | `POST` | Provision OAuth clients (client credentials / device code). |
| `/internal/revocations/export` | `GET` | Export revocation bundle + detached JWS + digest. |
| `/internal/signing/rotate` | `POST` | Promote a new signing key (see SOP above). Request body accepts `keyId`, `location`, optional `source`, `algorithm`, `provider`, and metadata. |
All administrative calls emit `AuthEventRecord` entries enriched with correlation IDs, PII tags, and network metadata for offline SOC ingestion.
> **Tenant hint:** include a `tenant` entry inside `properties` when bootstrapping clients. Authority normalises the value, stores it on the registration, and stamps future tokens/audit events with the tenant.
### Bootstrap client example
```jsonc
POST /internal/clients
{
"clientId": "concelier",
"confidential": true,
"displayName": "Concelier Backend",
"allowedGrantTypes": ["client_credentials"],
"allowedScopes": ["concelier.jobs.trigger", "advisory:ingest", "advisory:read"],
"properties": {
"tenant": "tenant-default"
}
}
```
For environments with multiple tenants, repeat the call per tenant-specific client (e.g. `concelier-tenant-a`, `concelier-tenant-b`) or append suffixes to the client identifier.
## 7. Configuration Reference
| Section | Key | Description | Notes |
| --- | --- | --- | --- |
@@ -181,11 +239,45 @@ Authority now understands two flavours of sender-constrained OAuth clients:
- Certificate bindings now act as an allow-list: Authority verifies thumbprint, subject, issuer, serial number, and any declared SAN values against the presented certificate, with rotation grace windows applied to `notBefore/notAfter`. Operators can enforce subject regexes, SAN type allow-lists (`dns`, `uri`, `ip`), trusted certificate authorities, and rotation grace via `security.senderConstraints.mtls.*`.
Both modes persist additional metadata in `authority_tokens`: `senderConstraint` records the enforced policy, while `senderKeyThumbprint` stores the DPoP JWK thumbprint or mTLS certificate hash captured at issuance. Downstream services can rely on these fields (and the corresponding `cnf` claim) when auditing offline copies of the token store.
## 8. Offline & Sovereign Operation
- **No outbound dependencies:** Authority only contacts MongoDB and local plugins. Discovery and JWKS are cached by clients with offline tolerances (`AllowOfflineCacheFallback`, `OfflineCacheTolerance`). Operators should mirror these responses for air-gapped use.
- **Structured logging:** Every revocation export, signing rotation, bootstrap action, and token issuance emits structured logs with `traceId`, `client_id`, `subjectId`, and `network.remoteIp` where applicable. Mirror logs to your SIEM to retain audit trails without central connectivity.
- **Determinism:** Sorting rules in token and revocation exports guarantee byte-for-byte identical artefacts given the same datastore state. Hashes and signatures remain stable across machines.
### 7.2 Policy Engine clients & scopes
Policy Engine v2 introduces dedicated scopes and a service identity that materialises effective findings. Configure Authority as follows when provisioning policy clients:
| Client | Scopes | Notes |
| --- | --- | --- |
| `policy-engine` (service) | `policy:run`, `findings:read`, `effective:write` | Must include `properties.serviceIdentity: policy-engine` and a tenant. Authority rejects `effective:write` tokens without the marker or tenant. |
| `policy-cli` / automation | `policy:write`, `policy:submit`, `policy:run`, `findings:read` | Keep scopes minimal; only trusted automation should add `policy:approve`/`policy:activate`. |
| UI/editor sessions | `policy:read`, `policy:write`, `policy:simulate` (+ reviewer/approver scopes as appropriate) | Issue tenant-specific clients so audit and rate limits remain scoped. |
Sample YAML entry:
```yaml
- clientId: "policy-engine"
displayName: "Policy Engine Service"
grantTypes: [ "client_credentials" ]
audiences: [ "api://policy-engine" ]
scopes: [ "policy:run", "findings:read", "effective:write" ]
tenant: "tenant-default"
properties:
serviceIdentity: "policy-engine"
senderConstraint: "dpop"
auth:
type: "client_secret"
secretFile: "../secrets/policy-engine.secret"
```
Compliance checklist:
- [ ] `policy-engine` client includes `properties.serviceIdentity: policy-engine` and a tenant hint; logins missing either are rejected.
- [ ] Non-service clients omit `effective:write` and receive only the scopes required for their role (`policy:write`, `policy:submit`, `policy:approve`, `policy:activate`, etc.).
- [ ] Approval/activation workflows use identities distinct from authoring identities; tenants are provisioned per client to keep telemetry segregated.
- [ ] Operators document reviewer assignments and incident procedures alongside `/docs/security/policy-governance.md` and archive policy evidence bundles (`stella policy bundle export`) with each release.
## 8. Offline & Sovereign Operation
- **No outbound dependencies:** Authority only contacts MongoDB and local plugins. Discovery and JWKS are cached by clients with offline tolerances (`AllowOfflineCacheFallback`, `OfflineCacheTolerance`). Operators should mirror these responses for air-gapped use.
- **Structured logging:** Every revocation export, signing rotation, bootstrap action, and token issuance emits structured logs with `traceId`, `client_id`, `subjectId`, and `network.remoteIp` where applicable. Mirror logs to your SIEM to retain audit trails without central connectivity.
- **Determinism:** Sorting rules in token and revocation exports guarantee byte-for-byte identical artefacts given the same datastore state. Hashes and signatures remain stable across machines.
## 9. Operational Checklist
- [ ] Protect the bootstrap API key and disable bootstrap endpoints (`bootstrap.enabled: false`) once initial setup is complete.

View File

@@ -232,6 +232,101 @@ Images are deduplicated and sorted by digest. Label keys are normalised to lower
```
Metadata keys are lowercased, firstwriter wins (duplicates with different casing are ignored), and optional IDs (`scheduleId`, `runId`) are trimmed when empty. Use the canonical serializer when emitting events so audit digests remain reproducible.
####3.1.5Run Summary (`run_summaries`)
Materialized view powering the Scheduler UI dashboards. Stores the latest roll-up per schedule/tenant, enabling quick “last run” banners and sparkline counters without scanning the full `runs` collection.
```jsonc
{
"tenantId": "tenant-alpha",
"scheduleId": "sch_20251018a",
"updatedAt": "2025-10-18T22:10:10Z",
"lastRun": {
"runId": "run_20251018_0001",
"trigger": "feedser",
"state": "completed",
"createdAt": "2025-10-18T22:03:14Z",
"startedAt": "2025-10-18T22:03:20Z",
"finishedAt": "2025-10-18T22:08:45Z",
"stats": {
"candidates": 1280,
"deduped": 910,
"queued": 0,
"completed": 910,
"deltas": 42,
"newCriticals": 7,
"newHigh": 11,
"newMedium": 18,
"newLow": 6
},
"error": null
},
"recent": [
{
"runId": "run_20251018_0001",
"trigger": "feedser",
"state": "completed",
"createdAt": "2025-10-18T22:03:14Z",
"startedAt": "2025-10-18T22:03:20Z",
"finishedAt": "2025-10-18T22:08:45Z",
"stats": {
"candidates": 1280,
"deduped": 910,
"queued": 0,
"completed": 910,
"deltas": 42,
"newCriticals": 7,
"newHigh": 11,
"newMedium": 18,
"newLow": 6
},
"error": null
},
{
"runId": "run_20251017_0003",
"trigger": "cron",
"state": "error",
"createdAt": "2025-10-17T22:01:02Z",
"startedAt": "2025-10-17T22:01:08Z",
"finishedAt": "2025-10-17T22:04:11Z",
"stats": {
"candidates": 1040,
"deduped": 812,
"queued": 0,
"completed": 640,
"deltas": 18,
"newCriticals": 2,
"newHigh": 4,
"newMedium": 7,
"newLow": 3
},
"error": "scanner timeout"
}
],
"counters": {
"total": 3,
"planning": 0,
"queued": 0,
"running": 0,
"completed": 1,
"error": 1,
"cancelled": 1,
"totalDeltas": 60,
"totalNewCriticals": 9,
"totalNewHigh": 15,
"totalNewMedium": 25,
"totalNewLow": 9
}
}
```
- `_id` combines `tenantId` and `scheduleId` (`tenant:schedule`).
- `recent` contains the 20 most recent runs ordered by `createdAt` (UTC). Updates replace the existing entry for a run to respect state transitions.
- `counters` aggregate over the retained window (20 runs) for quick trend indicators. Totals are recomputed after every update.
- Schedulers should call the projection service after every run state change so the cache mirrors planner/runner progress.
Sample file: `samples/api/scheduler/run-summary.json`.
---

View File

@@ -1,170 +1,170 @@
#12 - Performance Workbook
*Purpose* define **repeatable, datadriven** benchmarks that guard StellaOps core pledge:
> *“P95 vulnerability feedback in ≤5seconds.”*
---
##0Benchmark Scope
| Area | Included | Excluded |
|------------------|----------------------------------|---------------------------|
| SBOMfirst scan | Trivy engine w/ warmed DB | Full image unpack ≥300MB |
| Delta SBOM ⭑ | Missinglayer lookup & merge | Multiarch images |
| Policy eval ⭑ | YAML → JSON → rule match | Rego (until GA) |
| Feed merge | NVD JSON 20232025 | GHSA GraphQL (plugin) |
| Quota waitpath | 5s softwait, 60s hardwait behaviour | Paid tiers (unlimited) |
| API latency | REST `/scan`, `/layers/missing` | UI SPA calls |
⭑ = new in July2025.
---
##1Hardware Baseline (Reference Rig)
| Element | Spec |
|-------------|------------------------------------|
| CPU | 8vCPU (Intel IceLake equiv.) |
| Memory | 16GiB |
| Disk | NVMe SSD, 3GB/s R/W |
| Network | 1Gbit virt. switch |
| Container | Docker 25.0 + overlay2 |
| OS | Ubuntu 22.04LTS (kernel 6.8) |
*All P95 targets assume a **singlenode** deployment on this rig unless stated.*
---
##2Phase Targets & Gates
| Phase (ID) | Target P95 | Gate (CI) | Rationale |
|-----------------------|-----------:|-----------|----------------------------------------|
| **SBOM_FIRST** | ≤5s | `hard` | Core UX promise. |
| **IMAGE_UNPACK** | ≤10s | `soft` | Fallback path for legacy flows. |
| **DELTA_SBOM** ⭑ | ≤1s | `hard` | Needed to stay sub5s for big bases. |
| **POLICY_EVAL** ⭑ | ≤50ms | `hard` | Keeps gate latency invisible to users. |
| **QUOTA_WAIT** ⭑ | *soft*5s<br>*hard*60s | `hard` | Ensures graceful Freetier throttling. |
| **SCHED_RESCAN** | ≤30s | `soft` | Nightly batch not userfacing. |
| **FEED_MERGE** | ≤60s | `soft` | Offpeak cron @ 01:00. |
| **API_P95** | ≤200ms | `hard` | UI snappiness. |
*Gate* legend `hard`: break CI if regression>3×target,
`soft`: raise warning & issue ticket.
---
##3Test Harness
#12 - Performance Workbook
*Purpose* define **repeatable, datadriven** benchmarks that guard StellaOps core pledge:
> *“P95 vulnerability feedback in ≤5seconds.”*
---
##0Benchmark Scope
| Area | Included | Excluded |
|------------------|----------------------------------|---------------------------|
| SBOMfirst scan | Trivy engine w/ warmed DB | Full image unpack ≥300MB |
| Delta SBOM ⭑ | Missinglayer lookup & merge | Multiarch images |
| Policy eval ⭑ | YAML → JSON → rule match | Rego (until GA) |
| Feed merge | NVD JSON 20232025 | GHSA GraphQL (plugin) |
| Quota waitpath | 5s softwait, 60s hardwait behaviour | Paid tiers (unlimited) |
| API latency | REST `/scan`, `/layers/missing` | UI SPA calls |
⭑ = new in July2025.
---
##1Hardware Baseline (Reference Rig)
| Element | Spec |
|-------------|------------------------------------|
| CPU | 8vCPU (Intel IceLake equiv.) |
| Memory | 16GiB |
| Disk | NVMe SSD, 3GB/s R/W |
| Network | 1Gbit virt. switch |
| Container | Docker 25.0 + overlay2 |
| OS | Ubuntu 22.04LTS (kernel 6.8) |
*All P95 targets assume a **singlenode** deployment on this rig unless stated.*
---
##2Phase Targets & Gates
| Phase (ID) | Target P95 | Gate (CI) | Rationale |
|-----------------------|-----------:|-----------|----------------------------------------|
| **SBOM_FIRST** | ≤5s | `hard` | Core UX promise. |
| **IMAGE_UNPACK** | ≤10s | `soft` | Fallback path for legacy flows. |
| **DELTA_SBOM** ⭑ | ≤1s | `hard` | Needed to stay sub5s for big bases. |
| **POLICY_EVAL** ⭑ | ≤50ms | `hard` | Keeps gate latency invisible to users. |
| **QUOTA_WAIT** ⭑ | *soft*5s<br>*hard*60s | `hard` | Ensures graceful Freetier throttling. |
| **SCHED_RESCAN** | ≤30s | `soft` | Nightly batch not userfacing. |
| **FEED_MERGE** | ≤60s | `soft` | Offpeak cron @ 01:00. |
| **API_P95** | ≤200ms | `hard` | UI snappiness. |
*Gate* legend `hard`: break CI if regression>3×target,
`soft`: raise warning & issue ticket.
---
##3Test Harness
* **Runner** `perf/run.sh`, accepts `--phase` and `--samples`.
* **Language analyzers microbench** `dotnet run --project bench/Scanner.Analyzers/StellaOps.Bench.ScannerAnalyzers/StellaOps.Bench.ScannerAnalyzers.csproj -- --repo-root . --out bench/Scanner.Analyzers/baseline.csv --json out/bench/scanner-analyzers/latest.json --prom out/bench/scanner-analyzers/latest.prom --commit $(git rev-parse HEAD)` produces CSV + JSON + Prometheus gauges for analyzer scenarios. Runs fail if `max_ms` regresses ≥20% against `baseline.csv` or if thresholds are exceeded.
* **Language analyzers microbench** `dotnet run --project src/StellaOps.Bench/Scanner.Analyzers/StellaOps.Bench.ScannerAnalyzers/StellaOps.Bench.ScannerAnalyzers.csproj -- --repo-root . --out src/StellaOps.Bench/Scanner.Analyzers/baseline.csv --json out/bench/scanner-analyzers/latest.json --prom out/bench/scanner-analyzers/latest.prom --commit $(git rev-parse HEAD)` produces CSV + JSON + Prometheus gauges for analyzer scenarios. Runs fail if `max_ms` regresses ≥20% against `baseline.csv` or if thresholds are exceeded.
* **Metrics** Prometheus + `jq` extracts; aggregated via `scripts/aggregate.ts`.
* **CI** GitLab CI job *benchmark* publishes JSON to `benchartifacts/`.
* **Visualisation** Grafana dashboard *StellaPerf* (provisioned JSON).
> **Note** harness mounts `/var/cache/trivy` tmpfs to avoid disk noise.
---
##4Current Results (July2025)
| Phase | Samples | Mean (s) | P95 (s) | Target OK? |
|---------------|--------:|---------:|--------:|-----------:|
| SBOM_FIRST | 100 | 3.7 | 4.9 | ✅ |
| IMAGE_UNPACK | 50 | 6.4 | 9.2 | ✅ |
| **DELTA_SBOM**| 100 | 0.46 | 0.83 | ✅ |
| **POLICY_EVAL** | 1000 | 0.021 | 0.041 | ✅ |
| **QUOTA_WAIT** | 80 | 4.0* | 4.9* | ✅ |
| SCHED_RESCAN | 10 | 18.3 | 24.9 | ✅ |
| FEED_MERGE | 3 | 38.1 | 41.0 | ✅ |
| API_P95 | 20000 | 0.087 | 0.143 | ✅ |
*Data files:* `bench-artifacts/20250714/phasestats.json`.
---
##5ΔSBOM MicroBenchmark Detail
### 5.1 Scenario
1. Base image `python:3.12-slim` already scanned (all layers cached).
2. Application layer (`COPY . /app`) triggers new digest.
3. `Stella CLI` lists **7** layers, backend replies *6 hit*, *1 miss*.
4. Builder scans **only 1 layer** (~9MiB, 217files) & uploads delta.
### 5.2 Key Timings
| Step | Time (ms) |
|---------------------|----------:|
| `/layers/missing` | 13 |
| Trivy single layer | 655 |
| Upload delta blob | 88 |
| Backend merge + CVE | 74 |
| **Total walltime** | **830ms** |
---
##6Quota WaitPath Benchmark Detail
###6.1Scenario
1. Freetier token reaches **scan #200** dashboard shows yellow banner.
###6.2 Key Timings
| Step | Time (ms) |
|------------------------------------|----------:|
| `/quota/check` Redis LUA INCR | 0.8 |
| Soft wait sleep (server) | 5000 |
| Hard wait sleep (server) | 60000 |
| Endtoend walltime (softhit) | 5003 |
| Endtoend walltime (hardhit) | 60004 |
---
##7Policy Eval Bench
### 7.1 Setup
* Policy YAML: **28** rules, mix severity & package conditions.
* Input: scan result JSON with **1026** findings.
* Evaluator: custom rules engine (Go structs → map lookups).
### 7.2 Latency Histogram
```
010ms ▇▇▇▇▇▇▇▇▇▇ 38%
1020ms ▇▇▇▇▇▇▇▇▇▇ 42%
2040ms ▇▇▇▇▇▇ 17%
4050ms ▇ 3%
```
P99=48ms. Meets 50ms gate.
---
##8Trend Snapshot
> **Note** harness mounts `/var/cache/trivy` tmpfs to avoid disk noise.
---
##4Current Results (July2025)
| Phase | Samples | Mean (s) | P95 (s) | Target OK? |
|---------------|--------:|---------:|--------:|-----------:|
| SBOM_FIRST | 100 | 3.7 | 4.9 | ✅ |
| IMAGE_UNPACK | 50 | 6.4 | 9.2 | ✅ |
| **DELTA_SBOM**| 100 | 0.46 | 0.83 | ✅ |
| **POLICY_EVAL** | 1000 | 0.021 | 0.041 | ✅ |
| **QUOTA_WAIT** | 80 | 4.0* | 4.9* | ✅ |
| SCHED_RESCAN | 10 | 18.3 | 24.9 | ✅ |
| FEED_MERGE | 3 | 38.1 | 41.0 | ✅ |
| API_P95 | 20000 | 0.087 | 0.143 | ✅ |
*Data files:* `bench-artifacts/20250714/phasestats.json`.
---
##5ΔSBOM MicroBenchmark Detail
### 5.1 Scenario
1. Base image `python:3.12-slim` already scanned (all layers cached).
2. Application layer (`COPY . /app`) triggers new digest.
3. `Stella CLI` lists **7** layers, backend replies *6 hit*, *1 miss*.
4. Builder scans **only 1 layer** (~9MiB, 217files) & uploads delta.
### 5.2 Key Timings
| Step | Time (ms) |
|---------------------|----------:|
| `/layers/missing` | 13 |
| Trivy single layer | 655 |
| Upload delta blob | 88 |
| Backend merge + CVE | 74 |
| **Total walltime** | **830ms** |
---
##6Quota WaitPath Benchmark Detail
###6.1Scenario
1. Freetier token reaches **scan #200** dashboard shows yellow banner.
###6.2 Key Timings
| Step | Time (ms) |
|------------------------------------|----------:|
| `/quota/check` Redis LUA INCR | 0.8 |
| Soft wait sleep (server) | 5000 |
| Hard wait sleep (server) | 60000 |
| Endtoend walltime (softhit) | 5003 |
| Endtoend walltime (hardhit) | 60004 |
---
##7Policy Eval Bench
### 7.1 Setup
* Policy YAML: **28** rules, mix severity & package conditions.
* Input: scan result JSON with **1026** findings.
* Evaluator: custom rules engine (Go structs → map lookups).
### 7.2 Latency Histogram
```
010ms ▇▇▇▇▇▇▇▇▇▇ 38%
1020ms ▇▇▇▇▇▇▇▇▇▇ 42%
2040ms ▇▇▇▇▇▇ 17%
4050ms ▇ 3%
```
P99=48ms. Meets 50ms gate.
---
##8Trend Snapshot
![Perf trend sparkline placeholder](perftrend.svg)
> **Grafana/Alerting** Import `docs/ops/scanner-analyzers-grafana-dashboard.json` and point it at the Prometheus datasource storing `scanner_analyzer_bench_*` metrics. Configure an alert on `scanner_analyzer_bench_regression_ratio` ≥1.20 (default limit); the bundled Stat panel surfaces breached scenarios (non-zero values). On-call runbook: `docs/ops/scanner-analyzers-operations.md`.
_Plot generated weekly by `scripts/updatetrend.py`; shows last 12 weeks P95 per phase._
---
##9Action Items
1. **Image Unpack** Evaluate zstd for layer decompress; aim to shave 1s.
2. **Feed Merge** Parallelise regional XML feed parse (plugin) once stable.
3. **Rego Support** Prototype OPA sidecar; target ≤100ms eval.
4. **Concurrency** Stresstest 100rps on 4node Redis cluster (Q42025).
---
##10Change Log
| Date | Note |
|------------|-------------------------------------------------------------------------|
| 20250714 | Added ΔSBOM & Policy Eval phases; updated targets & current results. |
| 20250712 | First public workbook (SBOMfirst, imageunpack, feed merge). |
---
_Plot generated weekly by `scripts/updatetrend.py`; shows last 12 weeks P95 per phase._
---
##9Action Items
1. **Image Unpack** Evaluate zstd for layer decompress; aim to shave 1s.
2. **Feed Merge** Parallelise regional XML feed parse (plugin) once stable.
3. **Rego Support** Prototype OPA sidecar; target ≤100ms eval.
4. **Concurrency** Stresstest 100rps on 4node Redis cluster (Q42025).
---
##10Change Log
| Date | Note |
|------------|-------------------------------------------------------------------------|
| 20250714 | Added ΔSBOM & Policy Eval phases; updated targets & current results. |
| 20250712 | First public workbook (SBOMfirst, imageunpack, feed merge). |
---

View File

@@ -65,6 +65,9 @@ graph LR
`ops/devops/release/build_release.py` to build multi-arch images, attach
CycloneDX SBOMs and SLSA provenance with Cosign, and emit
`out/release/release.yaml` for downstream packaging (Helm, Compose, Offline Kit).
The `build-test-deploy` workflow also runs
`python ops/devops/release/test_verify_release.py` so release verifier
regressions fail fast during every CI pass.
---
@@ -100,11 +103,12 @@ ouk fetch \
--sign cosign.key
```
###4.2PipelineHook
* Runs on **first Friday** each month (cron).
* Generates tarball, signs it, uploads to **GitLab Release asset**.
* SHA256 + signature published alongside.
###4.2PipelineHook
* Runs on **first Friday** each month (cron).
* Generates tarball, signs it, uploads to **GitLab Release asset**.
* SHA256 + signature published alongside.
* Release job must emit `out/release/debug/` with `debug-manifest.json` and `.sha256` so `ops/offline-kit/mirror_debug_store.py` can mirror symbols into the Offline Kit (see `DEVOPS-REL-17-004`).
###4.3ActivationFlow (runtime)
@@ -123,12 +127,13 @@ CI job fails if token expiry <29days (guard against stale caches).
##5Artifact Signing & Transparency
| Artefact | Signer | Tool |
| ------------ | --------------- | --------------------- |
| Git tags | GPG (`0x90C4…`) | `git tag -s` |
| Containers | Cosign key pair | `cosign sign` |
| Helm Charts | prov file | `helm package --sign` |
| OUK tarballs | Cosign | `cosign sign-blob` |
| Artefact | Signer | Tool/Notes |
| ------------ | --------------- | ---------------------------------- |
| Git tags | GPG (`0x90C4…`) | `git tag -s` |
| Containers | Cosign key pair | `cosign sign` |
| Helm Charts | prov file | `helm package --sign` |
| OUK tarballs | Cosign | `cosign sign-blob` |
| Debug store | | `debug/debug-manifest.json` hashed |
**Rekor** integration is **TODO** once the internal Rekor mirror is online (`StellaOpsAttestor`) a postpublish job will submit transparency log entries.
@@ -141,9 +146,20 @@ CI job fails if token expiry <29days (guard against stale caches).
3. Tag `git tag -s X.Y.Z -m "Release X.Y.Z"` & push.
4. GitLab CI autopublishes images & charts.
5. Draft GitLab **Release Notes** using `tools/release-notes-gen`.
6. Verify SBOM attachment with `stella sbom verify stella/backend:X.Y.Z`.
7. Smoketest OUK tarball in offline lab.
8. Announce in `#stella-release` Mattermost channel.
6. Verify SBOM attachment with `stella sbom verify stella/backend:X.Y.Z`.
7. Run the release verifier locally if CI isnt available (mirrors the workflow step):
`python ops/devops/release/test_verify_release.py`
8. Mirror the release debug store into the Offline Kit staging tree and re-check the manifest:
```bash
./ops/offline-kit/mirror_debug_store.py \
--release-dir out/release \
--offline-kit-dir out/offline-kit
jq '.artifacts | length' out/offline-kit/debug/debug-manifest.json
readelf -n /app/... | grep -i 'Build ID'
```
Validate that the hash from `readelf` matches the `.build-id/<aa>/<rest>.debug` path created by the script.
9. Smoke-test OUK tarball in offline lab.
10. Announce in `#stella-release` Mattermost channel.
---

View File

@@ -147,15 +147,27 @@ If you paste YAML but enable **Strict Mode** (toggle), backend converts to Rego
Lists discovered UI plugins; each can inject routes/panels. Toggle on/off without reload.
###3.6Settings**Quota & Tokens** (new)
* View current **ClientJWT claims** (tier, maxScansPerDay, expiry).
* **Generate Offline Token** adminonly button → POST `/token/offline` (UI wraps the API).
* Upload new token file for manual refresh.
---
##4i18n & l10n
###3.6Settings**Quota & Tokens** (new)
* View current **ClientJWT claims** (tier, maxScansPerDay, expiry).
* **Generate Offline Token** adminonly button → POST `/token/offline` (UI wraps the API).
* Upload new token file for manual refresh.
###3.7Notifications Panel (new)
Route: **`/notify`** (header shortcut “Notify”). The panel now exposes every Notify control-plane primitive without depending on the backend being online.
| Area | What you can do |
| --- | --- |
| **Channels** | Create/edit Slack/Teams/Email/Webhook channels, toggle enablement, maintain labels/metadata, and execute **test send** previews. Channel health cards show mocked status + trace IDs so ops can validate wiring before Notify.WebService is reachable. |
| **Rules** | Manage routing rules (matchers, severity gates, throttles/digests, locale hints). A single-action form keeps Signal-style configuration quick while mirroring Notify schema (`match`, `actions[]`). |
| **Deliveries** | Browsable ledger with status filter (All/Sent/Failed/Throttled/…​), showing targets, kinds, and timestamps so operators confirm noise controls. |
The component leans on the mocked Notify API service in `src/app/testing/mock-notify-api.service.ts`, meaning Offline Kit demos run instantly yet the view stays API-shaped (same DTOs + tenant header expectations).
---
##4i18n & l10n
* JSON files under `/locales`.
* Russian (`ru`) ships firstclass, translated security terms align with **GOST RISO/IEC 270022020**.

View File

@@ -1,70 +1,70 @@
# StellaOps — Installation Guide (Docker &AirGap)
<!--
This file is processed by the Eleventy build.
Do **not** hardcode versions or quota numbers; inherit from
docs/_includes/CONSTANTS.md instead.
{{ dotnet }}     → ".NET 10 LTS"
{{ angular }}    → "20"
-->
> **Status — public α not yet published.**
> The commands below will work as soon as the first image is tagged
> `registry.stella-ops.org/stella-ops/stella-ops:0.1.0-alpha`
> (target date: **late2025**). Track progress on the
> [roadmap](/roadmap/).
---
## 0·Prerequisites
| Item | Minimum | Notes |
|------|---------|-------|
| Linux | Ubuntu22.04 LTS / Alma9 | x8664 or arm64 |
| CPU / RAM | 2 vCPU / 2GiB | Laptop baseline |
| Disk | 10GiB SSD | SBOM + vuln DB cache |
| Docker | **Engine25 + Composev2** | `docker -v` |
| TLS | OpenSSL 1.1+  | Selfsigned cert generated at first run |
---
## 1·Connectedhost install (Docker Compose)
```bash
# 1. Make a working directory
mkdir stella && cd stella
# 2. Download the signed Compose bundle + example .env
curl -LO https://get.stella-ops.org/releases/latest/.env.example
curl -LO https://get.stella-ops.org/releases/latest/.env.example.sig
curl -LO https://get.stella-ops.org/releases/latest/docker-compose.infrastructure.yml
curl -LO https://get.stella-ops.org/releases/latest/docker-compose.infrastructure.yml.sig
curl -LO https://get.stella-ops.org/releases/latest/docker-compose.stella-ops.yml
curl -LO https://get.stella-ops.org/releases/latest/docker-compose.stella-ops.yml.sig
# 3. Verify provenance (Cosign public key is stable)
cosign verify-blob \
--key https://stella-ops.org/keys/cosign.pub \
--signature .env.example.sig \
.env.example
cosign verify-blob \
--key https://stella-ops.org/keys/cosign.pub \
--signature docker-compose.infrastructure.yml.sig \
docker-compose.infrastructure.yml
cosign verify-blob \
--key https://stella-ops.org/keys/cosign.pub \
--signature docker-compose.stella-ops.yml.sig \
docker-compose.stella-ops.yml
# 4. Copy .env.example → .env and edit secrets
cp .env.example .env
$EDITOR .env
# 5. Launch databases (MongoDB + Redis)
docker compose --env-file .env -f docker-compose.infrastructure.yml up -d
# StellaOps — Installation Guide (Docker &AirGap)
<!--
This file is processed by the Eleventy build.
Do **not** hardcode versions or quota numbers; inherit from
docs/_includes/CONSTANTS.md instead.
{{ dotnet }}     → ".NET 10 LTS"
{{ angular }}    → "20"
-->
> **Status — public α not yet published.**
> The commands below will work as soon as the first image is tagged
> `registry.stella-ops.org/stella-ops/stella-ops:0.1.0-alpha`
> (target date: **late2025**). Track progress on the
> [roadmap](/roadmap/).
---
## 0·Prerequisites
| Item | Minimum | Notes |
|------|---------|-------|
| Linux | Ubuntu22.04 LTS / Alma9 | x8664 or arm64 |
| CPU / RAM | 2 vCPU / 2GiB | Laptop baseline |
| Disk | 10GiB SSD | SBOM + vuln DB cache |
| Docker | **Engine25 + Composev2** | `docker -v` |
| TLS | OpenSSL 1.1+  | Selfsigned cert generated at first run |
---
## 1·Connectedhost install (Docker Compose)
```bash
# 1. Make a working directory
mkdir stella && cd stella
# 2. Download the signed Compose bundle + example .env
curl -LO https://get.stella-ops.org/releases/latest/.env.example
curl -LO https://get.stella-ops.org/releases/latest/.env.example.sig
curl -LO https://get.stella-ops.org/releases/latest/docker-compose.infrastructure.yml
curl -LO https://get.stella-ops.org/releases/latest/docker-compose.infrastructure.yml.sig
curl -LO https://get.stella-ops.org/releases/latest/docker-compose.stella-ops.yml
curl -LO https://get.stella-ops.org/releases/latest/docker-compose.stella-ops.yml.sig
# 3. Verify provenance (Cosign public key is stable)
cosign verify-blob \
--key https://stella-ops.org/keys/cosign.pub \
--signature .env.example.sig \
.env.example
cosign verify-blob \
--key https://stella-ops.org/keys/cosign.pub \
--signature docker-compose.infrastructure.yml.sig \
docker-compose.infrastructure.yml
cosign verify-blob \
--key https://stella-ops.org/keys/cosign.pub \
--signature docker-compose.stella-ops.yml.sig \
docker-compose.stella-ops.yml
# 4. Copy .env.example → .env and edit secrets
cp .env.example .env
$EDITOR .env
# 5. Launch databases (MongoDB + Redis)
docker compose --env-file .env -f docker-compose.infrastructure.yml up -d
# 6. Launch Stella Ops (first run pulls ~50MB merged vuln DB)
docker compose --env-file .env -f docker-compose.stella-ops.yml up -d
````
@@ -95,7 +95,13 @@ The Concelier container reads configuration from `etc/concelier.yaml` plus
CONCELIER_AUTHORITY__ISSUER="https://authority.internal"
CONCELIER_AUTHORITY__AUDIENCES__0="api://concelier"
CONCELIER_AUTHORITY__REQUIREDSCOPES__0="concelier.jobs.trigger"
CONCELIER_AUTHORITY__REQUIREDSCOPES__1="advisory:read"
CONCELIER_AUTHORITY__REQUIREDSCOPES__2="advisory:ingest"
CONCELIER_AUTHORITY__REQUIREDTENANTS__0="tenant-default"
CONCELIER_AUTHORITY__CLIENTID="concelier-jobs"
CONCELIER_AUTHORITY__CLIENTSCOPES__0="concelier.jobs.trigger"
CONCELIER_AUTHORITY__CLIENTSCOPES__1="advisory:read"
CONCELIER_AUTHORITY__CLIENTSCOPES__2="advisory:ingest"
CONCELIER_AUTHORITY__CLIENTSECRETFILE="/run/secrets/concelier_authority_client"
CONCELIER_AUTHORITY__BYPASSNETWORKS__0="127.0.0.1/32"
CONCELIER_AUTHORITY__BYPASSNETWORKS__1="::1/128"
@@ -132,53 +138,53 @@ The Concelier container reads configuration from `etc/concelier.yaml` plus
---
## 2·Optional: request a free quota token
Anonymous installs allow **{{ quota\_anon }} scans per UTC day**.
Email `token@stella-ops.org` to receive a signed JWT that raises the limit to
**{{ quota\_token }} scans/day**. Insert it into `.env`:
```bash
STELLA_JWT="pastetokenhere"
docker compose --env-file .env -f docker-compose.stella-ops.yml \
exec stella-ops stella set-jwt "$STELLA_JWT"
```
> The UI shows a reminder at 200 scans and throttles above the limit but will
> **never block** your pipeline.
---
## 3·Airgapped install (Offline Update Kit)
When running on an isolated network use the **Offline Update Kit (OUK)**:
```bash
# Download & verify on a connected host
curl -LO https://get.stella-ops.org/ouk/stella-ops-offline-kit-v0.1a.tgz
curl -LO https://get.stella-ops.org/ouk/stella-ops-offline-kit-v0.1a.tgz.sig
cosign verify-blob \
--key https://stella-ops.org/keys/cosign.pub \
--signature stella-ops-offline-kit-v0.1a.tgz.sig \
stella-ops-offline-kit-v0.1a.tgz
# Transfer → airgap → import
docker compose --env-file .env -f docker-compose.stella-ops.yml \
exec stella admin import-offline-usage-kit stella-ops-offline-kit-v0.1a.tgz
```
*Import is atomic; no service downtime.*
For details see the dedicated [Offline Kit guide](/offline/).
---
## 4·Next steps
* **5min QuickStart:** `/quickstart/`
* **CI recipes:** `docs/ci/20_CI_RECIPES.md`
* **Plugin SDK:** `/plugins/`
---
*Generated {{ "now" | date: "%Y%m%d" }} — build tags inserted at render time.*
Anonymous installs allow **{{ quota\_anon }} scans per UTC day**.
Email `token@stella-ops.org` to receive a signed JWT that raises the limit to
**{{ quota\_token }} scans/day**. Insert it into `.env`:
```bash
STELLA_JWT="pastetokenhere"
docker compose --env-file .env -f docker-compose.stella-ops.yml \
exec stella-ops stella set-jwt "$STELLA_JWT"
```
> The UI shows a reminder at 200 scans and throttles above the limit but will
> **never block** your pipeline.
---
## 3·Airgapped install (Offline Update Kit)
When running on an isolated network use the **Offline Update Kit (OUK)**:
```bash
# Download & verify on a connected host
curl -LO https://get.stella-ops.org/ouk/stella-ops-offline-kit-v0.1a.tgz
curl -LO https://get.stella-ops.org/ouk/stella-ops-offline-kit-v0.1a.tgz.sig
cosign verify-blob \
--key https://stella-ops.org/keys/cosign.pub \
--signature stella-ops-offline-kit-v0.1a.tgz.sig \
stella-ops-offline-kit-v0.1a.tgz
# Transfer → airgap → import
docker compose --env-file .env -f docker-compose.stella-ops.yml \
exec stella admin import-offline-usage-kit stella-ops-offline-kit-v0.1a.tgz
```
*Import is atomic; no service downtime.*
For details see the dedicated [Offline Kit guide](/offline/).
---
## 4·Next steps
* **5min QuickStart:** `/quickstart/`
* **CI recipes:** `docs/ci/20_CI_RECIPES.md`
* **Plugin SDK:** `/plugins/`
---
*Generated {{ "now" | date: "%Y%m%d" }} — build tags inserted at render time.*

View File

@@ -18,6 +18,8 @@ completely isolated network:
| **Attested manifest** | `offline-manifest.json` + detached JWS covering bundle metadata, signed during export. |
| **Delta patches** | Daily diff bundles keep size \<350MB |
| **Scanner plug-ins** | OS analyzers plus the Node.js, Go, .NET, and Python language analyzers packaged under `plugins/scanner/analyzers/**` with manifests so Workers load deterministically offline. |
| **Debug store** | `.debug` artefacts laid out under `debug/.build-id/<aa>/<rest>.debug` with `debug/debug-manifest.json` mapping build-ids to originating images for symbol retrieval. |
| **Telemetry collector bundle** | `telemetry/telemetry-offline-bundle.tar.gz` plus `.sha256`, containing OTLP collector config, Helm/Compose overlays, and operator instructions. |
**RU BDU note:** ship the official Russian Trusted Root/Sub CA bundle (`certificates/russian_trusted_bundle.pem`) inside the kit so `concelier:httpClients:source.bdu:trustedRootPaths` can resolve it when the service runs in an airgapped network. Drop the most recent `vulxml.zip` alongside the kit if operators need a cold-start cache.
@@ -25,11 +27,53 @@ completely isolated network:
*Scanner core:* C# 12 on **.NET{{ dotnet }}**.
*Imports are idempotent and atomic — no service downtime.*
---
## 1·Download & verify
## 0·Prepare the debug store
Before packaging the Offline Kit, mirror the release debug artefacts (GNU build-id `.debug` files and the associated manifest) into the staging directory:
```bash
./ops/offline-kit/mirror_debug_store.py \
--release-dir out/release \
--offline-kit-dir out/offline-kit
```
The helper copies `debug/.build-id/**`, validates `debug/debug-manifest.json` against its recorded SHA-256, and writes `out/offline-kit/metadata/debug-store.json` with a short summary (platforms, artefact counts, sample build-ids). The command exits non-zero if an artefact referenced by the manifest is missing or has the wrong digest, so run it as part of every kit build.
---
## 0.1·Automated packaging
The packaging workflow is scripted via `ops/offline-kit/build_offline_kit.py`.
It verifies the release artefacts, runs the Python analyzer smoke suite, mirrors the debug store, and emits a deterministic tarball + manifest set.
```bash
python ops/offline-kit/build_offline_kit.py \
--version 2025.10.0 \
--channel edge \
--release-dir out/release \
--staging-dir out/offline-kit/staging \
--output-dir out/offline-kit/dist
# Optional: regenerate the telemetry collector bundle prior to packaging.
python ops/devops/telemetry/package_offline_bundle.py --output out/telemetry/telemetry-offline-bundle.tar.gz
```
Outputs:
- `stella-ops-offline-kit-<version>-<channel>.tar.gz` — bundle (mtime/uid/gid forced to zero for reproducibility)
- `stella-ops-offline-kit-<version>-<channel>.tar.gz.sha256` — bundle digest
- `manifest/offline-manifest.json` + `.sha256` — inventories every file in the bundle
- `<bundle>.metadata.json` — descriptor consumed by the CLI/Console import tooling
- `telemetry/telemetry-offline-bundle.tar.gz` + `.sha256` — packaged OTLP collector assets for environments without upstream access
- `plugins/scanner/analyzers/lang/StellaOps.Scanner.Analyzers.Lang.Python/*.sig` (+ `.sha256`) — Cosign signatures for the Python analyzer DLL and manifest
Provide `--cosign-key` / `--cosign-identity-token` (and optional `--cosign-password`) to generate Cosign signatures for both the tarball and manifest.
---
## 1·Download & verify
```bash
curl -LO https://get.stella-ops.org/ouk/stella-ops-offline-kit-<DATE>.tgz
curl -LO https://get.stella-ops.org/ouk/stella-ops-offline-kit-<DATE>.tgz.sig
@@ -101,21 +145,21 @@ Example excerpt (2025-10-23 kit) showing the Go and .NET analyzer plug-in payloa
}
{
"name": "plugins/scanner/analyzers/lang/StellaOps.Scanner.Analyzers.Lang.Python/StellaOps.Scanner.Analyzers.Lang.Python.dll",
"sha256": "28b6e06c7cabf3b78f13f801cbb14962093f3d42c4ae9ec01babbcd14cda4644",
"size": 53760,
"capturedAt": "2025-10-23T00:00:00Z"
"sha256": "a4f558f363394096e3dd6263f35b180b93b4112f9cf616c05872da8a8657d518",
"size": 47104,
"capturedAt": "2025-10-26T00:00:00Z"
}
{
"name": "plugins/scanner/analyzers/lang/StellaOps.Scanner.Analyzers.Lang.Python/StellaOps.Scanner.Analyzers.Lang.Python.pdb",
"sha256": "be4e34b4dc9a790fe1299e84213343b7c8ea90a2d22e5d7d1aa7585b8fedc946",
"size": 34516,
"capturedAt": "2025-10-23T00:00:00Z"
"sha256": "ef2ad78bc2cd1d7e99bae000b92357aa9a9c32938501899e9033d001096196d0",
"size": 31896,
"capturedAt": "2025-10-26T00:00:00Z"
}
{
"name": "plugins/scanner/analyzers/lang/StellaOps.Scanner.Analyzers.Lang.Python/manifest.json",
"sha256": "bceea1e7542aae860b0ec5ba7b8b3aa960b21edc4d1efe60afc98ce289341ac3",
"size": 671,
"capturedAt": "2025-10-23T00:00:00Z"
"sha256": "668ad9a1a35485628677b639db4d996d1e25f62021680a81a22482483800e557",
"size": 648,
"capturedAt": "2025-10-26T00:00:00Z"
}
```
@@ -153,6 +197,21 @@ tar -tzf stella-ops-offline-kit-<DATE>.tgz 'plugins/scanner/analyzers/lang/Stell
The manifest lookup above and this `tar` listing should both surface the Go analyzer DLL, PDB, and manifest entries before the kit is promoted.
> **Release guardrail.** The automated release pipeline now publishes the Python plug-in from source and executes `dotnet run --project tools/LanguageAnalyzerSmoke --configuration Release -- --repo-root <checkout>` to validate manifest integrity and cold/warm determinism within the <30s / <5s budgets (differences versus repository goldens are logged for triage). Run `ops/offline-kit/run-python-analyzer-smoke.sh` locally before shipping a refreshed kit if you rebuild artefacts outside CI or when preparing the air-gap bundle.
### Debug store mirror
Offline symbols (`debug/.build-id/**`) must accompany every Offline Kit to keep symbol lookup deterministic. The release workflow is expected to emit `out/release/debug/` containing the build-id tree plus `debug-manifest.json` and its `.sha256` companion. After a release completes:
```bash
python ops/offline-kit/mirror_debug_store.py \
--release-dir out/release \
--offline-dir out/offline-kit \
--summary out/offline-kit/metadata/debug-store.json
```
The script mirrors the debug tree into the Offline Kit staging directory, verifies SHA-256 values against the manifest, and writes a summary under `metadata/debug-store.json` for audit logs. If the release pipeline does not populate `out/release/debug`, the tooling now logs a warning (`DEVOPS-REL-17-004`)—treat it as a build failure and re-run the release once symbol extraction is enabled.
---
## 3·Delta patch workflow

View File

@@ -123,6 +123,23 @@ details // structured conflict explanation / merge reasoning
- Conflict explainers are serialized as deterministic `MergeConflictExplainerPayload` records (type, reason, source ranks, winning values); replay clients can parse the payload to render human-readable rationales without re-computing precedence.
- Concelier.WebService exposes the immutable log via `GET /concelier/advisories/{vulnerabilityKey}/replay[?asOf=UTC_ISO8601]`, returning the latest statements (with hex-encoded hashes) and any conflict explanations for downstream exporters and APIs.
**AdvisoryObservation (new in Sprint 24)**
```
observationId // deterministic id: {tenant}:{source}:{upstreamId}:{revision}
tenant // issuing tenant (lower-case)
source{vendor,stream,api,collectorVersion}
upstream{
upstreamId, documentVersion, contentHash,
fetchedAt, receivedAt, signature{present,format,keyId,signature}}
content{format,specVersion,raw,metadata}
linkset{aliases[], purls[], cpes[], references[{type,url}]}
createdAt // when Concelier recorded the observation
attributes // optional provenance metadata (e.g., batch, connector)
```
The observation is an immutable projection of the raw ingestion document (post provenance validation, pre-merge) that powers LinkNotMerge overlays and Vuln Explorer. Observations live in the `advisory_observations` collection, keyed by tenant + upstream identity. `linkset` provides normalized aliases/PURLs/CPES that downstream services (Graph/Vuln Explorer) join against without triggering merge logic. Concelier.Core exposes strongly-typed models (`AdvisoryObservation`, `AdvisoryObservationLinkset`, etc.) and a Mongo-backed store for filtered queries by tenant/alias; this keeps overlay consumers read-only while preserving AOC guarantees.
**ExportState**
```

View File

@@ -76,8 +76,14 @@ At startup, services **selfadvertise** their semver & channel; the UI surface
* **Unit/integration**: percomponent, plus **endtoend** flows (scan→vex→policy→sign→attest).
* **Perf SLOs**: hot paths (SBOM compose, diff, export) measured against budgets.
* **Security**: dependency audit vs Concelier export; container hardening tests; minimal caps.
* **Analyzer smoke**: restart-time language plug-ins (currently Python) verified via `dotnet run --project tools/LanguageAnalyzerSmoke` to ensure manifest integrity plus cold vs warm determinism (<30s / <5s budgets); the harness logs deviations from repository goldens for follow-up.
* **Canary cohort**: internal staging + selected customers; one week on **edge** before **stable** tag.
### 2.5 Debug-store artefacts
* Every release exports stripped debug information for ELF binaries discovered in service images. Debug files follow the GNU build-id layout (`debug/.build-id/<aa>/<rest>.debug`) and are generated via `objcopy --only-keep-debug`.
* `debug/debug-manifest.json` captures build-id component/image/source mappings with SHA-256 checksums so operators can mirror the directory into debuginfod or offline symbol stores. The manifest (and its `.sha256` companion) ships with every release bundle and Offline Kit.
---
## 3) Distribution & activation
@@ -92,7 +98,7 @@ At startup, services **selfadvertise** their semver & channel; the UI surface
**Gating policy**:
* **Core images** (Authority, Scanner, Concelier, Excititor, Attestor, UI): public **read**.
* **Enterprise addons** (if any) and **prerelease**: private repos via OAuth2 token service.
* **Enterprise addons** (if any) and **prerelease**: private repos via the **Registry Token Service** (`src/StellaOps.Registry.TokenService`) which exchanges Authority-issued OpToks for short-lived Docker registry bearer tokens.
> Monetization lever is **signing** (PoE gate), not image pulls, so the core remains simple to consume.
@@ -105,6 +111,8 @@ At startup, services **selfadvertise** their semver & channel; the UI surface
3. Registry allows pull for the requested repo.
* Tokens are **shortlived** (60300s) and **DPoPbound**.
The token service enforces plan gating via `registry-token.yaml` (see `docs/ops/registry-token-service.md`) and exposes Prometheus metrics (`registry_token_issued_total`, `registry_token_rejected_total`). Revoked licence identifiers halt issuance even when scope requirements are met.
### 3.3 Offline kits (airgapped)
* Tarball per release channel:

View File

@@ -242,12 +242,15 @@ When `scanner.events.enabled = true`, the WebService serialises the signed repor
### 5.5 SBOM assembly & emit
* **Perlayer SBOM fragments**: components introduced by the layer (+ relationships).
* **Per-layer SBOM fragments**: components introduced by the layer (+ relationships).
* **Image SBOMs**: merge fragments; refer back to them via **CycloneDX BOMLink** (or SPDX ExternalRef).
* Emit both **Inventory** & **Usage** views.
* When the native analyzer reports an ELF `buildId`, attach it to component metadata and surface it as `stellaops:buildId` in CycloneDX properties (and diff metadata). This keeps SBOM/diff output in lockstep with runtime events and the debug-store manifest.
* Serialize **CycloneDX JSON** and **CycloneDX Protobuf**; optionally **SPDX 3.0.1 JSON**.
* Build **BOMIndex** sidecar: purl table + roaring bitmap; flag `usedByEntrypoint` components for fast backend joins.
The emitted `buildId` metadata is preserved in component hashes, diff payloads, and `/policy/runtime` responses so operators can pivot from SBOM entries → runtime events → `debug/.build-id/<aa>/<rest>.debug` within the Offline Kit or release bundle.
### 5.6 DSSE attestation (via Signer/Attestor)
* WebService constructs **predicate** with `image_digest`, `stellaops_version`, `license_id`, `policy_digest?` (when emitting **final reports**), timestamps.

View File

@@ -141,10 +141,10 @@ stellaops/zastava-agent # System service; watch Docker events; observer on
* Image signature presence (if cosign policies are local; else ask backend).
* SBOM **referrers** presence (HEAD to registry, optional).
* Rekor UUID known (query Scanner.WebService by image digest).
* **Publish runtime events** to Scanner.WebService `/runtime/events` (batch & compress).
* **Request delta scan** if: no SBOM in catalog OR base differs from known baseline.
### 3.2 Privileges & mounts (K8s)
* **Publish runtime events** to Scanner.WebService `/runtime/events` (batch & compress).
* **Request delta scan** if: no SBOM in catalog OR base differs from known baseline.
### 3.2 Privileges & mounts (K8s)
* **SecurityContext:** `runAsUser: 0`, `readOnlyRootFilesystem: true`, `allowPrivilegeEscalation: false`.
* **Capabilities:** `CAP_SYS_PTRACE` (optional if using nsenter trace), `CAP_DAC_READ_SEARCH`.
@@ -154,12 +154,22 @@ stellaops/zastava-agent # System service; watch Docker events; observer on
* `/run/containerd/containerd.sock` (or CRIO socket)
* `/var/lib/containerd/io.containerd.runtime.v2.task` (rootfs paths & pids)
* **Networking:** clusterinternal egress to Scanner.WebService only.
* **Rate limits:** hard caps for bytes hashed and file count per container to avoid noisy tenants.
### 3.3 Event batching
* Buffer NDJSON; flush by **N events** or **2s**.
* Backpressure: local disk ring buffer (50MB default) if Scanner is temporarily unavailable; drop oldest after cap with **metrics** and **warning** event.
* **Rate limits:** hard caps for bytes hashed and file count per container to avoid noisy tenants.
### 3.3 Event batching
* Buffer NDJSON; flush by **N events** or **2s**.
* Backpressure: local disk ring buffer (50MB default) if Scanner is temporarily unavailable; drop oldest after cap with **metrics** and **warning** event.
### 3.4 Build-id capture & validation workflow
1. When Observer sees a `CONTAINER_START` it dereferences `/proc/<pid>/exe`, extracts the `NT_GNU_BUILD_ID` note, normalises it to lower-case hex, and sends it as `process.buildId` in the runtime envelope.
2. Scanner.WebService persists the observation and propagates the most recent hashes into `/policy/runtime` responses (`buildIds` list) and policy caches consumed by the webhook/CLI.
3. Release engineering copies the matching `.debug` files into the bundle (`debug/.build-id/<aa>/<rest>.debug`) and publishes `debug/debug-manifest.json` with per-hash digests. Offline Kit packaging reuses those artefacts verbatim (see `ops/offline-kit/mirror_debug_store.py`).
4. Operators resolve symbols by either:
* calling `stellaops-cli runtime policy test --image <digest>` to read the current `buildIds` and then fetching the corresponding `.debug` file from the bundle/offline mirror, or
* piping the hash into `debuginfod-find debuginfo <buildId>` when a `debuginfod` service is wired against the mirrored tree.
5. Missing hashes indicate stripped binaries without GNU notes; operators should trigger a rebuild with `-Wl,--build-id` or register a fallback symbol package as described in the runtime operations runbook.
---

View File

@@ -1,77 +1,94 @@
# Stella Ops
> **Selfhosted, SBOMfirst DevSecOps platform offlinefriendly, AGPL3.0, free up to {{ quota_token }} scans per UTC day (soft delay only, never blocks).**
StellaOps lets you discover container vulnerabilities in **<5s** without sending a single byte outside your network.
Everything here is opensource and versioned when you check out a git tag, the docs match the code you are running.
---
## 🚀 Start here (first 60minutes)
| Step | What you will learn | Doc |
|------|--------------------|-----|
| 1 | 90second elevator pitch & pillars | **[What IsStellaOps?](01_WHAT_IS_IT.md)** |
| 2 | Pain points it solves | **[Why DoesItExist?](02_WHY.md)** |
| 3 | Install & run a scan in 10min | **[Install Guide](21_INSTALL_GUIDE.md)** |
| 4 | Components & dataflow | **[HighLevel Architecture](07_HIGH_LEVEL_ARCHITECTURE.md)** |
| 5 | Integrate the CLI / REST API | **[API&CLI Reference](09_API_CLI_REFERENCE.md)** |
| 6 | Vocabulary used throughout the docs | **[Glossary](14_GLOSSARY_OF_TERMS.md)** |
---
## 📚 Complete Table of Contents
<details>
<summary>Click to expand the full docs index</summary>
### Overview
- **01[What IsStellaOps?](01_WHAT_IS_IT.md)**
- **02[Why DoesItExist?](02_WHY.md)**
- **03[Vision & Roadmap](03_VISION.md)**
- **04[Feature Matrix](04_FEATURE_MATRIX.md)**
# Stella Ops
> **Selfhosted, SBOMfirst DevSecOps platform offlinefriendly, AGPL3.0, free up to {{ quota_token }} scans per UTC day (soft delay only, never blocks).**
StellaOps lets you discover container vulnerabilities in **<5s** without sending a single byte outside your network.
Everything here is opensource and versioned when you check out a git tag, the docs match the code you are running.
---
## 🚀 Start here (first 60minutes)
| Step | What you will learn | Doc |
|------|--------------------|-----|
| 1 | 90second elevator pitch & pillars | **[What IsStellaOps?](01_WHAT_IS_IT.md)** |
| 2 | Pain points it solves | **[Why DoesItExist?](02_WHY.md)** |
| 3 | Install & run a scan in 10min | **[Install Guide](21_INSTALL_GUIDE.md)** |
| 4 | Components & dataflow | **[HighLevel Architecture](07_HIGH_LEVEL_ARCHITECTURE.md)** |
| 5 | Integrate the CLI / REST API | **[API&CLI Reference](09_API_CLI_REFERENCE.md)** |
| 6 | Vocabulary used throughout the docs | **[Glossary](14_GLOSSARY_OF_TERMS.md)** |
---
## 📚 Complete Table of Contents
<details>
<summary>Click to expand the full docs index</summary>
### Overview
- **01[What IsStellaOps?](01_WHAT_IS_IT.md)**
- **02[Why DoesItExist?](02_WHY.md)**
- **03[Vision & Roadmap](03_VISION.md)**
- **04[Feature Matrix](04_FEATURE_MATRIX.md)**
### Reference & concepts
- **05[System Requirements Specification](05_SYSTEM_REQUIREMENTS_SPEC.md)**
- **07[HighLevel Architecture](07_HIGH_LEVEL_ARCHITECTURE.md)**
- **08[Architecture Decision Records](adr/index.md)**
- **08Module Architecture Dossiers**
- [Architecture Overview](architecture/overview.md)
- [Scanner](ARCHITECTURE_SCANNER.md)
- [Concelier](ARCHITECTURE_CONCELIER.md)
- [Excititor](ARCHITECTURE_EXCITITOR.md)
- [Excititor Mirrors](ARCHITECTURE_EXCITITOR_MIRRORS.md)
- [Signer](ARCHITECTURE_SIGNER.md)
- [Attestor](ARCHITECTURE_ATTESTOR.md)
- [Authority](ARCHITECTURE_AUTHORITY.md)
- [Notify](ARCHITECTURE_NOTIFY.md)
- [Scheduler](ARCHITECTURE_SCHEDULER.md)
- [CLI](ARCHITECTURE_CLI.md)
- [WebUI](ARCHITECTURE_UI.md)
- [Zastava Runtime](ARCHITECTURE_ZASTAVA.md)
- [Release & Operations](ARCHITECTURE_DEVOPS.md)
- **09[API&CLI Reference](09_API_CLI_REFERENCE.md)**
- [Signer](ARCHITECTURE_SIGNER.md)
- [Attestor](ARCHITECTURE_ATTESTOR.md)
- [Authority](ARCHITECTURE_AUTHORITY.md)
- [Policy Engine](architecture/policy-engine.md)
- [Notify](ARCHITECTURE_NOTIFY.md)
- [Scheduler](ARCHITECTURE_SCHEDULER.md)
- [CLI](ARCHITECTURE_CLI.md)
- [WebUI](ARCHITECTURE_UI.md)
- [Zastava Runtime](ARCHITECTURE_ZASTAVA.md)
- [Release & Operations](ARCHITECTURE_DEVOPS.md)
- **09[API&CLI Reference](09_API_CLI_REFERENCE.md)**
- **10[Plugin SDK Guide](10_PLUGIN_SDK_GUIDE.md)**
- **10[Concelier CLI Quickstart](10_CONCELIER_CLI_QUICKSTART.md)**
- **10[BuildX Generator Quickstart](dev/BUILDX_PLUGIN_QUICKSTART.md)**
- **10[Scanner Cache Configuration](dev/SCANNER_CACHE_CONFIGURATION.md)**
- **30[Excititor Connector Packaging Guide](dev/30_EXCITITOR_CONNECTOR_GUIDE.md)**
- **30Developer Templates**
- [Excititor Connector Skeleton](dev/templates/excititor-connector/)
- **11[Authority Service](11_AUTHORITY.md)**
- **11[Data Schemas](11_DATA_SCHEMAS.md)**
- **12[Performance Workbook](12_PERFORMANCE_WORKBOOK.md)**
- **13[ReleaseEngineering Playbook](13_RELEASE_ENGINEERING_PLAYBOOK.md)**
- **30[Fixture Maintenance](dev/fixtures.md)**
### User & operator guides
- **14[Glossary](14_GLOSSARY_OF_TERMS.md)**
- **15[UI Guide](15_UI_GUIDE.md)**
- **17[Security Hardening Guide](17_SECURITY_HARDENING_GUIDE.md)**
- **18[Coding Standards](18_CODING_STANDARDS.md)**
- **19[TestSuite Overview](19_TEST_SUITE_OVERVIEW.md)**
- **21[Install Guide](21_INSTALL_GUIDE.md)**
- **22[CI/CD Recipes Library](ci/20_CI_RECIPES.md)**
- **23[FAQ](23_FAQ_MATRIX.md)**
- **30[Excititor Connector Packaging Guide](dev/30_EXCITITOR_CONNECTOR_GUIDE.md)**
- **31[Aggregation-Only Contract Reference](ingestion/aggregation-only-contract.md)**
- **30Developer Templates**
- [Excititor Connector Skeleton](dev/templates/excititor-connector/)
- **11[Authority Service](11_AUTHORITY.md)**
- **11[Data Schemas](11_DATA_SCHEMAS.md)**
- **12[Performance Workbook](12_PERFORMANCE_WORKBOOK.md)**
- **13[ReleaseEngineering Playbook](13_RELEASE_ENGINEERING_PLAYBOOK.md)**
- **20[CLI AOC Commands Reference](cli/cli-reference.md)**
- **60[Policy Engine Overview](policy/overview.md)**
- **61[Policy DSL Grammar](policy/dsl.md)**
- **62[Policy Lifecycle & Approvals](policy/lifecycle.md)**
- **63[Policy Runs & Orchestration](policy/runs.md)**
- **64[Policy Engine REST API](api/policy.md)**
- **65[Policy CLI Guide](cli/policy.md)**
- **66[Policy Editor Workspace](ui/policy-editor.md)**
- **67[Policy Observability](observability/policy.md)**
- **68[Policy Governance & Least Privilege](security/policy-governance.md)**
- **69[Policy Examples](examples/policies/README.md)**
- **70[Policy FAQ](faq/policy-faq.md)**
- **71[Policy Run DTOs](../src/StellaOps.Scheduler.Models/docs/SCHED-MODELS-20-001-POLICY-RUNS.md)**
- **30[Fixture Maintenance](dev/fixtures.md)**
### User & operator guides
- **14[Glossary](14_GLOSSARY_OF_TERMS.md)**
- **15[UI Guide](15_UI_GUIDE.md)**
- **16[Console AOC Dashboard](ui/console.md)**
- **17[Security Hardening Guide](17_SECURITY_HARDENING_GUIDE.md)**
- **18[Coding Standards](18_CODING_STANDARDS.md)**
- **19[TestSuite Overview](19_TEST_SUITE_OVERVIEW.md)**
- **21[Install Guide](21_INSTALL_GUIDE.md)**
- **22[CI/CD Recipes Library](ci/20_CI_RECIPES.md)**
- **23[FAQ](23_FAQ_MATRIX.md)**
- **24[Offline Update Kit Admin Guide](24_OFFLINE_KIT.md)**
- **25[Mirror Operations Runbook](ops/concelier-mirror-operations.md)**
- **26[Concelier Apple Connector Operations](ops/concelier-apple-operations.md)**
@@ -80,15 +97,35 @@ Everything here is opensource and versioned— when you check out a git ta
- **29[Concelier CISA ICS Connector Operations](ops/concelier-icscisa-operations.md)**
- **30[Concelier CERT-Bund Connector Operations](ops/concelier-certbund-operations.md)**
- **31[Concelier MSRC Connector AAD Onboarding](ops/concelier-msrc-operations.md)**
- **32[Scanner Analyzer Bench Operations](ops/scanner-analyzers-operations.md)**
- **33[Scanner Artifact Store Migration](ops/scanner-rustfs-migration.md)**
- **34[Zastava Runtime Operations Runbook](ops/zastava-runtime-operations.md)**
- **32[Scanner Analyzer Bench Operations](ops/scanner-analyzers-operations.md)**
- **33[Scanner Artifact Store Migration](ops/scanner-rustfs-migration.md)**
- **34[Zastava Runtime Operations Runbook](ops/zastava-runtime-operations.md)**
- **35[Launch Readiness Checklist](ops/launch-readiness.md)**
- **36[Launch Cutover Runbook](ops/launch-cutover.md)**
- **37[Registry Token Service](ops/registry-token-service.md)**
- **37[Deployment Upgrade & Rollback Runbook](ops/deployment-upgrade-runbook.md)**
- **38[Policy Schema Export Automation](devops/policy-schema-export.md)**
- **40[Observability Guide (AOC)](observability/observability.md)**
- **41[Telemetry Collector Deployment](ops/telemetry-collector.md)**
- **42[Telemetry Storage Deployment](ops/telemetry-storage.md)**
- **43[Authority Scopes & Tenancy](security/authority-scopes.md)**
- **44[Container Deployment (AOC)](deploy/containers.md)**
### Legal & licence
- **32[Legal & Quota FAQ](29_LEGAL_FAQ_QUOTA.md)**
</details>
---
© 2025 StellaOps contributors licensed AGPL3.0orlater
</details>
---
## 🧹 Backlog hygiene
> Imposed rule: Work of this type or tasks of this type on this component must also be applied everywhere else it should be applied.
- **Aggregation-Only Contract (AOC).** Ingestion services aggregate and link facts only—derived precedence, severity, and safe-fix hints live in Policy overlays and dedicated explorers. Review [`../AGENTS.md`](../AGENTS.md) and the AOC guardrails in [`aoc/aoc-guardrails.md`](aoc/aoc-guardrails.md).
- **Cartographer owns graphs.** SBOM Service emits projections/events; Cartographer (`CARTO-GRAPH-21-00x`) builds graph storage, overlays, and tiles. See `ARCHITECTURE_CONCELIER.md` (Cartographer handshake section) for handoff boundaries.
- **Notifier replaces legacy Notify.** Sprint15 `StellaOps.Notify.*` tasks are frozen; use the Notifications Studio/Notifier backlogs (`NOTIFY-SVC-38..40`, `WEB-NOTIFY-3x-00x`, `CLI-NOTIFY-3x-00x`).
- **Dedicated services for Vuln & Policy.** Vuln Explorer work flows through `src/StellaOps.VulnExplorer.Api`/Console/CLI (Sprint 29); gateway routes proxy only. Policy Engine remains the sole source for precedence/suppression overlays.
- **Cleanup log.** The backlog consolidation summary lives in [`backlog/2025-10-cleanup.md`](backlog/2025-10-cleanup.md).
© 2025 StellaOps contributors licensed AGPL3.0orlater

View File

@@ -16,8 +16,326 @@
| PLATFORM-EVENTS-09-401 | DONE (2025-10-21) | Platform Events Guild | DOCS-EVENTS-09-003 | Embed canonical event samples into contract/integration tests and ensure CI validates payloads against published schemas. | Notify models tests now run schema validation against `docs/events/*.json`, event schemas allow optional `attributes`, and docs capture the new validation workflow. |
| RUNTIME-GUILD-09-402 | DONE (2025-10-19) | Runtime Guild | SCANNER-POLICY-09-107 | Confirm Scanner WebService surfaces `quietedFindingCount` and progress hints to runtime consumers; document readiness checklist. | Runtime verification run captures enriched payload; checklist/doc updates merged; stakeholders acknowledge availability. |
| DOCS-CONCELIER-07-201 | DONE (2025-10-22) | Docs Guild, Concelier WebService | FEEDWEB-DOCS-01-001 | Final editorial review and publish pass for Concelier authority toggle documentation (Quickstart + operator guide). | Review feedback resolved, publish PR merged, release notes updated with documentation pointer. |
| DOCS-RUNTIME-17-004 | TODO | Docs Guild, Runtime Guild | SCANNER-EMIT-17-701, ZASTAVA-OBS-17-005, DEVOPS-REL-17-002 | Document build-id workflows: SBOM exposure, runtime event payloads, debug-store layout, and operator guidance for symbol retrieval. | Architecture + operator docs updated with build-id sections, examples show `readelf` output + debuginfod usage, references linked from Offline Kit/Release guides. |
| DOCS-RUNTIME-17-004 | DONE (2025-10-26) | Docs Guild, Runtime Guild | SCANNER-EMIT-17-701, ZASTAVA-OBS-17-005, DEVOPS-REL-17-002 | Document build-id workflows: SBOM exposure, runtime event payloads (`process.buildId`), Scanner `/policy/runtime` response (`buildIds` list), debug-store layout, and operator guidance for symbol retrieval. | Architecture + operator docs updated with build-id sections (Observer, Scanner, CLI), examples show `readelf` output + debuginfod usage, references linked from Offline Kit/Release guides + CLI help. |
| DOCS-OBS-50-001 | BLOCKED (2025-10-26) | Docs Guild, Observability Guild | TELEMETRY-OBS-50-001 | Publish `/docs/observability/overview.md` introducing scope, imposed rule banner, architecture diagram, and tenant guarantees. | Doc merged with imposed rule banner; diagram committed; cross-links to telemetry stack + evidence locker docs. |
> Blocked: waiting on telemetry core deliverable (TELEMETRY-OBS-50-001) to finalise architecture details and diagrams.
| DOCS-OBS-50-002 | TODO | Docs Guild, Security Guild | TELEMETRY-OBS-50-002 | Author `/docs/observability/telemetry-standards.md` detailing common fields, scrubbing policy, sampling defaults, and redaction override procedure. | Doc merged; imposed rule banner present; examples validated with telemetry fixtures; security review sign-off captured. |
| DOCS-OBS-50-003 | TODO | Docs Guild, Observability Guild | TELEMETRY-OBS-50-001 | Create `/docs/observability/logging.md` covering structured log schema, dos/don'ts, tenant isolation, and copyable examples. | Doc merged with banner; sample logs redacted; lint passes; linked from coding standards. |
| DOCS-OBS-50-004 | TODO | Docs Guild, Observability Guild | TELEMETRY-OBS-50-002 | Draft `/docs/observability/tracing.md` explaining context propagation, async linking, CLI header usage, and sampling strategies. | Doc merged; imposed rule banner included; diagrams updated; references to CLI/Console features added. |
| DOCS-OBS-51-001 | TODO | Docs Guild, DevOps Guild | WEB-OBS-51-001, DEVOPS-OBS-51-001 | Publish `/docs/observability/metrics-and-slos.md` cataloging metrics, SLO targets, burn rate policies, and alert runbooks. | Doc merged with banner; SLO tables verified; alert workflows linked to incident runbook. |
| DOCS-SEC-OBS-50-001 | TODO | Docs Guild, Security Guild | TELEMETRY-OBS-51-002 | Update `/docs/security/redaction-and-privacy.md` to cover telemetry privacy controls, tenant opt-in debug, and imposed rule reminder. | Doc merged; redaction matrix updated; banner present; security sign-off recorded. |
| DOCS-INSTALL-50-001 | TODO | Docs Guild, DevOps Guild | DEVOPS-OBS-50-003 | Add `/docs/install/telemetry-stack.md` with collector deployment, exporter options, offline kit notes, and imposed rule banner. | Doc merged; install steps verified on air-gapped profile; banner present; screenshots attached. |
| DOCS-FORENSICS-53-001 | TODO | Docs Guild, Evidence Locker Guild | EVID-OBS-53-003 | Publish `/docs/forensics/evidence-locker.md` describing bundle formats, WORM options, retention, legal hold, and imposed rule banner. | Doc merged; manifest examples validated; banner present; legal hold steps aligned with API. |
| DOCS-FORENSICS-53-002 | TODO | Docs Guild, Provenance Guild | PROV-OBS-54-001 | Release `/docs/forensics/provenance-attestation.md` covering DSSE schema, signing process, verification workflow, and imposed rule banner. | Doc merged; sample statements reference fixtures; banner included; verification steps tested. |
| DOCS-FORENSICS-53-003 | TODO | Docs Guild, Timeline Indexer Guild | TIMELINE-OBS-52-003 | Publish `/docs/forensics/timeline.md` with schema, event kinds, filters, query examples, and imposed rule banner. | Doc merged; query examples validated; banner present; linked from Console/CLI docs. |
| DOCS-CONSOLE-OBS-52-001 | TODO | Docs Guild, Console Guild | CONSOLE-OBS-51-001 | Document `/docs/console/observability.md` showcasing Observability Hub widgets, trace/log search, imposed rule banner, and accessibility tips. | Doc merged; screenshots updated; banner present; navigation steps verified. |
| DOCS-CONSOLE-OBS-52-002 | TODO | Docs Guild, Console Guild | CONSOLE-OBS-52-002, CONSOLE-OBS-53-001 | Publish `/docs/console/forensics.md` covering timeline explorer, evidence viewer, attestation verifier, imposed rule banner, and troubleshooting. | Doc merged; banner included; workflows validated via Playwright capture; troubleshooting section populated. |
| DOCS-CLI-OBS-52-001 | TODO | Docs Guild, DevEx/CLI Guild | CLI-OBS-52-001 | Create `/docs/cli/observability.md` detailing `stella obs` commands, examples, exit codes, imposed rule banner, and scripting tips. | Doc merged; examples tested; banner included; CLI parity matrix updated. |
| DOCS-CLI-FORENSICS-53-001 | TODO | Docs Guild, DevEx/CLI Guild | CLI-FORENSICS-54-001 | Publish `/docs/cli/forensics.md` for snapshot/verify/attest commands with sample outputs, imposed rule banner, and offline workflows. | Doc merged; sample bundles verified; banner present; offline notes cross-linked. |
| DOCS-RUNBOOK-55-001 | TODO | Docs Guild, Ops Guild | DEVOPS-OBS-55-001, WEB-OBS-55-001 | Author `/docs/runbooks/incidents.md` describing incident mode activation, escalation steps, retention impact, verification checklist, and imposed rule banner. | Doc merged; runbook rehearsed; banner included; linked from alerts. |
| DOCS-AOC-19-001 | DONE (2025-10-26) | Docs Guild, Concelier Guild | CONCELIER-WEB-AOC-19-001, EXCITITOR-WEB-AOC-19-001 | Author `/docs/ingestion/aggregation-only-contract.md` covering philosophy, invariants, schemas, error codes, migration, observability, and security checklist. | New doc published with compliance checklist; cross-links from existing docs added. |
| DOCS-AOC-19-002 | DONE (2025-10-26) | Docs Guild, Architecture Guild | DOCS-AOC-19-001 | Update `/docs/architecture/overview.md` to include AOC boundary, raw stores, and sequence diagram (fetch → guard → raw insert → policy evaluation). | Overview doc updated with diagrams/text; lint passes; stakeholders sign off. |
| DOCS-AOC-19-003 | DONE (2025-10-26) | Docs Guild, Policy Guild | POLICY-AOC-19-003 | Refresh `/docs/architecture/policy-engine.md` clarifying ingestion boundary, raw inputs, and policy-only derived data. | Doc highlights raw-only ingestion contract, updated diagrams merge, compliance checklist added. |
| DOCS-AOC-19-004 | DONE (2025-10-26) | Docs Guild, UI Guild | UI-AOC-19-001 | Extend `/docs/ui/console.md` with Sources dashboard tiles, violation drill-down workflow, and verification action. | UI doc updated with screenshots/flow descriptions, compliance checklist appended. |
> DOCS-AOC-19-004: Architecture overview & policy-engine updates landed 2025-10-26; incorporate the new AOC boundary diagrams and metrics references.
| DOCS-AOC-19-005 | DONE (2025-10-26) | Docs Guild, CLI Guild | CLI-AOC-19-003 | Update `/docs/cli/cli-reference.md` with `stella sources ingest --dry-run` and `stella aoc verify` usage, exit codes, and offline notes. | CLI reference + quickstart sections updated; examples validated; compliance checklist added. |
> DOCS-AOC-19-005: New ingestion reference + architecture overview published 2025-10-26; ensure CLI docs link to both and surface AOC exit codes mapping.
| DOCS-AOC-19-006 | DONE (2025-10-26) | Docs Guild, Observability Guild | CONCELIER-WEB-AOC-19-002, EXCITITOR-WEB-AOC-19-002 | Document new metrics/traces/log keys in `/docs/observability/observability.md`. | Observability doc lists new metrics/traces/log fields; dashboards referenced; compliance checklist appended. |
| DOCS-AOC-19-007 | DONE (2025-10-26) | Docs Guild, Authority Core | AUTH-AOC-19-001 | Update `/docs/security/authority-scopes.md` with new ingestion scopes and tenancy enforcement notes. | Doc reflects new scopes, sample policies updated, compliance checklist added. |
| DOCS-AOC-19-008 | DONE (2025-10-26) | Docs Guild, DevOps Guild | DEVOPS-AOC-19-002 | Refresh `/docs/deploy/containers.md` to cover validator enablement, guard env flags, and read-only verify user. | Deploy doc updated; offline kit section mentions validator scripts; compliance checklist appended. |
| DOCS-AOC-19-009 | DONE (2025-10-26) | Docs Guild, Authority Core | AUTH-AOC-19-001 | Update AOC docs/samples to reflect new `advisory:*`, `vex:*`, and `aoc:verify` scopes. | Docs reference new scopes, samples aligned, compliance checklist updated. |
## Air-Gapped Mode (Epic 16)
| ID | Status | Owner(s) | Depends on | Description | Exit Criteria |
|----|--------|----------|------------|-------------|---------------|
| DOCS-AIRGAP-56-001 | TODO | Docs Guild, AirGap Controller Guild | AIRGAP-CTL-56-002 | Publish `/docs/airgap/overview.md` outlining modes, lifecycle, responsibilities, and imposed rule banner. | Doc merged; banner present; diagrams included. |
| DOCS-AIRGAP-56-002 | TODO | Docs Guild, DevOps Guild | DEVOPS-AIRGAP-56-001 | Author `/docs/airgap/sealing-and-egress.md` covering network policies, EgressPolicy facade usage, and verification steps. | Doc merged; examples validated; banner included. |
| DOCS-AIRGAP-56-003 | TODO | Docs Guild, Exporter Guild | EXPORT-AIRGAP-56-001 | Create `/docs/airgap/mirror-bundles.md` describing bundle format, DSSE/TUF/Merkle validation, creation/import workflows. | Doc merged; sample commands verified; banner present. |
| DOCS-AIRGAP-56-004 | TODO | Docs Guild, Deployment Guild | DEVOPS-AIRGAP-56-003 | Publish `/docs/airgap/bootstrap.md` detailing Bootstrap Pack creation, validation, and install procedures. | Doc merged; checklist appended; screenshots verified. |
| DOCS-AIRGAP-57-001 | TODO | Docs Guild, AirGap Time Guild | AIRGAP-TIME-58-001 | Write `/docs/airgap/staleness-and-time.md` explaining time anchors, drift policies, staleness budgets, and UI indicators. | Doc merged; math checked; banner included. |
| DOCS-AIRGAP-57-002 | TODO | Docs Guild, Console Guild | CONSOLE-AIRGAP-57-001 | Publish `/docs/console/airgap.md` covering sealed badge, import wizard, staleness dashboards. | Doc merged; screenshots captured; banner present. |
| DOCS-AIRGAP-57-003 | TODO | Docs Guild, CLI Guild | CLI-AIRGAP-57-001 | Publish `/docs/cli/airgap.md` documenting commands, examples, exit codes. | Doc merged; examples validated; banner present. |
| DOCS-AIRGAP-57-004 | TODO | Docs Guild, Ops Guild | DEVOPS-AIRGAP-56-002 | Create `/docs/airgap/operations.md` with runbooks for imports, failure recovery, and auditing. | Doc merged; runbooks rehearsed; banner included. |
| DOCS-AIRGAP-58-001 | TODO | Docs Guild, Product Guild | CONSOLE-AIRGAP-58-002 | Provide `/docs/airgap/degradation-matrix.md` enumerating feature availability, fallbacks, remediation. | Doc merged; matrix reviewed; banner included. |
| DOCS-AIRGAP-58-002 | TODO | Docs Guild, Security Guild | PROV-OBS-54-001 | Update `/docs/security/trust-and-signing.md` with DSSE/TUF roots, rotation, and signed time tokens. | Doc merged; security sign-off recorded; banner present. |
| DOCS-AIRGAP-58-003 | TODO | Docs Guild, DevEx Guild | AIRGAP-POL-56-001 | Publish `/docs/dev/airgap-contracts.md` describing EgressPolicy usage, sealed-mode tests, linting. | Doc merged; sample code validated; banner included. |
| DOCS-AIRGAP-58-004 | TODO | Docs Guild, Evidence Locker Guild | EVID-OBS-55-001 | Document `/docs/airgap/portable-evidence.md` for exporting/importing portable evidence bundles across enclaves. | Doc merged; verification steps tested; banner present. |
## SDKs & OpenAPI (Epic 17)
| ID | Status | Owner(s) | Depends on | Description | Exit Criteria |
|----|--------|----------|------------|-------------|---------------|
| DOCS-OAS-61-001 | TODO | Docs Guild, API Contracts Guild | OAS-61-002 | Publish `/docs/api/overview.md` covering auth, tenancy, pagination, idempotency, rate limits with banner. | Doc merged; examples validated; banner present. |
| DOCS-OAS-61-002 | TODO | Docs Guild, API Governance Guild | APIGOV-61-001 | Author `/docs/api/conventions.md` capturing naming, errors, filters, sorting, examples. | Doc merged; lint passes; banner included. |
| DOCS-OAS-61-003 | TODO | Docs Guild, API Governance Guild | APIGOV-63-001 | Publish `/docs/api/versioning.md` describing SemVer, deprecation headers, migration playbooks. | Doc merged; example headers validated; banner present. |
| DOCS-OAS-62-001 | TODO | Docs Guild, Developer Portal Guild | DEVPORT-62-002 | Stand up `/docs/api/reference/` auto-generated site; integrate with portal nav. | Reference site builds; search works; banner included. |
| DOCS-SDK-62-001 | TODO | Docs Guild, SDK Generator Guild | SDKGEN-63-001 | Publish `/docs/sdks/overview.md` plus language guides (`typescript.md`, `python.md`, `go.md`, `java.md`). | Docs merged; code samples pulled from tested examples; banner present. |
| DOCS-DEVPORT-62-001 | TODO | Docs Guild, Developer Portal Guild | DEVPORT-62-001 | Document `/docs/devportal/publishing.md` for build pipeline, offline bundle steps. | Doc merged; cross-links validated; banner included. |
| DOCS-CONTRIB-62-001 | TODO | Docs Guild, API Governance Guild | APIGOV-61-001 | Publish `/docs/contributing/api-contracts.md` detailing how to edit OAS, lint rules, compatibility checks. | Doc merged; banner present; examples validated. |
| DOCS-TEST-62-001 | TODO | Docs Guild, Contract Testing Guild | CONTR-62-001 | Author `/docs/testing/contract-testing.md` covering mock server, replay tests, golden fixtures. | Doc merged; references to tooling validated; banner present. |
| DOCS-SEC-62-001 | TODO | Docs Guild, Authority Core | AUTH-AIRGAP-56-001 | Update `/docs/security/auth-scopes.md` with OAuth2/PAT scopes, tenancy header usage. | Doc merged; scope tables verified; banner included. |
| DOCS-AIRGAP-DEVPORT-64-001 | TODO | Docs Guild, DevPortal Offline Guild | DVOFF-64-001 | Create `/docs/airgap/devportal-offline.md` describing offline bundle usage and verification. | Doc merged; verification steps tested; banner present. |
## Risk Profiles (Epic 18)
| ID | Status | Owner(s) | Depends on | Description | Exit Criteria |
|----|--------|----------|------------|-------------|---------------|
| DOCS-RISK-66-001 | TODO | Docs Guild, Risk Profile Schema Guild | POLICY-RISK-66-001 | Publish `/docs/risk/overview.md` covering concepts and glossary. | Doc merged with banner; terminology reviewed. |
| DOCS-RISK-66-002 | TODO | Docs Guild, Policy Guild | POLICY-RISK-66-003 | Author `/docs/risk/profiles.md` (authoring, versioning, scope). | Doc merged; schema examples validated; banner present. |
| DOCS-RISK-66-003 | TODO | Docs Guild, Risk Engine Guild | RISK-ENGINE-67-001 | Publish `/docs/risk/factors.md` cataloging signals, transforms, reducers, TTLs. | Document merged; tables verified; banner included. |
| DOCS-RISK-66-004 | TODO | Docs Guild, Risk Engine Guild | RISK-ENGINE-66-002 | Create `/docs/risk/formulas.md` detailing math, normalization, gating, severity. | Doc merged; equations rendered; banner present. |
| DOCS-RISK-67-001 | TODO | Docs Guild, Risk Engine Guild | RISK-ENGINE-68-001 | Publish `/docs/risk/explainability.md` showing artifact schema and UI screenshots. | Doc merged; CLI examples validated; banner included. |
| DOCS-RISK-67-002 | TODO | Docs Guild, API Guild | POLICY-RISK-67-002 | Produce `/docs/risk/api.md` with endpoint reference/examples. | Doc merged; OAS examples synced; banner present. |
| DOCS-RISK-67-003 | TODO | Docs Guild, Console Guild | CONSOLE-RISK-66-001 | Document `/docs/console/risk-ui.md` for authoring, simulation, dashboards. | Doc merged; screenshots updated; banner included. |
| DOCS-RISK-67-004 | TODO | Docs Guild, CLI Guild | CLI-RISK-66-001 | Publish `/docs/cli/risk.md` covering CLI workflows. | Doc merged; command examples validated; banner present. |
| DOCS-RISK-68-001 | TODO | Docs Guild, Export Guild | RISK-BUNDLE-69-001 | Add `/docs/airgap/risk-bundles.md` for offline factor bundles. | Doc merged; verification steps confirmed; banner included. |
| DOCS-RISK-68-002 | TODO | Docs Guild, Security Guild | POLICY-RISK-66-003 | Update `/docs/security/aoc-invariants.md` with risk scoring provenance guarantees. | Doc merged; audit references updated; banner present. |
## Attestor Console (Epic 19)
| ID | Status | Owner(s) | Depends on | Description | Exit Criteria |
|----|--------|----------|------------|-------------|---------------|
| DOCS-ATTEST-73-001 | TODO | Docs Guild, Attestor Service Guild | ATTEST-TYPES-73-001 | Publish `/docs/attestor/overview.md` with imposed rule banner. | Doc merged; terminology validated. |
| DOCS-ATTEST-73-002 | TODO | Docs Guild, Attestation Payloads Guild | ATTEST-TYPES-73-002 | Write `/docs/attestor/payloads.md` with schemas/examples. | Doc merged; examples validated via tests. |
| DOCS-ATTEST-73-003 | TODO | Docs Guild, Policy Guild | POLICY-ATTEST-73-002 | Publish `/docs/attestor/policies.md` covering verification policies. | Doc merged; policy examples validated. |
| DOCS-ATTEST-73-004 | TODO | Docs Guild, Attestor Service Guild | ATTESTOR-73-002 | Add `/docs/attestor/workflows.md` detailing ingest, verify, bulk operations. | Doc merged; workflows tested. |
| DOCS-ATTEST-74-001 | TODO | Docs Guild, KMS Guild | KMS-73-001 | Publish `/docs/attestor/keys-and-issuers.md`. | Doc merged; rotation guidance verified. |
| DOCS-ATTEST-74-002 | TODO | Docs Guild, Transparency Guild | TRANSP-74-001 | Document `/docs/attestor/transparency.md` with witness usage/offline validation. | Doc merged; proofs validated. |
| DOCS-ATTEST-74-003 | TODO | Docs Guild, Attestor Console Guild | CONSOLE-ATTEST-73-001 | Write `/docs/console/attestor-ui.md` with screenshots/workflows. | Doc merged; screenshots captured; banner present. |
| DOCS-ATTEST-74-004 | TODO | Docs Guild, CLI Attestor Guild | CLI-ATTEST-73-001 | Publish `/docs/cli/attest.md` covering CLI usage. | Doc merged; commands validated. |
| DOCS-ATTEST-75-001 | TODO | Docs Guild, Export Attestation Guild | EXPORT-ATTEST-75-002 | Add `/docs/attestor/airgap.md` for attestation bundles. | Doc merged; verification steps confirmed. |
| DOCS-ATTEST-75-002 | TODO | Docs Guild, Security Guild | ATTESTOR-73-002 | Update `/docs/security/aoc-invariants.md` with attestation invariants. | Doc merged; invariants detailed. |
## Policy Engine v2
| ID | Status | Owner(s) | Depends on | Description | Exit Criteria |
|----|--------|----------|------------|-------------|---------------|
| DOCS-POLICY-20-001 | DONE (2025-10-26) | Docs Guild, Policy Guild | POLICY-ENGINE-20-000 | Author `/docs/policy/overview.md` covering concepts, inputs/outputs, determinism, and compliance checklist. | Doc published with diagrams + glossary; lint passes; checklist included. |
| DOCS-POLICY-20-002 | DONE (2025-10-26) | Docs Guild, Policy Guild | POLICY-ENGINE-20-001 | Write `/docs/policy/dsl.md` with grammar, built-ins, examples, anti-patterns. | DSL doc includes grammar tables, examples, compliance checklist; validated against parser tests. |
| DOCS-POLICY-20-003 | DONE (2025-10-26) | Docs Guild, Authority Core | AUTH-POLICY-20-001 | Publish `/docs/policy/lifecycle.md` describing draft→approve workflow, roles, audit, compliance list. | Lifecycle doc linked from UI/CLI help; approvals roles documented; checklist appended. |
| DOCS-POLICY-20-004 | DONE (2025-10-26) | Docs Guild, Scheduler Guild | SCHED-MODELS-20-001 | Create `/docs/policy/runs.md` detailing run modes, incremental mechanics, cursors, replay. | Run doc includes sequence diagrams + compliance checklist; cross-links to scheduler docs. |
| DOCS-POLICY-20-005 | DONE (2025-10-26) | Docs Guild, BE-Base Platform Guild | WEB-POLICY-20-001 | Draft `/docs/api/policy.md` describing endpoints, schemas, error codes. | API doc validated against OpenAPI; examples included; checklist appended. |
| DOCS-POLICY-20-006 | DONE (2025-10-26) | Docs Guild, DevEx/CLI Guild | CLI-POLICY-20-002 | Produce `/docs/cli/policy.md` with command usage, exit codes, JSON output contracts. | CLI doc includes examples, exit codes, compliance checklist. |
| DOCS-POLICY-20-007 | DONE (2025-10-26) | Docs Guild, UI Guild | UI-POLICY-20-001 | Document `/docs/ui/policy-editor.md` covering editor, simulation, diff workflows, approvals. | UI doc includes screenshots/placeholders, accessibility notes, compliance checklist. |
| DOCS-POLICY-20-008 | DONE (2025-10-26) | Docs Guild, Architecture Guild | POLICY-ENGINE-20-003 | Write `/docs/architecture/policy-engine.md` (new epic content) with sequence diagrams, selection strategy, schema. | Architecture doc merged with diagrams; compliance checklist appended; references updated. |
| DOCS-POLICY-20-009 | DONE (2025-10-26) | Docs Guild, Observability Guild | POLICY-ENGINE-20-007 | Add `/docs/observability/policy.md` for metrics/traces/logs, sample dashboards. | Observability doc includes metrics tables, dashboard screenshots, checklist. |
| DOCS-POLICY-20-010 | DONE (2025-10-26) | Docs Guild, Security Guild | AUTH-POLICY-20-002 | Publish `/docs/security/policy-governance.md` covering scopes, approvals, tenancy, least privilege. | Security doc merged; compliance checklist appended; reviewed by Security Guild. |
| DOCS-POLICY-20-011 | DONE (2025-10-26) | Docs Guild, Policy Guild | POLICY-ENGINE-20-001 | Populate `/docs/examples/policies/` with baseline/serverless/internal-only samples and commentary. | Example policies committed with explanations; lint passes; compliance checklist per file. |
| DOCS-POLICY-20-012 | DONE (2025-10-26) | Docs Guild, Support Guild | WEB-POLICY-20-003 | Draft `/docs/faq/policy-faq.md` addressing common pitfalls, VEX conflicts, determinism issues. | FAQ published with Q/A entries, cross-links, compliance checklist. |
## Graph Explorer v1
| ID | Status | Owner(s) | Depends on | Description | Exit Criteria |
|----|--------|----------|------------|-------------|---------------|
## Link-Not-Merge v1
| ID | Status | Owner(s) | Depends on | Description | Exit Criteria |
|----|--------|----------|------------|-------------|---------------|
| DOCS-LNM-22-001 | TODO | Docs Guild, Concelier Guild | CONCELIER-LNM-21-001..003 | Author `/docs/advisories/aggregation.md` covering observation vs linkset, conflict handling, AOC requirements, and reviewer checklist. | Doc merged with examples + checklist; lint passes. |
| DOCS-LNM-22-002 | TODO | Docs Guild, Excititor Guild | EXCITITOR-LNM-21-001..003 | Publish `/docs/vex/aggregation.md` describing VEX observation/linkset model, product matching, conflicts. | Doc merged with fixtures; checklist appended. |
| DOCS-LNM-22-003 | TODO | Docs Guild, BE-Base Platform Guild | WEB-LNM-21-001..003 | Update `/docs/api/advisories.md` and `/docs/api/vex.md` for new endpoints, parameters, errors, exports. | API docs aligned with OpenAPI; examples validated. |
| DOCS-LNM-22-004 | TODO | Docs Guild, Policy Guild | POLICY-ENGINE-40-001 | Create `/docs/policy/effective-severity.md` detailing severity selection strategies from multiple sources. | Doc merged with policy examples; checklist included. |
| DOCS-LNM-22-005 | TODO | Docs Guild, UI Guild | UI-LNM-22-001..003 | Document `/docs/ui/evidence-panel.md` with screenshots, conflict badges, accessibility guidance. | UI doc merged; accessibility checklist completed. |
## StellaOps Console (Sprint 23)
| ID | Status | Owner(s) | Depends on | Description | Exit Criteria |
|----|--------|----------|------------|-------------|---------------|
| DOCS-CONSOLE-23-001 | DONE (2025-10-26) | Docs Guild, Console Guild | CONSOLE-CORE-23-004 | Publish `/docs/ui/console-overview.md` covering IA, tenant model, global filters, and AOC alignment with compliance checklist. | Doc merged with diagrams + overview tables; checklist appended; Console Guild sign-off. |
| DOCS-CONSOLE-23-002 | DONE (2025-10-26) | Docs Guild, Console Guild | DOCS-CONSOLE-23-001 | Author `/docs/ui/navigation.md` detailing routes, breadcrumbs, keyboard shortcuts, deep links, and tenant context switching. | Navigation doc merged with shortcut tables and screenshots; accessibility checklist satisfied. |
| DOCS-CONSOLE-23-003 | DONE (2025-10-26) | Docs Guild, SBOM Service Guild, Console Guild | SBOM-CONSOLE-23-001, CONSOLE-FEAT-23-102 | Document `/docs/ui/sbom-explorer.md` (catalog, detail, graph overlays, exports) including compliance checklist and performance tips. | Doc merged with annotated screenshots, export instructions, and overlay examples; checklist appended. |
| DOCS-CONSOLE-23-004 | DONE (2025-10-26) | Docs Guild, Concelier Guild, Excititor Guild | CONCELIER-CONSOLE-23-001, EXCITITOR-CONSOLE-23-001 | Produce `/docs/ui/advisories-and-vex.md` explaining aggregation-not-merge, conflict indicators, raw viewers, and provenance banners. | Doc merged; raw JSON examples included; compliance checklist complete. |
| DOCS-CONSOLE-23-005 | DONE (2025-10-26) | Docs Guild, Policy Guild | POLICY-CONSOLE-23-001, CONSOLE-FEAT-23-104 | Write `/docs/ui/findings.md` describing filters, saved views, explain drawer, exports, and CLI parity callouts. | Doc merged with filter matrix + explain walkthrough; checklist appended. |
| DOCS-CONSOLE-23-006 | DONE (2025-10-26) | Docs Guild, Policy Guild, Product Ops | POLICY-CONSOLE-23-002, CONSOLE-FEAT-23-105 | Publish `/docs/ui/policies.md` with editor, simulation, approvals, compliance checklist, and RBAC mapping. | Doc merged; Monaco screenshots + simulation diff examples included; approval flow described; checklist appended. |
| DOCS-CONSOLE-23-007 | DONE (2025-10-26) | Docs Guild, Scheduler Guild | SCHED-CONSOLE-23-001, CONSOLE-FEAT-23-106 | Document `/docs/ui/runs.md` covering queues, live progress, diffs, retries, evidence downloads, and troubleshooting. | Doc merged with SSE troubleshooting, metrics references, compliance checklist. |
| DOCS-CONSOLE-23-008 | DONE (2025-10-26) | Docs Guild, Authority Guild | AUTH-CONSOLE-23-002, CONSOLE-FEAT-23-108 | Draft `/docs/ui/admin.md` describing users/roles, tenants, tokens, integrations, fresh-auth prompts, and RBAC mapping. | Doc merged with tables for scopes vs roles, screenshots, compliance checklist. |
| DOCS-CONSOLE-23-009 | DONE (2025-10-27) | Docs Guild, DevOps Guild | DOWNLOADS-CONSOLE-23-001, CONSOLE-FEAT-23-109 | Publish `/docs/ui/downloads.md` listing product images, commands, offline instructions, parity with CLI, and compliance checklist. | Doc merged; manifest sample included; copy-to-clipboard guidance documented; checklist complete. |
| DOCS-CONSOLE-23-010 | DONE (2025-10-27) | Docs Guild, Deployment Guild, Console Guild | DEVOPS-CONSOLE-23-002, CONSOLE-REL-23-301 | Write `/docs/deploy/console.md` (Helm, ingress, TLS, CSP, env vars, health checks) with compliance checklist. | Deploy doc merged; templates validated; CSP guidance included; checklist appended. |
| DOCS-CONSOLE-23-011 | DOING (2025-10-27) | Docs Guild, Deployment Guild | DOCS-CONSOLE-23-010 | Update `/docs/install/docker.md` to cover Console image, Compose/Helm usage, offline tarballs, parity with CLI. | Doc updated with new sections; commands validated; compliance checklist appended. |
| DOCS-CONSOLE-23-012 | TODO | Docs Guild, Security Guild | AUTH-CONSOLE-23-003, WEB-CONSOLE-23-002 | Publish `/docs/security/console-security.md` detailing OIDC flows, scopes, CSP, fresh-auth, evidence handling, and compliance checklist. | Security doc merged; threat model notes included; checklist appended. |
| DOCS-CONSOLE-23-013 | TODO | Docs Guild, Observability Guild | TELEMETRY-CONSOLE-23-001, CONSOLE-QA-23-403 | Write `/docs/observability/ui-telemetry.md` cataloguing metrics/logs/traces, dashboards, alerts, and feature flags. | Doc merged with instrumentation tables, dashboard screenshots, checklist appended. |
| DOCS-CONSOLE-23-014 | TODO | Docs Guild, Console Guild, CLI Guild | CONSOLE-DOC-23-502 | Maintain `/docs/cli-vs-ui-parity.md` matrix and integrate CI check guidance. | Matrix published with parity status, CI workflow documented, compliance checklist appended. |
| DOCS-CONSOLE-23-015 | TODO | Docs Guild, Architecture Guild | CONSOLE-CORE-23-001, WEB-CONSOLE-23-001 | Produce `/docs/architecture/console.md` describing frontend packages, data flow diagrams, SSE design, performance budgets. | Architecture doc merged with diagrams + compliance checklist; reviewers approve. |
| DOCS-CONSOLE-23-016 | TODO | Docs Guild, Accessibility Guild | CONSOLE-QA-23-402, CONSOLE-FEAT-23-102 | Refresh `/docs/accessibility.md` with Console-specific keyboard flows, color tokens, testing tools, and compliance checklist updates. | Accessibility doc updated; audits referenced; checklist appended. |
| DOCS-CONSOLE-23-017 | TODO | Docs Guild, Console Guild | CONSOLE-FEAT-23-101..109 | Create `/docs/examples/ui-tours.md` providing triage, audit, policy rollout walkthroughs with annotated screenshots and GIFs. | UI tours doc merged; media assets stored; compliance checklist appended. |
| DOCS-LNM-22-006 | TODO | Docs Guild, Architecture Guild | CONCELIER-LNM-21-001..005, EXCITITOR-LNM-21-001..005 | Refresh `/docs/architecture/conseiller.md` and `/docs/architecture/excitator.md` describing observation/linkset pipelines and event contracts. | Architecture docs updated with diagrams; checklist appended. |
| DOCS-LNM-22-007 | TODO | Docs Guild, Observability Guild | CONCELIER-LNM-21-005, EXCITITOR-LNM-21-005, DEVOPS-LNM-22-002 | Publish `/docs/observability/aggregation.md` with metrics/traces/logs/SLOs. | Observability doc merged; dashboards referenced; checklist appended. |
| DOCS-LNM-22-008 | TODO | Docs Guild, DevOps Guild | MERGE-LNM-21-001, CONCELIER-LNM-21-102 | Write `/docs/migration/no-merge.md` describing migration plan, backfill steps, rollback, feature flags. | Migration doc approved by stakeholders; checklist appended. |
## Policy Engine + Editor v1
| ID | Status | Owner(s) | Depends on | Description | Exit Criteria |
|----|--------|----------|------------|-------------|---------------|
| DOCS-POLICY-23-001 | TODO | Docs Guild, Policy Guild | POLICY-SPL-23-001..003 | Author `/docs/policy/overview.md` describing SPL philosophy, layering, and glossary with reviewer checklist. | Doc merged; lint passes; checklist appended. |
| DOCS-POLICY-23-002 | TODO | Docs Guild, Policy Guild | POLICY-SPL-23-001 | Write `/docs/policy/spl-v1.md` (language reference, JSON Schema, examples). | Reference published with schema snippets; checklist completed. |
| DOCS-POLICY-23-003 | TODO | Docs Guild, Policy Guild | POLICY-ENGINE-50-001..004 | Produce `/docs/policy/runtime.md` covering compiler, evaluator, caching, events, SLOs. | Runtime doc merged with diagrams; observability references included. |
| DOCS-POLICY-23-004 | TODO | Docs Guild, UI Guild | UI-POLICY-23-001..006 | Document `/docs/policy/editor.md` (UI walkthrough, validation, simulation, approvals). | Editor doc merged with screenshots; accessibility checklist satisfied. |
| DOCS-POLICY-23-005 | TODO | Docs Guild, Security Guild | AUTH-POLICY-23-001..002 | Publish `/docs/policy/governance.md` (roles, scopes, approvals, signing, exceptions). | Governance doc merged; checklist appended. |
| DOCS-POLICY-23-006 | TODO | Docs Guild, BE-Base Platform Guild | WEB-POLICY-23-001..004 | Update `/docs/api/policy.md` with new endpoints, schemas, errors, pagination. | API doc aligns with OpenAPI; examples validated; checklist included. |
| DOCS-POLICY-23-007 | TODO | Docs Guild, DevEx/CLI Guild | CLI-POLICY-23-004..006 | Update `/docs/cli/policy.md` for lint/simulate/activate/history commands, exit codes. | CLI doc updated; samples verified; checklist appended. |
| DOCS-POLICY-23-008 | TODO | Docs Guild, Architecture Guild | POLICY-ENGINE-50-005..006 | Refresh `/docs/architecture/policy-engine.md` with data model, sequence diagrams, event flows. | Architecture doc merged with diagrams; checklist appended. |
| DOCS-POLICY-23-009 | TODO | Docs Guild, DevOps Guild | MERGE-LNM-21-001, DEVOPS-LNM-22-001 | Create `/docs/migration/policy-parity.md` covering dual-run parity plan and rollback. | Migration doc approved; checklist appended. |
| DOCS-POLICY-23-010 | TODO | Docs Guild, UI Guild | UI-POLICY-23-006 | Write `/docs/ui/explainers.md` showing explain trees, evidence overlays, interpretation guidance. | Doc merged with annotated screenshots; checklist appended. |
## Graph & Vuln Explorer v1
| ID | Status | Owner(s) | Depends on | Description | Exit Criteria |
|----|--------|----------|------------|-------------|---------------|
| DOCS-GRAPH-24-001 | TODO | Docs Guild, UI Guild | UI-GRAPH-24-001..006 | Author `/docs/ui/sbom-graph-explorer.md` detailing overlays, filters, saved views, accessibility, and AOC visibility. | Doc merged; screenshots included; checklist appended. |
| DOCS-GRAPH-24-002 | TODO | Docs Guild, UI Guild | UI-GRAPH-24-005 | Publish `/docs/ui/vulnerability-explorer.md` covering table usage, grouping, fix suggestions, Why drawer. | Doc merged with annotated images; accessibility checklist satisfied. |
| DOCS-GRAPH-24-003 | TODO | Docs Guild, SBOM Service Guild | SBOM-GRAPH-24-001..003 | Create `/docs/architecture/graph-index.md` describing data model, ingestion pipeline, caches, events. | Architecture doc merged with diagrams; checklist appended. |
| DOCS-GRAPH-24-004 | TODO | Docs Guild, BE-Base Platform Guild | WEB-GRAPH-24-001..003 | Document `/docs/api/graph.md` and `/docs/api/vuln.md` avec endpoints, parameters, errors, RBAC. | API docs aligned with OpenAPI; examples validated; checklist appended. |
| DOCS-GRAPH-24-005 | TODO | Docs Guild, DevEx/CLI Guild | CLI-GRAPH-24-001..003 | Update `/docs/cli/graph-and-vuln.md` covering new CLI commands, exit codes, scripting. | CLI doc merged; examples tested; checklist appended. |
| DOCS-GRAPH-24-006 | TODO | Docs Guild, Policy Guild | POLICY-ENGINE-60-001..002 | Write `/docs/policy/ui-integration.md` explaining overlays, cache usage, simulator contracts. | Doc merged; references cross-linked; checklist appended. |
| DOCS-GRAPH-24-007 | TODO | Docs Guild, DevOps Guild | DEVOPS-GRAPH-24-001..003 | Produce `/docs/migration/graph-parity.md` with rollout plan, parity checks, fallback guidance. | Migration doc approved; checklist appended. |
## Exceptions v1
| ID | Status | Owner(s) | Depends on | Description | Exit Criteria |
|----|--------|----------|------------|-------------|---------------|
| DOCS-EXC-25-001 | TODO | Docs Guild, Governance Guild | WEB-EXC-25-001 | Author `/docs/governance/exceptions.md` covering lifecycle, scope patterns, examples, compliance checklist. | Doc merged; reviewers sign off; checklist included. |
| DOCS-EXC-25-002 | TODO | Docs Guild, Authority Core | AUTH-EXC-25-001 | Publish `/docs/governance/approvals-and-routing.md` detailing roles, routing matrix, MFA rules, audit trails. | Doc merged; routing examples validated; checklist appended. |
| DOCS-EXC-25-003 | TODO | Docs Guild, BE-Base Platform Guild | WEB-EXC-25-001..003 | Create `/docs/api/exceptions.md` with endpoints, payloads, errors, idempotency notes. | API doc aligned with OpenAPI; examples tested; checklist appended. |
| DOCS-EXC-25-004 | TODO | Docs Guild, Policy Guild | POLICY-ENGINE-70-001 | Document `/docs/policy/exception-effects.md` explaining evaluation order, conflicts, simulation. | Doc merged; tests cross-referenced; checklist appended. |
| DOCS-EXC-25-005 | TODO | Docs Guild, UI Guild | UI-EXC-25-001..004 | Write `/docs/ui/exception-center.md` with UI walkthrough, badges, accessibility, shortcuts. | Doc merged with screenshots; accessibility checklist completed. |
| DOCS-EXC-25-006 | TODO | Docs Guild, DevEx/CLI Guild | CLI-EXC-25-001..002 | Update `/docs/cli/exceptions.md` covering command usage and exit codes. | CLI doc updated; examples validated; checklist appended. |
| DOCS-EXC-25-007 | TODO | Docs Guild, DevOps Guild | SCHED-WORKER-25-101, DEVOPS-GRAPH-24-003 | Publish `/docs/migration/exception-governance.md` describing cutover from legacy suppressions, notifications, rollback. | Migration doc approved; checklist included. |
> Update statuses (TODO/DOING/REVIEW/DONE/BLOCKED) as progress changes. Keep guides in sync with configuration samples under `etc/`.
> Remark (2025-10-13, DOC4.AUTH-PDG): Rate limit guide published (`docs/security/rate-limits.md`) and handed to plugin docs team for diagram uplift once PLG6.DIAGRAM lands.
## Orchestrator Dashboard (Epic 9)
| ID | Status | Owner(s) | Depends on | Description | Exit Criteria |
|----|--------|----------|------------|-------------|---------------|
| DOCS-ORCH-32-001 | TODO | Docs Guild | ORCH-SVC-32-001, AUTH-ORCH-32-001 | Author `/docs/orchestrator/overview.md` covering mission, roles, AOC alignment, governance, with imposed rule reminder. | Doc merged with diagrams; imposed rule statement included; entry linked from docs index. |
| DOCS-ORCH-32-002 | TODO | Docs Guild | ORCH-SVC-32-002 | Author `/docs/orchestrator/architecture.md` detailing scheduler, DAGs, rate limits, data model, message bus, storage layout, restating imposed rule. | Architecture doc merged; diagrams reviewed; imposed rule noted. |
| DOCS-ORCH-33-001 | TODO | Docs Guild | ORCH-SVC-33-001..004, WEB-ORCH-33-001 | Publish `/docs/orchestrator/api.md` (REST/WebSocket endpoints, payloads, error codes) with imposed rule note. | API doc merged; examples validated; imposed rule appended. |
| DOCS-ORCH-33-002 | TODO | Docs Guild | CONSOLE-ORCH-32-002, CONSOLE-ORCH-33-001..002 | Publish `/docs/orchestrator/console.md` covering screens, a11y, live updates, control actions, reiterating imposed rule. | Console doc merged with screenshots; accessibility checklist done; imposed rule statement present. |
| DOCS-ORCH-33-003 | TODO | Docs Guild | CLI-ORCH-33-001 | Publish `/docs/orchestrator/cli.md` documenting commands, options, exit codes, streaming output, offline usage, and imposed rule. | CLI doc merged; examples tested; imposed rule appended. |
| DOCS-ORCH-34-001 | TODO | Docs Guild | ORCH-SVC-34-002, LEDGER-34-101 | Author `/docs/orchestrator/run-ledger.md` covering ledger schema, provenance chain, audit workflows, with imposed rule reminder. | Run-ledger doc merged; payload samples validated; imposed rule included; cross-links added. |
| DOCS-ORCH-34-002 | TODO | Docs Guild | AUTH-ORCH-32-001, AUTH-ORCH-34-001 | Update `/docs/security/secrets-handling.md` for orchestrator KMS refs, redaction badges, operator hygiene, reiterating imposed rule. | Security doc merged; checklists updated; imposed rule restated; references from Console/CLI docs added. |
| DOCS-ORCH-34-003 | TODO | Docs Guild | ORCH-SVC-33-003, ORCH-SVC-34-001, DEVOPS-ORCH-34-001 | Publish `/docs/operations/orchestrator-runbook.md` (incident playbook, backfill guide, circuit breakers, throttling) with imposed rule statement. | Runbook merged; steps validated with DevOps; imposed rule included; runbook linked from ops index. |
| DOCS-ORCH-34-004 | TODO | Docs Guild | ORCH-SVC-32-005, WORKER-GO-33-001, WORKER-PY-33-001 | Document `/docs/schemas/artifacts.md` describing artifact kinds, schema versions, hashing, storage layout, restating imposed rule. | Schema doc merged; JSON schema provided; imposed rule included; sample payload validated. |
| DOCS-ORCH-34-005 | TODO | Docs Guild | ORCH-SVC-34-001, DEVOPS-ORCH-34-001 | Author `/docs/slo/orchestrator-slo.md` defining SLOs, burn alerts, measurement, and reiterating imposed rule. | SLO doc merged; dashboard screenshots embedded; imposed rule appended; alerts documented. |
## Export Center (Epic 10)
| ID | Status | Owner(s) | Depends on | Description | Exit Criteria |
|----|--------|----------|------------|-------------|---------------|
| DOCS-EXPORT-35-001 | TODO | Docs Guild | EXPORT-SVC-35-001..006 | Author `/docs/export-center/overview.md` covering purpose, profiles, security, AOC alignment, surfaces, ending with imposed rule statement. | Doc merged with diagrams/examples; imposed rule line present; index updated. |
| DOCS-EXPORT-35-002 | TODO | Docs Guild | EXPORT-SVC-35-002..005 | Publish `/docs/export-center/architecture.md` describing planner, adapters, manifests, signing, distribution flows, restating imposed rule. | Architecture doc merged; sequence diagrams included; rule statement appended. |
| DOCS-EXPORT-35-003 | TODO | Docs Guild | EXPORT-SVC-35-003..004 | Publish `/docs/export-center/profiles.md` detailing schema fields, examples, compatibility, and imposed rule reminder. | Profiles doc merged; JSON schemas linked; imposed rule noted. |
| DOCS-EXPORT-36-004 | TODO | Docs Guild | EXPORT-SVC-36-001..004, WEB-EXPORT-36-001 | Publish `/docs/export-center/api.md` covering endpoints, payloads, errors, and mention imposed rule. | API doc merged; examples validated; rule included. |
| DOCS-EXPORT-36-005 | TODO | Docs Guild | CLI-EXPORT-35-001, CLI-EXPORT-36-001 | Publish `/docs/export-center/cli.md` with command reference, CI scripts, verification steps, restating imposed rule. | CLI doc merged; script snippets tested; rule appended. |
| DOCS-EXPORT-36-006 | TODO | Docs Guild | EXPORT-SVC-36-001, DEVOPS-EXPORT-36-001 | Publish `/docs/export-center/trivy-adapter.md` covering field mappings, compatibility matrix, and imposed rule reminder. | Doc merged; mapping tables validated; rule included. |
| DOCS-EXPORT-37-001 | TODO | Docs Guild | EXPORT-SVC-37-001, DEVOPS-EXPORT-37-001 | Publish `/docs/export-center/mirror-bundles.md` describing filesystem/OCI layouts, delta/encryption, import guide, ending with imposed rule. | Doc merged; diagrams provided; verification steps tested; rule stated. |
| DOCS-EXPORT-37-002 | TODO | Docs Guild | EXPORT-SVC-35-005, EXPORT-SVC-37-002 | Publish `/docs/export-center/provenance-and-signing.md` detailing manifests, attestation flow, verification, reiterating imposed rule. | Doc merged; signature examples validated; rule appended. |
| DOCS-EXPORT-37-003 | TODO | Docs Guild | DEVOPS-EXPORT-37-001 | Publish `/docs/operations/export-runbook.md` covering failures, tuning, capacity planning, with imposed rule reminder. | Runbook merged; procedures validated; rule included. |
| DOCS-EXPORT-37-004 | TODO | Docs Guild | AUTH-EXPORT-37-001, EXPORT-SVC-37-002 | Publish `/docs/security/export-hardening.md` outlining RBAC, tenancy, encryption, redaction, restating imposed rule. | Security doc merged; checklist updated; rule appended. |
## Reachability v1
| ID | Status | Owner(s) | Depends on | Description | Exit Criteria |
|----|--------|----------|------------|-------------|---------------|
| DOCS-SIG-26-001 | TODO | Docs Guild, Signals Guild | SIGNALS-24-004 | Write `/docs/signals/reachability.md` covering states, scores, provenance, retention. | Doc merged with diagrams/examples; checklist appended. |
| DOCS-SIG-26-002 | TODO | Docs Guild, Signals Guild | SIGNALS-24-002 | Publish `/docs/signals/callgraph-formats.md` with schemas and validation errors. | Doc merged; examples tested; checklist included. |
| DOCS-SIG-26-003 | TODO | Docs Guild, Runtime Guild | SIGNALS-24-003 | Create `/docs/signals/runtime-facts.md` detailing agent capabilities, privacy safeguards, opt-in flags. | Doc merged; privacy review done; checklist appended. |
| DOCS-SIG-26-004 | TODO | Docs Guild, Policy Guild | POLICY-ENGINE-80-001 | Document `/docs/policy/signals-weighting.md` for SPL predicates and weighting strategies. | Doc merged; sample policies validated; checklist appended. |
| DOCS-SIG-26-005 | TODO | Docs Guild, UI Guild | UI-SIG-26-001..003 | Draft `/docs/ui/reachability-overlays.md` with badges, timelines, shortcuts. | Doc merged with screenshots; accessibility checklist completed. |
| DOCS-SIG-26-006 | TODO | Docs Guild, DevEx/CLI Guild | CLI-SIG-26-001..002 | Update `/docs/cli/reachability.md` for new commands and automation recipes. | Doc merged; examples verified; checklist appended. |
| DOCS-SIG-26-007 | TODO | Docs Guild, BE-Base Platform Guild | WEB-SIG-26-001..003 | Publish `/docs/api/signals.md` covering endpoints, payloads, ETags, errors. | API doc aligned with OpenAPI; examples tested; checklist appended. |
| DOCS-SIG-26-008 | TODO | Docs Guild, DevOps Guild | DEVOPS-SIG-26-001..002 | Write `/docs/migration/enable-reachability.md` guiding rollout, fallbacks, monitoring. | Migration doc approved; checklist appended. |
## Policy Studio (Sprint 27)
| ID | Status | Owner(s) | Depends on | Description | Exit Criteria |
|----|--------|----------|------------|-------------|---------------|
| DOCS-POLICY-27-001 | TODO | Docs Guild, Policy Guild | REGISTRY-API-27-001, POLICY-ENGINE-27-001 | Publish `/docs/policy/studio-overview.md` covering lifecycle, roles, glossary, and compliance checklist. | Doc merged with diagrams + lifecycle table; checklist appended; stakeholders sign off. |
| DOCS-POLICY-27-002 | TODO | Docs Guild, Console Guild | CONSOLE-STUDIO-27-001 | Write `/docs/policy/authoring.md` detailing workspace templates, snippets, lint rules, IDE shortcuts, and best practices. | Authoring doc includes annotated screenshots, snippet catalog, compliance checklist. |
| DOCS-POLICY-27-003 | TODO | Docs Guild, Policy Registry Guild | REGISTRY-API-27-007 | Document `/docs/policy/versioning-and-publishing.md` (semver rules, attestations, rollback) with compliance checklist. | Doc merged with flow diagrams; attestation steps documented; checklist appended. |
| DOCS-POLICY-27-004 | TODO | Docs Guild, Scheduler Guild | REGISTRY-API-27-005, SCHED-WORKER-27-301 | Write `/docs/policy/simulation.md` covering quick vs batch sim, thresholds, evidence bundles, CLI examples. | Simulation doc includes charts, sample manifests, checklist appended. |
| DOCS-POLICY-27-005 | TODO | Docs Guild, Product Ops | REGISTRY-API-27-006 | Publish `/docs/policy/review-and-approval.md` with approver requirements, comments, webhooks, audit trail guidance. | Doc merged with role matrix + webhook schema; checklist appended. |
| DOCS-POLICY-27-006 | TODO | Docs Guild, Policy Guild | REGISTRY-API-27-008 | Author `/docs/policy/promotion.md` covering environments, canary, rollback, and monitoring steps. | Promotion doc includes examples + checklist; verified by Policy Ops. |
| DOCS-POLICY-27-007 | TODO | Docs Guild, DevEx/CLI Guild | CLI-POLICY-27-001..004 | Update `/docs/policy/cli.md` with new commands, JSON schemas, CI usage, and compliance checklist. | CLI doc merged with transcripts; schema references validated; checklist appended. |
| DOCS-POLICY-27-008 | TODO | Docs Guild, Policy Registry Guild | REGISTRY-API-27-001..008 | Publish `/docs/policy/api.md` describing Registry endpoints, request/response schemas, errors, and feature flags. | API doc aligned with OpenAPI; examples validated; checklist appended. |
| DOCS-POLICY-27-009 | TODO | Docs Guild, Security Guild | AUTH-POLICY-27-002 | Create `/docs/security/policy-attestations.md` covering signing, verification, key rotation, and compliance checklist. | Security doc approved by Security Guild; verifier steps documented; checklist appended. |
| DOCS-POLICY-27-010 | TODO | Docs Guild, Architecture Guild | REGISTRY-API-27-001, SCHED-WORKER-27-301 | Author `/docs/architecture/policy-registry.md` (service design, schemas, queues, failure modes) with diagrams and checklist. | Architecture doc merged; diagrams committed; checklist appended. |
| DOCS-POLICY-27-011 | TODO | Docs Guild, Observability Guild | DEVOPS-POLICY-27-004 | Publish `/docs/observability/policy-telemetry.md` with metrics/log tables, dashboards, alerts, and compliance checklist. | Observability doc merged; dashboards linked; checklist appended. |
| DOCS-POLICY-27-012 | TODO | Docs Guild, Ops Guild | DEPLOY-POLICY-27-002 | Write `/docs/runbooks/policy-incident.md` detailing rollback, freeze, forensic steps, notifications. | Runbook merged; rehearsal recorded; checklist appended. |
| DOCS-POLICY-27-013 | TODO | Docs Guild, Policy Guild | CONSOLE-STUDIO-27-001, REGISTRY-API-27-002 | Update `/docs/examples/policy-templates.md` with new templates, snippets, and sample policies. | Examples committed with commentary; lint passes; checklist appended. |
| DOCS-POLICY-27-014 | TODO | Docs Guild, Policy Registry Guild | REGISTRY-API-27-003, WEB-POLICY-27-001 | Refresh `/docs/aoc/aoc-guardrails.md` to include Studio-specific guardrails and validation scenarios. | Doc updated with Studio guardrails; compliance checklist appended. |
## Vulnerability Explorer (Sprint 29)
| ID | Status | Owner(s) | Depends on | Description | Exit Criteria |
|----|--------|----------|------------|-------------|---------------|
| DOCS-VULN-29-001 | TODO | Docs Guild, Vuln Explorer Guild | VULN-API-29-001 | Publish `/docs/vuln/explorer-overview.md` covering domain model, identities, AOC guarantees, workflow summary. | Doc merged with diagrams/table; compliance checklist appended. |
| DOCS-VULN-29-002 | TODO | Docs Guild, Console Guild | CONSOLE-VULN-29-001..006 | Write `/docs/vuln/explorer-using-console.md` with workflows, screenshots, keyboard shortcuts, saved views, deep links. | Doc merged; images stored; WCAG notes included; checklist appended. |
| DOCS-VULN-29-003 | TODO | Docs Guild, Vuln Explorer API Guild | VULN-API-29-001..009 | Author `/docs/vuln/explorer-api.md` (endpoints, query schema, grouping, errors, rate limits). | Doc aligned with OpenAPI; examples validated; checklist appended. |
| DOCS-VULN-29-004 | TODO | Docs Guild, DevEx/CLI Guild | CLI-VULN-29-001..005 | Publish `/docs/vuln/explorer-cli.md` with command reference, samples, exit codes, CI snippets. | CLI doc merged; transcripts/JSON outputs validated; checklist appended. |
| DOCS-VULN-29-005 | TODO | Docs Guild, Findings Ledger Guild | LEDGER-29-001..009 | Write `/docs/vuln/findings-ledger.md` detailing event schema, hashing, Merkle roots, replay tooling. | Doc merged; compliance checklist appended; audit team sign-off. |
| DOCS-VULN-29-006 | TODO | Docs Guild, Policy Guild | POLICY-ENGINE-29-001..003 | Update `/docs/policy/vuln-determinations.md` for new rationale, signals, simulation semantics. | Doc updated; examples validated; checklist appended. |
| DOCS-VULN-29-007 | TODO | Docs Guild, Excititor Guild | EXCITITOR-VULN-29-001..004 | Publish `/docs/vex/explorer-integration.md` covering CSAF mapping, suppression precedence, status semantics. | Doc merged; compliance checklist appended. |
| DOCS-VULN-29-008 | TODO | Docs Guild, Concelier Guild | CONCELIER-VULN-29-001..004 | Publish `/docs/advisories/explorer-integration.md` covering key normalization, withdrawn handling, provenance. | Doc merged; checklist appended. |
| DOCS-VULN-29-009 | TODO | Docs Guild, SBOM Service Guild | SBOM-VULN-29-001..002 | Author `/docs/sbom/vuln-resolution.md` detailing version semantics, scope, paths, safe version hints. | Doc merged; ecosystem tables validated; checklist appended. |
| DOCS-VULN-29-010 | TODO | Docs Guild, Observability Guild | VULN-API-29-009, DEVOPS-VULN-29-002 | Publish `/docs/observability/vuln-telemetry.md` (metrics, logs, tracing, dashboards, SLOs). | Doc merged; dashboards linked; checklist appended. |
| DOCS-VULN-29-011 | TODO | Docs Guild, Security Guild | AUTH-VULN-29-001..003 | Create `/docs/security/vuln-rbac.md` for roles, ABAC policies, attachment encryption, CSRF. | Security doc approved; checklist appended. |
| DOCS-VULN-29-012 | TODO | Docs Guild, Ops Guild | DEVOPS-VULN-29-002, SCHED-WORKER-29-003 | Write `/docs/runbooks/vuln-ops.md` (projector lag, resolver storms, export failures, policy activation). | Runbook merged; rehearsal recorded; checklist appended. |
| DOCS-VULN-29-013 | TODO | Docs Guild, Deployment Guild | DEPLOY-VULN-29-001..002 | Update `/docs/install/containers.md` with Findings Ledger & Vuln Explorer API images, manifests, resource sizing, health checks. | Install doc updated; validation commands included; checklist appended. |
## VEX Lens (Sprint 30)
| ID | Status | Owner(s) | Depends on | Description | Exit Criteria |
|----|--------|----------|------------|-------------|---------------|
| DOCS-VEX-30-001 | TODO | Docs Guild, VEX Lens Guild | VEXLENS-30-005 | Publish `/docs/vex/consensus-overview.md` describing purpose, scope, AOC guarantees. | Doc merged with diagrams/terminology tables; compliance checklist appended. |
| DOCS-VEX-30-002 | TODO | Docs Guild, VEX Lens Guild | VEXLENS-30-005 | Author `/docs/vex/consensus-algorithm.md` covering normalization, weighting, thresholds, examples. | Doc merged; math reviewed by Policy; checklist appended. |
| DOCS-VEX-30-003 | TODO | Docs Guild, Issuer Directory Guild | ISSUER-30-001..003 | Document `/docs/vex/issuer-directory.md` (issuer management, keys, trust overrides, audit). | Doc merged; security review done; checklist appended. |
| DOCS-VEX-30-004 | TODO | Docs Guild, VEX Lens Guild | VEXLENS-30-007 | Publish `/docs/vex/consensus-api.md` with endpoint specs, query params, rate limits. | API doc aligned with OpenAPI; examples validated; checklist appended. |
| DOCS-VEX-30-005 | TODO | Docs Guild, Console Guild | CONSOLE-VEX-30-001 | Write `/docs/vex/consensus-console.md` covering UI workflows, filters, conflicts, accessibility. | Doc merged; screenshots added; checklist appended. |
| DOCS-VEX-30-006 | TODO | Docs Guild, Policy Guild | POLICY-ENGINE-29-001, VEXLENS-30-004 | Add `/docs/policy/vex-trust-model.md` detailing policy knobs, thresholds, simulation. | Doc merged; policy review completed; checklist appended. |
| DOCS-VEX-30-007 | TODO | Docs Guild, SBOM Service Guild | VEXLENS-30-002 | Publish `/docs/sbom/vex-mapping.md` (CPE→purl strategy, edge cases, overrides). | Doc merged; mapping tables validated; checklist appended. |
| DOCS-VEX-30-008 | TODO | Docs Guild, Security Guild | ISSUER-30-002, VEXLENS-30-003 | Deliver `/docs/security/vex-signatures.md` (verification flow, key rotation, audit). | Doc approved by Security; checklist appended. |
| DOCS-VEX-30-009 | TODO | Docs Guild, DevOps Guild | VEXLENS-30-009, DEVOPS-VEX-30-001 | Create `/docs/runbooks/vex-ops.md` for recompute storms, mapping failures, signature errors. | Runbook merged; rehearsal logged; checklist appended. |
## Advisory AI (Sprint 31)
| ID | Status | Owner(s) | Depends on | Description | Exit Criteria |
|----|--------|----------|------------|-------------|---------------|
| DOCS-AIAI-31-001 | TODO | Docs Guild, Advisory AI Guild | AIAI-31-006 | Publish `/docs/advisory-ai/overview.md` covering capabilities, guardrails, RBAC. | Doc merged with diagrams; compliance checklist appended. |
| DOCS-AIAI-31-002 | TODO | Docs Guild, Advisory AI Guild | AIAI-31-004 | Author `/docs/advisory-ai/architecture.md` detailing RAG pipeline, deterministics, caching, model options. | Doc merged; architecture review done; checklist appended. |
| DOCS-AIAI-31-003 | TODO | Docs Guild, Advisory AI Guild | AIAI-31-006 | Write `/docs/advisory-ai/api.md` describing endpoints, schemas, errors, rate limits. | API doc aligned with OpenAPI; examples validated; checklist appended. |
| DOCS-AIAI-31-004 | TODO | Docs Guild, Console Guild | CONSOLE-VULN-29-001, CONSOLE-VEX-30-001 | Create `/docs/advisory-ai/console.md` with screenshots, a11y notes, copy-as-ticket instructions. | Doc merged; images stored; checklist appended. |
| DOCS-AIAI-31-005 | TODO | Docs Guild, DevEx/CLI Guild | CLI-VULN-29-001, CLI-VEX-30-001 | Publish `/docs/advisory-ai/cli.md` covering commands, exit codes, scripting patterns. | Doc merged; examples tested; checklist appended. |
| DOCS-AIAI-31-006 | TODO | Docs Guild, Policy Guild | POLICY-ENGINE-31-001 | Update `/docs/policy/assistant-parameters.md` covering temperature, token limits, ranking weights, TTLs. | Doc merged; policy review done; checklist appended. |
| DOCS-AIAI-31-007 | TODO | Docs Guild, Security Guild | AIAI-31-005 | Write `/docs/security/assistant-guardrails.md` detailing redaction, injection defense, logging. | Doc approved by Security; checklist appended. |
| DOCS-AIAI-31-008 | TODO | Docs Guild, SBOM Service Guild | SBOM-AIAI-31-001 | Publish `/docs/sbom/remediation-heuristics.md` (feasibility scoring, blast radius). | Doc merged; heuristics reviewed; checklist appended. |
| DOCS-AIAI-31-009 | TODO | Docs Guild, DevOps Guild | DEVOPS-AIAI-31-001 | Create `/docs/runbooks/assistant-ops.md` for warmup, cache priming, model outages, scaling. | Runbook merged; rehearsal logged; checklist appended. |
## Notifications Studio
| ID | Status | Owner(s) | Depends on | Description | Exit Criteria |
|----|--------|----------|------------|-------------|---------------|
| DOCS-NOTIFY-38-001 | TODO | Docs Guild, Notifications Service Guild | NOTIFY-SVC-38-001..004 | Publish `/docs/notifications/overview.md` and `/docs/notifications/architecture.md`, each ending with imposed rule reminder. | Docs merged; diagrams verified; imposed rule appended. |
| DOCS-NOTIFY-39-002 | TODO | Docs Guild, Notifications Service Guild | NOTIFY-SVC-39-001..004 | Publish `/docs/notifications/rules.md`, `/docs/notifications/templates.md`, `/docs/notifications/digests.md` with examples and imposed rule line. | Docs merged; examples validated; imposed rule appended. |
| DOCS-NOTIFY-40-001 | TODO | Docs Guild, Security Guild | AUTH-NOTIFY-38-001, NOTIFY-SVC-40-001..004 | Publish `/docs/notifications/channels.md`, `/docs/notifications/escalations.md`, `/docs/notifications/api.md`, `/docs/operations/notifier-runbook.md`, `/docs/security/notifications-hardening.md`; each ends with imposed rule line. | Docs merged; accessibility checks passed; imposed rule appended. |
## CLI Parity & Task Packs
| ID | Status | Owner(s) | Depends on | Description | Exit Criteria |
|----|--------|----------|------------|-------------|---------------|
| DOCS-CLI-41-001 | TODO | Docs Guild, DevEx/CLI Guild | CLI-CORE-41-001 | Publish `/docs/cli/overview.md`, `/docs/cli/configuration.md`, `/docs/cli/output-and-exit-codes.md` with imposed rule statements. | Docs merged; examples verified; imposed rule appended. |
| DOCS-CLI-42-001 | TODO | Docs Guild | DOCS-CLI-41-001, CLI-PARITY-41-001 | Publish `/docs/cli/parity-matrix.md` and command guides under `/docs/cli/commands/*.md` (policy, sbom, vuln, vex, advisory, export, orchestrator, notify, aoc, auth). | Guides merged; parity automation documented; imposed rule appended. |
| DOCS-PACKS-43-001 | TODO | Docs Guild, Task Runner Guild | PACKS-REG-42-001, TASKRUN-42-001 | Publish `/docs/task-packs/spec.md`, `/docs/task-packs/authoring-guide.md`, `/docs/task-packs/registry.md`, `/docs/task-packs/runbook.md`, `/docs/security/pack-signing-and-rbac.md`, `/docs/operations/cli-release-and-packaging.md` with imposed rule statements. | Docs merged; tutorials validated; imposed rule appended; cross-links added. |
## Containerized Distribution (Epic 13)
| ID | Status | Owner(s) | Depends on | Description | Exit Criteria |
|----|--------|----------|------------|-------------|---------------|
| DOCS-INSTALL-44-001 | TODO | Docs Guild, Deployment Guild | COMPOSE-44-001 | Publish `/docs/install/overview.md` and `/docs/install/compose-quickstart.md` with imposed rule line and copy-ready commands. | Docs merged; screenshots/commands verified; imposed rule appended. |
| DOCS-INSTALL-45-001 | TODO | Docs Guild, Deployment Guild | HELM-45-001 | Publish `/docs/install/helm-prod.md` and `/docs/install/configuration-reference.md` with values tables and imposed rule reminder. | Docs merged; configuration matrix verified; imposed rule appended. |
| DOCS-INSTALL-46-001 | TODO | Docs Guild, Security Guild | DEPLOY-PACKS-43-001, CLI-PACKS-43-001 | Publish `/docs/install/airgap.md`, `/docs/security/supply-chain.md`, `/docs/operations/health-and-readiness.md`, `/docs/release/image-catalog.md`, `/docs/console/onboarding.md` (each with imposed rule). | Docs merged; checksum/signature sections validated; imposed rule appended. |
## Authority-Backed Scopes & Tenancy (Epic 14)
| ID | Status | Owner(s) | Depends on | Description | Exit Criteria |
|----|--------|----------|------------|-------------|---------------|
| DOCS-TEN-47-001 | TODO | Docs Guild, Authority Core | AUTH-TEN-47-001 | Publish `/docs/security/tenancy-overview.md` and `/docs/security/scopes-and-roles.md` outlining scope grammar, tenant model, imposed rule reminder. | Docs merged; diagrams included; imposed rule appended. |
| DOCS-TEN-48-001 | TODO | Docs Guild, Platform Ops | WEB-TEN-48-001 | Publish `/docs/operations/multi-tenancy.md`, `/docs/operations/rls-and-data-isolation.md`, `/docs/console/admin-tenants.md`. | Docs merged; examples validated; imposed rule appended. |
| DOCS-TEN-49-001 | TODO | Docs & DevEx Guilds | CLI-TEN-47-001, AUTH-TEN-49-001 | Publish `/docs/cli/authentication.md`, `/docs/api/authentication.md`, `/docs/policy/examples/abac-overlays.md`, update `/docs/install/configuration-reference.md` with new env vars, all ending with imposed rule line. | Docs merged; command examples verified; imposed rule appended. |

View File

@@ -0,0 +1,429 @@
> **Imposed rule:** Work of this type or tasks of this type on this component must also be applied everywhere else it should be applied.
---
# Epic 16: AirGapped Mode
**Short name:** AirGapped Mode
**Primary components:** Web Services API, Console, CLI, Orchestrator, Task Runner, Conseiller (Feedser), Excitator (VEXer), Policy Engine, Findings Ledger, Export Center, Authority & Tenancy, Notifications, Observability & Forensics
**Surfaces:** offline bootstrap, update ingestion via mirror bundles, sealed egress, deterministic jobs, offline advisories/VEX, offline policy packs, offline notifications, evidence exports
**Dependencies:** Export Center, Containerized Distribution, AuthorityBacked Scopes & Tenancy, Observability & Forensics, Policy Studio
**AOC ground rule reminder:** Conseiller and Excitator aggregate and link advisories/VEX. They never merge or mutate source records. AirGapped Mode must preserve this invariant even when mirroring and importing updates.
---
## 1) What it is
A fully supported operating profile where StellaOps runs in a disconnected environment with:
* **Zero external egress** from platform services and jobs.
* **Deterministic inputs** provided via signed, offline **Mirror Bundles** (advisories, VEX, policy packs, vendor feeds, Stella metadata, container images, dashboards).
* **Offline bootstrap** for images and charts, plus reproducible configuration and cryptographically verifiable updates.
* **Graceful feature degradation** with explicit UX: features that require external connectivity are either backed by local artifacts or clearly disabled with an explanation.
* **Auditable import/export** including provenance attestations, evidence bundles, and chainofcustody for all offline exchanges.
AirGapped Mode is selectable at install time and enforceable at runtime. When enabled, all components operate under an “egress sealed” policy and only consume data from local stores.
---
## 2) Why
Many users operate in classified, regulated, or highsensitivity networks where egress is prohibited. They still need SBOM analysis, policy evaluation, advisory/VEX mapping, and reporting. AirGapped Mode provides the same core outcomes with verifiable offline inputs and explicit operational guardrails.
---
## 3) How it should work
### 3.1 Modes and lifecycle
* **Connected Mode:** normal operation; can create Mirror Bundles on a staging host.
* **Sealed AirGapped Mode:** platform enforces no egress. Only local resources are allowed.
* **Transition flow:**
1. Prepare an offline **Bootstrap Pack** with all container images, Helm/compose charts, seed database, and initial Mirror Bundle.
2. Install in the airgapped enclave and **seal** egress.
3. Periodically import new **Mirror Bundles** via removable media.
4. Export evidence/reports as needed.
### 3.2 Egress sealing
* **Static guardrails:**
* Platform flag `STELLA_AIRGAP=sealed` and database feature flag `env.mode='sealed'`.
* NetworkPolicy/iptables/eBPF denyall egress for namespaces/pods except loopback and the internal object store.
* Outbound DNS blocked.
* HTTP clients in code use a single `EgressPolicy` facade. When sealed, it panics on direct network calls and returns a typed error with remediation (“import a Mirror Bundle”).
* **Verification:** `GET /system/airgap/status` returns `sealed: true|false`, current policy hash, and last import timestamp. CLI prints warning if not sealed in declared airgapped install.
### 3.3 Trusted time
* Airgapped systems cannot NTP. Each Mirror Bundle includes a **signed time token** (Roughtimestyle or RFC 3161) from a trusted authority. On import, platform stores `time_anchor` for drift calculations and staleness checks.
* If time drift exceeds policy threshold, UI shows “stale view” badges and some jobs are blocked until a new bundle provides a fresh anchor.
### 3.4 Mirror Bundles (offline updates)
* **Content types:**
* Public advisories (OSV, GHSA, vendor advisories), NVD mappings, CPE/Package metadata.
* VEX statements from vendors/communities.
* Policy packs (templates, baselines, versioned rule sets).
* StellaOps engine metadata and schema migrations.
* Optional: **OCI image set** for platform and recommended runners.
* Optional: dashboards and alert rule packs.
* **Format:** a TUFlike layout:
```
root.json, snapshot.json, timestamp.json, targets/
advisories/*.jsonl.zst
vex/*.jsonl.zst
policy/*.tar.zst
images/* (OCI layout or oci-archive)
meta/engine/*.tgz
meta/time-anchor.json (signed)
```
* **Integrity & trust:**
* DSSEsigned target manifests.
* Root of trust rotated via `root.json` within strict policy; rotation requires manual dual approval in sealed mode.
* Each content artifact has a content digest and a **Merkle root** for the overall bundle.
* **Creation:** in connected networks, `stella mirror create --content advisories,vex,policy,images --since 2025-01-01 --out bundle.tgz`.
* **Import:** in airgap, `stella airgap import bundle.tgz`. The importer verifies DSSE, TUF metadata, Merkle root, then writes to local object store and updates catalog tables.
* **Idempotence:** imports are contentaddressed; reimports deduplicate.
### 3.5 Deterministic jobs and sources
* **Allowed sources:** filesystem, internal object store, tenant private registry, and preapproved connectors that dont require external egress.
* **Disallowed in sealed mode:** remote package registries, web scrapers, outbound webhooks, cloud KMS unless on the enclave network.
* **Runner policy:** the Task Runner verifies job descriptors contain no network calls unless marked `internal:` with allowlisted destinations. Violations fail at plan time with an explainable error.
### 3.6 Conseiller and Excitator in airgap
* **Conseiller (Feedser):** ingests advisories only from imported bundles or tenant local feeds. It preserves source identities and never merges. Linkage uses bundleprovided crossrefs and local heuristics.
* **Excitator (VEXer):** imports VEX records asis, links them to components and advisories, and records the origin bundle and statement digests. Consensus Lens (Epic 7) operates offline across the imported sources.
### 3.7 Policy Engine and Studio
* Policy packs are versioned and imported via bundles.
* Simulation and authoring work locally. Exports of new or updated policies can be packaged as **Policy SubBundles** for transfer back to connected environments if needed.
* Engine shows which rules depend on external evidence and how they degrade in sealed mode (e.g., “No external EPSS; using cached percentile from last bundle.”).
### 3.8 Notifications in sealed mode
* Default to **local delivery** only: SMTP relay inside enclave, syslog, file sink.
* External webhooks are disabled.
* Notification templates show “airgap compliant channel” tags to avoid misconfiguration.
### 3.9 Observability & Forensics
* Traces, logs, metrics remain local.
* Evidence Locker supports **portable evidence packages** for crossdomain transfer: `stella forensic snapshot create --portable`.
* Importing an evidence bundle in another enclave verifies signatures and maintains chainofcustody.
### 3.10 Console and CLI behavior
* Console shows a prominent **AirGapped: Sealed** badge with last import time and staleness indicators for advisories, VEX, and policy packs.
* CLI commands gain `--sealed` awareness: any operation that would egress prints a refusal with remediation suggesting the appropriate import.
### 3.11 Multitenant and scope
* Tenancy works unchanged. Bundle imports can target:
* `--tenant-global`: shared catalogs (advisories, VEX, policy baselines).
* `--tenant=<id>`: tenantspecific content (e.g., private advisories).
* Authority scopes gain `airgap:import`, `airgap:status:read`, `airgap:seal` (adminonly).
### 3.12 Feature degradation matrix
* **AI Assistant:** offline variants use local models if installed; otherwise feature is disabled with a message.
* **External reputation feeds (e.g., EPSSlike):** replaced by cached values from the bundle.
* **Container base image lookups:** rely on imported metadata or tenant private registry.
---
## 4) Architecture
### 4.1 New modules
* `airgap/controller`
* Sealing state machine; status API; guardrails wiring into HTTP clients and runner.
* `airgap/importer`
* TUF/DSSE verification, Merkle validation, object store loader, catalog updater.
* `mirror/creator`
* Connectedside builder for bundles; content plugins for advisories/VEX/policy/images.
* `airgap/policy`
* Enforcement library exposing `EgressPolicy` facade and job plan validators.
* `airgap/time`
* Time anchor parser, drift checks, staleness annotations.
* `console/airgap`
* Sealed badge, import UI, staleness dashboards, degradation notices.
* `cli/airgap`
* `stella airgap seal|status|import|verify` commands; `stella mirror create|verify`.
### 4.2 Data model additions
* `airgap_state(id, sealed BOOLEAN, policy_hash TEXT, last_import_at TIMESTAMP, time_anchor JSONB)`
* `bundle_catalog(id, kind ENUM, merkle_root TEXT, dsse_signer TEXT, created_at TIMESTAMP, imported_at TIMESTAMP, scope ENUM('global','tenant'), tenant_id NULLABLE, labels JSONB)`
* `bundle_items(bundle_id, path TEXT, sha256 TEXT, size BIGINT, type TEXT, meta JSONB)`
* `import_audit(id, bundle_id, actor, tenant_scope, verify_result, trace_id, created_at)`
RLS: tenantscoped rows when `scope='tenant'`; global rows readable only with `stella:airgap:status:read`.
### 4.3 Storage layout
Object store paths:
```
tenants/_global/mirror/<bundle_id>/targets/...
tenants/<tenant>/mirror/<bundle_id>/targets/...
tenants/_global/images/<digest>/...
```
Evidence locker remains separate. Imported images use **OCI layout** for local registry sync.
### 4.4 Message topics
* `stella.<tenant>.airgap.imported` with bundle metadata.
* `stella.<tenant>.airgap.staleness` periodic events emitted for UX.
* `stella.<tenant>.policy.degraded` when rules fall back due to sealed mode.
---
## 5) APIs and contracts
### 5.1 Status and control
* `GET /system/airgap/status` → `{ sealed, policy_hash, last_import_at, time_anchor, drift_seconds, staleness: { advisories_days, vex_days, policy_days } }`
* `POST /system/airgap/seal` → seals environment; requires `stella:airgap:seal#tenant/<id or global>`.
* `POST /system/airgap/unseal` → only allowed if installed mode is not declared “permanently sealed” at bootstrap. Typically disabled.
### 5.2 Import & verify
* `POST /airgap/import` multipart or file reference → runs verify, writes catalog, returns bundle summary and warnings.
* `POST /airgap/verify` dryrun verification returning DSSE/TUF and Merkle results.
* `GET /airgap/bundles` list imported bundles with filters.
### 5.3 Conseiller/Excitator sources
* `POST /feeds/register` supports `kind=mirror` with `bundle_id` and paths; disallowed to point to external URLs in sealed mode.
* `GET /feeds/status` shows persource staleness and last artifact version.
### 5.4 Errors
Standardized sealedmode error:
```
{
"code": "AIRGAP_EGRESS_BLOCKED",
"message": "Egress is sealed. Import a Mirror Bundle with advisories.",
"remediation": "Run: stella airgap import bundle.tgz",
"trace_id": "..."
}
```
---
## 6) Documentation changes
Create or update:
1. `/docs/airgap/overview.md`
* Modes, lifecycle, responsibilities, threat model, what degrades.
2. `/docs/airgap/bootstrap.md`
* Offline Bootstrap Pack creation, validation, install steps for Helm/compose, local registry seeding.
3. `/docs/airgap/mirror-bundles.md`
* Bundle format, DSSE/TUF/Merkle, signed time, creation on connected host, import in sealed environment, rotation of roots.
4. `/docs/airgap/sealing-and-egress.md`
* Network policies, EgressPolicy facade, runner validation, verifying sealed status.
5. `/docs/airgap/staleness-and-time.md`
* Time anchor, drift, staleness budgets and UI behavior.
6. `/docs/airgap/operations.md`
* Periodic update cadence, runbooks, failure scenarios, disaster recovery.
7. `/docs/airgap/degradation-matrix.md`
* Feature map: available, degraded, disabled; with remediation.
8. `/docs/console/airgap.md`
* Status badges, import wizard, staleness indicators.
9. `/docs/cli/airgap.md`
* Commands, examples, exit codes.
10. `/docs/security/trust-and-signing.md`
* Roots of trust, key rotation, DSSE, TUF model.
11. `/docs/dev/airgap-contracts.md`
* EgressPolicy usage, testing patterns, sealedmode CI gates.
Add the banner at the top of each page:
> **Imposed rule:** Work of this type or tasks of this type on this component must also be applied everywhere else it should be applied.
---
## 7) Implementation plan
### Phase 1 — Foundations
* Add `airgap/controller` with sealed state and status API.
* Integrate `EgressPolicy` facade in all outbound network call sites.
* Provide default NetworkPolicy/iptables templates and Helm values to block egress.
* Console shows sealed badge and status.
### Phase 2 — Mirror Bundles
* Implement `mirror/creator` in connected mode with content plugins.
* Implement `airgap/importer` with DSSE/TUF/Merkle verification and catalog updates.
* Export Center gains **Mirror bundle** build and verify commands (connected side).
### Phase 3 — Deterministic jobs
* Add job plan validation in the Task Runner.
* Restrict sources in sealed mode.
* Conseiller/Excitator add “mirror source” adapters.
### Phase 4 — Staleness and time
* Parse time anchors; enforce staleness budgets; add UI indicators and task refusal when budgets exceeded.
* Notifications for expiring anchors.
### Phase 5 — Degradation matrix and UX
* Wire feature flags and fallbacks in Console and APIs.
* Improve error messages with remediation guidance.
### Phase 6 — Evidence portability
* Portable evidence packages: export/import with full verification.
* Document crossdomain workflows.
---
## 8) Engineering tasks
**Airgap controller and sealing**
* [ ] Implement `airgap/controller` with persistent state and RBAC.
* [ ] Add `GET /system/airgap/status`, `POST /system/airgap/seal`.
* [ ] Provide cluster egress templates for Kubernetes and for dockercompose.
* [ ] Instrument startup checks to refuse running in sealed mode if egress rules arent applied.
**EgressPolicy integration**
* [ ] Create `pkg/egress` facade and replace all direct HTTP client constructions in services.
* [ ] Add linter rule and CI check forbidding raw `http.NewClient` in server code.
* [ ] Add unit tests for sealed and unsealed behavior.
**Mirror bundles**
* [ ] Implement TUF/DSSE verifiers and Merkle root builder.
* [ ] Build content plugins: advisories, VEX, policy packs, images.
* [ ] Write `bundle_catalog` and `bundle_items` tables with RLS.
* [ ] CLI: `stella mirror create|verify`, `stella airgap import|verify`.
**Conseiller/Excitator**
* [ ] Add mirror adapters for readonly ingestion from bundle paths.
* [ ] Persist source digests and bundle IDs on each linked record.
* [ ] Unit tests to ensure no merge behavior is introduced by bundle ingestion.
**Policy Engine & Studio**
* [ ] Accept policy packs from bundles; track `policy_version` and `bundle_id`.
* [ ] Add degradation notices for rules requiring external reputation; provide cached fallbacks.
**Task Runner & Orchestrator**
* [ ] Plantime validation against network calls; add `internal:` allowlist mapping.
* [ ] Emit sealedmode violations to Timeline with remediation text.
**Console**
* [ ] Status panel: sealed badge, last import, staleness meters.
* [ ] Import wizard with verify results and catalog diff preview.
* [ ] Degradation matrix UI and contextual tooltips.
**Observability & Forensics**
* [ ] Mark sealed mode in telemetry attributes.
* [ ] Add portable evidence package export/import; verify on read.
**Authority & Tenancy**
* [ ] New scopes: `airgap:seal`, `airgap:import`, `airgap:status:read`.
* [ ] Audit import actions with actor and trace ID.
**Docs**
* [ ] Author all pages listed in section 6, include signedtime workflow diagrams.
* [ ] Insert banner statement in each page.
**Testing**
* [ ] Sealedmode e2e: attempt egress; ensure refusal and remediation.
* [ ] Bundle import e2e: corrupt DSSE, wrong root, tampered artifact → rejected.
* [ ] Performance: large advisory bundle import within target time (see Acceptance).
* [ ] Time drift scenarios and staleness budget enforcement.
* [ ] Regression: ensure AOC rules unchanged in sealed mode.
---
## 9) Feature changes required in other components
* **Export Center:** add mirror bundle export profile, signedtime token inclusion, and portable evidence packages.
* **Notifications:** remove external webhooks by default in sealed mode; add local SMTP/syslog sinks.
* **CLI Parity:** ensure all admin and import operations are exposed; add sealedmode safety prompts.
* **Containerized Distribution:** ship **Bootstrap Pack** that includes all images and charts in a single ociarchive set with index manifest.
* **Observability:** disable remote exporters; include local dashboards; mark sealed mode in UI.
* **Policy Studio:** enable offline authoring and export of policy subbundles.
* **VEX Consensus Lens:** ensure it operates solely on imported VEX statements; highlight coverage vs. stale.
> **Imposed rule reminder:** Work of this type or tasks of this type on this component must also be applied everywhere else it should be applied.
---
## 10) Acceptance criteria
* Environment can be **sealed** and verified via API, CLI, and network policies.
* Import of a valid Mirror Bundle succeeds; DSSE, TUF, and Merkle validations recorded in `import_audit`.
* Conseiller and Excitator operate only on imported sources; linkage reflects original source identities.
* Policy packs are importable and versioned; rules that depend on external evidence show clear degradation.
* Large bundle (e.g., 812 GB with images) imports in under 20 minutes on SSD storage and indexes advisories in under 5 minutes on a 4core node.
* Console displays sealed badge, last import, staleness, and degradation matrix.
* Attempted egress in sealed mode fails with `AIRGAP_EGRESS_BLOCKED` and remediation.
* Portable evidence packages export and verify across separate enclaves.
* All changes documented with the banner statement.
---
## 11) Risks and mitigations
* **Key management complexity:** rotate TUF roots with dualcontrol workflow and explicit docs; failsafe to previous root if rotation bundle absent.
* **Staleness risk:** enforce budgets and block riskcritical jobs when expired; provide monitoring and notifications for impending staleness.
* **Operator error during import:** dryrun verification, diff preview of catalog changes, and ability to roll back via content address.
* **Hidden egress paths:** CI lints and runtime guardrails; network policies enforced at cluster layer.
* **Bundle size bloat:** Zstandard compression, delta bundles, and selective content flags for creation.
---
## 12) Philosophy
* **Predictable over perfect:** deterministic, explainable results beat unknown “live” results in sensitive networks.
* **Trust is earned:** every offline exchange is signed, verifiable, and auditable.
* **Degrade transparently:** when features reduce capability, explain it and guide remediation.
> **Imposed rule:** Work of this type or tasks of this type on this component must also be applied everywhere else it should be applied.

View File

@@ -0,0 +1,13 @@
# Aggregation-Only Contract (AOC) Guardrails
The Aggregation-Only Contract keeps ingestion services deterministic and policy-neutral. Use these checkpoints whenever you add or modify backlog items:
1. **Ingestion writes raw facts only.** Concelier and Excititor append immutable observations/linksets. No precedence, severity, suppression, or "safe fix" hints may be computed at ingest time.
2. **Derived semantics live elsewhere.** Policy Engine overlays, Vuln Explorer composition, and downstream reporting layers attach severity, precedence, policy verdicts, and UI hints.
3. **Provenance is mandatory.** Every ingestion write must include original source metadata, digests, and signing/provenance evidence when available. Reject writes lacking provenance.
4. **Deterministic outputs.** Given the same inputs, ingestion must produce identical documents, hashes, and event payloads across reruns.
5. **Guardrails everywhere.** Roslyn analyzers, schema validators, and CI smoke tests should fail builds that attempt forbidden writes.
For detailed roles and ownership boundaries, see `AGENTS.md` at the repo root and the module-specific `ARCHITECTURE_*.md` dossiers.
Need the full contract? Read the [Aggregation-Only Contract reference](../ingestion/aggregation-only-contract.md) for schemas, error codes, and migration guidance.

View File

@@ -0,0 +1,356 @@
> **Imposed rule:** Work of this type or tasks of this type on this component must also be applied everywhere else it should be applied.
---
# Epic 17: SDKs and OpenAPI Docs
**Short name:** SDKs & OpenAPI
**Primary components:** API Gateway, Web Services, Policy Engine, Conseiller (Feedser), Excitator (VEXer), Orchestrator, Findings Ledger, Export Center, Authority & Tenancy, Console, CLI
**Surfaces:** OpenAPI 3.1 contracts, language SDKs (TS/Node, Python, Go, Java, C#), dev portal, examples, mock server, conformance tests, changelogs, versioning, deprecations
**Dependencies:** AuthorityBacked Scopes & Tenancy, CLI Parity, Export Center, Notifications Studio, AirGapped Mode, Observability
**AOC ground rule reminder:** Conseiller and Excitator aggregate and link advisories/VEX. They never merge or mutate source records. SDKs must preserve this invariant and expose source identity in all models.
---
## 1) What it is
A contractfirst program that standardizes StellaOps APIs with OpenAPI 3.1 and ships official, versioned SDKs for popular languages. It includes:
* A single **sourceoftruth OpenAPI** for each service and a canonical aggregate spec.
* **Generated SDKs** with idiomatic ergonomics, retries, auth helpers, pagination cursors, streaming downloads, and typed error envelopes.
* A **developer portal** with interactive reference, runnable examples, and “copycurl” snippets.
* **Mock server & conformance tests** so changes are validated against the contract before code ships.
* **Versioning & deprecation policy**, automated changelogs, and notification hooks.
* **Airgapped bundles** of docs and SDKs for disconnected environments.
Net result: partners and internal teams integrate quickly without reverseengineering request bodies from error logs.
---
## 2) Why
* Reduce friction and support load with a single, accurate contract.
* Make the platform extensible: third parties can build automation, dashboards, and policy pipelines without trawling source code.
* Enforce stability: contract linting and backwardscompat checks prevent accidental breakage.
* Bring CLI and Console parity to programmatic users through firstclass clients.
---
## 3) How it should work
### 3.1 Source of truth and layout
* Each service owns a **modulescoped OAS** file: `src/StellaOps.Api.OpenApi/<service>/openapi.yaml`.
* An aggregate spec `src/StellaOps.Api.OpenApi/stella.yaml` is produced by build tooling that composes perservice specs, resolves `$ref`s, and validates crossservice schemas.
* JSON Schema dialect: 202012 (OpenAPI 3.1). No vendorspecific features for core models.
* Every response and error has at least one **validated example**.
### 3.2 API conventions (normative)
* **Paths:** `/v1/{resource}`, plural nouns. Subresources use `/v1/resources/{id}/subresources`.
* **Identifiers:** `id` fields are ULID/UUIDv7 as strings.
* **Pagination:** cursorbased: `?cursor=<token>&limit=<n>`, response envelope includes `next_cursor`.
* **Sorting/filtering:** `?sort=field:asc|desc`, `?filter[field]=op:value` with documented operators.
* **Idempotency:** POST operations that create or mutate accept `Idempotency-Key`.
* **Errors:** single envelope:
```json
{
"error": {
"code": "STRING_CODE",
"message": "human friendly",
"details": { "field": "value" },
"trace_id": "..."
}
}
```
Standard codes include `AIRGAP_EGRESS_BLOCKED`, `POLICY_VIOLATION`, `NOT_FOUND`, `RATE_LIMITED`.
* **Auth:** OAuth2 client credentials and PAT. Scopes are explicit (see 14: AuthorityBacked Scopes). Tenancy via claims; optional override header: `X-Stella-Scope: tenant/<id>` if the token permits delegation.
* **Content negotiation:** JSON only for request/response unless endpoint is a stream or file download (`application/octet-stream`).
* **Longrunning operations:** either webhooks (if enabled) or polling via `operation_id` resource.
### 3.3 Versioning and deprecation
* **SemVer** for the aggregate API: `v1`, `v2` in base path.
* Backwardscompatible changes allowed in minor versions (add fields, new optional params).
* Breaking changes require new major version and coexistence for a **deprecation window** (min 12 months) with:
* Deprecation headers: `Deprecation: true`, `Sunset: <rfc1123 date>`, `Link: <doc rel="deprecation">`.
* Portal banners and Notifications Studio broadcast.
### 3.4 Governance and linting
* Enforce naming, pagination, error envelope, and example requirements via an OAS linter.
* CI gate: no PR merges if OAS validation fails or coverage < 100% for operation examples.
* **Compatibility check:** diff new OAS vs previous release, fail on breaking changes unless explicitly flagged.
### 3.5 SDK generation
* Initial languages: **TypeScript/Node**, **Python**, **Go**, **Java**. C# and Rust are followups.
* Generated via a stable, reproducible toolchain. Postgeneration patches are applied by templates, not hand edits.
* **Capabilities:**
* Auth helpers: PAT and OAuth2.
* Retries with decorrelated jitter and `RetryAfter` respect.
* Pluggable HTTP transport for proxies and airgapped environments.
* Binary download helpers and upload helpers for multipart endpoints.
* Paginators that yield items and handle `next_cursor`.
* Rich error types mapping `error.code` to language enums.
* Telemetry hooks (before/after request callbacks).
* **Packaging:**
* TS: npm package with ESM and CJS builds, types included.
* Python: PyPI package, Pydanticfriendly models, type hints.
* Go: module with contextaware methods and `io.Reader` streaming.
* Java: Maven coordinates, builder pattern, OkHttp/HTTP client provider.
* **Versioning:** SDK major matches API major. Minor/patch track generator changes only.
### 3.6 Dev portal and artifacts
* **Reference docs** autobuilt from the aggregate OAS with searchable nav, schema diagrams, and example blocks.
* **Tryit** panel wired to the sandbox environment (disabled in airgap).
* **Download center:** links to SDKs, changelogs, and Postman/HTTP collection exports.
* **.wellknown discovery:** `GET /.well-known/openapi` returns the canonical spec.
### 3.7 Conformance testing
* **Mock server** generated from OAS for contract tests.
* **Replay tests**: real services are validated against the OAS via request/response capture; deviations fail CI.
* **Golden examples**: every endpoint has recorded examples exercised in tests.
### 3.8 AirGapped support
* Export Center can build a **Docs & SDKs bundle**: `stella export devportal --offline`, including HTML docs, specs, and packages.
* SDKs avoid network discovery and accept explicit base URLs; no autoupdates.
### 3.9 Domainspecific notes
* **Conseiller/Excitator:** models expose `source_id`, `source_type`, `source_digest`. SDKs never hide source multiplicity.
* **Policy Engine:** policy documents are versioned; SDK supports dryrun/simulate endpoints with structured explanations.
* **Findings Ledger:** paginated listing includes stable, filterable fields for evidence export.
---
## 4) Architecture
### 4.1 New modules
* `src/StellaOps.Api.OpenApi/*` per service and aggregate composer
* `src/StellaOps.Api.Governance` OAS linter rules and compatibility checker
* `src/StellaOps.Sdk.Generator` codegen drivers, postprocessing templates, smoke tests
* `src/StellaOps.Sdk.Release` packaging, signing, publishing
* `src/StellaOps.DevPortal.Site` static generator and assets
* `test/contract` mock server config, golden examples
* `src/StellaOps.ExportCenter.DevPortalOffline` bundler
### 4.2 Build flow
1. Validate perservice specs → compose aggregate → lint → compatibility diff.
2. Generate SDKs → build → run languagelevel tests → publish to internal registry.
3. Build dev portal and publish.
4. Optionally build offline bundle.
### 4.3 Runtime contracts
* `GET /.well-known/openapi` per service and at the gateway.
* All services embed `x-stella-service` and `x-stella-version` extensions for traceability.
---
## 5) APIs and contracts (select)
* **Discovery**: `GET /.well-known/openapi` → JSON or YAML.
* **Errors**: standard envelope (see 3.2).
* **Rate limits**: expose `X-RateLimit-Limit`, `X-RateLimit-Remaining`, `X-RateLimit-Reset`.
* **Operations**: longrunning ops expose `operation_id` and `status` via `GET /v1/operations/{id}`.
---
## 6) Documentation changes
Create or update:
1. `/docs/api/overview.md`
* API surface, auth, tenancy, pagination, idempotency, rate limits.
2. `/docs/api/conventions.md`
* Path, naming, errors, filters, sorting, examples.
3. `/docs/api/versioning.md`
* SemVer policy, deprecation windows, headers, migration playbooks.
4. `/docs/api/reference/`
* Autogenerated OAS site; link into service pages.
5. `/docs/sdks/overview.md`
* Supported languages, install, helloworld, retry/auth patterns.
6. `/docs/sdks/typescript.md`, `/python.md`, `/go.md`, `/java.md`
* Languagespecific guides, snippets, paginator usage, streaming.
7. `/docs/devportal/publishing.md`
* Build pipeline, offline bundle steps.
8. `/docs/contributing/api-contracts.md`
* How to edit OAS, lint rules, compatibility checks, examples.
9. `/docs/testing/contract-testing.md`
* Mock server, golden examples, replay tests.
10. `/docs/security/auth-scopes.md`
* OAuth2, PAT, scope mapping, tenancy header.
11. `/docs/airgap/devportal-offline.md`
* Airgapped docs and SDK bundle usage.
Add the banner at the top of each page:
> **Imposed rule:** Work of this type or tasks of this type on this component must also be applied everywhere else it should be applied.
---
## 7) Implementation plan
### Phase 1 — Foundations
* Establish perservice OAS skeletons and the aggregate composer.
* Introduce linting and compatibility checks in CI.
* Define the standard error envelope and migrate services.
### Phase 2 — Reference & discovery
* Implement `/.well-known/openapi` for gateway and services.
* Build the dev portal with search, schema diagrams, and examples.
### Phase 3 — SDKs (TS, Python, Go, Java)
* Implement generator drivers and templates.
* Publish alpha packages internally; integrate in CLI and Console integration tests.
* Add paginators, retries, auth helpers, and streaming.
### Phase 4 — Conformance & examples
* Wire mock server into PR CI.
* Record golden example fixtures and replay tests against staging.
* Automate example extraction into docs.
### Phase 5 — Release automation & deprecation
* Automate changelogs from OAS diffs.
* Notifications Studio integration for API deprecations.
* Offline dev portal bundle through Export Center.
### Phase 6 — Followups
* C# and Rust SDKs, Postman/HTTP collections, sample apps repo.
---
## 8) Engineering tasks
**OAS & governance**
* [ ] Create `src/StellaOps.Api.OpenApi/<service>/openapi.yaml` for all services with minimal paths and shared components.
* [ ] Implement aggregate composer and `$ref` resolver.
* [ ] Add CI job: lint, validate, compatibility diff; block merges on failure.
* [ ] Migrate all endpoints to standard error envelope and provide examples.
**Discovery & portal**
* [ ] Implement `GET /.well-known/openapi` at service and gateway.
* [ ] Build dev portal: nav, search, schema viewer, tryit (nonprod), copycurl.
* [ ] Add version selector for v1/v2 specs.
**SDKs**
* [ ] Generator driver with pinned templates; forbid manual edits in generated folders.
* [ ] TS SDK: ESM/CJS build, treeshaking, paginator, middleware hooks.
* [ ] Python SDK: async and sync clients, type hints, file upload/download helpers.
* [ ] Go SDK: contextfirst API, streaming, error type mapping.
* [ ] Java SDK: builder pattern, HTTP client provider abstraction.
* [ ] Common: retries, `RetryAfter` handling, idempotency key helper, auth helpers, telemetry hooks.
* [ ] Languagespecific tests and smoke examples.
**Conformance**
* [ ] Mock server config with operation examples.
* [ ] Replay tests against staging; fail on schema drift.
* [ ] Golden example extraction pipeline.
**AirGapped**
* [ ] Export Center job: `devportal --offline` producing HTML docs, specs, and package artifacts.
* [ ] SDKs accept explicit base URLs; disable online discovery.
**Authority & Tenancy**
* [ ] Document scopes per endpoint in OAS (`securitySchemes` + `security` blocks).
* [ ] Implement optional `X-Stella-Scope` override with validation.
**Release automation**
* [ ] Version bump tooling for OAS and SDKs; SemVer aligned.
* [ ] Autogenerate `CHANGELOG.md` from OAS diffs.
* [ ] Publish to registries with signed artifacts and provenance.
**Docs**
* [ ] Author all pages listed in section 6; embed code snippets pulled from tested examples.
* [ ] Insert banner statement in each page.
**Testing**
* [ ] Contract tests in PR CI; 100% operation coverage with at least one example.
* [ ] Language SDK integration tests against mock server and staging.
* [ ] Backwardscompat test suite comparing last N releases.
---
## 9) Feature changes required in other components
* **Web Services:** unify on error envelope, pagination, idempotency handling, and deprecation headers.
* **CLI:** consume the official TS or Go SDK instead of bespoke HTTP calls; this enforces parity.
* **Console:** use SDKs for backend calls where appropriate; helps dogfood the clients.
* **Export Center:** add `devportal --offline` and package signing.
* **Observability:** include `x-stella-service` and API version attributes in spans; trace IDs mirrored in error responses.
* **Notifications Studio:** templates for API deprecations and SDK updates.
* **AirGapped Mode:** ship offline dev portal and SDKs bundle; console disables tryit.
> **Imposed rule reminder:** Work of this type or tasks of this type on this component must also be applied everywhere else it should be applied.
---
## 10) Acceptance criteria
* Aggregate OpenAPI validates, lints cleanly, and covers 100% of public endpoints with examples.
* `/.well-known/openapi` available at gateway and service level.
* Dev portal builds with search, example blocks, and version selector.
* TS/Python/Go/Java SDKs publish successfully; each has paginators, retries, auth helpers, streaming, and typed errors.
* CLI integrations pass using SDKs.
* Contract tests run in PR CI; schema drift causes failures.
* Deprecation headers and Notifications Studio flow proven in a staged deprecation.
* Offline dev portal bundle exports and renders in a sealed environment.
---
## 11) Risks and mitigations
* **Spec drift vs code reality:** mockfirst development and replay tests keep services aligned with OAS.
* **Generator churn:** pin generator and templates; only update via planned minor releases.
* **Breaking changes under pressure:** enforce compatibility gate and documented exception process.
* **SDK ergonomics mismatch:** run languagenative design reviews with maintainers before GA.
* **Airgapped constraints:** prebuild full offline bundles; avoid dynamic CDN assets in docs.
---
## 12) Philosophy
* **Contract first, code second.** The spec is the product; servers and SDKs are implementations.
* **Stability over cleverness.** Boring, predictable APIs beat “magical” behavior.
* **Truth preservation.** Never hide or merge advisory/VEX sources; surface provenance everywhere.
* **Automation everywhere.** Humans shouldnt manually edit generated code or publish packages.
> **Imposed rule:** Work of this type or tasks of this type on this component must also be applied everywhere else it should be applied.

402
docs/api/policy.md Normal file
View File

@@ -0,0 +1,402 @@
# Policy Engine REST API
> **Audience:** Backend integrators, platform operators, and CI engineers invoking Policy Engine services programmatically.
> **Base URL:** `/api/policy/*` (internal gateway route) requires OAuth 2.0 bearer token issued by Authority with scopes listed below.
This document is the canonical reference for the Policy Engine REST surface described in Epic2 (Policy Engine v2). Use it alongside the [DSL](../policy/dsl.md), [Lifecycle](../policy/lifecycle.md), and [Runs](../policy/runs.md) guides for end-to-end implementations.
---
## 1·Authentication & Headers
- **Auth:** Bearer tokens (`Authorization: Bearer <token>`) with the following scopes as applicable:
- `policy:read`, `policy:write`, `policy:submit`, `policy:approve`, `policy:run`, `policy:activate`, `policy:archive`, `policy:simulate`, `policy:runs`
- `findings:read` (for effective findings APIs)
- `effective:write` (service identity only; not exposed to clients)
- **Service identity:** Authority marks the Policy Engine client with `properties.serviceIdentity: policy-engine`. Tokens missing this marker cannot obtain `effective:write`.
- **Tenant:** Supply tenant context via `X-Stella-Tenant`. Tokens without multi-tenant claims default to `default`.
- **Idempotency:** For mutating endpoints, include `Idempotency-Key` (UUID). Retries with same key return original result.
- **Content type:** All request/response bodies are `application/json; charset=utf-8` unless otherwise noted.
---
## 2·Error Model
All errors use HTTP semantics plus a structured payload:
```json
{
"code": "ERR_POL_001",
"message": "Policy syntax error",
"details": [
{"path": "rules[0].when", "error": "Unknown function foo()"}
],
"traceId": "01HDV1C4E9Z4T5G6H7J8",
"timestamp": "2025-10-26T14:07:03Z"
}
```
| Code | Meaning | Notes |
|------|---------|-------|
| `ERR_POL_001` | Policy syntax/compile error | Returned by `compile`, `submit`, `simulate`, `run` when DSL invalid. |
| `ERR_POL_002` | Policy not approved | Attempted to run or activate unapproved version. |
| `ERR_POL_003` | Missing inputs | Downstream service unavailable (Concelier/Excititor/SBOM). |
| `ERR_POL_004` | Determinism violation | Illegal API usage (wall-clock, RNG). Triggers incident mode. |
| `ERR_POL_005` | Unauthorized materialisation | Identity lacks `effective:write`. |
| `ERR_POL_006` | Run canceled or timed out | Includes cancellation metadata. |
---
## 3·Policy Management
### 3.1 Create Draft
```
POST /api/policy/policies
Scopes: policy:write
```
**Request**
```json
{
"policyId": "P-7",
"name": "Default Org Policy",
"description": "Baseline severity + VEX precedence",
"dsl": {
"syntax": "stella-dsl@1",
"source": "policy \"Default Org Policy\" syntax \"stella-dsl@1\" { ... }"
},
"tags": ["baseline","vex"]
}
```
**Response 201**
```json
{
"policyId": "P-7",
"version": 1,
"status": "draft",
"digest": "sha256:7e1d…",
"createdBy": "user:ali",
"createdAt": "2025-10-26T13:40:00Z"
}
```
### 3.2 List Policies
```
GET /api/policy/policies?status=approved&tenant=default&page=1&pageSize=25
Scopes: policy:read
```
Returns paginated list with `X-Total-Count` header.
### 3.3 Fetch Version
```
GET /api/policy/policies/{policyId}/versions/{version}
Scopes: policy:read
```
Returns full DSL, metadata, provenance, simulation artefact references.
### 3.4 Update Draft Version
```
PUT /api/policy/policies/{policyId}/versions/{version}
Scopes: policy:write
```
Body identical to create. Only permitted while `status=draft`.
---
## 4·Lifecycle Transitions
### 4.1 Submit for Review
```
POST /api/policy/policies/{policyId}/versions/{version}:submit
Scopes: policy:submit
```
**Request**
```json
{
"reviewers": ["user:kay","group:sec-reviewers"],
"notes": "Simulated on golden SBOM set (diff attached)",
"simulationArtifacts": [
"blob://policy/P-7/v3/simulations/2025-10-26.json"
]
}
```
**Response 202** submission recorded. `Location` header points to review resource.
### 4.2 Review Feedback
```
POST /api/policy/policies/{policyId}/versions/{version}/reviews
Scopes: policy:review
```
**Request**
```json
{
"decision": "approve", // approve | request_changes | comment
"note": "Looks good, ensure incident playbook covers reachability data.",
"blocking": false
}
```
### 4.3 Approve
```
POST /api/policy/policies/{policyId}/versions/{version}:approve
Scopes: policy:approve
```
Body requires approval note and confirmation of compliance gates:
```json
{
"note": "All simulations and determinism checks passed.",
"acknowledgeDeterminism": true,
"acknowledgeSimulation": true
}
```
### 4.4 Activate
```
POST /api/policy/policies/{policyId}/versions/{version}:activate
Scopes: policy:activate, policy:run
```
Marks version as active for tenant; triggers optional immediate full run (`"runNow": true`).
### 4.5 Archive
```
POST /api/policy/policies/{policyId}/versions/{version}:archive
Scopes: policy:archive
```
Request includes `reason` and optional `incidentId`.
---
## 5·Compilation & Validation
### 5.1 Compile
```
POST /api/policy/policies/{policyId}/versions/{version}:compile
Scopes: policy:write
```
**Response 200**
```json
{
"digest": "sha256:7e1d…",
"warnings": [],
"rules": {
"count": 24,
"actions": {
"block": 5,
"warn": 4,
"ignore": 3,
"requireVex": 2
}
}
}
```
### 5.2 Lint / Simulate Quick Check
```
POST /api/policy/policies/{policyId}/lint
Scopes: policy:write
```
Slim wrapper used by CLI; returns 204 on success or `ERR_POL_001` payload.
---
## 6·Run & Simulation APIs
> Schema reference: canonical policy run request/status/diff payloads ship with the Scheduler Models guide (`src/StellaOps.Scheduler.Models/docs/SCHED-MODELS-20-001-POLICY-RUNS.md`) and JSON fixtures under `samples/api/scheduler/policy-*.json`.
### 6.1 Trigger Run
```
POST /api/policy/policies/{policyId}/runs
Scopes: policy:run
```
**Request**
```json
{
"mode": "incremental", // full | incremental
"runId": "run:P-7:2025-10-26:auto", // optional idempotency key
"sbomSet": ["sbom:S-42","sbom:S-318"],
"env": {"exposure": "internet"},
"priority": "normal" // normal | high | emergency
}
```
**Response 202**
```json
{
"runId": "run:P-7:2025-10-26:auto",
"status": "queued",
"queuedAt": "2025-10-26T14:05:00Z"
}
```
### 6.2 Get Run Status
```
GET /api/policy/policies/{policyId}/runs/{runId}
Scopes: policy:runs
```
Includes status, stats, determinism hash, failure diagnostics.
### 6.3 List Runs
```
GET /api/policy/policies/{policyId}/runs?mode=incremental&status=failed&page=1&pageSize=20
Scopes: policy:runs
```
Supports filtering by `mode`, `status`, `from`/`to` timestamps, `tenant`.
### 6.4 Simulate
```
POST /api/policy/policies/{policyId}/simulate
Scopes: policy:simulate
```
**Request**
```json
{
"baseVersion": 3,
"candidateVersion": 4,
"sbomSet": ["sbom:S-42","sbom:S-318"],
"env": {"sealed": false},
"explain": true
}
```
**Response 200**
```json
{
"diff": {
"added": 12,
"removed": 8,
"unchanged": 657,
"bySeverity": {
"Critical": {"up": 1, "down": 0},
"High": {"up": 3, "down": 4}
}
},
"explainUri": "blob://policy/P-7/simulations/2025-10-26-4-vs-3.json"
}
```
### 6.5 Replay Run
```
POST /api/policy/policies/{policyId}/runs/{runId}:replay
Scopes: policy:runs, policy:simulate
```
Produces sealed bundle for determinism verification; returns location of bundle.
---
## 7·Effective Findings APIs
### 7.1 List Findings
```
GET /api/policy/findings/{policyId}?sbomId=S-42&status=affected&severity=High,Critical&page=1&pageSize=100
Scopes: findings:read
```
Response includes cursor-based pagination:
```json
{
"items": [
{
"findingId": "P-7:S-42:pkg:npm/lodash@4.17.21:CVE-2021-23337",
"status": "affected",
"severity": {"normalized": "High", "score": 7.5},
"sbomId": "sbom:S-42",
"advisoryIds": ["CVE-2021-23337"],
"vex": {"winningStatementId": "VendorX-123"},
"policyVersion": 4,
"updatedAt": "2025-10-26T14:06:01Z"
}
],
"nextCursor": "eyJwYWdlIjoxfQ=="
}
```
### 7.2 Fetch Explain Trace
```
GET /api/policy/findings/{policyId}/{findingId}/explain?mode=verbose
Scopes: findings:read
```
Returns rule hit sequence:
```json
{
"findingId": "P-7:S-42:pkg:npm/lodash@4.17.21:CVE-2021-23337",
"policyVersion": 4,
"steps": [
{"rule": "vex_precedence", "status": "not_affected", "inputs": {"statementId": "VendorX-123"}},
{"rule": "severity_baseline", "severity": {"normalized": "Low", "score": 3.4}}
],
"sealedHints": [{"message": "Using cached EPSS percentile from bundle 2025-10-20"}]
}
```
---
## 8·Events & Webhooks
- `policy.run.completed` emitted with `runId`, `policyId`, `mode`, `stats`, `determinismHash`.
- `policy.run.failed` includes error code, retry count, guidance.
- `policy.lifecycle.*` mirrored from lifecycle APIs (see [Lifecycle guide](../policy/lifecycle.md)).
- Webhook registration occurs via `/api/policy/webhooks` (future work, reserved). For now, integrate with Notifier streams documented in `/docs/notifications/*`.
---
## 9·Compliance Checklist
- [ ] **Scopes enforced:** Endpoint access requires correct Authority scope mapping (see `/src/StellaOps.Authority/TASKS.md`).
- [ ] **Schemas current:** JSON examples align with Scheduler Models (`SCHED-MODELS-20-001`) and Policy Engine DTOs; update when contracts change.
- [ ] **Error codes mapped:** `ERR_POL_*` table reflects implementation and CI tests cover edge cases.
- [ ] **Pagination documented:** List endpoints specify page/size and cursor semantics; responses include `X-Total-Count` or `nextCursor`.
- [ ] **Idempotency described:** Mutating endpoints mandate `Idempotency-Key`.
- [ ] **Offline parity noted:** Simulate/run endpoints explain `--sealed` behaviour and bundle generation.
- [ ] **Cross-links added:** References to lifecycle, runs, DSL, and observability docs verified.
---
*Last updated: 2025-10-26 (Sprint 20).*

View File

@@ -0,0 +1,168 @@
# StellaOps Architecture Overview (Sprint19)
> **Ownership:** Architecture Guild • Docs Guild
> **Audience:** Service owners, platform engineers, solution architects
> **Related:** [High-Level Architecture](../07_HIGH_LEVEL_ARCHITECTURE.md), [Concelier Architecture](../ARCHITECTURE_CONCELIER.md), [Policy Engine Architecture](policy-engine.md), [Aggregation-Only Contract](../ingestion/aggregation-only-contract.md)
This dossier summarises the end-to-end runtime topology after the Aggregation-Only Contract (AOC) rollout. It highlights where raw facts live, how ingest services enforce guardrails, and how downstream components consume those facts to derive policy decisions and user-facing experiences.
---
## 1·System landscape
```mermaid
graph TD
subgraph Edge["Clients & Automation"]
CLI[stella CLI]
UI[Console SPA]
APIClients[CI / API Clients]
end
Gateway[API Gateway<br/>(JWT + DPoP scopes)]
subgraph Scanner["Fact Collection"]
ScannerWeb[Scanner.WebService]
ScannerWorkers[Scanner.Workers]
Agent[Agent Runtime]
end
subgraph Ingestion["Aggregation-Only Ingestion (AOC)"]
Concelier[Concelier.WebService]
Excititor[Excititor.WebService]
RawStore[(MongoDB<br/>advisory_raw / vex_raw)]
end
subgraph Derivation["Policy & Overlay"]
Policy[Policy Engine]
Scheduler[Scheduler Services]
Notify[Notifier]
end
subgraph Experience["UX & Export"]
UIService[Console Backend]
Exporters[Export / Offline Kit]
end
Observability[Telemetry Stack]
CLI --> Gateway
UI --> Gateway
APIClients --> Gateway
Gateway --> ScannerWeb
ScannerWeb --> ScannerWorkers
ScannerWorkers --> Concelier
ScannerWorkers --> Excititor
Concelier --> RawStore
Excititor --> RawStore
RawStore --> Policy
Policy --> Scheduler
Policy --> Notify
Policy --> UIService
Scheduler --> UIService
UIService --> Exporters
Exporters --> CLI
Exporters --> Offline[Offline Kit]
Observability -.-> ScannerWeb
Observability -.-> Concelier
Observability -.-> Excititor
Observability -.-> Policy
Observability -.-> Scheduler
Observability -.-> Notify
```
Key boundaries:
- **AOC border.** Everything inside the Ingestion subgraph writes only immutable raw facts plus link hints. Derived severity, consensus, and risk remain outside the border.
- **Policy-only derivation.** Policy Engine materialises `effective_finding_*` collections and emits overlays; other services consume but never mutate them.
- **Tenant enforcement.** Authority-issued DPoP scopes flow through Gateway to every service; raw stores and overlays include `tenant` strictly.
---
## 2·Aggregation-Only Contract focus
### 2.1 Responsibilities at the boundary
| Area | Services | Responsibilities under AOC | Forbidden under AOC |
|------|----------|-----------------------------|---------------------|
| **Ingestion (Concelier / Excititor)** | `StellaOps.Concelier.WebService`, `StellaOps.Excititor.WebService` | Fetch upstream advisories/VEX, verify signatures, compute linksets, append immutable documents to `advisory_raw` / `vex_raw`, emit observability signals, expose raw read APIs. | Computing severity, consensus, suppressions, or policy hints; merging upstream sources into a single derived record; mutating existing documents. |
| **Policy & Overlay** | `StellaOps.Policy.Engine`, Scheduler | Join SBOM inventory with raw advisories/VEX, evaluate policies, issue `effective_finding_*` overlays, drive remediation workflows. | Writing to raw collections; bypassing guard scopes; running without recorded provenance. |
| **Experience layers** | Console, CLI, Exporters | Surface raw facts + policy overlays; run `stella aoc verify`; render AOC dashboards and reports. | Accepting ingestion payloads that lack provenance or violate guard results. |
### 2.2 Raw stores
| Collection | Purpose | Key fields | Notes |
|------------|---------|------------|-------|
| `advisory_raw` | Immutable vendor/ecosystem advisory documents. | `_id`, `tenant`, `source.*`, `upstream.*`, `content.raw`, `linkset`, `supersedes`. | Idempotent by `(source.vendor, upstream.upstream_id, upstream.content_hash)`. |
| `vex_raw` | Immutable vendor VEX statements. | Mirrors `advisory_raw`; `identifiers.statements` summarises affected components. | Maintains supersedes chain identical to advisory flow. |
| Change streams (`advisory_raw_stream`, `vex_raw_stream`) | Feed Policy Engine and Scheduler. | `operationType`, `documentKey`, `fullDocument`, `tenant`, `traceId`. | Scope filtered per tenant before delivery. |
### 2.3 Guarded ingestion sequence
```mermaid
sequenceDiagram
participant Upstream as Upstream Source
participant Connector as Concelier/Excititor Connector
participant Guard as AOCWriteGuard
participant Mongo as MongoDB (advisory_raw / vex_raw)
participant Stream as Change Stream
participant Policy as Policy Engine
Upstream-->>Connector: CSAF / OSV / VEX document
Connector->>Connector: Normalize transport, compute content_hash
Connector->>Guard: Candidate raw doc (source + upstream + content + linkset)
Guard-->>Connector: ERR_AOC_00x on violation
Guard->>Mongo: Append immutable document (with tenant & supersedes)
Mongo-->>Stream: Change event (tenant scoped)
Stream->>Policy: Raw delta payload
Policy->>Policy: Evaluate policies, compute effective findings
```
---
### 2.4 Authority scopes & tenancy
| Scope | Holder | Purpose | Notes |
|-------|--------|---------|-------|
| `advisory:write` / `vex:write` | Concelier / Excititor collectors | Append raw documents through ingestion endpoints. | Paired with tenant claims; requests without tenant are rejected. |
| `advisory:verify` / `vex:verify` | DevOps verify identity, CLI | Run `stella aoc verify` or call `/aoc/verify`. | Read-only; cannot mutate raw docs. |
| `effective:write` | Policy Engine | Materialise `effective_finding_*` overlays. | Only Policy Engine identity may hold; ingestion contexts receive `ERR_AOC_006` if they attempt. |
| `effective:read` | Console, CLI, exports | Consume derived findings. | Enforced by Gateway and downstream services. |
---
## 3·Data & control flow highlights
1. **Ingestion:** Concelier / Excititor connectors fetch upstream documents, compute linksets, and hand payloads to `AOCWriteGuard`. Guards validate schema, provenance, forbidden fields, supersedes pointers, and append-only rules before writing to Mongo.
2. **Verification:** `stella aoc verify` (CLI/CI) and `/aoc/verify` endpoints replay guard checks against stored documents, mapping `ERR_AOC_00x` codes to exit codes for automation.
3. **Policy evaluation:** Mongo change streams deliver tenant-scoped raw deltas. Policy Engine joins SBOM inventory (via BOM Index), executes deterministic policies, writes overlays, and emits events to Scheduler/Notify.
4. **Experience surfaces:** Console renders an AOC dashboard showing ingestion latency, guard violations, and supersedes depth. CLI exposes raw-document fetch helpers for auditing. Offline Kit bundles raw collections alongside guard configs to keep air-gapped installs verifiable.
5. **Observability:** All services emit `ingestion_write_total`, `aoc_violation_total{code}`, `ingestion_latency_seconds`, and trace spans `ingest.fetch`, `ingest.transform`, `ingest.write`, `aoc.guard`. Logs correlate via `traceId`, `tenant`, `source.vendor`, and `content_hash`.
---
## 4·Offline & disaster readiness
- **Offline Kit:** Packages raw Mongo snapshots (`advisory_raw`, `vex_raw`) plus guard configuration and CLI verifier binaries so air-gapped sites can re-run AOC checks before promotion.
- **Recovery:** Supersedes chains allow rollback to prior revisions without mutating documents. Disaster exercises must rehearse restoring from snapshot, replaying change streams into Policy Engine, and re-validating guard compliance.
- **Migration:** Legacy normalised fields are moved to temporary views during cutover; ingestion runtime removes writes once guard-enforced path is live (see [Migration playbook](../ingestion/aggregation-only-contract.md#8-migration-playbook)).
---
## 5·References
- [Aggregation-Only Contract reference](../ingestion/aggregation-only-contract.md)
- [Concelier architecture](../ARCHITECTURE_CONCELIER.md)
- [Excititor architecture](../ARCHITECTURE_EXCITITOR.md)
- [Policy Engine architecture](policy-engine.md)
- [Authority service](../ARCHITECTURE_AUTHORITY.md)
- [Observability standards (upcoming)](../observability/policy.md) interim reference for telemetry naming.
---
## 6·Compliance checklist
- [ ] AOC guard enabled for all Concelier and Excititor write paths in production.
- [ ] Mongo schema validators deployed for `advisory_raw` and `vex_raw`; change streams scoped per tenant.
- [ ] Authority scopes (`advisory:*`, `vex:*`, `effective:*`) configured in Gateway and validated via integration tests.
- [ ] `stella aoc verify` wired into CI/CD pipelines with seeded violation fixtures.
- [ ] Console AOC dashboard and CLI documentation reference the new ingestion contract.
- [ ] Offline Kit bundles include guard configs, verifier tooling, and documentation updates.
- [ ] Observability dashboards include violation, latency, and supersedes depth metrics with alert thresholds.
---
*Last updated: 2025-10-26 (Sprint19).*

View File

@@ -0,0 +1,243 @@
# Policy Engine Architecture (v2)
> **Ownership:** Policy Guild • Platform Guild
> **Services:** `StellaOps.Policy.Engine` (Minimal API + worker host)
> **Data Stores:** MongoDB (`policies`, `policy_runs`, `effective_finding_*`), Object storage (explain bundles), optional NATS/Mongo queue
> **Related docs:** [Policy overview](../policy/overview.md), [DSL](../policy/dsl.md), [Lifecycle](../policy/lifecycle.md), [Runs](../policy/runs.md), [REST API](../api/policy.md), [Policy CLI](../cli/policy.md), [Architecture overview](../architecture/overview.md), [AOC reference](../ingestion/aggregation-only-contract.md)
This dossier describes the internal structure of the Policy Engine service delivered in Epic2. It focuses on module boundaries, deterministic evaluation, orchestration, and integration contracts with Concelier, Excititor, SBOM Service, Authority, Scheduler, and Observability stacks.
The service operates strictly downstream of the **Aggregation-Only Contract (AOC)**. It consumes immutable `advisory_raw` and `vex_raw` documents emitted by Concelier and Excititor, derives findings inside Policy-owned collections, and never mutates ingestion stores. Refer to the architecture overview and AOC reference for system-wide guardrails and provenance obligations.
---
## 1·Responsibilities & Constraints
- Compile and evaluate `stella-dsl@1` policy packs into deterministic verdicts.
- Join SBOM inventory, Concelier advisories, and Excititor VEX evidence via canonical linksets and equivalence tables.
- Materialise effective findings (`effective_finding_{policyId}`) with append-only history and produce explain traces.
- Operate incrementally: react to change streams (advisory/vex/SBOM deltas) with ≤5min SLA.
- Provide simulations with diff summaries for UI/CLI workflows without modifying state.
- Enforce strict determinism guard (no wall-clock, RNG, network beyond allow-listed services) and RBAC + tenancy via Authority scopes.
- Support sealed/air-gapped deployments with offline bundles and sealed-mode hints.
Non-goals: policy authoring UI (handled by Console), ingestion or advisory normalisation (Concelier), VEX consensus (Excititor), runtime enforcement (Zastava).
---
## 2·High-Level Architecture
```mermaid
graph TD
subgraph Clients
CLI[stella CLI]
UI[Console Policy Editor]
CI[CI Pipelines]
end
subgraph PolicyEngine["StellaOps.Policy.Engine"]
API[Minimal API Host]
Orchestrator[Run Orchestrator]
WorkerPool[Evaluation Workers]
Compiler[DSL Compiler Cache]
Materializer[Effective Findings Writer]
end
subgraph RawStores["Raw Stores (AOC)"]
AdvisoryRaw[(MongoDB<br/>advisory_raw)]
VexRaw[(MongoDB<br/>vex_raw)]
end
subgraph Derived["Derived Stores"]
Mongo[(MongoDB<br/>policies / policy_runs / effective_finding_*)]
Blob[(Object Store / Evidence Locker)]
Queue[(Mongo Queue / NATS)]
end
Concelier[(Concelier APIs)]
Excititor[(Excititor APIs)]
SBOM[(SBOM Service)]
Authority[(Authority / DPoP Gateway)]
CLI --> API
UI --> API
CI --> API
API --> Compiler
API --> Orchestrator
Orchestrator --> Queue
Queue --> WorkerPool
Concelier --> AdvisoryRaw
Excititor --> VexRaw
WorkerPool --> AdvisoryRaw
WorkerPool --> VexRaw
WorkerPool --> SBOM
WorkerPool --> Materializer
Materializer --> Mongo
WorkerPool --> Blob
API --> Mongo
API --> Blob
API --> Authority
Orchestrator --> Mongo
Authority --> API
```
Key notes:
- API host exposes lifecycle, run, simulate, findings endpoints with DPoP-bound OAuth enforcement.
- Orchestrator manages run scheduling/fairness; writes run tickets to queue, leases jobs to worker pool.
- Workers evaluate policies using cached IR; join external services via tenant-scoped clients; pull immutable advisories/VEX from the raw stores; write derived overlays to Mongo and optional explain bundles to blob storage.
- Observability (metrics/traces/logs) integrated via OpenTelemetry (not shown).
---
### 2.1·AOC inputs & immutability
- **Raw-only reads.** Evaluation workers access `advisory_raw` / `vex_raw` via tenant-scoped Mongo clients or the Concelier/Excititor raw APIs. No Policy Engine component is permitted to mutate these collections.
- **Guarded ingestion.** `AOCWriteGuard` rejects forbidden fields before data reaches the raw stores. Policy tests replay known `ERR_AOC_00x` violations to confirm ingestion compliance.
- **Change streams as contract.** Run orchestration stores resumable cursors for raw change streams. Replays of these cursors (e.g., after failover) must yield identical materialisation outcomes.
- **Derived stores only.** All severity, consensus, and suppression state lives in `effective_finding_*` collections and explain bundles owned by Policy Engine. Provenance fields link back to raw document IDs so auditors can trace every verdict.
- **Authority scopes.** Only the Policy Engine service identity holds `effective:write`. Ingestion identities retain `advisory:*`/`vex:*` scopes, ensuring separation of duties enforced by Authority and the API Gateway.
---
## 3·Module Breakdown
| Module | Responsibility | Notes |
|--------|----------------|-------|
| **Configuration** (`Configuration/`) | Bind settings (Mongo URIs, queue options, service URLs, sealed mode), validate on start. | Strict schema; fails fast on missing secrets. |
| **Authority Client** (`Authority/`) | Acquire tokens, enforce scopes, perform DPoP key rotation. | Only service identity uses `effective:write`. |
| **DSL Compiler** (`Dsl/`) | Parse, canonicalise, IR generation, checksum caching. | Uses Roslyn-like pipeline; caches by `policyId+version+hash`. |
| **Selection Layer** (`Selection/`) | Batch SBOM ↔ advisory ↔ VEX joiners; apply equivalence tables; support incremental cursors. | Deterministic ordering (SBOM → advisory → VEX). |
| **Evaluator** (`Evaluation/`) | Execute IR with first-match semantics, compute severity/trust/reachability weights, record rule hits. | Stateless; all inputs provided by selection layer. |
| **Materialiser** (`Materialization/`) | Upsert effective findings, append history, manage explain bundle exports. | Mongo transactions per SBOM chunk. |
| **Orchestrator** (`Runs/`) | Change-stream ingestion, fairness, retry/backoff, queue writer. | Works with Scheduler Models DTOs. |
| **API** (`Api/`) | Minimal API endpoints, DTO validation, problem responses, idempotency. | Generated clients for CLI/UI. |
| **Observability** (`Telemetry/`) | Metrics (`policy_run_seconds`, `rules_fired_total`), traces, structured logs. | Sampled rule-hit logs with redaction. |
| **Offline Adapter** (`Offline/`) | Bundle export/import (policies, simulations, runs), sealed-mode enforcement. | Uses DSSE signing via Signer service. |
---
## 4·Data Model & Persistence
### 4.1 Collections
- `policies` policy versions, metadata, lifecycle states, simulation artefact references.
- `policy_runs` run records, inputs (cursors, env), stats, determinism hash, run status.
- `policy_run_events` append-only log (queued, leased, completed, failed, canceled, replay).
- `effective_finding_{policyId}` current verdict snapshot per finding.
- `effective_finding_{policyId}_history` append-only history (previous verdicts, timestamps, runId).
- `policy_reviews` review comments/decisions.
### 4.2 Schema Highlights
- Run records include `changeDigests` (hash of advisory/VEX inputs) for replay verification.
- Effective findings store provenance references (`advisory_raw_ids`, `vex_raw_ids`, `sbom_component_id`).
- All collections include `tenant`, `policyId`, `version`, `createdAt`, `updatedAt`, `traceId` for audit.
### 4.3 Indexing
- Compound indexes: `{tenant, policyId, status}` on `policies`; `{tenant, policyId, status, startedAt}` on `policy_runs`; `{policyId, sbomId, findingKey}` on findings.
- TTL indexes on transient explain bundle references (configurable).
---
## 5·Evaluation Pipeline
```mermaid
sequenceDiagram
autonumber
participant Worker as EvaluationWorker
participant Compiler as CompilerCache
participant Selector as SelectionLayer
participant Eval as Evaluator
participant Mat as Materialiser
participant Expl as ExplainStore
Worker->>Compiler: Load IR (policyId, version, digest)
Compiler-->>Worker: CompiledPolicy (cached or compiled)
Worker->>Selector: Fetch tuple batches (sbom, advisory, vex)
Selector-->>Worker: Deterministic batches (1024 tuples)
loop For each batch
Worker->>Eval: Execute rules (batch, env)
Eval-->>Worker: Verdicts + rule hits
Worker->>Mat: Upsert effective findings
Mat-->>Worker: Success
Worker->>Expl: Persist sampled explain traces (optional)
end
Worker->>Mat: Append history + run stats
Worker-->>Worker: Compute determinism hash
Worker->>+Mat: Finalize transaction
Mat-->>Worker: Ack
```
Determinism guard instrumentation wraps the evaluator, rejecting access to forbidden APIs and ensuring batch ordering remains stable.
---
## 6·Run Orchestration & Incremental Flow
- **Change streams:** Concelier and Excititor publish document changes to the scheduler queue (`policy.trigger.delta`). Payload includes `tenant`, `source`, `linkset digests`, `cursor`.
- **Orchestrator:** Maintains per-tenant backlog; merges deltas until time/size thresholds met, then enqueues `PolicyRunRequest`.
- **Queue:** Mongo queue with lease; each job assigned `leaseDuration`, `maxAttempts`.
- **Workers:** Lease jobs, execute evaluation pipeline, report status (success/failure/canceled). Failures with recoverable errors requeue with backoff; determinism or schema violations mark job `failed` and raise incident event.
- **Fairness:** Round-robin per `{tenant, policyId}`; emergency jobs (`priority=emergency`) jump queue but limited via circuit breaker.
- **Replay:** On demand, orchestrator rehydrates run via stored cursors and exports sealed bundle for audit/CI determinism checks.
---
## 7·Security & Tenancy
- **Auth:** All API calls pass through Authority gateway; DPoP tokens enforced for service-to-service (Policy Engine service principal). CLI/UI tokens include scope claims.
- **Scopes:** Mutations require `policy:*` scopes corresponding to action; `effective:write` restricted to service identity.
- **Tenancy:** All queries filter by `tenant`. Service identity uses `tenant-global` for shared policies; cross-tenant reads prohibited unless `policy:tenant-admin` scope present.
- **Secrets:** Configuration loaded via environment variables or sealed secrets; runtime avoids writing secrets to logs.
- **Determinism guard:** Static analyzer prevents referencing forbidden namespaces; runtime guard intercepts `DateTime.Now`, `Random`, `Guid`, HTTP clients beyond allow-list.
- **Sealed mode:** Global flag disables outbound network except allow-listed internal hosts; watchers fail fast if unexpected egress attempted.
---
## 8·Observability
- Metrics:
- `policy_run_seconds{mode,tenant,policy}` (histogram)
- `policy_run_queue_depth{tenant}`
- `policy_rules_fired_total{policy,rule}`
- `policy_vex_overrides_total{policy,vendor}`
- Logs: Structured JSON with `traceId`, `policyId`, `version`, `runId`, `tenant`, `phase`. Guard ensures no sensitive data leakage.
- Traces: Spans `policy.select`, `policy.evaluate`, `policy.materialize`, `policy.simulate`. Trace IDs surfaced to CLI/UI.
- Incident mode toggles 100% sampling and extended retention windows.
---
## 9·Offline / Bundle Integration
- **Imports:** Offline Kit delivers policy packs, advisory/VEX snapshots, SBOM updates. Policy Engine ingests bundles via `offline import`.
- **Exports:** `stella policy bundle export` packages policy, IR digest, simulations, run metadata; UI provides export triggers.
- **Sealed hints:** Explain traces annotate when cached values used (EPSS, KEV). Run records mark `env.sealed=true`.
- **Sync cadence:** Operators perform monthly bundle sync; Policy Engine warns when snapshots > configured staleness (default 14days).
---
## 10·Testing & Quality
- **Unit tests:** DSL parsing, evaluator semantics, guard enforcement.
- **Integration tests:** Joiners with sample SBOM/advisory/VEX data; materialisation with deterministic ordering; API contract tests generated from OpenAPI.
- **Property tests:** Ensure rule evaluation deterministic across permutations.
- **Golden tests:** Replay recorded runs, compare determinism hash.
- **Performance tests:** Evaluate 100k component / 1M advisory dataset under warmed caches (<30s full run).
- **Chaos hooks:** Optional toggles to simulate upstream latency/failures; used in staging.
---
## 11·Compliance Checklist
- [ ] **Determinism guard enforced:** Static analyzer + runtime guard block wall-clock, RNG, unauthorized network calls.
- [ ] **Incremental correctness:** Change-stream cursors stored and replayed during tests; unit/integration coverage for dedupe.
- [ ] **RBAC validated:** Endpoint scope requirements match Authority configuration; integration tests cover deny/allow.
- [ ] **AOC separation enforced:** No code path writes to `advisory_raw` / `vex_raw`; integration tests capture `ERR_AOC_00x` handling; read-only clients verified.
- [ ] **Effective findings ownership:** Only Policy Engine identity holds `effective:write`; unauthorized callers receive `ERR_AOC_006`.
- [ ] **Observability wired:** Metrics/traces/logs exported with correlation IDs; dashboards include `aoc_violation_total` and ingest latency panels.
- [ ] **Offline parity:** Sealed-mode tests executed; bundle import/export flows documented and validated.
- [ ] **Schema docs synced:** DTOs match Scheduler Models (`SCHED-MODELS-20-001`); JSON schemas committed.
- [ ] **Security reviews complete:** Threat model (including queue poisoning, determinism bypass, data exfiltration) documented; mitigations in place.
- [ ] **Disaster recovery rehearsed:** Run replay+rollback procedures tested and recorded.
---
*Last updated: 2025-10-26 (Sprint 19).*

View File

@@ -0,0 +1,135 @@
> **Imposed rule:** Work of this type or tasks of this type on this component must also be applied everywhere else it should be applied.
---
# Epic 19: Attestor Console
**Short name:** Attestor Console
**Primary components:** Attestation Service, Policy Studio, Authority & Tenancy, Export Center, Observability, CLI, StellaOps Console
**Surfaces:** Web UI, REST APIs, CLI, SDKs, docs
**AOC ground rule reminder:** Conseiller and Excitator only aggregate and link. They never merge sources. Attestations must reference the exact subject digests and their source provenance. No collapsing of distinct advisories or VEX statements.
---
## 1) What it is
Attestor Console centralizes creation, verification, browsing, and operationalization of software supply chain attestations within StellaOps. It covers build provenance, SBOM attestations, VEX statements, scan results, policy evaluations, risk profile evidence, and custom facts—all signed and verifiable end-to-end.
---
## 2) Why
Trustworthy pipelines need signed evidence, repeatable verification, explainability, and policy enforcement. Attestor Console weaves attestations into the rest of Stella (AOC, Policy Studio gates, Risk Profiles, Notifications, Export Center) to reduce blind spots and accelerate compliance.
---
## 3) How it should work
### 3.1 Roles, identities, scopes
* Subjects: immutable digests for artifacts (images, packages, SBOMs, findings).
* Issuers: identities that sign attestations (builders, scanners, policy engines).
* Scopes: tenant/project/environment context enforced by Authority & Tenancy.
### 3.2 Payload types
Supports DSSE envelopes for BuildProvenance v1, SBOMAttestation v1, VEXAttestation v1, ScanResults v1, PolicyEvaluation v1, RiskProfileEvidence v1, and CustomEvidence v1. All payloads include subject, issuer, scope, materials, provenance, policy context, and versioned schemas.
### 3.3 Envelope & signature model
DSSE envelopes with multi-signature support, Ed25519/ECDSA keys, KMS/HSM/FIDO2 drivers, transparency log witnesses, and detached payload storage. Identity policies ensure least privilege and traceability.
### 3.4 Verification pipeline
Runs at ingest, policy gates, and interactively. Steps: resolve subject, fetch envelopes + witness proofs, validate DSSE structure/signatures, evaluate issuer trust and policies, produce cached verification reports.
### 3.5 Verification policies
Policy Studio authored rules covering required evidence types, allowed issuers, freshness, transparency requirements, signature counts, and waivers. Supports scoped overrides and defaults.
### 3.6 UI workflows
Evidence Browser, Verification Reports, Chain of Custody Graph, Key & Issuer Management, Attestation Workbench, and Bulk Verification views in Console.
### 3.7 CLI & SDK
Commands: `stella attest sign`, `verify`, `list`, `fetch`, `key create/import/rotate/revoke`. SDKs expose Sign/Verify/List/Fetch APIs.
### 3.8 Data model
Tables for attestations, issuers, verification reports, transparency index, key store. Indexed by subject digest, type, issuer, and timestamps.
### 3.9 Storage & air gap
Store envelopes in CAS object storage; optionally mirror transparency logs. `stella export attestation-bundle` enables offline transfer. Policies can relax witness requirements for sealed environments while logging the gap.
### 3.10 Observability & security
Spans, metrics, logs for signing and verification. Private keys never leave KMS/HSM. Revocation/rotation supported. Verification rejects mismatched subjects and ensures AOC invariants for scan/VEX evidence.
### 3.11 Performance
Use compressed JSON payloads, cached verification results, batched operations, and concurrency controls. P95 target: 1k envelopes/min on a single worker.
---
## 4) Architecture
New services (`src/StellaOps.Attestor/`), libraries (`src/StellaOps.Attestor.Envelope/`, `src/StellaOps.Attestor.Types/`, `src/StellaOps.Attestor.Verify/`), CLI (`src/StellaOps.Cli/`), export tooling (`src/StellaOps.ExportCenter.AttestationBundles/`), and shared KMS providers (`src/StellaOps.Cryptography.Kms/`). REST endpoints documented in OpenAPI.
---
## 5) Documentation changes
Requires new pages for overview, payloads, policies, workflows, key management, transparency, air gap, console UI, CLI, and updated security invariants, all with the imposed rule banner.
---
## 6) Implementation plan
Six phases: Foundations; Policies & UI; Scan & VEX support; Transparency & keys; Bulk & air gap; Performance & hardening.
---
## 7) Engineering tasks
Detailed tasks across envelope/crypto, payload schemas, APIs, Policy Studio integration, Console UI, CLI, transparency, bulk operations, observability, security, docs, and testing.
---
## 8) Feature changes required elsewhere
Policy Studio (VerificationPolicy), Export Center (attestation bundles), Authority & Tenancy, SBOM/Vulnerability explorers, Notifications, Observability. All must inherit the imposed rule and update docs accordingly.
---
## 9) Acceptance criteria
Signing and verification for all payloads, policy enforcement, Console views, bulk verification, export/import for air gap, observability coverage, AOC invariants respected, documented OpenAPI endpoints. Tests and performance targets met.
---
## 10) Risks & mitigations
Key compromise, parsing bugs, policy complexity, transparency outages addressed via rotation workflows, fuzz tests, curated starter policies, and fallback/mirroring strategies.
---
## 11) Philosophy
Evidence first, scoped identities, cheap verification, portable attestations, truth preservation.
---
## 12) Examples
Includes abbreviated SBOM attestation and verification report JSON samples illustrating required fields and outcomes.
---
## 13) Cross-epic documentation updates
Cross-link Attestor docs from Policy Studio, Export Center, Air-Gapped, Observability, Risk Profiles, SBOM Graph, and Vulnerability Explorer pages, maintaining the imposed rule banner.
> **Imposed rule:** Work of this type or tasks of this type on this component must also be applied everywhere else it should be applied.

View File

@@ -0,0 +1,17 @@
# Backlog Cleanup — 26 October 2025
This note captures the Sprint backlog hygiene pass applied on 26 October 2025. The goal was to eliminate legacy tasks that violated the aggregation-only contract (AOC), duplicated scope, or conflicted with the current module ownership map.
## Summary
- **Console replaces legacy Angular UI.** Sprint 13 UI tasks (`UI-SCANS-13-002`, `UI-VEX-13-003`, `UI-ADMIN-13-004`, `UI-SCHED-13-005`) are retired. Console Sprint 23 (`CONSOLE-CORE-23-001..005`, `CONSOLE-FEAT-23-101..109`, `CONSOLE-REL-23-301..303`) owns the experience.
- **Policy CLI runtime verbs consolidated.** `CLI-RUNTIME-13-005` is superseded by `CLI-POLICY-20-002` and Policy Studio flows (`CLI-POLICY-27-00x`).
- **Notifier supersedes legacy Notify.* modules.** All Sprint 15 `StellaOps.Notify.*` tasks are archived. Replacement work lives in Notifications Studio / Notifier Sprints 3840 (`NOTIFY-SVC-38-00x`, `NOTIFY-SVC-39-00x`, `NOTIFY-SVC-40-00x`, plus `WEB/CLI-NOTIFY-3x-00x`).
- **Cartographer owns graph construction.** `SBOM-GRAPH-24-00{1..4}` tasks are deleted from SBOM Service; Cartographer backlog (`CARTO-GRAPH-21-001..009`) covers graph storage, overlays, and tiling.
- **Dedicated Vuln Explorer service.** Gateway/UI/CLI entries that attempted to inline Vuln Explorer logic (`WEB-GRAPH-24-003`, `UI-GRAPH-24-005`, `CLI-VULN-24-003`) now defer to Sprint 29 Vuln Explorer (`VULN-API-29-00x`, `CONSOLE-VULN-29-00x`, `CLI-VULN-29-00x`).
- **AOC enforcement.** Ingestion-layer tasks attempting to compute derived severity/safe-fix metadata (`CONCELIER-VULN-29-003`, `EXCITITOR-VULN-29-003`) were removed; the Policy Engine overlay backlog (`POLICY-ENGINE-29-001..003`) is the canonical home.
- **CI/Offline adjustments.** `DEVOPS-UI-13-006` and `DEVOPS-OFFLINE-18-003` moved under Console release tasks (`CONSOLE-QA-23-401`, `DEVOPS-CONSOLE-23-001`, `CONSOLE-REL-23-302`).
## Follow-up
- Update module task boards only under their active backlogs (`src/StellaOps.Notifier`, Cartographer, Vuln Explorer).
- Ensure future ingestion tasks reference AOC guardrails and avoid derived semantics.
- Cross-check `SPRINTS.md` after adding new tasks to keep tables consistent with module `TASKS.md` files.

View File

@@ -8,7 +8,9 @@
| `DOCKER_HOST` | How containers reach your Docker daemon (because we no longer mount `/var/run/docker.sock`) | `tcp://docker:2375` |
| `WORKSPACE` | Directory where the pipeline stores artefacts (SBOM file) | `$(pwd)` |
| `IMAGE` | The image you are building & scanning | `acme/backend:sha-${COMMIT_SHA}` |
| `SBOM_FILE` | Immutable SBOM name `<image-ref>YYYYMMDDThhmmssZ.sbom.json` | `acme_backend_shaabc12320250804T153050Z.sbom.json` |
| `SBOM_FILE` | Immutable SBOM name `<image-ref>YYYYMMDDThhmmssZ.sbom.json` | `acme_backend_shaabc12320250804T153050Z.sbom.json` |
> **Authority graph scopes note (20251027):** CI stages that spin up the Authority compose profile now rely on the checked-in `etc/authority.yaml`. Before running integration smoke jobs, inject real secrets for every `etc/secrets/*.secret` file (Cartographer, Graph API, Policy Engine, Concelier, Excititor). The repository defaults contain `*-change-me` placeholders and Authority will reject tokens if those secrets are not overridden.
```bash
export STELLA_URL="stella-ops.ci.acme.example"
@@ -291,6 +293,40 @@ Host the resulting bundle via any static file server for review (for example `py
- [ ] Markdown link check (`npx markdown-link-check`) reports no broken references.
- [ ] Preview bundle archived (or attached) for stakeholders.
### 4.5 Policy DSL lint stage
Policy Engine v2 pipelines now fail fast if policy documents are malformed. After checkout and dotnet restore, run:
```bash
dotnet run \
--project tools/PolicyDslValidator/PolicyDslValidator.csproj \
-- \
--strict docs/examples/policies/*.yaml
```
- `--strict` treats warnings as errors so missing metadata doesnt slip through.
- The validator accepts globs, so you can point it at tenant policy directories later (`policies/**/*.yaml`).
- Exit codes follow UNIX conventions: `0` success, `1` parse/errors, `2` warnings when `--strict` is set, `64` usage mistakes.
Capture the validator output as part of your build logs; Support uses it when triaging policy rollout issues.
### 4.6 Policy simulation smoke
Catch unexpected policy regressions by exercising a small set of golden SBOM findings via the simulation smoke tool:
```bash
dotnet run \
--project tools/PolicySimulationSmoke/PolicySimulationSmoke.csproj \
-- \
--scenario-root samples/policy/simulations \
--output artifacts/policy-simulations
```
- The tool loads each `scenario.json` under `samples/policy/simulations`, evaluates the referenced policy, and fails the build if projected verdicts change.
- In CI the command runs twice (to `run1/` and `run2/`) and `diff -u` compares the summaries—any mismatch signals a determinism regression.
- Artifacts land in `artifacts/policy-simulations/policy-simulation-summary.json`; upload them for later inspection (see CI workflow).
- Expand scenarios by copying real-world findings into the samples directory—ensure expected statuses are recorded so regressions trip the pipeline.
---
## 5·Troubleshooting cheatsheet

284
docs/cli/cli-reference.md Normal file
View File

@@ -0,0 +1,284 @@
# CLI AOC Commands Reference
> **Audience:** DevEx engineers, operators, and CI authors integrating the `stella` CLI with Aggregation-Only Contract (AOC) workflows.
> **Scope:** Command synopsis, options, exit codes, and offline considerations for `stella sources ingest --dry-run` and `stella aoc verify` as introduced in Sprint19.
Both commands are designed to enforce the AOC guardrails documented in the [aggregation-only reference](../ingestion/aggregation-only-contract.md) and the [architecture overview](../architecture/overview.md). They consume Authority-issued tokens with tenant scopes and never mutate ingestion stores.
---
## 1·Prerequisites
- CLI version: `stella`0.19.0 (AOC feature gate enabled).
- Required scopes (DPoP-bound):
- `advisory:verify` for Concelier sources.
- `vex:verify` for Excititor sources (optional but required for VEX checks).
- `tenant:select` if your deployment uses tenant switching.
- Connectivity: direct access to Concelier/Excititor APIs or Offline Kit snapshot (see §4).
- Environment: set `STELLA_AUTHORITY_URL`, `STELLA_TENANT`, and export a valid OpTok via `stella auth login` or existing token cache.
---
## 2·`stella sources ingest --dry-run`
### 2.1Synopsis
```bash
stella sources ingest --dry-run \
--source <source-key> \
--input <path-or-uri> \
[--tenant <tenant-id>] \
[--format json|table] \
[--no-color] \
[--output <file>]
```
### 2.2Description
Previews an ingestion write without touching MongoDB. The command loads an upstream advisory or VEX document, computes the would-write payload, runs it through the `AOCWriteGuard`, and reports any forbidden fields, provenance gaps, or idempotency issues. Use it during connector development, CI validation, or while triaging incidents.
### 2.3Options
| Option | Description |
|--------|-------------|
| `--source <source-key>` | Logical source name (`redhat`, `ubuntu`, `osv`, etc.). Mirrors connector configuration. |
| `--input <path-or-uri>` | Path to local CSAF/OSV/VEX file or HTTPS URI. CLI normalises transport (gzip/base64) before guard evaluation. |
| `--tenant <tenant-id>` | Overrides default tenant for multi-tenant deployments. Mandatory when `STELLA_TENANT` is not set. |
| `--format json|table` | Output format. `table` (default) prints summary with highlighted violations; `json` emits machine-readable report (see below). |
| `--no-color` | Disables ANSI colour output for CI logs. |
| `--output <file>` | Writes the JSON report to file while still printing human-readable summary to stdout. |
### 2.4Output schema (JSON)
```json
{
"source": "redhat",
"tenant": "default",
"guardVersion": "1.0.0",
"status": "ok",
"document": {
"contentHash": "sha256:…",
"supersedes": null,
"provenance": {
"signature": { "format": "pgp", "present": true }
}
},
"violations": []
}
```
When violations exist, `status` becomes `error` and `violations` contains entries with `code` (`ERR_AOC_00x`), a short `message`, and JSON Pointer `path` values indicating offending fields.
### 2.5Exit codes
| Exit code | Meaning |
|-----------|---------|
| `0` | Guard passed; would-write payload is AOC compliant. |
| `11` | `ERR_AOC_001` Forbidden field (`severity`, `cvss`, etc.) detected. |
| `12` | `ERR_AOC_002` Merge attempt (multiple upstream sources fused). |
| `13` | `ERR_AOC_003` Idempotency violation (duplicate without supersedes). |
| `14` | `ERR_AOC_004` Missing provenance fields. |
| `15` | `ERR_AOC_005` Signature/checksum mismatch. |
| `16` | `ERR_AOC_006` Effective findings present (Policy-only data). |
| `17` | `ERR_AOC_007` Unknown top-level fields / schema violation. |
| `70` | Transport error (network, auth, malformed input). |
> Exit codes map directly to the `ERR_AOC_00x` table for scripting consistency. Multiple violations yield the highest-priority code (e.g., 11 takes precedence over 14).
### 2.6Examples
Dry-run a local CSAF file:
```bash
stella sources ingest --dry-run \
--source redhat \
--input ./fixtures/redhat/RHSA-2025-1234.json
```
Stream from HTTPS and emit JSON for CI:
```bash
stella sources ingest --dry-run \
--source osv \
--input https://osv.dev/vulnerability/GHSA-aaaa-bbbb \
--format json \
--output artifacts/osv-dry-run.json
cat artifacts/osv-dry-run.json | jq '.violations'
```
### 2.7Offline notes
When operating in sealed/offline mode:
- Use `--input` paths pointing to Offline Kit snapshots (`offline-kit/advisories/*.json`).
- Provide `--tenant` explicitly if the offline bundle contains multiple tenants.
- The command does not attempt network access when given a file path.
- Store reports with `--output` to include in transfer packages for policy review.
---
## 3·`stella aoc verify`
### 3.1Synopsis
```bash
stella aoc verify \
[--since <iso8601|duration>] \
[--limit <count>] \
[--sources <list>] \
[--codes <ERR_AOC_00x,...>] \
[--format table|json] \
[--export <file>] \
[--tenant <tenant-id>] \
[--no-color]
```
### 3.2Description
Replays the AOC guard against stored raw documents. By default it checks all advisories and VEX statements ingested in the last 24hours for the active tenant, reporting totals, top violation codes, and sample documents. Use it in CI pipelines, scheduled verifications, or during incident response.
### 3.3Options
| Option | Description |
|--------|-------------|
| `--since <value>` | Verification window. Accepts ISO8601 timestamp (`2025-10-25T12:00:00Z`) or duration (`48h`, `7d`). Defaults to `24h`. |
| `--limit <count>` | Maximum number of violations to display (per code). `0` means show all. Defaults to `20`. |
| `--sources <list>` | Comma-separated list of sources (`redhat,ubuntu,osv`). Filters both advisories and VEX entries. |
| `--codes <list>` | Restricts output to specific `ERR_AOC_00x` codes. Useful for regression tracking. |
| `--format table|json` | `table` (default) prints summary plus top violations; `json` outputs machine-readable report identical to the `/aoc/verify` API. |
| `--export <file>` | Writes the JSON report to disk (useful for audits/offline uploads). |
| `--tenant <tenant-id>` | Overrides tenant context. Required for cross-tenant verifications when run by platform operators. |
| `--no-color` | Disables ANSI colours. |
### 3.4Report structure (JSON)
```json
{
"tenant": "default",
"window": {
"from": "2025-10-25T12:00:00Z",
"to": "2025-10-26T12:00:00Z"
},
"checked": {
"advisories": 482,
"vex": 75
},
"violations": [
{
"code": "ERR_AOC_001",
"count": 2,
"examples": [
{
"source": "redhat",
"documentId": "advisory_raw:redhat:RHSA-2025:1",
"contentHash": "sha256:…",
"path": "/content/raw/cvss"
}
]
}
],
"metrics": {
"ingestion_write_total": 557,
"aoc_violation_total": 2
}
}
```
### 3.5Exit codes
| Exit code | Meaning |
|-----------|---------|
| `0` | Verification succeeded with zero violations. |
| `11…17` | Same mapping as §2.5 when violations are detected. Highest-priority code returned. |
| `18` | Verification ran but results truncated (limit reached) treat as warning; rerun with higher `--limit`. |
| `70` | Transport/authentication error. |
| `71` | CLI misconfiguration (missing tenant, invalid `--since`, etc.). |
### 3.6Examples
Daily verification across all sources:
```bash
stella aoc verify --since 24h --format table
```
CI pipeline focusing on errant sources and exporting evidence:
```bash
stella aoc verify \
--sources redhat,ubuntu \
--codes ERR_AOC_001,ERR_AOC_004 \
--format json \
--limit 100 \
--export artifacts/aoc-verify.json
jq '.violations[] | {code, count}' artifacts/aoc-verify.json
```
Air-gapped verification using Offline Kit snapshot (example script):
```bash
stella aoc verify \
--since 7d \
--format json \
--export /mnt/offline/aoc-verify-$(date +%F).json
sha256sum /mnt/offline/aoc-verify-*.json > /mnt/offline/checksums.txt
```
### 3.7Automation tips
- Schedule with `cron` or platform scheduler and fail the job when exit code ≥11.
- Pair with `stella sources ingest --dry-run` for pre-flight validation before re-enabling a paused source.
- Push JSON exports to observability pipelines for historical tracking of violation counts.
### 3.8Offline notes
- Works against Offline Kit Mongo snapshots when CLI is pointed at the local API gateway included in the bundle.
- When fully disconnected, run against exported `aoc verify` reports generated on production and replay them using `--format json --export` (automation recipe above).
- Include verification output in compliance packages alongside Offline Kit manifests.
---
## 4·Global exit-code reference
| Code | Summary |
|------|---------|
| `0` | Success / no violations. |
| `11` | `ERR_AOC_001` Forbidden field present. |
| `12` | `ERR_AOC_002` Merge attempt detected. |
| `13` | `ERR_AOC_003` Idempotency violation. |
| `14` | `ERR_AOC_004` Missing provenance/signature metadata. |
| `15` | `ERR_AOC_005` Signature/checksum mismatch. |
| `16` | `ERR_AOC_006` Effective findings in ingestion payload. |
| `17` | `ERR_AOC_007` Schema violation / unknown fields. |
| `18` | Partial verification (limit reached). |
| `70` | Transport or HTTP failure. |
| `71` | CLI usage error (invalid arguments, missing tenant). |
Use these codes in CI to map outcomes to build statuses or alert severities.
---
## 5·Related references
- [Aggregation-Only Contract reference](../ingestion/aggregation-only-contract.md)
- [Architecture overview](../architecture/overview.md)
- [Console AOC dashboard](../ui/console.md)
- [Authority scopes](../ARCHITECTURE_AUTHORITY.md)
---
## 6·Compliance checklist
- [ ] Usage documented for both table and JSON formats.
- [ ] Exit-code mapping matches `ERR_AOC_00x` definitions and automation guidance.
- [ ] Offline/air-gap workflow captured for both commands.
- [ ] References to AOC architecture and console docs included.
- [ ] Examples validated against current CLI syntax (update post-implementation).
- [ ] Docs guild screenshot/narrative placeholder logged for release notes (pending CLI team capture).
---
*Last updated: 2025-10-26 (Sprint19).*

284
docs/cli/policy.md Normal file
View File

@@ -0,0 +1,284 @@
# Stella CLI — Policy Commands
> **Audience:** Policy authors, reviewers, operators, and CI engineers using the `stella` CLI to interact with Policy Engine.
> **Supported from:** `stella` CLI ≥0.20.0 (Policy Engine v2 sprint line).
> **Prerequisites:** Authority-issued bearer token with the scopes noted per command (export `STELLA_TOKEN` or pass `--token`).
---
## 1·Global Options & Output Modes
All `stella policy *` commands honour the common CLI options:
| Flag | Default | Description |
|------|---------|-------------|
| `--server <url>` | `https://stella.local` | Policy Engine gateway root. |
| `--tenant <id>` | token default | Override tenant for multi-tenant installs. |
| `--format <table\|json\|yaml>` | `table` for TTY, `json` otherwise | Output format for listings/diffs. |
| `--output <file>` | stdout | Write full JSON payload to file. |
| `--sealed` | false | Force sealed-mode behaviour (no outbound fetch). |
| `--trace` | false | Emit verbose timing/log correlation info. |
> **Tip:** Set `STELLA_PROFILE=policy` in CI to load saved defaults from `~/.stella/profiles/policy.toml`.
---
## 2·Authoring & Drafting Commands
### 2.1 `stella policy new`
Create a draft policy from a template or scratch.
```
stella policy new --policy-id P-7 --name "Default Org Policy" \
--template baseline --output-path policies/P-7.stella
```
Options:
| Flag | Description |
|------|-------------|
| `--policy-id` *(required)* | Stable identifier (e.g., `P-7`). |
| `--name` | Friendly display name. |
| `--template` | `baseline`, `serverless`, `blank`. |
| `--from` | Start from existing version (`policyId@version`). |
| `--open` | Launches `$EDITOR` after creation. |
Writes DSL to local file and registers draft version (`status=draft`). Requires `policy:write`.
### 2.2 `stella policy edit`
Open an existing draft in the local editor.
```
stella policy edit P-7 --version 4
```
- Auto-checks out latest draft if `--version` omitted.
- Saves to temp file, uploads on editor exit (unless `--no-upload`).
- Use `--watch` to keep command alive and re-upload on every save.
### 2.3 `stella policy lint`
Static validation without submitting.
```
stella policy lint policies/P-7.stella --format json
```
Outputs diagnostics (line/column, code, message). Exit codes:
| Code | Meaning |
|------|---------|
| `0` | No lint errors. |
| `10` | Syntax/compile errors (`ERR_POL_001`). |
| `11` | Unsupported syntax version. |
### 2.4 `stella policy compile`
Emits IR digest and rule summary.
```
stella policy compile P-7 --version 4
```
Returns JSON with `digest`, `rules.count`, action counts. Exit `0` success, `10` on compile errors.
---
## 3·Lifecycle Workflow
### 3.1 Submit
```
stella policy submit P-7 --version 4 \
--reviewer user:kay --reviewer group:sec-reviewers \
--note "Simulated against golden SBOM set" \
--attach sims/P-7-v4-vs-v3.json
```
Requires `policy:submit`. CLI validates that lint/compile run within 24h and bundle attachments exist.
### 3.2 Review
```
stella policy review P-7 --version 4 --approve \
--note "Looks good; ensure incident playbook updated."
```
- `--approve`, `--request-changes`, or `--comment`.
- Provide `--blocking` to mark comment as blocking.
- Requires `policy:review`.
### 3.3 Approve
```
stella policy approve P-7 --version 4 \
--note "Determinism CI green; simulation diff attached." \
--attach sims/P-7-v4-vs-v3.json
```
Prompts for confirmation; refuses if approver == submitter. Requires `policy:approve`.
### 3.4 Activate
```
stella policy activate P-7 --version 4 --run-now --priority high
```
- Optional `--scheduled-at 2025-10-27T02:00:00Z`.
- Requires `policy:activate` and `policy:run`.
### 3.5 Archive / Rollback
```
stella policy archive P-7 --version 3 --reason "Superseded by v4"
stella policy activate P-7 --version 3 --rollback --incident INC-2025-104
```
---
## 4·Simulation & Runs
### 4.1 Simulate
```
stella policy simulate P-7 \
--base 3 --candidate 4 \
--sbom sbom:S-42 --sbom sbom:S-318 \
--env exposure=internet --env sealed=false \
--format json --output sims/P-7-v4-vs-v3.json
```
Output fields (JSON):
```json
{
"diff": {
"added": 12,
"removed": 8,
"unchanged": 657,
"bySeverity": {
"Critical": {"up": 1, "down": 0},
"High": {"up": 3, "down": 4}
}
},
"explainUri": "blob://policy/P-7/simulations/2025-10-26.json"
}
```
> Schema reminder: CLI commands surface objects defined in `src/StellaOps.Scheduler.Models/docs/SCHED-MODELS-20-001-POLICY-RUNS.md`; use the samples in `samples/api/scheduler/` for contract validation when extending output parsing.
Exit codes:
| Code | Meaning |
|------|---------|
| `0` | Simulation succeeded; diffs informational. |
| `20` | Blocking delta (`--fail-on-diff` triggered). |
| `21` | Simulation input missing (`ERR_POL_003`). |
| `22` | Determinism guard (`ERR_POL_004`). |
| `23` | API/permission error (`ERR_POL_002`, `ERR_POL_005`). |
### 4.2 Run
```
stella policy run P-7 --mode full \
--sbom sbom:S-42 --env exposure=internal-only \
--wait --watch
```
Options:
| Flag | Description |
|------|-------------|
| `--mode` | `full` or `incremental` (default incremental). |
| `--sbom` | Explicit SBOM IDs (optional). |
| `--priority` | `normal`, `high`, `emergency`. |
| `--wait` | Poll run status until completion. |
| `--watch` | Stream progress events (requires TTY). |
`stella policy run status <runId>` retrieves run metadata.
`stella policy run list --status failed --limit 20` returns recent runs.
### 4.3 Replay & Cancel
```
stella policy run replay run:P-7:2025-10-26:auto --output bundles/replay.tgz
stella policy run cancel run:P-7:2025-10-26:auto
```
Replay downloads sealed bundle for deterministic verification.
---
## 5·Findings & Explainability
### 5.1 List Findings
```
stella findings ls --policy P-7 \
--sbom sbom:S-42 \
--status affected --severity High,Critical \
--format table
```
Common flags:
| Flag | Description |
|------|-------------|
| `--page`, `--page-size` | Pagination (default page size 50). |
| `--cursor` | Use cursor token from previous call. |
| `--since` | ISO timestamp filter. |
### 5.2 Fetch Explain
```
stella findings explain --policy P-7 --finding P-7:S-42:pkg:npm/lodash@4.17.21:CVE-2021-23337 \
--format json --output explains/lodash.json
```
Outputs ordered rule hits, inputs, and sealed-mode hints.
---
## 6·Exit Codes Summary
| Exit code | Description | Typical ERR codes |
|-----------|-------------|-------------------|
| `0` | Success (command completed, warnings only). | — |
| `10` | DSL syntax/compile failure. | `ERR_POL_001` |
| `11` | Unsupported DSL version / schema mismatch. | `ERR_POL_001` |
| `12` | Approval/rbac failure. | `ERR_POL_002`, `ERR_POL_005` |
| `20` | Simulation diff exceeded thresholds (`--fail-on-diff`). | — |
| `21` | Required inputs missing (SBOM/advisory/VEX). | `ERR_POL_003` |
| `22` | Determinism guard triggered. | `ERR_POL_004` |
| `23` | Run canceled or timed out. | `ERR_POL_006` |
| `30` | Network/transport error (non-HTTP success). | — |
| `64` | CLI usage error (invalid flag/argument). | — |
All non-zero exits emit structured error envelope on stderr when `--format json` or `STELLA_JSON_ERRORS=1`.
---
## 7·Offline & Air-Gap Usage
- Use `--sealed` to ensure commands avoid outbound calls; required for sealed enclaves.
- `stella policy bundle export --policy P-7 --version 4 --output bundles/policy-P-7-v4.bundle` pairs with Offline Kit import.
- Replay bundles (`run replay`) are DSSE-signed; verify with `stella offline verify`.
- Store credentials in `~/.stella/offline.toml` for non-interactive air-gapped pipelines.
---
## 8·Compliance Checklist
- [ ] **Help text synced:** `stella policy --help` matches documented flags/examples (update during release pipeline).
- [ ] **Exit codes mapped:** Table above reflects CLI implementation and CI asserts mapping for `ERR_POL_*`.
- [ ] **JSON schemas verified:** Example payloads validated against OpenAPI/SDK contracts before publishing.
- [ ] **Scope guidance present:** Each command lists required Authority scopes.
- [ ] **Offline guidance included:** Sealed-mode steps and bundle workflows documented.
- [ ] **Cross-links tested:** Links to DSL, lifecycle, runs, and API docs resolve locally (`yarn docs:lint`).
- [ ] **Examples no-op safe:** Command examples either read-only or use placeholders (no destructive defaults).
---
*Last updated: 2025-10-26 (Sprint 20).*

229
docs/deploy/console.md Normal file
View File

@@ -0,0 +1,229 @@
# Deploying the StellaOps Console
> **Audience:** Deployment Guild, Console Guild, operators rolling out the web console.
> **Scope:** Helm and Docker Compose deployment steps, ingress/TLS configuration, required environment variables, health checks, offline/air-gap operation, and compliance checklist (Sprint 23).
The StellaOps Console ships as part of the `stellaops` stack Helm chart and Compose bundles maintained under `deploy/`. This guide describes the supported deployment paths, the configuration surface, and operational checks needed to run the console in connected or air-gapped environments.
---
## 1. Prerequisites
- Kubernetes cluster (v1.28+) with ingress controller (NGINX, Traefik, or equivalent) and Cert-Manager for automated TLS, or Docker host for Compose deployments.
- Container registry access to `registry.stella-ops.org` (or mirrored registry) for all images listed in `deploy/releases/*.yaml`.
- Authority service configured with console client (`aud=ui`, scopes `ui.read`, `ui.admin`).
- DNS entry pointing to the console hostname (for example, `console.acme.internal`).
- Cosign public key for manifest verification (`deploy/releases/manifest.json.sig`).
- Optional: Offline Kit bundle for air-gapped sites (`stella-ops-offline-kit-<ver>.tar.gz`).
---
## 2. Helm deployment (recommended)
### 2.1 Install chart repository
```bash
helm repo add stellaops https://downloads.stella-ops.org/helm
helm repo update stellaops
```
If operating offline, copy the chart archive from the Offline Kit (`deploy/helm/stellaops-<ver>.tgz`) and run:
```bash
helm install stellaops ./stellaops-<ver>.tgz --namespace stellaops --create-namespace
```
### 2.2 Base installation
```bash
helm install stellaops stellaops/stellaops \
--namespace stellaops \
--create-namespace \
--values deploy/helm/stellaops/values-prod.yaml
```
The chart deploys Authority, Console web/API gateway, Scanner API, Scheduler, and supporting services. The console frontend pod is labelled `app=stellaops-web-ui`.
### 2.3 Helm values highlights
Key sections in `deploy/helm/stellaops/values-prod.yaml`:
| Path | Description |
|------|-------------|
| `console.ingress.host` | Hostname served by the console (`console.example.com`). |
| `console.ingress.tls.secretName` | Kubernetes secret containing TLS certificate (generated by Cert-Manager or uploaded manually). |
| `console.config.apiGateway.baseUrl` | Internal base URL the UI uses to reach the gateway (defaults to `https://stellaops-web`). |
| `console.env.AUTHORITY_ISSUER` | Authority issuer URL (for example, `https://authority.example.com`). |
| `console.env.AUTHORITY_CLIENT_ID` | Authority client ID for the console UI. |
| `console.env.AUTHORITY_SCOPES` | Space-separated scopes required by UI (`ui.read ui.admin`). |
| `console.resources` | CPU/memory requests and limits (default 250m CPU / 512Mi memory). |
| `console.podAnnotations` | Optional annotations for service mesh or monitoring. |
Use `values-stage.yaml`, `values-dev.yaml`, or `values-airgap.yaml` as templates for other environments.
### 2.4 TLS and ingress
Example ingress override:
```yaml
console:
ingress:
enabled: true
className: nginx
host: console.acme.internal
tls:
enabled: true
secretName: console-tls
```
Generate certificates using Cert-Manager or provide an existing secret. For air-gapped deployments, pre-create the secret with the mirrored CA chain.
### 2.5 Health checks
Console pods expose:
| Path | Purpose | Notes |
|------|---------|-------|
| `/health/live` | Liveness probe | Confirms process responsive. |
| `/health/ready` | Readiness probe | Verifies configuration bootstrap and Authority reachability. |
| `/metrics` | Prometheus metrics | Enabled when `console.metrics.enabled=true`. |
Helm chart sets default probes (`initialDelaySeconds: 10`, `periodSeconds: 15`). Adjust via `console.livenessProbe` and `console.readinessProbe`.
---
## 3. Docker Compose deployment
Located in `deploy/compose/docker-compose.console.yaml`. Quick start:
```bash
cd deploy/compose
docker compose -f docker-compose.console.yaml --env-file console.env up -d
```
`console.env` should define:
```
CONSOLE_PUBLIC_BASE_URL=https://console.acme.internal
AUTHORITY_ISSUER=https://authority.acme.internal
AUTHORITY_CLIENT_ID=console-ui
AUTHORITY_CLIENT_SECRET=<if using confidential client>
AUTHORITY_SCOPES=ui.read ui.admin
CONSOLE_GATEWAY_BASE_URL=https://api.acme.internal
```
The compose bundle includes Traefik as reverse proxy with TLS termination. Update `traefik/dynamic/console.yml` for custom certificates or additional middlewares (CSP headers, rate limits).
---
## 4. Environment variables
| Variable | Description | Default |
|----------|-------------|---------|
| `CONSOLE_PUBLIC_BASE_URL` | External URL used for redirects, deep links, and telemetry. | None (required). |
| `CONSOLE_GATEWAY_BASE_URL` | URL of the web gateway that proxies API calls (`/console/*`). | Chart service name. |
| `AUTHORITY_ISSUER` | Authority issuer (`https://authority.example.com`). | None (required). |
| `AUTHORITY_CLIENT_ID` | OIDC client configured in Authority. | None (required). |
| `AUTHORITY_SCOPES` | Space-separated scopes assigned to the console client. | `ui.read ui.admin`. |
| `AUTHORITY_DPOP_ENABLED` | Enables DPoP challenge/response (recommended true). | `true`. |
| `CONSOLE_FEATURE_FLAGS` | Comma-separated feature flags (`runs`, `downloads.offline`, etc.). | `runs,downloads,policies`. |
| `CONSOLE_LOG_LEVEL` | Minimum log level (`Information`, `Debug`, etc.). | `Information`. |
| `CONSOLE_METRICS_ENABLED` | Expose `/metrics` endpoint. | `true`. |
| `CONSOLE_SENTRY_DSN` | Optional error reporting DSN. | Blank. |
When running behind additional proxies, set `ASPNETCORE_FORWARDEDHEADERS_ENABLED=true` to honour `X-Forwarded-*` headers.
---
## 5. Security headers and CSP
The console serves a strict Content Security Policy (CSP) by default:
```
default-src 'self';
connect-src 'self' https://*.stella-ops.local;
script-src 'self';
style-src 'self' 'unsafe-inline';
img-src 'self' data:;
font-src 'self';
frame-ancestors 'none';
```
Adjust via `console.config.cspOverrides` if additional domains are required. For integrations embedding the console, update OIDC redirect URIs and Authority scopes accordingly.
TLS recommendations:
- Use TLS 1.2+ with modern cipher suite policy.
- Enable HSTS (`Strict-Transport-Security: max-age=31536000; includeSubDomains`).
- Provide custom trust bundles via `console.config.trustBundleSecret` when using private CAs.
---
## 6. Logging and metrics
- Structured logs emitted to stdout with correlation IDs. Configure log shipping via Fluent Bit or similar.
- Metrics available at `/metrics` in Prometheus format. Key metrics include `ui_request_duration_seconds`, `ui_tenant_switch_total`, and `ui_download_manifest_refresh_seconds`.
- Enable OpenTelemetry exporter by setting `OTEL_EXPORTER_OTLP_ENDPOINT` and associated headers in environment variables.
---
## 7. Offline and air-gap deployment
- Mirror container images using the Downloads workspace or Offline Kit manifest. Example:
```bash
oras copy registry.stella-ops.org/stellaops/web-ui@sha256:<digest> \
registry.airgap.local/stellaops/web-ui:2025.10.0
```
- Import Offline Kit using `stella ouk import` before starting the console so manifest parity checks succeed.
- Use `values-airgap.yaml` to disable external telemetry endpoints and configure internal certificate chains.
- Run `helm upgrade --install` using the mirrored chart (`stellaops-<ver>.tgz`) and set `console.offlineMode=true` to surface offline banners.
---
## 8. Health checks and remediation
| Check | Command | Expected result |
|-------|---------|-----------------|
| Pod status | `kubectl get pods -n stellaops` | `Running` state with restarts = 0. |
| Liveness | `kubectl exec deploy/stellaops-web-ui -- curl -fsS http://localhost:8080/health/live` | Returns `{"status":"Healthy"}`. |
| Readiness | `kubectl exec deploy/stellaops-web-ui -- curl -fsS http://localhost:8080/health/ready` | Returns `{"status":"Ready"}`. |
| Gateway reachability | `curl -I https://console.example.com/api/console/status` | `200 OK` with CSP headers. |
| Static assets | `curl -I https://console.example.com/static/assets/app.js` | `200 OK` with long cache headers. |
Troubleshooting steps:
- **Authority unreachable:** readiness fails with `AUTHORITY_UNREACHABLE`. Check DNS, trust bundles, and Authority service health.
- **Manifest mismatch:** console logs `DOWNLOAD_MANIFEST_SIGNATURE_INVALID`. Verify cosign key and re-sync manifest.
- **Ingress 404:** ensure ingress controller routes host to `stellaops-web-ui` service; check TLS secret name.
- **SSE blocked:** confirm proxy allows HTTP/1.1 and disables buffering on `/console/runs/*`.
---
## 9. References
- `deploy/helm/stellaops/values-*.yaml` - environment-specific overrides.
- `deploy/compose/docker-compose.console.yaml` - Compose bundle.
- `/docs/ui/downloads.md` - manifest and offline bundle guidance.
- `/docs/security/console-security.md` (pending) - CSP and Authority scopes.
- `/docs/24_OFFLINE_KIT.md` - Offline kit packaging and verification.
- `/docs/ops/deployment-runbook.md` (pending) - wider platform deployment steps.
---
## 10. Compliance checklist
- [ ] Helm and Compose instructions verified against `deploy/` assets.
- [ ] Ingress/TLS guidance aligns with Security Guild recommendations.
- [ ] Environment variables documented with defaults and required values.
- [ ] Health/liveness/readiness endpoints tested and listed.
- [ ] Offline workflow (mirrors, manifest parity) captured.
- [ ] Logging and metrics surface documented metrics.
- [ ] CSP and security header defaults stated alongside override guidance.
- [ ] Troubleshooting section linked to relevant runbooks.
---
*Last updated: 2025-10-27 (Sprint 23).*

135
docs/deploy/containers.md Normal file
View File

@@ -0,0 +1,135 @@
# Container Deployment Guide — AOC Update
> **Audience:** DevOps Guild, platform operators deploying StellaOps services.
> **Scope:** Deployment configuration changes required by the Aggregation-Only Contract (AOC), including schema validators, guard environment flags, and verifier identities.
This guide supplements existing deployment manuals with AOC-specific configuration. It assumes familiarity with the base Compose/Helm manifests described in `ops/deployment/` and `docs/ARCHITECTURE_DEVOPS.md`.
---
## 1·Schema validator enablement
### 1.1MongoDB validators
- Apply JSON schema validators to `advisory_raw` and `vex_raw` collections before enabling AOC guards.
- Use the migration script provided in `ops/devops/scripts/apply-aoc-validators.js`:
```bash
kubectl exec -n concelier deploy/concelier-mongo -- \
mongo concelier ops/devops/scripts/apply-aoc-validators.js
kubectl exec -n excititor deploy/excititor-mongo -- \
mongo excititor ops/devops/scripts/apply-aoc-validators.js
```
- Validators enforce required fields (`tenant`, `source`, `upstream`, `linkset`) and reject forbidden keys at DB level.
- Rollback plan: validators are applied with `validationLevel: moderate`—downgrade via the same script with `--remove` if required.
### 1.2Migration order
1. Deploy validators in maintenance window.
2. Roll out Concelier/Excititor images with guard middleware enabled (`AOC_GUARD_ENABLED=true`).
3. Run smoke tests (`stella sources ingest --dry-run` fixtures) before resuming production ingestion.
---
## 2·Container environment flags
Add the following environment variables to Concelier/Excititor deployments:
| Variable | Default | Description |
|----------|---------|-------------|
| `AOC_GUARD_ENABLED` | `true` | Enables `AOCWriteGuard` interception. Set `false` only for controlled rollback. |
| `AOC_ALLOW_SUPERSEDES_RETROFIT` | `false` | Allows temporary supersedes backfill during migration. Remove after cutover. |
| `AOC_METRICS_ENABLED` | `true` | Emits `ingestion_write_total`, `aoc_violation_total`, etc. |
| `AOC_TENANT_HEADER` | `X-Stella-Tenant` | Header name expected from Gateway. |
| `AOC_VERIFIER_USER` | `stella-aoc-verify` | Read-only service user used by UI/CLI verification. |
Compose snippet:
```yaml
environment:
- AOC_GUARD_ENABLED=true
- AOC_ALLOW_SUPERSEDES_RETROFIT=false
- AOC_METRICS_ENABLED=true
- AOC_TENANT_HEADER=X-Stella-Tenant
- AOC_VERIFIER_USER=stella-aoc-verify
```
Ensure `AOC_VERIFIER_USER` exists in Authority with `aoc:verify` scope and no write permissions.
---
## 3·Verifier identity
- Create a dedicated client (`stella-aoc-verify`) via Authority bootstrap:
```yaml
clients:
- clientId: stella-aoc-verify
grantTypes: [client_credentials]
scopes: [aoc:verify, advisory:verify, vex:verify]
tenants: [default]
```
- Store credentials in secret store (`Kubernetes Secret`, `Docker swarm secret`).
- Bind credentials to `stella aoc verify` CI jobs and Console verification service.
- Rotate quarterly; document in `ops/authority-key-rotation.md`.
---
## 4·Deployment steps
1. **Pre-checks:** Confirm database backups, alerting in maintenance mode, and staging environment validated.
2. **Apply validators:** Run scripts per §1.1.
3. **Update manifests:** Inject environment variables (§2) and mount guard configuration configmaps.
4. **Redeploy services:** Rolling restart Concelier/Excititor pods. Monitor `ingestion_write_total` for steady throughput.
5. **Seed verifier:** Deploy read-only verifier user and store credentials.
6. **Run verification:** Execute `stella aoc verify --since 24h` and ensure exit code `0`.
7. **Update dashboards:** Point Grafana panels to new metrics (`aoc_violation_total`).
8. **Record handoff:** Capture console screenshots and verification logs for release notes.
---
## 5·Offline Kit updates
- Ship validator scripts with Offline Kit (`offline-kit/scripts/apply-aoc-validators.js`).
- Include pre-generated verification reports for air-gapped deployments.
- Document offline CLI workflow in bundle README referencing `docs/cli/cli-reference.md`.
- Ensure `stella-aoc-verify` credentials are scoped to offline tenant and rotated during bundle refresh.
---
## 6·Rollback plan
1. Disable guard via `AOC_GUARD_ENABLED=false` on Concelier/Excititor and rollout.
2. Remove validators with the migration script (`--remove`).
3. Pause verification jobs to prevent noise.
4. Investigate and remediate upstream issues before re-enabling guards.
---
## 7·References
- [Aggregation-Only Contract reference](../ingestion/aggregation-only-contract.md)
- [Authority scopes & tenancy](../security/authority-scopes.md)
- [Observability guide](../observability/observability.md)
- [CLI AOC commands](../cli/cli-reference.md)
- [Concelier architecture](../ARCHITECTURE_CONCELIER.md)
- [Excititor architecture](../ARCHITECTURE_EXCITITOR.md)
---
## 8·Compliance checklist
- [ ] Validators documented and scripts referenced for online/offline deployments.
- [ ] Environment variables cover guard enablement, metrics, and tenant header.
- [ ] Read-only verifier user installation steps included.
- [ ] Offline kit instructions align with validator/verification workflow.
- [ ] Rollback procedure captured.
- [ ] Cross-links to AOC docs, Authority scopes, and observability guides present.
- [ ] DevOps Guild sign-off tracked (owner: @devops-guild, due 2025-10-29).
---
*Last updated: 2025-10-26 (Sprint19).*

View File

@@ -0,0 +1,25 @@
# Policy Schema Export Automation
This utility generates JSON Schema documents for the Policy Engine run contracts.
## Command
```
scripts/export-policy-schemas.sh [output-directory]
```
When no output directory is supplied, schemas are written to `docs/schemas/`.
The exporter builds against `StellaOps.Scheduler.Models` and emits:
- `policy-run-request.schema.json`
- `policy-run-status.schema.json`
- `policy-diff-summary.schema.json`
- `policy-explain-trace.schema.json`
## CI integration checklist
- [ ] Invoke the script in the DevOps pipeline (see `DEVOPS-POLICY-20-004`).
- [ ] Publish the generated schemas as pipeline artifacts.
- [ ] Notify downstream consumers when schemas change (Slack `#policy-engine`, changelog snippet).
- [ ] Gate CLI validation once schema artifacts are available.

View File

@@ -2,28 +2,53 @@
Platform services publish strongly typed events; the JSON Schemas in this directory define those envelopes. File names follow `<event-name>@<version>.json` so producers and consumers can negotiate contracts explicitly.
## Catalog
- `scanner.report.ready@1.json` — emitted by Scanner.WebService once a signed report is persisted (payload embeds the canonical report plus DSSE envelope). Consumers: Notify, UI timeline.
- `scanner.scan.completed@1.json` — emitted alongside the signed report to capture scan outcomes/summary data for downstream automation. Consumers: Notify, Scheduler backfills, UI timelines.
- `scheduler.rescan.delta@1.json` — emitted by Scheduler when BOM-Index diffs require fresh scans. Consumers: Notify, Policy Engine.
- `attestor.logged@1.json` — emitted by Attestor after storing the Rekor inclusion proof. Consumers: UI attestation panel, Governance exports.
## Catalog
**Orchestrator envelopes (ORCH-SVC-38-101)**
- `scanner.event.report.ready@1.json` — orchestrator event emitted when a signed report is persisted. Supersedes the legacy `scanner.report.ready@1` schema and adds versioning, idempotency keys, and trace context. Consumers: Orchestrator bus, Notifications Studio, UI timeline.
- `scanner.event.scan.completed@1.json` — orchestrator event emitted when a scan run finishes. Supersedes the legacy `scanner.scan.completed@1` schema. Consumers: Orchestrator bus, Notifications Studio, Scheduler replay tooling.
**Legacy envelopes (Redis-backed)**
- `scanner.report.ready@1.json` — legacy Redis stream event emitted once a signed report is persisted (kept for transitional compatibility).
- `scanner.scan.completed@1.json` — legacy Redis stream event emitted alongside the signed report for automation.
- `scheduler.rescan.delta@1.json` — emitted by Scheduler when BOM-Index diffs require fresh scans. Consumers: Notify, Policy Engine.
- `scheduler.graph.job.completed@1.json` — emitted when a Cartographer graph build/overlay job finishes (`status = completed|failed|cancelled`). Consumers: Scheduler WebService (lag metrics/API), Cartographer cache warmers, UI overlay freshness indicators.
- `attestor.logged@1.json` — emitted by Attestor after storing the Rekor inclusion proof. Consumers: UI attestation panel, Governance exports.
Additive payload changes (new optional fields) can stay within the same version. Any breaking change (removing a field, tightening validation, altering semantics) must increment the `@<version>` suffix and update downstream consumers.
Additive payload changes (new optional fields) can stay within the same version. Any breaking change (removing a field, tightening validation, altering semantics) must increment the `@<version>` suffix and update downstream consumers. For full orchestrator guidance see [`orchestrator-scanner-events.md`](orchestrator-scanner-events.md).
## Envelope structure
All event envelopes share the same deterministic header. Use the following table as the quick reference when emitting or parsing events:
| Field | Type | Notes |
|-------|------|-------|
| `eventId` | `uuid` | Must be globally unique per occurrence; producers log duplicates as fatal. |
| `kind` | `string` | Fixed per schema (e.g., `scanner.report.ready`). Downstream services reject unknown kinds or versions. |
| `tenant` | `string` | Multitenant isolation key; mirror the value recorded in queue/Mongo metadata. |
| `ts` | `date-time` | RFC3339 UTC timestamp. Use monotonic clocks or atomic offsets so ordering survives retries. |
| `scope` | `object` | Optional block used when the event concerns a specific image or repository. See schema for required fields (e.g., `repo`, `digest`). |
| `payload` | `object` | Event-specific body. Schemas allow additional properties so producers can add optional hints (e.g., `reportId`, `quietedFindingCount`) without breaking consumers. For scanner events, payloads embed both the canonical report document and the DSSE envelope so consumers can reuse signatures without recomputing them. See `docs/runtime/SCANNER_RUNTIME_READINESS.md` for the runtime consumer checklist covering these hints. |
## Envelope structure
### Orchestrator envelope (version&nbsp;1)
| Field | Type | Notes |
|-------|------|-------|
| `eventId` | `uuid` | Globally unique per occurrence. |
| `kind` | `string` | e.g., `scanner.event.report.ready`. |
| `version` | `integer` | Schema version (`1` for the initial release). |
| `tenant` | `string` | Multitenant isolation key; mirror the value recorded in queue/Mongo metadata. |
| `occurredAt` | `date-time` | RFC3339 UTC timestamp describing when the state transition happened. |
| `recordedAt` | `date-time` | RFC3339 UTC timestamp for durable persistence (optional but recommended). |
| `source` | `string` | Producer identifier (`scanner.webservice`). |
| `idempotencyKey` | `string` | Deterministic dedupe key (`scanner.event.*:<tenant>:<report|scan>`). |
| `correlationId` | `string` | Ties the event to the originating scan/API request. |
| `traceId` / `spanId` | `string` | W3C trace context propagated into downstream telemetry. |
| `scope` | `object` | Optional block with at least `repo` and `digest`. |
| `payload` | `object` | Event-specific body; schemas embed the canonical report and DSSE envelope. |
| `attributes` | `object` | Optional metadata bag (`string` keys/values) for downstream correlation. |
For Scanner orchestrator events, `links` include console and API deep links (`ui`, `report`, and `policy`) plus an optional `attestation` URL when a DSSE envelope is present. See [`orchestrator-scanner-events.md`](orchestrator-scanner-events.md) for details.
### Legacy Redis envelope
| Field | Type | Notes |
|-------|------|-------|
| `eventId` | `uuid` | Must be globally unique per occurrence; producers log duplicates as fatal. |
| `kind` | `string` | Fixed per schema (e.g., `scanner.report.ready`). Downstream services reject unknown kinds or versions. |
| `tenant` | `string` | Multitenant isolation key; mirror the value recorded in queue/Mongo metadata. |
| `ts` | `date-time` | RFC3339 UTC timestamp. Use monotonic clocks or atomic offsets so ordering survives retries. |
| `scope` | `object` | Optional block used when the event concerns a specific image or repository. See schema for required fields (e.g., `repo`, `digest`). |
| `payload` | `object` | Event-specific body. Schemas allow additional properties so producers can add optional hints (e.g., `reportId`, `quietedFindingCount`) without breaking consumers. See `docs/runtime/SCANNER_RUNTIME_READINESS.md` for the runtime consumer checklist covering these hints. |
| `attributes` | `object` | Optional metadata bag (`string` keys/values) for downstream correlation (e.g., pipeline identifiers). Omit when unused to keep payloads concise. |
When adding new optional fields, document the behaviour in the schemas `description` block and update the consumer checklist in the next sprint sync.
When adding new optional fields, document the behaviour in the schemas `description` block and update the consumer checklist in the next sprint sync.
## Canonical samples & validation
Reference payloads live under `docs/events/samples/`, mirroring the schema version (`<event-name>@<version>.sample.json`). They illustrate common field combinations, including the optional attributes that downstream teams rely on for UI affordances and audit trails. Scanner samples reuse the exact DSSE envelope checked into `samples/api/reports/report-sample.dsse.json`, and unit tests (`ReportSamplesTests`, `PlatformEventSchemaValidationTests`) guard that payloads stay canonical and continue to satisfy the published schemas.

View File

@@ -0,0 +1,121 @@
# Scanner Orchestrator Events (ORCH-SVC-38-101)
Last updated: 2025-10-26
The Notifications Studio initiative (NOTIFY-SVC-38-001) and orchestrator backlog (ORCH-SVC-38-101) standardise how platform services emit lifecycle events. This document describes the Scanner WebService contract for the new **orchestrator envelopes** (`scanner.event.*`) and how they supersede the legacy Redis-backed `scanner.report.ready` / `scanner.scan.completed` events.
## 1. Envelope overview
Orchestrator events share a deterministic JSON envelope:
| Field | Type | Notes |
|-------|------|-------|
| `eventId` | `uuid` | Globally unique identifier generated per occurrence. |
| `kind` | `string` | Event identifier; Scanner emits `scanner.event.report.ready` and `scanner.event.scan.completed`. |
| `version` | `integer` | Schema version. Initial release uses `1`. |
| `tenant` | `string` | Tenant that owns the scan/report. Mirrors Authority claims. |
| `occurredAt` | `date-time` | UTC instant when the underlying state transition happened (e.g., report persisted). |
| `recordedAt` | `date-time` | UTC instant when the event was durably written. Optional but recommended. |
| `source` | `string` | Producer identifier (`scanner.webservice`). |
| `idempotencyKey` | `string` | Deterministic key for duplicate suppression (see §4). |
| `correlationId` | `string` | Maps back to the API request or scan identifier. |
| `traceId` / `spanId` | `string` | W3C trace context propagated into downstream telemetry. |
| `scope` | `object` | Describes the affected artefact. Requires `repo` and `digest`; optional `namespace`, `component`, `image`. |
| `attributes` | `object` | Flat string map for frequently queried metadata (e.g., policy revision). |
| `payload` | `object` | Event-specific body (see §2). |
Canonical schemas live under `docs/events/scanner.event.*@1.json`. Samples that round-trip through `NotifyCanonicalJsonSerializer` are stored in `docs/events/samples/`.
## 2. Event kinds and payloads
### 2.1 `scanner.event.report.ready`
Emitted once a signed report is persisted and attested. Payload highlights:
- `reportId` / `scanId` — identifiers for the persisted report and originating scan. Until Scan IDs are surfaced by the API, `scanId` mirrors `reportId` so downstream correlators can stabilise on a single key.
- **Attributes:** `reportId`, `policyRevisionId`, `policyDigest`, `verdict` — pre-sorted for deterministic routing.
- **Links:**
- `ui``/ui/reports/{reportId}` on the current host.
- `report``{apiBasePath}/{reportsSegment}/{reportId}` (defaults to `/api/v1/reports/{reportId}`).
- `policy``{apiBasePath}/{policySegment}/revisions/{revisionId}` when a revision is present.
- `attestation``/ui/attestations/{reportId}` when a DSSE envelope is included.
- `imageDigest` — OCI image digest associated with the analysis.
- `generatedAt` — report generation timestamp (ISO-8601 UTC).
- `verdict``pass`, `warn`, or `fail` after policy evaluation.
- `summary` — blocked/warned/ignored/quieted counters (all non-negative integers).
- `delta` — newly critical/high counts and optional `kev` array.
- `quietedFindingCount` — mirrors `summary.quieted`.
- `policy` — revision metadata (`digest`, `revisionId`) surfaced for routing.
- `links` — UI/report/policy URLs suitable for operators.
- `dsse` — embedded DSSE envelope (payload, type, signature list).
- `report` — canonical report document; identical to the DSSE payload.
Schema: `docs/events/scanner.event.report.ready@1.json`
Sample: `docs/events/samples/scanner.event.report.ready@1.sample.json`
### 2.2 `scanner.event.scan.completed`
Emitted after scan execution finishes (success or policy failure). Payload highlights:
- `reportId` / `scanId` / `imageDigest` — identifiers mirroring the report-ready event. As with the report-ready payload, `scanId` currently mirrors `reportId` as a temporary shim.
- **Attributes:** `reportId`, `policyRevisionId`, `policyDigest`, `verdict`.
- **Links:** same as above (`ui`, `report`, `policy`) with `attestation` populated when DSSE metadata exists.
- `verdict`, `summary`, `delta`, `policy` — same semantics as above.
- `findings` — array of surfaced findings with `id`, `severity`, optional `cve`, `purl`, and `reachability`.
- `links`, `dsse`, `report` — same structure as §2.1 (allows Notifier to reuse signatures).
Schema: `docs/events/scanner.event.scan.completed@1.json`
Sample: `docs/events/samples/scanner.event.scan.completed@1.sample.json`
### 2.3 Relationship to legacy events
| Legacy Redis event | Replacement orchestrator event | Notes |
|--------------------|-------------------------------|-------|
| `scanner.report.ready` | `scanner.event.report.ready` | Adds versioning, idempotency, trace context. Payload is a superset of the legacy fields. |
| `scanner.scan.completed` | `scanner.event.scan.completed` | Same data plus explicit scan identifiers and orchestrator metadata. |
Legacy schemas remain for backwards-compatibility during migration, but new integrations **must** target the orchestrator variants.
## 3. Deterministic serialization
- Producers must serialise events using `NotifyCanonicalJsonSerializer` to guarantee consistent key ordering and whitespace.
- Timestamps (`occurredAt`, `recordedAt`, `payload.generatedAt`) use `DateTimeOffset.UtcDateTime.ToString("O")`.
- Payload arrays (`delta.kev`, `findings`) should be pre-sorted (e.g., alphabetical CVE order) so hash-based consumers remain stable.
- Optional fields are omitted rather than emitted as `null`.
## 4. Idempotency and correlation
Idempotency keys dedupe repeated publishes and align with the orchestrators outbox pattern:
| Event kind | Idempotency key template |
|------------|-------------------------|
| `scanner.event.report.ready` | `scanner.event.report.ready:<tenant>:<reportId>` |
| `scanner.event.scan.completed` | `scanner.event.scan.completed:<tenant>:<scanId>` |
Keys are ASCII lowercase; components should be trimmed and validated before concatenation. Retries must reuse the same key.
`correlationId` should match the scan identifier that appears in REST responses (`scanId`). Re-using the same value across the pair of events allows Notifier and orchestrator analytics to stitch lifecycle data together.
## 5. Versioning and evolution
- Increment the `version` field and the `@<version>` suffix for **breaking** changes (field removals, type changes, semantic shifts).
- Additive optional fields may remain within version 1; update the JSON schema and samples accordingly.
- When introducing `@2`, keep the `@1` schema/docs in place until orchestrator subscribers confirm migration.
## 6. Consumer checklist
1. Validate incoming payloads against the schema for the targeted version.
2. Use `idempotencyKey` for dedupe, not `eventId`.
3. Map `traceId`/`spanId` into telemetry spans to preserve causality.
4. Prefer `payload.report``policy.revisionId` when populating templates; the top-level `attributes` are convenience duplicates for quick routing.
5. Reserve the legacy Redis events for transitional compatibility only; downstream systems should subscribe to the orchestrator bus exposed by ORCH-SVC-38-101.
## 7. Implementation status and next actions
- **Scanner WebService** — `SCANNER-EVENTS-16-301` (blocked) and `SCANNER-EVENTS-16-302` (doing) track the production of these envelopes. The remaining blocker is the .NET 10 preview OpenAPI/Auth dependency drift that currently breaks `dotnet test`. Once Gateway and Notifier owners land the replacement packages, rerun the full test suite and capture fresh fixtures under `docs/events/samples/`.
- **Gateway/Notifier consumers** — subscribe to the orchestrator stream documented in ORCH-SVC-38-101. When the Scanner tasks unblock, regenerate notifier contract tests against the sample events included here.
- **Docs cadence** — update this file and the matching JSON schemas whenever payload fields change. Use the rehearsal checklist in `docs/ops/launch-cutover.md` to confirm downstream validation before the production cutover. Record gaps or newly required fields in `docs/ops/launch-readiness.md` so they land in the launch checklist.
---
**Imposed rule reminder:** work of this type or tasks of this type on this component must also be applied everywhere else it should be applied.

View File

@@ -0,0 +1,93 @@
{
"eventId": "6d2d1b77-f3c3-4f70-8a9d-6f2d0c8801ab",
"kind": "scanner.event.report.ready",
"version": 1,
"tenant": "tenant-alpha",
"occurredAt": "2025-10-19T12:34:56Z",
"recordedAt": "2025-10-19T12:34:57Z",
"source": "scanner.webservice",
"idempotencyKey": "scanner.event.report.ready:tenant-alpha:report-abc",
"correlationId": "report-abc",
"traceId": "0af7651916cd43dd8448eb211c80319c",
"spanId": "b7ad6b7169203331",
"scope": {
"namespace": "acme/edge",
"repo": "api",
"digest": "sha256:feedface"
},
"attributes": {
"reportId": "report-abc",
"policyRevisionId": "rev-42",
"policyDigest": "digest-123",
"verdict": "blocked"
},
"payload": {
"reportId": "report-abc",
"scanId": "report-abc",
"imageDigest": "sha256:feedface",
"generatedAt": "2025-10-19T12:34:56Z",
"verdict": "fail",
"summary": {
"total": 1,
"blocked": 1,
"warned": 0,
"ignored": 0,
"quieted": 0
},
"delta": {
"newCritical": 1,
"kev": [
"CVE-2024-9999"
]
},
"quietedFindingCount": 0,
"policy": {
"digest": "digest-123",
"revisionId": "rev-42"
},
"links": {
"ui": "https://scanner.example/ui/reports/report-abc",
"report": "https://scanner.example/api/v1/reports/report-abc",
"policy": "https://scanner.example/api/v1/policy/revisions/rev-42",
"attestation": "https://scanner.example/ui/attestations/report-abc"
},
"dsse": {
"payloadType": "application/vnd.stellaops.report+json",
"payload": "eyJyZXBvcnRJZCI6InJlcG9ydC1hYmMiLCJpbWFnZURpZ2VzdCI6InNoYTI1NjpmZWVkZmFjZSIsImdlbmVyYXRlZEF0IjoiMjAyNS0xMC0xOVQxMjozNDo1NiswMDowMCIsInZlcmRpY3QiOiJibG9ja2VkIiwicG9saWN5Ijp7InJldmlzaW9uSWQiOiJyZXYtNDIiLCJkaWdlc3QiOiJkaWdlc3QtMTIzIn0sInN1bW1hcnkiOnsidG90YWwiOjEsImJsb2NrZWQiOjEsIndhcm5lZCI6MCwiaWdub3JlZCI6MCwicXVpZXRlZCI6MH0sInZlcmRpY3RzIjpbeyJmaW5kaW5nSWQiOiJmaW5kaW5nLTEiLCJzdGF0dXMiOiJCbG9ja2VkIiwic2NvcmUiOjQ3LjUsInNvdXJjZVRydXN0IjoiTlZEIiwicmVhY2hhYmlsaXR5IjoicnVudGltZSJ9XSwiaXNzdWVzIjpbXX0=",
"signatures": [
{
"keyId": "test-key",
"algorithm": "hs256",
"signature": "signature-value"
}
]
},
"report": {
"reportId": "report-abc",
"generatedAt": "2025-10-19T12:34:56Z",
"imageDigest": "sha256:feedface",
"policy": {
"digest": "digest-123",
"revisionId": "rev-42"
},
"summary": {
"total": 1,
"blocked": 1,
"warned": 0,
"ignored": 0,
"quieted": 0
},
"verdict": "blocked",
"verdicts": [
{
"findingId": "finding-1",
"status": "Blocked",
"score": 47.5,
"sourceTrust": "NVD",
"reachability": "runtime"
}
],
"issues": []
}
}
}

View File

@@ -0,0 +1,99 @@
{
"eventId": "08a6de24-4a94-4d14-8432-9d14f36f6da3",
"kind": "scanner.event.scan.completed",
"version": 1,
"tenant": "tenant-alpha",
"occurredAt": "2025-10-19T12:34:56Z",
"recordedAt": "2025-10-19T12:34:57Z",
"source": "scanner.webservice",
"idempotencyKey": "scanner.event.scan.completed:tenant-alpha:report-abc",
"correlationId": "report-abc",
"traceId": "4bf92f3577b34da6a3ce929d0e0e4736",
"scope": {
"namespace": "acme/edge",
"repo": "api",
"digest": "sha256:feedface"
},
"attributes": {
"reportId": "report-abc",
"policyRevisionId": "rev-42",
"policyDigest": "digest-123",
"verdict": "blocked"
},
"payload": {
"reportId": "report-abc",
"scanId": "report-abc",
"imageDigest": "sha256:feedface",
"verdict": "fail",
"summary": {
"total": 1,
"blocked": 1,
"warned": 0,
"ignored": 0,
"quieted": 0
},
"delta": {
"newCritical": 1,
"kev": [
"CVE-2024-9999"
]
},
"policy": {
"digest": "digest-123",
"revisionId": "rev-42"
},
"findings": [
{
"id": "finding-1",
"severity": "Critical",
"cve": "CVE-2024-9999",
"purl": "pkg:docker/acme/edge-api@sha256-feedface",
"reachability": "runtime"
}
],
"links": {
"ui": "https://scanner.example/ui/reports/report-abc",
"report": "https://scanner.example/api/v1/reports/report-abc",
"policy": "https://scanner.example/api/v1/policy/revisions/rev-42",
"attestation": "https://scanner.example/ui/attestations/report-abc"
},
"dsse": {
"payloadType": "application/vnd.stellaops.report+json",
"payload": "eyJyZXBvcnRJZCI6InJlcG9ydC1hYmMiLCJpbWFnZURpZ2VzdCI6InNoYTI1NjpmZWVkZmFjZSIsImdlbmVyYXRlZEF0IjoiMjAyNS0xMC0xOVQxMjozNDo1NiswMDowMCIsInZlcmRpY3QiOiJibG9ja2VkIiwicG9saWN5Ijp7InJldmlzaW9uSWQiOiJyZXYtNDIiLCJkaWdlc3QiOiJkaWdlc3QtMTIzIn0sInN1bW1hcnkiOnsidG90YWwiOjEsImJsb2NrZWQiOjEsIndhcm5lZCI6MCwiaWdub3JlZCI6MCwicXVpZXRlZCI6MH0sInZlcmRpY3RzIjpbeyJmaW5kaW5nSWQiOiJmaW5kaW5nLTEiLCJzdGF0dXMiOiJCbG9ja2VkIiwic2NvcmUiOjQ3LjUsInNvdXJjZVRydXN0IjoiTlZEIiwicmVhY2hhYmlsaXR5IjoicnVudGltZSJ9XSwiaXNzdWVzIjpbXX0=",
"signatures": [
{
"keyId": "test-key",
"algorithm": "hs256",
"signature": "signature-value"
}
]
},
"report": {
"reportId": "report-abc",
"generatedAt": "2025-10-19T12:34:56Z",
"imageDigest": "sha256:feedface",
"policy": {
"digest": "digest-123",
"revisionId": "rev-42"
},
"summary": {
"total": 1,
"blocked": 1,
"warned": 0,
"ignored": 0,
"quieted": 0
},
"verdict": "blocked",
"verdicts": [
{
"findingId": "finding-1",
"status": "Blocked",
"score": 47.5,
"sourceTrust": "NVD",
"reachability": "runtime"
}
],
"issues": []
}
}
}

View File

@@ -0,0 +1,36 @@
{
"eventId": "4d33c19c-1c8a-44d1-9954-1d5e98b2af71",
"kind": "scheduler.graph.job.completed",
"tenant": "tenant-alpha",
"ts": "2025-10-26T12:00:45Z",
"payload": {
"jobType": "build",
"status": "completed",
"occurredAt": "2025-10-26T12:00:45Z",
"job": {
"schemaVersion": "scheduler.graph-build-job@1",
"id": "gbj_20251026a",
"tenantId": "tenant-alpha",
"sbomId": "sbom_20251026",
"sbomVersionId": "sbom_ver_20251026",
"sbomDigest": "sha256:0123456789abcdef0123456789abcdef0123456789abcdef0123456789abcdef",
"graphSnapshotId": "graph_snap_20251026",
"status": "completed",
"trigger": "sbom-version",
"attempts": 1,
"cartographerJobId": "carto_job_42",
"correlationId": "evt_svc_987",
"createdAt": "2025-10-26T12:00:00+00:00",
"startedAt": "2025-10-26T12:00:05+00:00",
"completedAt": "2025-10-26T12:00:45+00:00",
"metadata": {
"sbomEventId": "sbom_evt_20251026"
}
},
"resultUri": "oras://cartographer/offline/tenant-alpha/graph_snap_20251026"
},
"attributes": {
"cartographerCluster": "offline-kit",
"plannerShard": "graph-builders-01"
}
}

View File

@@ -0,0 +1,164 @@
{
"$id": "https://stella-ops.org/schemas/events/scanner.event.report.ready@1.json",
"$schema": "http://json-schema.org/draft-07/schema#",
"title": "Scanner orchestrator event report ready (v1)",
"type": "object",
"additionalProperties": false,
"required": [
"eventId",
"kind",
"version",
"tenant",
"occurredAt",
"source",
"idempotencyKey",
"payload"
],
"properties": {
"eventId": {
"type": "string",
"format": "uuid",
"description": "Globally unique identifier for this occurrence."
},
"kind": {
"const": "scanner.event.report.ready",
"description": "Event kind identifier consumed by orchestrator subscribers."
},
"version": {
"const": 1,
"description": "Schema version for orchestrator envelopes."
},
"tenant": {
"type": "string",
"description": "Tenant that owns the scan/report."
},
"occurredAt": {
"type": "string",
"format": "date-time",
"description": "Timestamp (UTC) when the report transitioned to ready."
},
"recordedAt": {
"type": "string",
"format": "date-time",
"description": "Timestamp (UTC) when the event was persisted. Optional."
},
"source": {
"type": "string",
"description": "Producer identifier, e.g. `scanner.webservice`."
},
"idempotencyKey": {
"type": "string",
"minLength": 8,
"description": "Deterministic key used to deduplicate events downstream."
},
"correlationId": {
"type": "string",
"description": "Correlation identifier that ties this event to a request or workflow."
},
"traceId": {
"type": "string",
"description": "W3C trace ID (32 hex chars) for distributed tracing."
},
"spanId": {
"type": "string",
"description": "Optional span identifier associated with traceId."
},
"scope": {
"type": "object",
"additionalProperties": false,
"required": ["repo", "digest"],
"properties": {
"namespace": {"type": "string"},
"repo": {"type": "string"},
"digest": {"type": "string"},
"component": {"type": "string"},
"image": {"type": "string"}
}
},
"attributes": {
"type": "object",
"description": "String attributes for downstream correlation (policy revision, scan id, etc.).",
"additionalProperties": {"type": "string"}
},
"payload": {
"type": "object",
"additionalProperties": true,
"required": ["reportId", "verdict", "summary", "links", "report"],
"properties": {
"reportId": {"type": "string"},
"scanId": {"type": "string"},
"imageDigest": {"type": "string"},
"generatedAt": {"type": "string", "format": "date-time"},
"verdict": {"enum": ["pass", "warn", "fail"]},
"summary": {
"type": "object",
"additionalProperties": false,
"required": ["total", "blocked", "warned", "ignored", "quieted"],
"properties": {
"total": {"type": "integer", "minimum": 0},
"blocked": {"type": "integer", "minimum": 0},
"warned": {"type": "integer", "minimum": 0},
"ignored": {"type": "integer", "minimum": 0},
"quieted": {"type": "integer", "minimum": 0}
}
},
"delta": {
"type": "object",
"additionalProperties": false,
"properties": {
"newCritical": {"type": "integer", "minimum": 0},
"newHigh": {"type": "integer", "minimum": 0},
"kev": {
"type": "array",
"items": {"type": "string"}
}
}
},
"quietedFindingCount": {
"type": "integer",
"minimum": 0
},
"policy": {
"type": "object",
"description": "Policy revision metadata surfaced alongside the report."
},
"links": {
"type": "object",
"additionalProperties": false,
"properties": {
"ui": {"type": "string", "format": "uri"},
"report": {"type": "string", "format": "uri"},
"policy": {"type": "string", "format": "uri"},
"attestation": {"type": "string", "format": "uri"}
}
},
"dsse": {
"type": "object",
"additionalProperties": false,
"required": ["payloadType", "payload", "signatures"],
"properties": {
"payloadType": {"type": "string"},
"payload": {"type": "string"},
"signatures": {
"type": "array",
"items": {
"type": "object",
"additionalProperties": false,
"required": ["keyId", "algorithm", "signature"],
"properties": {
"keyId": {"type": "string"},
"algorithm": {"type": "string"},
"signature": {"type": "string"}
}
}
}
}
},
"report": {
"type": "object",
"description": "Canonical scanner report document that aligns with the DSSE payload."
}
}
}
}
}

View File

@@ -0,0 +1,174 @@
{
"$id": "https://stella-ops.org/schemas/events/scanner.event.scan.completed@1.json",
"$schema": "http://json-schema.org/draft-07/schema#",
"title": "Scanner orchestrator event scan completed (v1)",
"type": "object",
"additionalProperties": false,
"required": [
"eventId",
"kind",
"version",
"tenant",
"occurredAt",
"source",
"idempotencyKey",
"payload"
],
"properties": {
"eventId": {
"type": "string",
"format": "uuid",
"description": "Globally unique identifier for this occurrence."
},
"kind": {
"const": "scanner.event.scan.completed",
"description": "Event kind identifier consumed by orchestrator subscribers."
},
"version": {
"const": 1,
"description": "Schema version for orchestrator envelopes."
},
"tenant": {
"type": "string",
"description": "Tenant that owns the scan."
},
"occurredAt": {
"type": "string",
"format": "date-time",
"description": "Timestamp (UTC) when the scan completed."
},
"recordedAt": {
"type": "string",
"format": "date-time",
"description": "Timestamp (UTC) when the event was persisted. Optional."
},
"source": {
"type": "string",
"description": "Producer identifier, e.g. `scanner.webservice`."
},
"idempotencyKey": {
"type": "string",
"minLength": 8,
"description": "Deterministic key used to deduplicate events downstream."
},
"correlationId": {
"type": "string",
"description": "Correlation identifier tying this event to a request or workflow."
},
"traceId": {
"type": "string",
"description": "W3C trace ID (32 hex chars) for distributed tracing."
},
"spanId": {
"type": "string",
"description": "Optional span identifier associated with traceId."
},
"scope": {
"type": "object",
"additionalProperties": false,
"required": ["repo", "digest"],
"properties": {
"namespace": {"type": "string"},
"repo": {"type": "string"},
"digest": {"type": "string"},
"component": {"type": "string"},
"image": {"type": "string"}
}
},
"attributes": {
"type": "object",
"description": "String attributes for downstream correlation (policy revision, scan id, etc.).",
"additionalProperties": {"type": "string"}
},
"payload": {
"type": "object",
"additionalProperties": true,
"required": ["reportId", "scanId", "imageDigest", "verdict", "summary", "report"],
"properties": {
"reportId": {"type": "string"},
"scanId": {"type": "string"},
"imageDigest": {"type": "string"},
"verdict": {"enum": ["pass", "warn", "fail"]},
"summary": {
"type": "object",
"additionalProperties": false,
"required": ["total", "blocked", "warned", "ignored", "quieted"],
"properties": {
"total": {"type": "integer", "minimum": 0},
"blocked": {"type": "integer", "minimum": 0},
"warned": {"type": "integer", "minimum": 0},
"ignored": {"type": "integer", "minimum": 0},
"quieted": {"type": "integer", "minimum": 0}
}
},
"delta": {
"type": "object",
"additionalProperties": false,
"properties": {
"newCritical": {"type": "integer", "minimum": 0},
"newHigh": {"type": "integer", "minimum": 0},
"kev": {
"type": "array",
"items": {"type": "string"}
}
}
},
"policy": {
"type": "object",
"description": "Policy revision metadata surfaced alongside the report."
},
"findings": {
"type": "array",
"items": {
"type": "object",
"additionalProperties": false,
"required": ["id"],
"properties": {
"id": {"type": "string"},
"severity": {"type": "string"},
"cve": {"type": "string"},
"purl": {"type": "string"},
"reachability": {"type": "string"}
}
}
},
"links": {
"type": "object",
"additionalProperties": false,
"properties": {
"ui": {"type": "string", "format": "uri"},
"report": {"type": "string", "format": "uri"},
"policy": {"type": "string", "format": "uri"},
"attestation": {"type": "string", "format": "uri"}
}
},
"dsse": {
"type": "object",
"additionalProperties": false,
"required": ["payloadType", "payload", "signatures"],
"properties": {
"payloadType": {"type": "string"},
"payload": {"type": "string"},
"signatures": {
"type": "array",
"items": {
"type": "object",
"additionalProperties": false,
"required": ["keyId", "algorithm", "signature"],
"properties": {
"keyId": {"type": "string"},
"algorithm": {"type": "string"},
"signature": {"type": "string"}
}
}
}
}
},
"report": {
"type": "object",
"description": "Canonical scanner report document that aligns with the DSSE payload."
}
}
}
}
}

View File

@@ -0,0 +1,196 @@
{
"$id": "https://stella-ops.org/schemas/events/scheduler.graph.job.completed@1.json",
"$schema": "http://json-schema.org/draft-07/schema#",
"title": "Scheduler Graph Job Completed Event",
"description": "Legacy scheduler event emitted when a graph build or overlay job reaches a terminal state. Consumers validate downstream caches and surface overlay freshness.",
"type": "object",
"additionalProperties": false,
"required": ["eventId", "kind", "tenant", "ts", "payload"],
"properties": {
"eventId": {
"type": "string",
"format": "uuid",
"description": "Globally unique identifier per event."
},
"kind": {
"const": "scheduler.graph.job.completed"
},
"tenant": {
"type": "string",
"description": "Tenant identifier scoped to the originating job."
},
"ts": {
"type": "string",
"format": "date-time",
"description": "UTC timestamp when the job reached a terminal state."
},
"payload": {
"type": "object",
"additionalProperties": false,
"required": ["jobType", "job", "status", "occurredAt"],
"properties": {
"jobType": {
"type": "string",
"enum": ["build", "overlay"],
"description": "Job flavour, matches the CLR type of the serialized job payload."
},
"status": {
"type": "string",
"enum": ["completed", "failed", "cancelled"],
"description": "Terminal status recorded for the job."
},
"occurredAt": {
"type": "string",
"format": "date-time",
"description": "UTC timestamp of the terminal transition, mirrors job.CompletedAt."
},
"job": {
"oneOf": [
{"$ref": "#/definitions/graphBuildJob"},
{"$ref": "#/definitions/graphOverlayJob"}
],
"description": "Canonical serialized representation of the finished job."
},
"resultUri": {
"type": "string",
"description": "Optional URI pointing to Cartographer snapshot or overlay bundle (if available)."
}
}
},
"attributes": {
"type": "object",
"description": "Optional correlation bag for downstream consumers.",
"additionalProperties": {
"type": "string"
}
}
},
"definitions": {
"graphBuildJob": {
"type": "object",
"additionalProperties": false,
"required": [
"schemaVersion",
"id",
"tenantId",
"sbomId",
"sbomVersionId",
"sbomDigest",
"status",
"trigger",
"attempts",
"createdAt"
],
"properties": {
"schemaVersion": {
"const": "scheduler.graph-build-job@1"
},
"id": {"type": "string"},
"tenantId": {"type": "string"},
"sbomId": {"type": "string"},
"sbomVersionId": {"type": "string"},
"sbomDigest": {
"type": "string",
"pattern": "^sha256:[a-f0-9]{64}$"
},
"graphSnapshotId": {"type": "string"},
"status": {
"type": "string",
"enum": ["pending", "queued", "running", "completed", "failed", "cancelled"]
},
"trigger": {
"type": "string",
"enum": ["sbom-version", "backfill", "manual"]
},
"attempts": {
"type": "integer",
"minimum": 0
},
"cartographerJobId": {"type": "string"},
"correlationId": {"type": "string"},
"createdAt": {
"type": "string",
"format": "date-time"
},
"startedAt": {
"type": "string",
"format": "date-time"
},
"completedAt": {
"type": "string",
"format": "date-time"
},
"error": {"type": "string"},
"metadata": {
"type": "object",
"additionalProperties": {"type": "string"}
}
}
},
"graphOverlayJob": {
"type": "object",
"additionalProperties": false,
"required": [
"schemaVersion",
"id",
"tenantId",
"graphSnapshotId",
"overlayKind",
"overlayKey",
"status",
"trigger",
"attempts",
"createdAt"
],
"properties": {
"schemaVersion": {
"const": "scheduler.graph-overlay-job@1"
},
"id": {"type": "string"},
"tenantId": {"type": "string"},
"graphSnapshotId": {"type": "string"},
"buildJobId": {"type": "string"},
"overlayKind": {
"type": "string",
"enum": ["policy", "advisory", "vex"]
},
"overlayKey": {"type": "string"},
"subjects": {
"type": "array",
"items": {"type": "string"},
"uniqueItems": true
},
"status": {
"type": "string",
"enum": ["pending", "queued", "running", "completed", "failed", "cancelled"]
},
"trigger": {
"type": "string",
"enum": ["policy", "advisory", "vex", "sbom-version", "manual"]
},
"attempts": {
"type": "integer",
"minimum": 0
},
"correlationId": {"type": "string"},
"createdAt": {
"type": "string",
"format": "date-time"
},
"startedAt": {
"type": "string",
"format": "date-time"
},
"completedAt": {
"type": "string",
"format": "date-time"
},
"error": {"type": "string"},
"metadata": {
"type": "object",
"additionalProperties": {"type": "string"}
}
}
}
}
}

View File

@@ -0,0 +1,16 @@
# Policy Examples
Sample `stella-dsl@1` policies illustrating common deployment personas. Each example includes commentary, CLI usage hints, and a compliance checklist.
| Example | Description |
|---------|-------------|
| [Baseline](baseline.md) | Balanced production defaults (block critical, respect strong VEX). |
| [Serverless](serverless.md) | Aggressive blocking for serverless workloads (no High+, pinned base images). |
| [Internal Only](internal-only.md) | Lenient policy for internal/dev environments with KEV safeguards. |
Policy source files (`*.stella`) live alongside the documentation so you can copy/paste or use `stella policy new --from file://...`.
---
*Last updated: 2025-10-26.*

View File

@@ -0,0 +1,79 @@
# Baseline Policy Example (`baseline.stella`)
This sample policy provides a balanced default for production workloads: block critical findings, require strong VEX justifications to suppress advisories, and warn on deprecated runtimes. Use it as a starting point for tenants that want guardrails without excessive noise.
```dsl
policy "Baseline Production Policy" syntax "stella-dsl@1" {
metadata {
description = "Block critical, escalate high, enforce VEX justifications."
tags = ["baseline","production"]
}
profile severity {
map vendor_weight {
source "GHSA" => +0.5
source "OSV" => +0.0
source "VendorX" => -0.2
}
env exposure_adjustments {
if env.exposure == "internet" then +0.5
if env.runtime == "legacy" then +0.3
}
}
rule block_critical priority 5 {
when severity.normalized >= "Critical"
then status := "blocked"
because "Critical severity must be remediated before deploy."
}
rule escalate_high_internet {
when severity.normalized == "High"
and env.exposure == "internet"
then escalate to severity_band("Critical")
because "High severity on internet-exposed asset escalates to critical."
}
rule require_vex_justification {
when vex.any(status in ["not_affected","fixed"])
and vex.justification in ["component_not_present","vulnerable_code_not_present"]
then status := vex.status
annotate winning_statement := vex.latest().statementId
because "Respect strong vendor VEX claims."
}
rule alert_warn_eol_runtime priority 1 {
when severity.normalized <= "Medium"
and sbom.has_tag("runtime:eol")
then warn message "Runtime marked as EOL; upgrade recommended."
because "Deprecated runtime should be upgraded."
}
}
```
## Commentary
- **Severity profile** tightens vendor weights and applies exposure modifiers so internet-facing/high severity pairs escalate automatically.
- **VEX rule** only honours strong justifications, preventing weaker claims from hiding issues.
- **Warnings first** The `alert_warn_eol_runtime` rule name ensures it sorts before the require-VEX rule, keeping alerts visible without flipping to `RequiresVex`.
- Works well as shared `tenant-global` baseline; use tenant overrides for stricter tolerant environments.
## Try it out
```bash
stella policy new --policy-id P-baseline --template blank --open
stella policy lint examples/policies/baseline.stella
stella policy simulate P-baseline --candidate 1 --sbom sbom:sample-prod
```
## Compliance checklist
- [ ] Policy compiled via `stella policy lint` without diagnostics.
- [ ] Simulation diff reviewed against golden SBOM set.
- [ ] Approval note documents rationale before promoting to production.
- [ ] EOL runtime tags kept up to date in SBOM metadata.
- [ ] VEX vendor allow-list reviewed quarterly.
---
*Last updated: 2025-10-26.*

View File

@@ -0,0 +1,46 @@
policy "Baseline Production Policy" syntax "stella-dsl@1" {
metadata {
description = "Block critical, escalate high, enforce VEX justifications."
tags = ["baseline","production"]
}
profile severity {
map vendor_weight {
source "GHSA" => +0.5
source "OSV" => +0.0
source "VendorX" => -0.2
}
env exposure_adjustments {
if env.exposure == "internet" then +0.5
if env.runtime == "legacy" then +0.3
}
}
rule block_critical priority 5 {
when severity.normalized >= "Critical"
then status := "blocked"
because "Critical severity must be remediated before deploy."
}
rule escalate_high_internet {
when severity.normalized == "High"
and env.exposure == "internet"
then escalate to severity_band("Critical")
because "High severity on internet-exposed asset escalates to critical."
}
rule require_vex_justification {
when vex.any(status in ["not_affected","fixed"])
and vex.justification in ["component_not_present","vulnerable_code_not_present"]
then status := vex.status
annotate winning_statement := vex.latest().statementId
because "Respect strong vendor VEX claims."
}
rule alert_warn_eol_runtime priority 1 {
when severity.normalized <= "Medium"
and sbom.has_tag("runtime:eol")
then warn message "Runtime marked as EOL; upgrade recommended."
because "Deprecated runtime should be upgraded."
}
}

View File

@@ -0,0 +1,34 @@
version: "1.0"
metadata:
description: Baseline production policy
tags:
- baseline
- production
rules:
- name: Block Critical
severity: [Critical]
action: block
- name: Escalate High Internet
severity: [High]
environments: [internet]
action:
type: escalate
escalate:
minimumSeverity: Critical
- name: Require VEX justification
sources: [NVD, GHSA]
action:
type: requireVex
requireVex:
vendors: [VendorX, VendorY]
justifications:
- component_not_present
- vulnerable_code_not_present
- name: Alert warn EOL runtime
priority: 1
severity: [Low, Medium]
tags: [runtime:eol]
action: warn

Some files were not shown because too many files have changed in this diff Show More