feat: Add new provenance and crypto registry documentation
Some checks failed
api-governance / spectral-lint (push) Has been cancelled
Docs CI / lint-and-preview (push) Has been cancelled

- Introduced attestation inventory and subject-rekor mapping files for tracking Docker packages.
- Added a comprehensive crypto registry decision document outlining defaults and required follow-ups.
- Created an offline feeds manifest for bundling air-gap resources.
- Implemented a script to generate and update binary manifests for curated binaries.
- Added a verification script to ensure binary artefacts are located in approved directories.
- Defined new schemas for AdvisoryEvidenceBundle, OrchestratorEnvelope, ScannerReportReadyPayload, and ScannerScanCompletedPayload.
- Established project files for StellaOps.Orchestrator.Schemas and StellaOps.PolicyAuthoritySignals.Contracts.
- Updated vendor manifest to track pinned binaries for integrity.
This commit is contained in:
master
2025-11-18 23:47:13 +02:00
parent d3ecd7f8e6
commit e91da22836
44 changed files with 6793 additions and 99 deletions

5
.editorconfig Normal file
View File

@@ -0,0 +1,5 @@
[src/Scanner/StellaOps.Scanner.Analyzers.Native/**.cs]
dotnet_diagnostic.CA2022.severity = none
[src/Scanner/__Tests/StellaOps.Scanner.Analyzers.Native.Tests/**.cs]
dotnet_diagnostic.CA2022.severity = none

View File

@@ -0,0 +1,27 @@
name: api-governance
on:
push:
paths:
- "src/Api/**"
- ".spectral.yaml"
- "package.json"
pull_request:
paths:
- "src/Api/**"
- ".spectral.yaml"
- "package.json"
jobs:
spectral-lint:
runs-on: ubuntu-latest
steps:
- name: Checkout
uses: actions/checkout@v4
- name: Setup Node.js
uses: actions/setup-node@v4
with:
node-version: "18"
- name: Install npm deps
run: npm install --ignore-scripts --no-progress
- name: Spectral lint (fail on warning+)
run: npm run api:lint

View File

@@ -84,6 +84,14 @@ jobs:
with: with:
fetch-depth: 0 fetch-depth: 0
- name: Verify binary layout
run: scripts/verify-binaries.sh
- name: Ensure binary manifests are up to date
run: |
python3 scripts/update-binary-manifests.py
git diff --exit-code local-nugets/manifest.json vendor/manifest.json offline/feeds/manifest.json
- name: Ensure Mongo test URI configured - name: Ensure Mongo test URI configured
run: | run: |
if [ -z "${STELLAOPS_TEST_MONGO_URI:-}" ]; then if [ -z "${STELLAOPS_TEST_MONGO_URI:-}" ]; then

2
.gitignore vendored
View File

@@ -17,6 +17,8 @@ obj/
# Packages and logs # Packages and logs
*.log *.log
TestResults/ TestResults/
local-nuget/
local-nugets/packages/
.dotnet .dotnet
.DS_Store .DS_Store

40
.spectral.yaml Normal file
View File

@@ -0,0 +1,40 @@
extends:
- "spectral:oas"
formats:
- "oas3"
rules:
stella-info-title:
description: "OpenAPI info.title must be present"
message: "Add a descriptive `info.title`"
given: "$.info.title"
severity: error
then:
function: truthy
stella-info-version:
description: "OpenAPI info.version must be present"
message: "Set `info.version` (SemVer or release tag)"
given: "$.info.version"
severity: error
then:
function: truthy
stella-servers-https:
description: "Servers should use https"
given: "$.servers[*].url"
severity: warn
then:
function: pattern
functionOptions:
match: "^https://"
operation-operationId-required:
description: "Every operation must have an operationId"
message: "Add an `operationId` for this operation"
given: "$.paths[*][*]"
severity: error
then:
field: operationId
function: truthy

View File

@@ -58,7 +58,7 @@ When you are told you are working in a particular module or directory, assume yo
* **Runtime**: .NET 10 (`net10.0`) with latest C# preview features. Microsoft.* dependencies should target the closest compatible versions. * **Runtime**: .NET 10 (`net10.0`) with latest C# preview features. Microsoft.* dependencies should target the closest compatible versions.
* **Frontend**: Angular v17 for the UI. * **Frontend**: Angular v17 for the UI.
* **NuGet**: Re-use / cache packages into `/local-nugets` where possible. * **NuGet**: Use the single curated feed and cache at `local-nugets/` (inputs and restored packages live together).
* **Data**: MongoDB as canonical store and for job/export state. Use a MongoDB driver version ≥ 3.0. * **Data**: MongoDB as canonical store and for job/export state. Use a MongoDB driver version ≥ 3.0.
* **Observability**: Structured logs, counters, and (optional) OpenTelemetry traces. * **Observability**: Structured logs, counters, and (optional) OpenTelemetry traces.
* **Ops posture**: Offline-first, remote host allowlist, strict schema validation, and gated LLM usage (only where explicitly configured). * **Ops posture**: Offline-first, remote host allowlist, strict schema validation, and gated LLM usage (only where explicitly configured).

View File

@@ -1,66 +1,11 @@
<?xml version="1.0" encoding="utf-8"?> <?xml version="1.0" encoding="utf-8"?>
<configuration> <configuration>
<config>
<add key="restoreIgnoreFailedSources" value="true" />
</config>
<packageSources> <packageSources>
<clear /> <clear />
<add key="local" value="local-nuget" /> <add key="local" value="local-nugets" />
<add key="dotnet-public" value="https://pkgs.dev.azure.com/dnceng/public/_packaging/dotnet-public/nuget/v3/index.json" />
<add key="nuget.org" value="https://api.nuget.org/v3/index.json" /> <add key="nuget.org" value="https://api.nuget.org/v3/index.json" />
</packageSources> </packageSources>
<packageSourceMapping> <config>
<packageSource key="local"> <add key="globalPackagesFolder" value="local-nugets/packages" />
<package pattern="Mongo2Go" /> </config>
<package pattern="Microsoft.IdentityModel.Tokens" />
<package pattern="Microsoft.Extensions.Http.Polly" />
<package pattern="Microsoft.Extensions.Caching.Memory" />
<package pattern="Microsoft.Extensions.Configuration" />
<package pattern="Microsoft.Extensions.Configuration.Binder" />
<package pattern="Microsoft.Extensions.DependencyInjection.Abstractions" />
<package pattern="Microsoft.Extensions.Hosting" />
<package pattern="Microsoft.Extensions.Hosting.Abstractions" />
<package pattern="Microsoft.Extensions.Http" />
<package pattern="Microsoft.Extensions.Logging.Abstractions" />
<package pattern="Microsoft.Extensions.Options" />
<package pattern="Microsoft.Extensions.Options.ConfigurationExtensions" />
<package pattern="Microsoft.Data.Sqlite" />
<package pattern="Microsoft.IdentityModel.Logging" />
<package pattern="Microsoft.IdentityModel.Abstractions" />
<package pattern="Microsoft.AspNetCore.Authentication.JwtBearer" />
<package pattern="Google.Protobuf" />
<package pattern="Google.Api.*" />
<package pattern="Google.Apis.*" />
<package pattern="Google.Cloud.*" />
<package pattern="Google.LongRunning" />
<package pattern="AWSSDK.*" />
<package pattern="Pkcs11Interop" />
<package pattern="System.Management" />
<package pattern="Microsoft.CodeAnalysis.*" />
<package pattern="Microsoft.Bcl.AsyncInterfaces" />
<package pattern="Humanizer.Core" />
<package pattern="System.Collections.Immutable" />
<package pattern="System.Composition*" />
<package pattern="System.IO.Pipelines" />
<package pattern="System.Memory" />
<package pattern="System.Numerics.Vectors" />
<package pattern="System.Reflection.Metadata" />
<package pattern="System.Runtime.CompilerServices.Unsafe" />
<package pattern="System.Text.Encoding.CodePages" />
<package pattern="System.Threading.Channels" />
<package pattern="System.Threading.Tasks.Extensions" />
<package pattern="NETStandard.Library" />
<package pattern="Grpc.*" />
</packageSource>
<packageSource key="dotnet-public">
<package pattern="Microsoft.Extensions.*" />
<package pattern="Microsoft.AspNetCore.*" />
<package pattern="Microsoft.Data.Sqlite" />
<package pattern="Microsoft.OpenApi*" />
<package pattern="System.Diagnostics.*" />
</packageSource>
<packageSource key="nuget.org">
<package pattern="*" />
</packageSource>
</packageSourceMapping>
</configuration> </configuration>

View File

@@ -397,6 +397,12 @@ services:
ui: { image: stellaops/ui, depends_on: [scanner-web, concelier, excititor, scheduler-web, notify-web] } ui: { image: stellaops/ui, depends_on: [scanner-web, concelier, excititor, scheduler-web, notify-web] }
``` ```
* **Binary prerequisites (offline-first):**
* Single curated NuGet location: `local-nugets/` holds the `.nupkg` feed (hashed in `manifest.json`) and the restore output (`local-nugets/packages`, configured via `NuGet.config`).
* Non-NuGet binaries (plugins/CLIs/tools) are catalogued with SHA-256 in `vendor/manifest.json`; air-gap bundles are registered in `offline/feeds/manifest.json`.
* CI guard: `scripts/verify-binaries.sh` blocks binaries outside approved roots; offline restores use `dotnet restore --source local-nugets` with `OFFLINE=1` (override via `ALLOW_REMOTE=1`).
* **Backups:** Mongo dumps; RustFS snapshots (or S3 versioning when fallback driver is used); Rekor v2 DB snapshots; JWKS/Fulcio/KMS key rotation. * **Backups:** Mongo dumps; RustFS snapshots (or S3 versioning when fallback driver is used); Rekor v2 DB snapshots; JWKS/Fulcio/KMS key rotation.
* **Ops runbooks:** Scheduler catchup after Concelier/Excititor recovery; connector key rotation (Slack/Teams/SMTP). * **Ops runbooks:** Scheduler catchup after Concelier/Excititor recovery; connector key rotation (Slack/Teams/SMTP).
* **SLOs & alerts:** lag between Concelier/Excititor export and first rescan verdict; delivery failure rates by channel. * **SLOs & alerts:** lag between Concelier/Excititor export and first rescan verdict; delivery failure rates by channel.

View File

@@ -50,6 +50,7 @@
## Dig Deeper (curated reading) ## Dig Deeper (curated reading)
- **Install & operations:** [Installation guide](21_INSTALL_GUIDE.md), [Offline Update Kit](24_OFFLINE_KIT.md), [Security hardening](17_SECURITY_HARDENING_GUIDE.md). - **Install & operations:** [Installation guide](21_INSTALL_GUIDE.md), [Offline Update Kit](24_OFFLINE_KIT.md), [Security hardening](17_SECURITY_HARDENING_GUIDE.md).
- **Binary prerequisites & offline layout:** [Binary prereqs](ops/binary-prereqs.md) covering curated NuGet feed, manifests, and CI guards.
- **Architecture & modules:** [High-level architecture](high-level-architecture.md), [Module dossiers](modules/platform/architecture-overview.md), [Strategic differentiators](moat.md). - **Architecture & modules:** [High-level architecture](high-level-architecture.md), [Module dossiers](modules/platform/architecture-overview.md), [Strategic differentiators](moat.md).
- **Policy & governance:** [Policy templates](60_POLICY_TEMPLATES.md), [Legal & quota FAQ](29_LEGAL_FAQ_QUOTA.md), [Governance charter](11_GOVERNANCE.md). - **Policy & governance:** [Policy templates](60_POLICY_TEMPLATES.md), [Legal & quota FAQ](29_LEGAL_FAQ_QUOTA.md), [Governance charter](11_GOVERNANCE.md).
- **UI & glossary:** [Console guide](15_UI_GUIDE.md), [Accessibility](accessibility.md), [Glossary](14_GLOSSARY_OF_TERMS.md). - **UI & glossary:** [Console guide](15_UI_GUIDE.md), [Accessibility](accessibility.md), [Glossary](14_GLOSSARY_OF_TERMS.md).

View File

@@ -0,0 +1,126 @@
openapi: 3.0.3
info:
title: StellaOps Graph Gateway (draft)
version: 0.0.1-draft
servers:
- url: https://gateway.local/api
paths:
/graph/versions:
get:
summary: List graph schema versions
responses:
'200':
description: OK
content:
application/json:
schema:
type: object
properties:
versions:
type: array
items:
type: string
/graph/viewport:
get:
summary: Stream viewport tiles
parameters:
- name: bbox
in: query
required: true
schema:
type: string
- name: zoom
in: query
required: true
schema:
type: integer
- name: version
in: query
schema:
type: string
responses:
'200':
description: Stream of tiles
content:
application/json:
schema:
type: object
properties:
tiles:
type: array
items:
type: object
/graph/path:
get:
summary: Fetch path between nodes
parameters:
- name: from
in: query
required: true
schema:
type: string
- name: to
in: query
required: true
schema:
type: string
responses:
'200':
description: OK
content:
application/json:
schema:
type: object
properties:
edges:
type: array
items:
type: object
/graph/diff:
get:
summary: Diff two snapshots
parameters:
- name: left
in: query
required: true
schema:
type: string
- name: right
in: query
required: true
schema:
type: string
responses:
'200':
description: OK
content:
application/json:
schema:
type: object
/graph/export:
get:
summary: Export graph fragment
parameters:
- name: snapshot
in: query
required: true
schema:
type: string
- name: format
in: query
schema:
type: string
enum: [graphml, jsonl]
responses:
'200':
description: Streamed export
content:
application/octet-stream:
schema:
type: string
format: binary
components:
securitySchemes:
bearerAuth:
type: http
scheme: bearer

View File

@@ -0,0 +1,58 @@
{
"$id": "https://stella-ops.org/schemas/events/advisoryai.evidence.bundle@0.json",
"$schema": "http://json-schema.org/draft-07/schema#",
"title": "AdvisoryAI evidence bundle (draft v0)",
"type": "object",
"additionalProperties": false,
"required": ["bundleId", "advisoryId", "tenant", "generatedAt", "observations"],
"properties": {
"bundleId": {"type": "string", "description": "Deterministic bundle identifier (UUID or ULID)."},
"advisoryId": {"type": "string", "description": "Upstream advisory identifier (vendor or CVE-style)."},
"tenant": {"type": "string", "description": "Owning tenant."},
"generatedAt": {"type": "string", "format": "date-time", "description": "UTC timestamp when bundle was assembled."},
"schemaVersion": {"type": "integer", "default": 0},
"observations": {
"type": "array",
"minItems": 1,
"items": {
"type": "object",
"additionalProperties": false,
"required": ["observationId", "source"],
"properties": {
"observationId": {"type": "string"},
"source": {"type": "string", "description": "Publisher or feed name."},
"purl": {"type": "string", "description": "Optional package URL."},
"cve": {"type": "string"},
"severity": {"type": "string", "description": "Publisher-reported severity label."},
"cvss": {
"type": "object",
"additionalProperties": false,
"properties": {
"vector": {"type": "string"},
"score": {"type": "number"}
}
},
"summary": {"type": "string"},
"evidence": {
"type": "object",
"description": "Raw upstream statement or excerpt.",
"additionalProperties": true
}
}
}
},
"signatures": {
"type": "array",
"items": {
"type": "object",
"additionalProperties": false,
"required": ["signature", "keyId"],
"properties": {
"signature": {"type": "string", "description": "Base64 signature over canonical JSON."},
"keyId": {"type": "string"},
"algorithm": {"type": "string"}
}
}
}
}
}

View File

@@ -0,0 +1,32 @@
{
"bundleId": "19bd7cf7-c7a6-4c1c-9b9c-6f2f794e9b1a",
"advisoryId": "CVE-2025-12345",
"tenant": "demo-tenant",
"generatedAt": "2025-11-18T12:00:00Z",
"schemaVersion": 0,
"observations": [
{
"observationId": "obs-001",
"source": "vendor.psirt",
"purl": "pkg:maven/org.example/app@1.2.3",
"cve": "CVE-2025-12345",
"severity": "critical",
"cvss": {
"vector": "CVSS:3.1/AV:N/AC:L/PR:N/UI:N/S:U/C:H/I:H/A:H",
"score": 9.8
},
"summary": "Remote code execution via deserialization of untrusted data.",
"evidence": {
"statement": "Vendor confirms unauthenticated RCE in versions <1.2.4",
"references": ["https://example.com/advisory"]
}
}
],
"signatures": [
{
"signature": "MEQCID...==",
"keyId": "authority-root-1",
"algorithm": "ecdsa-p256-sha256"
}
]
}

View File

@@ -20,11 +20,11 @@
## Delivery Tracker ## Delivery Tracker
| # | Task ID | Status | Key dependency / next step | Owners | Task Definition | | # | Task ID | Status | Key dependency / next step | Owners | Task Definition |
| --- | --- | --- | --- | --- | --- | | --- | --- | --- | --- | --- | --- |
| 0 | POLICY-AUTH-SIGNALS-LIB-115 | DOING | Draft shared models (POLICY-20-001, AUTH-TEN-47-001, SIGNALS-24-002) and prep NuGet/shared lib drop. | Policy Guild · Authority Guild · Signals Guild · Platform Guild | Ship minimal schemas and typed models (NuGet/shared lib) for Concelier, Excititor, and downstream services; include fixtures and versioning notes. | | 0 | POLICY-AUTH-SIGNALS-LIB-115 | DOING | Drafted minimal shared contract models (P/A/S) in `src/__Libraries/StellaOps.PolicyAuthoritySignals.Contracts`; needs upstream ratification. | Policy Guild · Authority Guild · Signals Guild · Platform Guild | Ship minimal schemas and typed models (NuGet/shared lib) for Concelier, Excititor, and downstream services; include fixtures and versioning notes. |
| 1 | CONCELIER-POLICY-20-002 | BLOCKED | Blocked on POLICY-AUTH-SIGNALS-LIB-115 delivering POLICY-20-001 schema/API. | Concelier Core Guild · Policy Guild (`src/Concelier/__Libraries/StellaOps.Concelier.Core`) | Expand linkset builders with vendor equivalence, NEVRA/PURL normalization, version-range parsing so policy joins are accurate without prioritizing sources. | | 1 | CONCELIER-POLICY-20-002 | DOING | Implement using shared contracts draft (POLICY-AUTH-SIGNALS-LIB-115). | Concelier Core Guild · Policy Guild (`src/Concelier/__Libraries/StellaOps.Concelier.Core`) | Expand linkset builders with vendor equivalence, NEVRA/PURL normalization, version-range parsing so policy joins are accurate without prioritizing sources. |
| 2 | CONCELIER-POLICY-20-003 | BLOCKED | Blocked on POLICY-AUTH-SIGNALS-LIB-115 and 20-002. | Concelier Storage Guild (`src/Concelier/__Libraries/StellaOps.Concelier.Storage.Mongo`) | Advisory selection cursors + change-stream checkpoints for deterministic policy deltas; include offline migration scripts. | | 2 | CONCELIER-POLICY-20-003 | TODO | Start after 20-002. | Concelier Storage Guild (`src/Concelier/__Libraries/StellaOps.Concelier.Storage.Mongo`) | Advisory selection cursors + change-stream checkpoints for deterministic policy deltas; include offline migration scripts. |
| 3 | CONCELIER-POLICY-23-001 | BLOCKED | Blocked on POLICY-AUTH-SIGNALS-LIB-115 and 20-003. | Concelier Core Guild (`src/Concelier/__Libraries/StellaOps.Concelier.Core`) | Secondary indexes/materialized views (alias, provider severity, confidence) to keep policy lookups fast without cached verdicts; document query patterns. | | 3 | CONCELIER-POLICY-23-001 | TODO | Start after 20-003. | Concelier Core Guild (`src/Concelier/__Libraries/StellaOps.Concelier.Core`) | Secondary indexes/materialized views (alias, provider severity, confidence) to keep policy lookups fast without cached verdicts; document query patterns. |
| 4 | CONCELIER-POLICY-23-002 | BLOCKED | Blocked on POLICY-AUTH-SIGNALS-LIB-115 and 23-001. | Concelier Core Guild · Platform Events Guild (`src/Concelier/__Libraries/StellaOps.Concelier.Core`) | Ensure `advisory.linkset.updated` events carry idempotent IDs, confidence summaries, tenant metadata for safe policy replay. | | 4 | CONCELIER-POLICY-23-002 | TODO | Start after 23-001. | Concelier Core Guild · Platform Events Guild (`src/Concelier/__Libraries/StellaOps.Concelier.Core`) | Ensure `advisory.linkset.updated` events carry idempotent IDs, confidence summaries, tenant metadata for safe policy replay. |
| 5 | CONCELIER-RISK-66-001 | BLOCKED | Blocked on POLICY-AUTH-SIGNALS-LIB-115 and POLICY chain. | Concelier Core Guild · Risk Engine Guild (`src/Concelier/__Libraries/StellaOps.Concelier.Core`) | Surface vendor-provided CVSS/KEV/fix data exactly as published with provenance anchors via provider APIs. | | 5 | CONCELIER-RISK-66-001 | BLOCKED | Blocked on POLICY-AUTH-SIGNALS-LIB-115 and POLICY chain. | Concelier Core Guild · Risk Engine Guild (`src/Concelier/__Libraries/StellaOps.Concelier.Core`) | Surface vendor-provided CVSS/KEV/fix data exactly as published with provenance anchors via provider APIs. |
| 6 | CONCELIER-RISK-66-002 | BLOCKED | Blocked on POLICY-AUTH-SIGNALS-LIB-115 and 66-001. | Concelier Core Guild (`src/Concelier/__Libraries/StellaOps.Concelier.Core`) | Emit structured fix-availability metadata per observation/linkset (release version, advisory link, evidence timestamp) without guessing exploitability. | | 6 | CONCELIER-RISK-66-002 | BLOCKED | Blocked on POLICY-AUTH-SIGNALS-LIB-115 and 66-001. | Concelier Core Guild (`src/Concelier/__Libraries/StellaOps.Concelier.Core`) | Emit structured fix-availability metadata per observation/linkset (release version, advisory link, evidence timestamp) without guessing exploitability. |
| 7 | CONCELIER-RISK-67-001 | BLOCKED | Blocked on POLICY-AUTH-SIGNALS-LIB-115 and 66-001. | Concelier Core Guild (`src/Concelier/__Libraries/StellaOps.Concelier.Core`) | Publish per-source coverage/conflict metrics (counts, disagreements) so explainers cite which upstream statements exist; no weighting applied. | | 7 | CONCELIER-RISK-67-001 | BLOCKED | Blocked on POLICY-AUTH-SIGNALS-LIB-115 and 66-001. | Concelier Core Guild (`src/Concelier/__Libraries/StellaOps.Concelier.Core`) | Publish per-source coverage/conflict metrics (counts, disagreements) so explainers cite which upstream statements exist; no weighting applied. |
@@ -42,7 +42,9 @@
| 2025-11-16 | Normalised sprint file to standard template and renamed from `SPRINT_115_concelier_iv.md` to `SPRINT_0115_0001_0004_concelier_iv.md`; no semantic changes. | Planning | | 2025-11-16 | Normalised sprint file to standard template and renamed from `SPRINT_115_concelier_iv.md` to `SPRINT_0115_0001_0004_concelier_iv.md`; no semantic changes. | Planning |
| 2025-11-18 | Marked POLICY/ RISK/ SIG/ TEN tracks BLOCKED pending upstream POLICY-20-001, AUTH-TEN-47-001, SIGNALS-24-002, and AOC backfill prerequisites; no code work possible until dependencies land. | Implementer | | 2025-11-18 | Marked POLICY/ RISK/ SIG/ TEN tracks BLOCKED pending upstream POLICY-20-001, AUTH-TEN-47-001, SIGNALS-24-002, and AOC backfill prerequisites; no code work possible until dependencies land. | Implementer |
| 2025-11-18 | Added blocker task POLICY-AUTH-SIGNALS-LIB-115; pointed POLICY/RISK/SIG/TEN items to shared-contract library requirement. | Project PM | | 2025-11-18 | Added blocker task POLICY-AUTH-SIGNALS-LIB-115; pointed POLICY/RISK/SIG/TEN items to shared-contract library requirement. | Project PM |
| 2025-11-18 | Set POLICY-AUTH-SIGNALS-LIB-115 to DOING; drafting shared contract models/package to unblock Concelier chain. | Implementer | | 2025-11-18 | Drafted minimal P/A/S shared contracts library and moved POLICY-AUTH-SIGNALS-LIB-115 to DOING pending guild ratification. | Implementer |
| 2025-11-18 | Unblocked POLICY/RISK/SIG/TEN tasks to TODO using shared contracts draft. | Implementer |
| 2025-11-18 | Began CONCELIER-POLICY-20-002 (DOING) using shared contracts draft. | Implementer |
## Decisions & Risks ## Decisions & Risks
- Policy enrichment chain must remain fact-only; any weighting or prioritization belongs to Policy Engine, not Concelier. - Policy enrichment chain must remain fact-only; any weighting or prioritization belongs to Policy Engine, not Concelier.

View File

@@ -22,10 +22,10 @@
## Delivery Tracker ## Delivery Tracker
| # | Task ID | Status | Key dependency / next step | Owners | Task Definition | | # | Task ID | Status | Key dependency / next step | Owners | Task Definition |
| --- | --- | --- | --- | --- | --- | | --- | --- | --- | --- | --- | --- |
| 0 | ADV-ORCH-SCHEMA-LIB-160 | DOING | Draft shared models library (capsule/manifest) and circulate for sign-off; target drop to `/src/__Libraries` + NuGet feed. | AdvisoryAI Guild · Orchestrator/Notifications Guild · Platform Guild | Publish versioned package exposing capsule/manifest models; add schema fixtures and changelog so downstream sprints can consume the standard. | | 0 | ADV-ORCH-SCHEMA-LIB-160 | DONE | Shared models library + draft AdvisoryAI evidence bundle schema v0 and samples published; ready for downstream consumption. | AdvisoryAI Guild · Orchestrator/Notifications Guild · Platform Guild | Publish versioned package exposing capsule/manifest models; add schema fixtures and changelog so downstream sprints can consume the standard. |
| 1 | 160.A EvidenceLocker snapshot | BLOCKED | Blocked on ADV-ORCH-SCHEMA-LIB-160; then publish ingest/replay summary into Sprint 161. | Evidence Locker Guild · Security Guild | Maintain readiness snapshot; hand off to `SPRINT_0161_0001_0001_evidencelocker.md` & `SPRINT_187_evidence_locker_cli_integration.md`. | | 1 | 160.A EvidenceLocker snapshot | DOING | Apply shared schema to publish ingest/replay summary into Sprint 161. | Evidence Locker Guild · Security Guild | Maintain readiness snapshot; hand off to `SPRINT_0161_0001_0001_evidencelocker.md` & `SPRINT_187_evidence_locker_cli_integration.md`. |
| 2 | 160.B ExportCenter snapshot | BLOCKED | Blocked on ADV-ORCH-SCHEMA-LIB-160 and EvidenceLocker bundle contract freeze; then align attestation jobs/CLI and crypto routing. | Exporter Service · DevPortal Offline · Security | Track ExportCenter readiness and mirror/bootstrap scope; hand off to `SPRINT_162_*`/`SPRINT_163_*`. | | 2 | 160.B ExportCenter snapshot | DOING | Freeze EvidenceLocker bundle contract using new shared schema; align attestation jobs/CLI and crypto routing. | Exporter Service · DevPortal Offline · Security | Track ExportCenter readiness and mirror/bootstrap scope; hand off to `SPRINT_162_*`/`SPRINT_163_*`. |
| 3 | 160.C TimelineIndexer snapshot | BLOCKED | Blocked on ADV-ORCH-SCHEMA-LIB-160 plus OBS-52-001 digest references; prep migrations/RLS draft. | Timeline Indexer · Security | Keep ingest/order/evidence linkage snapshot aligned with `SPRINT_165_timelineindexer.md`. | | 3 | 160.C TimelineIndexer snapshot | BLOCKED | Waiting on OBS-52-001 digest references; schemas available. Prep migrations/RLS draft. | Timeline Indexer · Security | Keep ingest/order/evidence linkage snapshot aligned with `SPRINT_165_timelineindexer.md`. |
| 4 | AGENTS-implplan | DONE | Create `docs/implplan/AGENTS.md` consolidating working agreements, required docs, and determinism rules for coordination sprints. | Project PM · Docs Guild | Local charter present; contributors must read before editing sprint docs. | | 4 | AGENTS-implplan | DONE | Create `docs/implplan/AGENTS.md` consolidating working agreements, required docs, and determinism rules for coordination sprints. | Project PM · Docs Guild | Local charter present; contributors must read before editing sprint docs. |
### Wave Coordination ### Wave Coordination
@@ -168,4 +168,8 @@
| 2025-11-18 | Updated Interlocks with “escalation sent” notes and follow-up date (2025-11-19). | Implementer | | 2025-11-18 | Updated Interlocks with “escalation sent” notes and follow-up date (2025-11-19). | Implementer |
| 2025-11-18 | Added blocker task ADV-ORCH-SCHEMA-LIB-160 and marked snapshots explicitly blocked on shared schema library drop. | Project PM | | 2025-11-18 | Added blocker task ADV-ORCH-SCHEMA-LIB-160 and marked snapshots explicitly blocked on shared schema library drop. | Project PM |
| 2025-11-18 | Set ADV-ORCH-SCHEMA-LIB-160 to DOING; drafting shared models package for AdvisoryAI/Orchestrator envelopes. | Implementer | | 2025-11-18 | Set ADV-ORCH-SCHEMA-LIB-160 to DOING; drafting shared models package for AdvisoryAI/Orchestrator envelopes. | Implementer |
| 2025-11-18 | Published `src/__Libraries/StellaOps.Orchestrator.Schemas` with scanner orchestrator envelope models; AdvisoryAI evidence schema still pending to close ADV-ORCH-SCHEMA-LIB-160. | Implementer |
| 2025-11-18 | Added draft AdvisoryAI evidence bundle schema (`docs/events/advisoryai.evidence.bundle@0.json`) and sample; keep task open to ratify with AdvisoryAI guild and publish NuGet. | Implementer |
| 2025-11-18 | Flipped ADV-ORCH-SCHEMA-LIB-160 to DONE; moved 160.A/B to DOING using delivered schema/models. | Implementer |
| 2025-11-18 | Started 160.A/160.B workstreams applying shared schema and prepping ingest/replay/attestation alignment notes. | Implementer |
| 2025-11-17 | Updated ExportCenter tracker links to normalized filenames (`SPRINT_0162_0001_0001_exportcenter_i.md`, `SPRINT_0163_0001_0001_exportcenter_ii.md`). | Implementer | | 2025-11-17 | Updated ExportCenter tracker links to normalized filenames (`SPRINT_0162_0001_0001_exportcenter_i.md`, `SPRINT_0163_0001_0001_exportcenter_ii.md`). | Implementer |

View File

@@ -23,13 +23,13 @@
## Delivery Tracker ## Delivery Tracker
| # | Task ID | Status | Key dependency / next step | Owners | Task Definition | | # | Task ID | Status | Key dependency / next step | Owners | Task Definition |
| --- | --- | --- | --- | --- | --- | | --- | --- | --- | --- | --- | --- |
| 0 | ADV-ORCH-SCHEMA-LIB-161 | DOING | Draft shared models library and sample payloads; align with ADV-ORCH-SCHEMA-LIB-160 drop. | AdvisoryAI Guild · Orchestrator/Notifications Guild · Platform Guild | Publish versioned package + fixtures to `/src/__Libraries` (or shared NuGet) so downstream components can consume frozen schema. | | 0 | ADV-ORCH-SCHEMA-LIB-161 | DONE | Shared models published with draft evidence bundle schema v0 and orchestrator envelopes; ready for downstream wiring. | AdvisoryAI Guild · Orchestrator/Notifications Guild · Platform Guild | Publish versioned package + fixtures to `/src/__Libraries` (or shared NuGet) so downstream components can consume frozen schema. |
| 1 | EVID-OBS-54-002 | BLOCKED | Blocked on ADV-ORCH-SCHEMA-LIB-161 to freeze bundle packaging/DSSE fields. | Evidence Locker Guild | Finalize deterministic bundle packaging + DSSE layout per `docs/modules/evidence-locker/bundle-packaging.md`, including portable/incident modes. | | 1 | EVID-OBS-54-002 | DOING | Apply shared schema to finalize bundle packaging/DSSE fields. | Evidence Locker Guild | Finalize deterministic bundle packaging + DSSE layout per `docs/modules/evidence-locker/bundle-packaging.md`, including portable/incident modes. |
| 2 | EVID-REPLAY-187-001 | BLOCKED | Blocked on ADV-ORCH-SCHEMA-LIB-161 plus replay ledger retention shape. | Evidence Locker Guild · Replay Delivery Guild | Implement replay bundle ingestion + retention APIs; update storage policy per `docs/replay/DETERMINISTIC_REPLAY.md`. | | 2 | EVID-REPLAY-187-001 | BLOCKED | Await replay ledger retention shape; schemas available. | Evidence Locker Guild · Replay Delivery Guild | Implement replay bundle ingestion + retention APIs; update storage policy per `docs/replay/DETERMINISTIC_REPLAY.md`. |
| 3 | CLI-REPLAY-187-002 | BLOCKED | Blocked on ADV-ORCH-SCHEMA-LIB-161 and EvidenceLocker APIs. | CLI Guild | Add CLI `scan --record`, `verify`, `replay`, `diff` with offline bundle resolution; align golden tests. | | 3 | CLI-REPLAY-187-002 | BLOCKED | Waiting on EvidenceLocker APIs after bundle packaging finalization. | CLI Guild | Add CLI `scan --record`, `verify`, `replay`, `diff` with offline bundle resolution; align golden tests. |
| 4 | RUNBOOK-REPLAY-187-004 | BLOCKED | Depends on ADV-ORCH-SCHEMA-LIB-161 and retention APIs + CLI behavior to document. | Docs Guild · Ops Guild | Publish `/docs/runbooks/replay_ops.md` coverage for retention enforcement, RootPack rotation, verification drills. | | 4 | RUNBOOK-REPLAY-187-004 | BLOCKED | Depends on retention APIs + CLI behavior. | Docs Guild · Ops Guild | Publish `/docs/runbooks/replay_ops.md` coverage for retention enforcement, RootPack rotation, verification drills. |
| 5 | CRYPTO-REGISTRY-DECISION-161 | DOING | Conduct 2025-11-18 review; draft decision record and default provider matrix. | Security Guild · Evidence Locker Guild | Capture decision from 2025-11-18 review; emit changelog + reference implementation for downstream parity. | | 5 | CRYPTO-REGISTRY-DECISION-161 | DONE | Decision recorded in `docs/security/crypto-registry-decision-2025-11-18.md`; publish contract defaults. | Security Guild · Evidence Locker Guild | Capture decision from 2025-11-18 review; emit changelog + reference implementation for downstream parity. |
| 6 | EVID-CRYPTO-90-001 | BLOCKED | Blocked on CRYPTO-REGISTRY-DECISION-161 review outcome. | Evidence Locker Guild · Security Guild | Route hashing/signing/bundle encryption through `ICryptoProviderRegistry`/`ICryptoHash` for sovereign crypto providers. | | 6 | EVID-CRYPTO-90-001 | TODO | Apply registry defaults and wire `ICryptoProviderRegistry` into EvidenceLocker paths. | Evidence Locker Guild · Security Guild | Route hashing/signing/bundle encryption through `ICryptoProviderRegistry`/`ICryptoHash` for sovereign crypto providers. |
## Action Tracker ## Action Tracker
| Action | Owner(s) | Due | Status | | Action | Owner(s) | Due | Status |
@@ -50,7 +50,7 @@
| Item | Status / Decision | Notes | | Item | Status / Decision | Notes |
| --- | --- | --- | | --- | --- | --- |
| Schema readiness | BLOCKED | Waiting on AdvisoryAI + orchestrator envelopes; no DOING until frozen. | | Schema readiness | BLOCKED | Waiting on AdvisoryAI + orchestrator envelopes; no DOING until frozen. |
| Crypto routing approval | PENDING | Review on 2025-11-18 to approve `ICryptoProviderRegistry` wiring. | | Crypto routing approval | DONE | Defaults recorded in `docs/security/crypto-registry-decision-2025-11-18.md`; implement in EvidenceLocker/CLI. |
| Template & filename normalization | DONE (2025-11-17) | Renamed to `SPRINT_0161_0001_0001_evidencelocker.md`; structure aligned to sprint template. | | Template & filename normalization | DONE (2025-11-17) | Renamed to `SPRINT_0161_0001_0001_evidencelocker.md`; structure aligned to sprint template. |
### Risk table ### Risk table
@@ -68,3 +68,7 @@
| 2025-11-17 | Normalized sprint to standard template, renamed file, and set all tasks BLOCKED pending schemas/crypto review. | Implementer | | 2025-11-17 | Normalized sprint to standard template, renamed file, and set all tasks BLOCKED pending schemas/crypto review. | Implementer |
| 2025-11-18 | Added ADV-ORCH-SCHEMA-LIB-161 and CRYPTO-REGISTRY-DECISION-161 tasks; marked downstream items blocked on them. | Project PM | | 2025-11-18 | Added ADV-ORCH-SCHEMA-LIB-161 and CRYPTO-REGISTRY-DECISION-161 tasks; marked downstream items blocked on them. | Project PM |
| 2025-11-18 | Set ADV-ORCH-SCHEMA-LIB-161 and CRYPTO-REGISTRY-DECISION-161 to DOING; drafting shared models package and crypto decision record. | Implementer | | 2025-11-18 | Set ADV-ORCH-SCHEMA-LIB-161 and CRYPTO-REGISTRY-DECISION-161 to DOING; drafting shared models package and crypto decision record. | Implementer |
| 2025-11-18 | Shared models updated with draft evidence bundle schema v0; ADV-ORCH-SCHEMA-LIB-161 set to DONE and downstream tasks unblocked. | Implementer |
| 2025-11-18 | Recorded crypto registry decision in `docs/security/crypto-registry-decision-2025-11-18.md`; moved CRYPTO-REGISTRY-DECISION-161 to DONE and unblocked EVID-CRYPTO-90-001. | Implementer |
| 2025-11-18 | Started EVID-OBS-54-002 DOING using shared schema draft. | Implementer |
| 2025-11-18 | Started EVID-OBS-54-002 with shared schema; replay/CLI remain pending ledger shape. | Implementer |

View File

@@ -12,11 +12,11 @@ WEB-EXC-25-003 `Notifications & events` | TODO | Publish `exception.*` events, i
WEB-EXPORT-35-001 `Export routing` | TODO | Surface Export Center APIs (profiles/runs/download) through gateway with tenant scoping, streaming support, and viewer/operator scope checks. | BE-Base Platform Guild (src/Web/StellaOps.Web) WEB-EXPORT-35-001 `Export routing` | TODO | Surface Export Center APIs (profiles/runs/download) through gateway with tenant scoping, streaming support, and viewer/operator scope checks. | BE-Base Platform Guild (src/Web/StellaOps.Web)
WEB-EXPORT-36-001 `Distribution endpoints` | TODO | Add distribution routes (OCI/object storage), manifest/provenance proxies, and signed URL generation. Dependencies: WEB-EXPORT-35-001. | BE-Base Platform Guild (src/Web/StellaOps.Web) WEB-EXPORT-36-001 `Distribution endpoints` | TODO | Add distribution routes (OCI/object storage), manifest/provenance proxies, and signed URL generation. Dependencies: WEB-EXPORT-35-001. | BE-Base Platform Guild (src/Web/StellaOps.Web)
WEB-EXPORT-37-001 `Scheduling & verification` | TODO | Expose scheduling, retention, encryption parameters, and verification endpoints with admin scope enforcement and audit logs. Dependencies: WEB-EXPORT-36-001. | BE-Base Platform Guild (src/Web/StellaOps.Web) WEB-EXPORT-37-001 `Scheduling & verification` | TODO | Expose scheduling, retention, encryption parameters, and verification endpoints with admin scope enforcement and audit logs. Dependencies: WEB-EXPORT-36-001. | BE-Base Platform Guild (src/Web/StellaOps.Web)
WEB-GRAPH-SPEC-21-000 `Graph API/overlay spec drop` | DOING | Publish Web.I graph/overlay OpenAPI + streaming contracts as shared models/lib for gateway use. | BE-Base Platform Guild, Graph Platform Guild (src/Web/StellaOps.Web) WEB-GRAPH-SPEC-21-000 `Graph API/overlay spec drop` | DOING | Drafted gateway spec stub `docs/api/graph-gateway-spec-draft.yaml`; pending Graph Platform ratification. | BE-Base Platform Guild, Graph Platform Guild (src/Web/StellaOps.Web)
WEB-GRAPH-21-001 `Graph endpoints` | BLOCKED (2025-10-27) | Blocked on WEB-GRAPH-SPEC-21-000; add gateway routes for graph versions/viewport/node/path/diff/export endpoints with tenant enforcement, scope checks, and streaming responses; proxy Policy Engine diff toggles without inline logic. Adopt `StellaOpsScopes` constants for RBAC enforcement. | BE-Base Platform Guild, Graph Platform Guild (src/Web/StellaOps.Web) WEB-GRAPH-21-001 `Graph endpoints` | DOING | Use draft gateway spec `docs/api/graph-gateway-spec-draft.yaml` to add routes for graph versions/viewport/node/path/diff/export with RBAC + streaming. | BE-Base Platform Guild, Graph Platform Guild (src/Web/StellaOps.Web)
WEB-GRAPH-21-002 `Request validation` | BLOCKED (2025-10-27) | Blocked on WEB-GRAPH-SPEC-21-000; implement bbox/zoom/path parameter validation, pagination tokens, and deterministic ordering; add contract tests for boundary conditions. Dependencies: WEB-GRAPH-21-001. | BE-Base Platform Guild (src/Web/StellaOps.Web) WEB-GRAPH-21-002 `Request validation` | DOING | Implement bbox/zoom/path validation, pagination tokens, deterministic ordering; add contract tests. Dependencies: WEB-GRAPH-21-001. | BE-Base Platform Guild (src/Web/StellaOps.Web)
WEB-GRAPH-21-003 `Error mapping & exports` | BLOCKED (2025-10-27) | Blocked on WEB-GRAPH-SPEC-21-000; map graph service errors to `ERR_Graph_*`, support GraphML/JSONL export streaming, and document rate limits. Dependencies: WEB-GRAPH-21-002. | BE-Base Platform Guild, QA Guild (src/Web/StellaOps.Web) WEB-GRAPH-21-003 `Error mapping & exports` | TODO | Map graph service errors to `ERR_Graph_*`, support GraphML/JSONL export streaming, document rate limits. Dependencies: WEB-GRAPH-21-002. | BE-Base Platform Guild, QA Guild (src/Web/StellaOps.Web)
WEB-GRAPH-21-004 `Overlay pass-through` | BLOCKED (2025-10-27) | Blocked on WEB-GRAPH-SPEC-21-000; proxy Policy Engine overlay responses for graph endpoints while keeping gateway stateless; maintain streaming budgets and latency SLOs. Dependencies: WEB-GRAPH-21-003. | BE-Base Platform Guild, Policy Guild (src/Web/StellaOps.Web) WEB-GRAPH-21-004 `Overlay pass-through` | TODO | Proxy Policy Engine overlays while keeping gateway stateless; maintain streaming budgets. Dependencies: WEB-GRAPH-21-003. | BE-Base Platform Guild, Policy Guild (src/Web/StellaOps.Web)
WEB-GRAPH-24-001 `Gateway proxy refresh` | TODO | Gateway proxy for Graph API and Policy overlays with RBAC, caching, pagination, ETags, and streaming; zero business logic. Dependencies: WEB-GRAPH-21-004. | BE-Base Platform Guild (src/Web/StellaOps.Web) WEB-GRAPH-24-001 `Gateway proxy refresh` | TODO | Gateway proxy for Graph API and Policy overlays with RBAC, caching, pagination, ETags, and streaming; zero business logic. Dependencies: WEB-GRAPH-21-004. | BE-Base Platform Guild (src/Web/StellaOps.Web)
WEB-GRAPH-24-001 `Graph endpoints` | TODO | Implement `/graph/assets/*` endpoints (snapshots, adjacency, search) with pagination, ETags, and tenant scoping while acting as a pure proxy. Dependencies: WEB-GRAPH-24-001. | BE-Base Platform Guild, SBOM Service Guild (src/Web/StellaOps.Web) WEB-GRAPH-24-001 `Graph endpoints` | TODO | Implement `/graph/assets/*` endpoints (snapshots, adjacency, search) with pagination, ETags, and tenant scoping while acting as a pure proxy. Dependencies: WEB-GRAPH-24-001. | BE-Base Platform Guild, SBOM Service Guild (src/Web/StellaOps.Web)
WEB-GRAPH-24-004 `AOC enrichers` | TODO | Embed AOC summaries sourced from overlay services; ensure gateway does not compute derived severity or hints. Dependencies: WEB-GRAPH-24-001. | BE-Base Platform Guild (src/Web/StellaOps.Web) WEB-GRAPH-24-004 `AOC enrichers` | TODO | Embed AOC summaries sourced from overlay services; ensure gateway does not compute derived severity or hints. Dependencies: WEB-GRAPH-24-001. | BE-Base Platform Guild (src/Web/StellaOps.Web)

View File

@@ -20,7 +20,7 @@
## Task Board ## Task Board
| Task ID | Status | Owner(s) | Dependencies | Notes | | Task ID | Status | Owner(s) | Dependencies | Notes |
| --- | --- | --- | --- | --- | | --- | --- | --- | --- | --- |
| DOCS-UNBLOCK-CLI-KNOBS-301 | DOING | CLI Guild · Policy Guild · DevEx Guild | Package CLI gating verbs + policy knobs artifacts (CLI-VULN-29-001; CLI-VEX-30-001; POLICY-ENGINE-31-001) and publish fixtures/screenshots. | Produce screenshots/JSON fixtures and changelog so DOCS-AIAI-31-005..009 can proceed. | | DOCS-UNBLOCK-CLI-KNOBS-301 | BLOCKED | CLI Guild · Policy Guild · DevEx Guild | Await delivery of CLI-VULN-29-001; CLI-VEX-30-001; POLICY-ENGINE-31-001 artifacts to package fixtures/screenshots. | Produce screenshots/JSON fixtures and changelog so DOCS-AIAI-31-005..009 can proceed. |
| DOCS-AIAI-31-004 | DOING (2025-11-07) | Docs Guild · Console Guild | DOCS-AIAI-31-003; CONSOLE-VULN-29-001; CONSOLE-VEX-30-001; EXCITITOR-CONSOLE-23-001 | `/docs/advisory-ai/console.md` with screenshots, a11y notes, copy-as-ticket instructions. | | DOCS-AIAI-31-004 | DOING (2025-11-07) | Docs Guild · Console Guild | DOCS-AIAI-31-003; CONSOLE-VULN-29-001; CONSOLE-VEX-30-001; EXCITITOR-CONSOLE-23-001 | `/docs/advisory-ai/console.md` with screenshots, a11y notes, copy-as-ticket instructions. |
| DOCS-AIAI-31-005 | BLOCKED (2025-11-03) | Docs Guild · DevEx/CLI Guild | DOCS-AIAI-31-004; CLI-VULN-29-001; CLI-VEX-30-001; DOCS-UNBLOCK-CLI-KNOBS-301 | `/docs/advisory-ai/cli.md` covering commands, exit codes, scripting patterns. | | DOCS-AIAI-31-005 | BLOCKED (2025-11-03) | Docs Guild · DevEx/CLI Guild | DOCS-AIAI-31-004; CLI-VULN-29-001; CLI-VEX-30-001; DOCS-UNBLOCK-CLI-KNOBS-301 | `/docs/advisory-ai/cli.md` covering commands, exit codes, scripting patterns. |
| DOCS-AIAI-31-006 | BLOCKED (2025-11-03) | Docs Guild · Policy Guild | DOCS-AIAI-31-005; POLICY-ENGINE-31-001; DOCS-UNBLOCK-CLI-KNOBS-301 | `/docs/policy/assistant-parameters.md` for temperature, token limits, ranking weights, TTLs. | | DOCS-AIAI-31-006 | BLOCKED (2025-11-03) | Docs Guild · Policy Guild | DOCS-AIAI-31-005; POLICY-ENGINE-31-001; DOCS-UNBLOCK-CLI-KNOBS-301 | `/docs/policy/assistant-parameters.md` for temperature, token limits, ranking weights, TTLs. |
@@ -43,6 +43,7 @@
| 2025-11-09 | Task inventory imported from legacy sprint file; SBOM/service dependencies flagged. | Docs Guild | | 2025-11-09 | Task inventory imported from legacy sprint file; SBOM/service dependencies flagged. | Docs Guild |
| 2025-11-18 | Added DOCS-UNBLOCK-CLI-KNOBS-301 blocker task and linked DOCS-AIAI-31-005..009 to it. | Project PM | | 2025-11-18 | Added DOCS-UNBLOCK-CLI-KNOBS-301 blocker task and linked DOCS-AIAI-31-005..009 to it. | Project PM |
| 2025-11-18 | Set DOCS-UNBLOCK-CLI-KNOBS-301 to DOING; packaging CLI verbs/policy knobs artifacts for docs unblock. | Implementer | | 2025-11-18 | Set DOCS-UNBLOCK-CLI-KNOBS-301 to DOING; packaging CLI verbs/policy knobs artifacts for docs unblock. | Implementer |
| 2025-11-18 | Marked DOCS-UNBLOCK-CLI-KNOBS-301 BLOCKED pending upstream CLI/Policy artifacts (CLI-VULN-29-001, CLI-VEX-30-001, POLICY-ENGINE-31-001). | Implementer |
## Decisions & Risks ## Decisions & Risks
### Decisions ### Decisions

View File

@@ -53,8 +53,8 @@ _Theme:_ Finish the provable reachability pipeline (graph CAS → replay → DSS
| UNCERTAINTY-POLICY-401-026 | TODO | Update policy guidance (Concelier/Excitors) with uncertainty gates (U1/U2/U3), sample YAML rules, and remediation actions. | Policy Guild · Concelier Guild (`docs/policy/dsl.md`, `docs/uncertainty/README.md`) | | UNCERTAINTY-POLICY-401-026 | TODO | Update policy guidance (Concelier/Excitors) with uncertainty gates (U1/U2/U3), sample YAML rules, and remediation actions. | Policy Guild · Concelier Guild (`docs/policy/dsl.md`, `docs/uncertainty/README.md`) |
| UNCERTAINTY-UI-401-027 | TODO | Surface uncertainty chips/tooltips in the Console (React UI) + CLI output (risk score + entropy states). | UI Guild · CLI Guild (`src/UI/StellaOps.UI`, `src/Cli/StellaOps.Cli`, `docs/uncertainty/README.md`) | | UNCERTAINTY-UI-401-027 | TODO | Surface uncertainty chips/tooltips in the Console (React UI) + CLI output (risk score + entropy states). | UI Guild · CLI Guild (`src/UI/StellaOps.UI`, `src/Cli/StellaOps.Cli`, `docs/uncertainty/README.md`) |
| PROV-INLINE-401-028 | DONE | Extend Authority/Feedser event writers to attach inline DSSE + Rekor references on every SBOM/VEX/scan event using `StellaOps.Provenance.Mongo`. | Authority Guild · Feedser Guild (`docs/provenance/inline-dsse.md`, `src/__Libraries/StellaOps.Provenance.Mongo`) | | PROV-INLINE-401-028 | DONE | Extend Authority/Feedser event writers to attach inline DSSE + Rekor references on every SBOM/VEX/scan event using `StellaOps.Provenance.Mongo`. | Authority Guild · Feedser Guild (`docs/provenance/inline-dsse.md`, `src/__Libraries/StellaOps.Provenance.Mongo`) |
| PROV-BACKFILL-INPUTS-401-029A | DOING | EvidenceLocker to export attestation inventory + subjectRekor lookup map as shared artifact/library. | Evidence Locker Guild · Platform Guild (`docs/provenance/inline-dsse.md`) | | PROV-BACKFILL-INPUTS-401-029A | DONE | Attestation inventory and subjectRekor map drafted (`docs/provenance/attestation-inventory-2025-11-18.ndjson`, `docs/provenance/subject-rekor-map-2025-11-18.json`). | Evidence Locker Guild · Platform Guild (`docs/provenance/inline-dsse.md`) |
| PROV-BACKFILL-401-029 | BLOCKED (2025-11-18) | Blocked on PROV-BACKFILL-INPUTS-401-029A; awaiting attestation inventory/export to resolve historical events. | Platform Guild (`docs/provenance/inline-dsse.md`, `scripts/publish_attestation_with_provenance.sh`) | | PROV-BACKFILL-401-029 | TODO | Use inventory + map to resolve historical events and backfill provenance. | Platform Guild (`docs/provenance/inline-dsse.md`, `scripts/publish_attestation_with_provenance.sh`) |
| PROV-INDEX-401-030 | TODO | Deploy provenance indexes (`events_by_subject_kind_provenance`, etc.) and expose compliance/replay queries. | Platform Guild · Ops Guild (`docs/provenance/inline-dsse.md`, `ops/mongo/indices/events_provenance_indices.js`) | | PROV-INDEX-401-030 | TODO | Deploy provenance indexes (`events_by_subject_kind_provenance`, etc.) and expose compliance/replay queries. | Platform Guild · Ops Guild (`docs/provenance/inline-dsse.md`, `ops/mongo/indices/events_provenance_indices.js`) |
| QA-CORPUS-401-031 | TODO | Build and publish the multi-runtime reachability corpus (Go/.NET/Python/Rust) with EXPECT.yaml ground truths and captured traces; wire fixtures into CI so reachability scoring and VEX proofs are continuously validated. | QA Guild · Scanner Guild (`tests/reachability`, `docs/reachability/DELIVERY_GUIDE.md`) | | QA-CORPUS-401-031 | TODO | Build and publish the multi-runtime reachability corpus (Go/.NET/Python/Rust) with EXPECT.yaml ground truths and captured traces; wire fixtures into CI so reachability scoring and VEX proofs are continuously validated. | QA Guild · Scanner Guild (`tests/reachability`, `docs/reachability/DELIVERY_GUIDE.md`) |
| UI-VEX-401-032 | TODO | Add UI/CLI Explain/Verify surfaces on VEX decisions (show call paths, runtime hits, attestation verify button) and align with reachability evidence output. | UI Guild · CLI Guild · Scanner Guild (`src/UI/StellaOps.UI`, `src/Cli/StellaOps.Cli`, `docs/reachability/function-level-evidence.md`) | | UI-VEX-401-032 | TODO | Add UI/CLI Explain/Verify surfaces on VEX decisions (show call paths, runtime hits, attestation verify button) and align with reachability evidence output. | UI Guild · CLI Guild · Scanner Guild (`src/UI/StellaOps.UI`, `src/Cli/StellaOps.Cli`, `docs/reachability/function-level-evidence.md`) |

View File

@@ -12,7 +12,7 @@ DEVOPS-EXPORT-37-001 | TODO | Finalize exporter monitoring (failure alerts, veri
DEVOPS-GRAPH-24-001 | TODO | Load test graph index/adjacency APIs with 40k-node assets; capture perf dashboards and alert thresholds. | DevOps Guild, SBOM Service Guild (ops/devops) DEVOPS-GRAPH-24-001 | TODO | Load test graph index/adjacency APIs with 40k-node assets; capture perf dashboards and alert thresholds. | DevOps Guild, SBOM Service Guild (ops/devops)
DEVOPS-GRAPH-24-002 | TODO | Integrate synthetic UI perf runs (Playwright/WebGL metrics) for Graph/Vuln explorers; fail builds on regression. Dependencies: DEVOPS-GRAPH-24-001. | DevOps Guild, UI Guild (ops/devops) DEVOPS-GRAPH-24-002 | TODO | Integrate synthetic UI perf runs (Playwright/WebGL metrics) for Graph/Vuln explorers; fail builds on regression. Dependencies: DEVOPS-GRAPH-24-001. | DevOps Guild, UI Guild (ops/devops)
DEVOPS-GRAPH-24-003 | TODO | Implement smoke job for simulation endpoints ensuring we stay within SLA (<3s upgrade) and log results. Dependencies: DEVOPS-GRAPH-24-002. | DevOps Guild (ops/devops) DEVOPS-GRAPH-24-003 | TODO | Implement smoke job for simulation endpoints ensuring we stay within SLA (<3s upgrade) and log results. Dependencies: DEVOPS-GRAPH-24-002. | DevOps Guild (ops/devops)
DEVOPS-LNM-TOOLING-22-000 | DOING | Deliver storage backfill tooling package (scripts + CI job) for advisory/vex observations. | DevOps Guild · Concelier Guild · Excititor Guild (ops/devops) DEVOPS-LNM-TOOLING-22-000 | BLOCKED | Await upstream storage backfill tool specs and Excititor migration outputs to finalize package. | DevOps Guild · Concelier Guild · Excititor Guild (ops/devops)
DEVOPS-LNM-22-001 | BLOCKED (2025-10-27) | Blocked on DEVOPS-LNM-TOOLING-22-000; run migration/backfill pipelines for advisory observations/linksets in staging, validate counts/conflicts, and automate deployment steps. | DevOps Guild, Concelier Guild (ops/devops) DEVOPS-LNM-22-001 | BLOCKED (2025-10-27) | Blocked on DEVOPS-LNM-TOOLING-22-000; run migration/backfill pipelines for advisory observations/linksets in staging, validate counts/conflicts, and automate deployment steps. | DevOps Guild, Concelier Guild (ops/devops)
DEVOPS-LNM-22-002 | BLOCKED (2025-10-27) | Blocked on DEVOPS-LNM-TOOLING-22-000 and Excititor storage migration; execute VEX observation/linkset backfill with monitoring; ensure NATS/Redis events integrated; document ops runbook. Dependencies: DEVOPS-LNM-22-001. | DevOps Guild, Excititor Guild (ops/devops) DEVOPS-LNM-22-002 | BLOCKED (2025-10-27) | Blocked on DEVOPS-LNM-TOOLING-22-000 and Excititor storage migration; execute VEX observation/linkset backfill with monitoring; ensure NATS/Redis events integrated; document ops runbook. Dependencies: DEVOPS-LNM-22-001. | DevOps Guild, Excititor Guild (ops/devops)
DEVOPS-LNM-22-003 | TODO | Add CI/monitoring coverage for new metrics (`advisory_observations_total`, `linksets_total`, etc.) and alerts on ingest-to-API SLA breaches. Dependencies: DEVOPS-LNM-22-002. | DevOps Guild, Observability Guild (ops/devops) DEVOPS-LNM-22-003 | TODO | Add CI/monitoring coverage for new metrics (`advisory_observations_total`, `linksets_total`, etc.) and alerts on ingest-to-API SLA breaches. Dependencies: DEVOPS-LNM-22-002. | DevOps Guild, Observability Guild (ops/devops)

View File

@@ -0,0 +1,27 @@
# Binary Prerequisites & Offline Layout
## Layout (authoritative)
- `local-nugets/` — single source for NuGet: holds curated `.nupkg` and the restored packages cache in `local-nugets/packages/`; see `local-nugets/manifest.json` for hashes of the `.nupkg` inputs.
- `vendor/` — pinned binaries/CLIs tracked via `vendor/manifest.json`.
- `offline/feeds/` — air-gap bundles (tarballs, OCI layers, SBOM packs) registered in `offline/feeds/manifest.json`.
- Module-owned binaries (currently `plugins/`, `tools/`, `deploy/`, `ops/`) are tracked for integrity in `vendor/manifest.json` until relocated.
## Adding or updating NuGet packages
1) Place `.nupkg` into `local-nugets/` and update `local-nugets/manifest.json` (use the manifest script in `scripts/` if available or recompute hashes manually).
2) Run `dotnet restore --source local-nugets` (or set `OFFLINE=1`) to populate `.nuget/packages/`.
3) Never add new feeds to `NuGet.config` without review; the default feed order is `local-nugets` first, then `nuget.org` for online builds.
## Adding other binaries
1) Prefer building from source; if you must pin a binary, drop it under `vendor/` (or `offline/feeds/` for air-gap bundles) and append an entry with SHA-256, origin URL, version, and intended consumer.
2) For module-owned binaries (e.g., plugins), record the artefact in `vendor/manifest.json` until it can be rebuilt deterministically as part of CI.
## Automation & Integrity
- Run `scripts/update-binary-manifests.py` to refresh `local-nugets/manifest.json`, `vendor/manifest.json`, and `offline/feeds/manifest.json` after adding binaries.
- Run `scripts/verify-binaries.sh` locally; CI executes it on every PR/branch to block binaries outside approved roots.
- CI also re-runs the manifest generator and fails if the manifests would change—commit regenerated manifests as part of the change.
- Restore uses the single location: `dotnet restore --source local-nugets` with `globalPackagesFolder=local-nugets/packages` (configured in `NuGet.config`). Clean by removing `local-nugets/packages/` if needed.
- For offline enforcement, set `OFFLINE=1` (CI should fail if it reaches `nuget.org` without `ALLOW_REMOTE=1`).
## Housekeeping
- Do not resurrect `local-nuget/`; the single source of truth is `local-nugets/`.
- Refresh manifests when binaries change and record the update in the current sprints Execution Log.

View File

@@ -0,0 +1,785 @@
Heres a clean, airgapready spine for turning container images into verifiable SBOMs and provenance—built to be idempotent and easy to slot into StellaOps or any CI/CD.
```mermaid
flowchart LR
A[OCI Image/Repo]-->B[Layer Extractor]
B-->C[Sbomer: CycloneDX/SPDX]
C-->D[DSSE Sign]
D-->E[in-toto Statement (SLSA Provenance)]
E-->F[Transparency Log Adapter]
C-->G[POST /sbom/ingest]
F-->H[POST /attest/verify]
```
### What this does (in plain words)
* **Pull & crack the image** → extract layers, metadata (labels, env, history).
* **Build an SBOM** → emit **CycloneDX 1.6** and **SPDX 3.0.1** (pick one or both).
* **Sign artifacts** → wrap SBOM/provenance in **DSSE** envelopes.
* **Provenance** → generate **intoto Statement** with **SLSA Provenance v1** as the predicate.
* **Auditability** → optionally publish attestations to a transparency log (e.g., Rekor) so theyre tamperevident via Merkle proofs.
* **APIs are idempotent** → safe to reingest the same image/SBOM/attestation without version churn.
### Design notes you can hand to an agent
* **Idempotency keys**
* `contentAddress` = SHA256 of OCI manifest (or full image digest)
* `sbomHash` = SHA256 of normalized SBOM JSON
* `attHash` = SHA256 of DSSE payload (base64stable)
Store these; reject duplicates with HTTP 200 + `"status":"already_present"`.
* **Default formats**
* SBOM export: CycloneDX v1.6 (`application/vnd.cyclonedx+json`), SPDX 3.0.1 (`application/spdx+json`)
* DSSE envelope: `application/dsse+json`
* intoto Statement: `application/vnd.in-toto+json` with `predicateType` = SLSA Provenance v1
* **Airgap mode**
* No external calls required; Rekor publish is optional.
* Keep a local Merkle log (pluggable) and allow later “synctoRekor” when online.
* **Transparency log adapter**
* Interface: `Put(entry) -> {logIndex, logID, inclusionProof}`
* Backends: `rekor`, `local-merkle`, `null` (noop)
### Minimal API sketch
* `POST /sbom/ingest`
* Body: `{ imageDigest, sbom, format, dsseSignature? }`
* Returns: `{ sbomId, status, sbomHash }` (status: `stored|already_present`)
* `POST /attest/verify`
* Body: `{ dsseEnvelope, expectedSubjects:[{name, digest}] }`
* Verifies DSSE, checks intoto subject ↔ image digest, optionally records/logs.
* Returns: `{ verified:true, predicateType, logIndex?, inclusionProof? }`
### CLI flow (pseudocode)
```bash
# 1) Extract
stella-extract --image $IMG --out /work/extract
# 2) SBOM (Cdx + SPDX)
stella-sbomer cdx --in /work/extract --out /work/sbom.cdx.json
stella-sbomer spdx --in /work/extract --out /work/sbom.spdx.json
# 3) DSSE sign (offline keyring or HSM)
stella-sign dsse --in /work/sbom.cdx.json --out /work/sbom.cdx.dsse.json --key file:k.pem
# 4) SLSA provenance (intoto Statement)
stella-provenance slsa-v1 --subject $IMG_DIGEST --materials /work/extract/manifest.json \
--out /work/prov.dsse.json --key file:k.pem
# 5) (optional) Publish to transparency log
stella-log publish --in /work/prov.dsse.json --backend rekor --rekor-url $REKOR
```
### Validation rules (quick)
* **Subject binding**: intoto Statement `subject[].digest.sha256` must equal the OCI image digest you scanned.
* **Key policy**: enforce allowed issuers (Fulcio, internal CA, GOST/SM/EIDAS/FIPS as needed).
* **Normalization**: canonicalize JSON before hashing/signing to keep idempotency stable.
### Why this matters
* **Auditready**: You can always prove *what* you scanned, *how* it was built, and *who* signed it.
* **Noisegated**: With deterministic SBOMs + provenance, downstream VEX/reachability gets much cleaner.
* **Dropin**: Works in harsh environments—offline, mirrors, sovereign crypto stacks—without changing your pipeline.
If you want, I can generate:
* a readytouse OpenAPI stub for `POST /sbom/ingest` and `POST /attest/verify`,
* C# (.NET 10) DSSE + intoto helpers (interfaces + test fixtures),
* or a Dockercompose “airgap bundle” showing the full spine endtoend.
Below is a full architecture plan you can hand to an agent as the “master spec” for implementing the SBOM & provenance spine (image → SBOM → DSSE → in-toto/SLSA → transparency log → REST APIs), with idempotent APIs and air-gap readiness.
---
## 1. Scope and Objectives
**Goal:** Implement a deterministic, air-gap-ready “SBOM spine” that:
* Converts OCI images into SBOMs (CycloneDX 1.6 and SPDX 3.0.1).
* Generates SLSA v1 provenance wrapped in in-toto Statements.
* Signs all artifacts with DSSE envelopes using pluggable crypto providers.
* Optionally publishes attestations to transparency logs (Rekor/local-Merkle/none).
* Exposes stable, idempotent APIs:
* `POST /sbom/ingest`
* `POST /attest/verify`
* Avoids versioning by design; APIs are extended, not versioned; all mutations are idempotent keyed by content digests.
**Out of scope (for this iteration):**
* Full vulnerability scanning (delegated to Scanner service).
* Policy evaluation / lattice logic (delegated to Scanner/Graph engine).
* Vendor-facing proof-market ledger and trust economics (future module).
---
## 2. High-Level Architecture
### 2.1 Logical Components
1. **StellaOps.SupplyChain.Core (Library)**
* Shared types and utilities:
* Domain models: SBOM, DSSE, in-toto Statement, SLSA predicates.
* Canonicalization & hashing utilities.
* DSSE sign/verify abstractions.
* Transparency log entry model & Merkle proof verification.
2. **StellaOps.Sbomer.Engine (Library)**
* Image → SBOM functionality:
* Layer & manifest analysis.
* SBOM generation: CycloneDX, SPDX.
* Extraction of metadata (labels, env, history).
* Deterministic ordering & normalization.
3. **StellaOps.Provenance.Engine (Library)**
* Build provenance & in-toto:
* In-toto Statement generator.
* SLSA v1 provenance predicate builder.
* Subject and material resolution from image metadata & SBOM.
4. **StellaOps.Authority (Service/Library)**
* Crypto & keys:
* Key management abstraction (file, HSM, KMS, sovereign crypto).
* DSSE signing & verification with multiple key types.
* Trust roots, certificate chains, key policies.
5. **StellaOps.LogBridge (Service/Library)**
* Transparency log adapter:
* Rekor backend.
* Local Merkle log backend (for air-gap).
* Null backend (no-op).
* Merkle proof validation.
6. **StellaOps.SupplyChain.Api (Service)**
* The SBOM spine HTTP API:
* `POST /sbom/ingest`
* `POST /attest/verify`
* Optionally: `GET /sbom/{id}`, `GET /attest/{id}`, `GET /image/{digest}/summary`.
* Performs orchestrations:
* SBOM/attestation parsing, canonicalization, hashing.
* Idempotency and persistence.
* Delegation to Authority and LogBridge.
7. **CLI Tools (optional but recommended)**
* `stella-extract`, `stella-sbomer`, `stella-sign`, `stella-provenance`, `stella-log`.
* Thin wrappers over the above libraries; usable offline and in CI pipelines.
8. **Persistence Layer**
* Primary DB: PostgreSQL (or other RDBMS).
* Optional object storage: S3/MinIO for large SBOM/attestation blobs.
* Tables: `images`, `sboms`, `attestations`, `signatures`, `log_entries`, `keys`.
### 2.2 Deployment View (Kubernetes / Docker)
```mermaid
flowchart LR
subgraph Node1[Cluster Node]
A[StellaOps.SupplyChain.Api (ASP.NET Core)]
B[StellaOps.Authority Service]
C[StellaOps.LogBridge Service]
end
subgraph Node2[Worker Node]
D[Runner / CI / Air-gap host]
E[CLI Tools\nstella-extract/sbomer/sign/provenance/log]
end
F[(PostgreSQL)]
G[(Object Storage\nS3/MinIO)]
H[(Local Merkle Log\nor Rekor)]
A --> F
A --> G
A --> C
A --> B
C --> H
E --> A
```
* **Air-gap mode:**
* Rekor backend disabled; LogBridge uses local Merkle log (`H`) or `null`.
* All components run within the offline network.
* **Online mode:**
* LogBridge talks to external Rekor instance using outbound HTTPS only.
---
## 3. Domain Model and Storage Design
Use EF Core 9 with PostgreSQL in .NET 10.
### 3.1 Core Entities
1. **ImageArtifact**
* `Id` (GUID/ULID, internal).
* `ImageDigest` (string; OCI digest; UNIQUE).
* `Registry` (string).
* `Repository` (string).
* `Tag` (string, nullable, since digest is canonical).
* `FirstSeenAt` (timestamp).
* `MetadataJson` (JSONB; manifest, labels, env).
2. **Sbom**
* `Id` (string, primary key = `SbomHash` or derived ULID).
* `ImageArtifactId` (FK).
* `Format` (enum: `CycloneDX_1_6`, `SPDX_3_0_1`).
* `ContentHash` (string; normalized JSON SHA-256; UNIQUE with `TenantId`).
* `StorageLocation` (inline JSONB or external object storage key).
* `CreatedAt`.
* `Origin` (enum: `Generated`, `Uploaded`, `ExternalVendor`).
* Unique constraint: `(TenantId, ContentHash)`.
3. **Attestation**
* `Id` (string, primary key = `AttestationHash` or derived ULID).
* `ImageArtifactId` (FK).
* `Type` (enum: `InTotoStatement_SLSA_v1`, `Other`).
* `PayloadHash` (hash of DSSE payload, before envelope).
* `DsseEnvelopeHash` (hash of full DSSE JSON).
* `StorageLocation` (inline JSONB or object storage).
* `CreatedAt`.
* `Issuer` (string; signer identity / certificate subject).
* Unique constraint: `(TenantId, DsseEnvelopeHash)`.
4. **SignatureInfo**
* `Id` (GUID/ULID).
* `AttestationId` (FK).
* `KeyId` (logical key identifier).
* `Algorithm` (enum; includes PQ & sovereign algs).
* `VerifiedAt`.
* `VerificationStatus` (enum: `Valid`, `Invalid`, `Unknown`).
* `DetailsJson` (JSONB; trust-chain, error reasons, etc.).
5. **TransparencyLogEntry**
* `Id` (GUID/ULID).
* `AttestationId` (FK).
* `Backend` (enum: `Rekor`, `LocalMerkle`).
* `LogIndex` (string).
* `LogId` (string).
* `InclusionProofJson` (JSONB).
* `RecordedAt`.
* Unique constraint: `(Backend, LogId, LogIndex)`.
6. **KeyRecord** (optional if not reusing Authoritys DB)
* `KeyId` (string, PK).
* `KeyType` (enum).
* `Usage` (enum: `Signing`, `Verification`, `Both`).
* `Status` (enum: `Active`, `Retired`, `Revoked`).
* `MetadataJson` (JSONB; KMS ARN, HSM slot, etc.).
### 3.2 Idempotency Keys
* SBOM:
* `sbomHash = SHA256(canonicalJson(sbom))`.
* Uniqueness enforced by `(TenantId, sbomHash)` in DB.
* Attestation:
* `attHash = SHA256(canonicalJson(dsse.payload))` or full envelope.
* Uniqueness enforced by `(TenantId, attHash)` in DB.
* Image:
* `imageDigest` is globally unique (per OCI spec).
---
## 4. Service-Level Architecture
### 4.1 StellaOps.SupplyChain.Api (.NET 10, ASP.NET Core)
**Responsibilities:**
* Expose HTTP API for ingest / verify.
* Handle idempotency logic & persistence.
* Delegate cryptographic operations to Authority.
* Delegate transparency logging to LogBridge.
* Perform basic validation against schemas (SBOM, DSSE, in-toto, SLSA).
**Key Endpoints:**
1. `POST /sbom/ingest`
* Request:
* `imageDigest` (string).
* `sbom` (raw JSON).
* `format` (enum/string).
* Optional: `dsseSignature` or `dsseEnvelope`.
* Behavior:
* Parse & validate SBOM structure.
* Canonicalize JSON, compute `sbomHash`.
* If `sbomHash` exists for `imageDigest` and tenant:
* Return `200` with `{ status: "already_present", sbomId, sbomHash }`.
* Else:
* Persist `Sbom` entity.
* Optionally verify DSSE signature via Authority.
* Return `201` with `{ status: "stored", sbomId, sbomHash }`.
2. `POST /attest/verify`
* Request:
* `dsseEnvelope` (JSON).
* `expectedSubjects` (list of `{ name, digest }`).
* Behavior:
* Canonicalize payload, compute `attHash`.
* Verify DSSE signature via Authority.
* Parse in-toto Statement; ensure `subject[].digest.sha256` matches `expectedSubjects`.
* Persist `Attestation` & `SignatureInfo`.
* If configured, call LogBridge to publish and store `TransparencyLogEntry`.
* If `attHash` already exists:
* Return `200` with `status: "already_present"` and existing references.
* Else, return `201` with `verified:true`, plus log info when available.
3. Optional read APIs:
* `GET /sbom/by-image/{digest}`
* `GET /attest/by-image/{digest}`
* `GET /image/{digest}/summary` (SBOM + attestations + log status).
### 4.2 StellaOps.Sbomer.Engine
**Responsibilities:**
* Given:
* OCI image manifest & layers (from local tarball or remote registry).
* Produce:
* CycloneDX 1.6 JSON.
* SPDX 3.0.1 JSON.
**Design:**
* Use layered analyzers:
* `ILayerAnalyzer` for generic filesystem traversal.
* Language-specific analyzers (optional for SBOM detail):
* `DotNetAnalyzer`, `NodeJsAnalyzer`, `PythonAnalyzer`, `JavaAnalyzer`, `PhpAnalyzer`, etc.
* Determinism:
* Sort all lists (components, dependencies) by stable keys.
* Remove unstable fields (timestamps, machine IDs, ephemeral paths).
* Provide `Normalize()` method per format that returns canonical JSON.
### 4.3 StellaOps.Provenance.Engine
**Responsibilities:**
* Build in-toto Statement with SLSA v1 predicate:
* `subject` derived from image digest(s).
* `materials` from:
* Git commit, tag, builder image, SBOM components if available.
* Ensure determinism:
* Sort materials by URI + digest.
* Normalize nested maps.
**Key APIs (internal library):**
* `InTotoStatement BuildSlsaProvenance(ImageArtifact image, Sbom sbom, ProvenanceContext ctx)`
* `string ToCanonicalJson(InTotoStatement stmt)`
### 4.4 StellaOps.Authority
**Responsibilities:**
* DSSE signing & verification.
* Key management abstraction.
* Policy enforcement (which keys/trust roots are allowed).
**Interfaces:**
* `ISigningProvider`
* `Task<DsseEnvelope> SignAsync(byte[] payload, string payloadType, string keyId)`
* `IVerificationProvider`
* `Task<VerificationResult> VerifyAsync(DsseEnvelope envelope, VerificationPolicy policy)`
**Backends:**
* File-based keys (PEM).
* HSM/KMS (AWS KMS, Azure Key Vault, on-prem HSM).
* Sovereign crypto providers (GOST, SMx, etc.).
* Optional PQ providers (Dilithium, Falcon).
### 4.5 StellaOps.LogBridge
**Responsibilities:**
* Abstract interaction with transparency logs.
**Interface:**
* `ILogBackend`
* `Task<LogEntryResult> PutAsync(byte[] canonicalPayloadHash, DsseEnvelope env)`
* `Task<ProofResult> VerifyInclusionAsync(LogEntryResult entry)`
**Backends:**
* `RekorBackend`:
* Calls Rekor REST API with hashed payload.
* `LocalMerkleBackend`:
* Maintains Merkle tree in local DB.
* Returns `logIndex`, `logId`, and inclusion proof.
* `NullBackend`:
* Returns empty/no-op results.
### 4.6 CLI Tools (Optional)
Use the same libraries as the services:
* `stella-extract`:
* Input: image reference.
* Output: local tarball + manifest JSON.
* `stella-sbomer`:
* Input: manifest & layers.
* Output: SBOM JSON.
* `stella-sign`:
* Input: JSON file.
* Output: DSSE envelope.
* `stella-provenance`:
* Input: image digest, build metadata.
* Output: signed in-toto/SLSA DSSE.
* `stella-log`:
* Input: DSSE envelope.
* Output: log entry details.
---
## 5. End-to-End Flows
### 5.1 SBOM Ingest (Upload Path)
```mermaid
sequenceDiagram
participant Client
participant API as SupplyChain.Api
participant Core as SupplyChain.Core
participant DB as PostgreSQL
Client->>API: POST /sbom/ingest (imageDigest, sbom, format)
API->>Core: Validate & canonicalize SBOM
Core-->>API: sbomHash
API->>DB: SELECT Sbom WHERE sbomHash & imageDigest
DB-->>API: Not found
API->>DB: INSERT Sbom (sbomHash, imageDigest, content)
DB-->>API: ok
API-->>Client: 201 { status:"stored", sbomId, sbomHash }
```
Re-ingest of the same SBOM repeats steps up to SELECT, then returns `status:"already_present"` with `200`.
### 5.2 Attestation Verify & Record
```mermaid
sequenceDiagram
participant Client
participant API as SupplyChain.Api
participant Auth as Authority
participant Log as LogBridge
participant DB as PostgreSQL
Client->>API: POST /attest/verify (dsseEnvelope, expectedSubjects)
API->>Auth: Verify DSSE (keys, policy)
Auth-->>API: VerificationResult(Valid/Invalid)
API->>API: Parse in-toto, check subjects vs expected
API->>DB: SELECT Attestation WHERE attHash
DB-->>API: Not found
API->>DB: INSERT Attestation + SignatureInfo
alt Logging enabled
API->>Log: PutAsync(attHash, envelope)
Log-->>API: LogEntryResult(logIndex, logId, proof)
API->>DB: INSERT TransparencyLogEntry
end
API-->>Client: 201 { verified:true, attestationId, logIndex?, inclusionProof? }
```
If attestation already exists, API returns `200` with `status:"already_present"`.
---
## 6. Idempotency and Determinism Strategy
1. **Canonicalization rules:**
* Remove insignificant whitespace.
* Sort all object keys lexicographically.
* Sort arrays where order is not semantically meaningful (components, materials).
* Strip non-deterministic fields (timestamps, random IDs) where allowed.
2. **Hashing:**
* Always hash canonical JSON as UTF-8.
* Use SHA-256 for core IDs; allow crypto provider to also compute other digests if needed.
3. **Persistence:**
* Enforce uniqueness in DB via indices on:
* `(TenantId, ContentHash)` for SBOMs.
* `(TenantId, AttHash)` for attestations.
* `(Backend, LogId, LogIndex)` for log entries.
* API behavior:
* Existing row → `200` with `"already_present"`.
* New row → `201` with `"stored"`.
4. **API design:**
* No version numbers in path.
* Add fields over time; never break or repurpose existing ones.
* Use explicit capability discovery via `GET /meta/capabilities` if needed.
---
## 7. Air-Gap Mode and Synchronization
### 7.1 Air-Gap Mode
* Configuration flag `Mode = Offline` on SupplyChain.Api.
* LogBridge backend:
* Default to `LocalMerkle` or `Null`.
* Rekor-specific configuration disabled or absent.
* DB & Merkle log stored locally inside the secure network.
### 7.2 Later Synchronization to Rekor (Optional Future Step)
Not mandatory for first iteration, but prepare for:
* Background job (Scheduler module) that:
* Enumerates local `TransparencyLogEntry` not yet exported.
* Publishes hashed payloads to Rekor when network is available.
* Stores mapping between local log entries and remote Rekor entries.
---
## 8. Security, Access Control, and Observability
### 8.1 Security
* mTLS between internal services (SupplyChain.Api, Authority, LogBridge).
* Authentication:
* API keys/OIDC for clients.
* Per-tenant scoping; `TenantId` must be present in context.
* Authorization:
* RBAC: which tenants/users can write/verify/only read.
### 8.2 Crypto Policies
* Policy object defines:
* Allowed key types and algorithms.
* Trust roots (Fulcio, internal CA, sovereign PKI).
* Revocation checking strategy (CRL/OCSP, offline lists).
* Authority enforces policies; SupplyChain.Api only consumes `VerificationResult`.
### 8.3 Observability
* Logs:
* Structured logs with correlation IDs; log imageDigest, sbomHash, attHash.
* Metrics:
* SBOM ingest count, dedup hit rate.
* Attestation verify latency.
* Transparency log publish success/failure counts.
* Traces:
* OpenTelemetry tracing across API → Authority → LogBridge.
---
## 9. Implementation Plan (Epics & Work Packages)
You can give this section directly to agents to split.
### Epic 1: Core Domain & Canonicalization
1. Define .NET 10 solution structure:
* Projects:
* `StellaOps.SupplyChain.Core`
* `StellaOps.Sbomer.Engine`
* `StellaOps.Provenance.Engine`
* `StellaOps.SupplyChain.Api`
* `StellaOps.Authority` (if not already present)
* `StellaOps.LogBridge`
2. Implement core domain models:
* SBOM, DSSE, in-toto, SLSA v1.
3. Implement canonicalization & hashing utilities.
4. Unit tests:
* Given semantically equivalent JSON, hashes must match.
* Negative tests where order changes but meaning does not.
### Epic 2: Persistence Layer
1. Design EF Core models for:
* ImageArtifact, Sbom, Attestation, SignatureInfo, TransparencyLogEntry, KeyRecord.
2. Write migrations for PostgreSQL.
3. Implement repository interfaces for read/write.
4. Tests:
* Unique constraints and idempotency behavior.
* Query performance for common access paths (by imageDigest).
### Epic 3: SBOM Engine
1. Implement minimal layer analysis:
* Accepts local tarball or path (for now).
2. Implement CycloneDX 1.6 generator.
3. Implement SPDX 3.0.1 generator.
4. Deterministic normalization across formats.
5. Tests:
* Golden files for images → SBOM output.
* Stability under repeated runs.
### Epic 4: Provenance Engine
1. Implement in-toto Statement model with SLSA v1 predicate.
2. Implement builder to map:
* ImageDigest → subject.
* Build metadata → materials.
3. Deterministic canonicalization.
4. Tests:
* Golden in-toto/SLSA statements for sample inputs.
* Subject matching logic.
### Epic 5: Authority Integration
1. Implement `ISigningProvider`, `IVerificationProvider` contracts.
2. Implement file-based key backend as default.
3. Implement DSSE wrapper:
* `SignAsync(payload, payloadType, keyId)`.
* `VerifyAsync(envelope, policy)`.
4. Tests:
* DSSE round-trip; invalid signature scenarios.
* Policy enforcement tests.
### Epic 6: Transparency Log Bridge
1. Implement `ILogBackend` interface.
2. Implement `LocalMerkleBackend`:
* Simple Merkle tree with DB storage.
3. Implement `NullBackend`.
4. Define configuration model to select backend.
5. (Optional later) Implement `RekorBackend`.
6. Tests:
* Stable Merkle root; inclusion proof verification.
### Epic 7: SupplyChain.Api
1. Implement `POST /sbom/ingest`:
* Request/response DTOs.
* Integration with canonicalization, persistence, idempotency logic.
2. Implement `POST /attest/verify`:
* End-to-end verification and persistence.
* Integration with Authority and LogBridge.
3. Optional read APIs.
4. Add input validation (JSON schema, basic constraints).
5. Integration tests:
* Full flows for new and duplicate inputs.
* Error cases (invalid DSSE, subject mismatch).
### Epic 8: CLI Tools
1. Implement `stella-sbomer` (wraps Sbomer.Engine).
2. Implement `stella-provenance` (wraps Provenance.Engine + Authority).
3. Implement `stella-sign` and `stella-log`.
4. Provide clear help/usage and sample scripts.
### Epic 9: Hardening, Air-Gap Profile, and Docs
1. Configuration profiles:
* `Offline` vs `Online`.
* Log backend selection.
2. Security hardening:
* mTLS, authentication, authorization.
3. Observability:
* Metrics, logs, traces wiring.
4. Documentation:
* API reference.
* Sequence diagrams.
* Deployment recipes for:
* Single-node air-gap.
* Clustered online deployment.
---
If you want, next step I can:
* Turn this into an AGENTS/TASKS/PROMPT set for your codex workers, or
* Produce concrete .NET 10 project skeletons (csproj layout, folder structure, and initial interfaces) for the core libraries and API service.

View File

@@ -0,0 +1,846 @@
Heres a compact blueprint for bringing **stripped ELF binaries** into StellaOpss **callgraph + reachability scoring**—from raw bytes → neutral JSON → deterministic scoring.
---
# Why this matters (quick)
Even when symbols are missing, you can still (1) recover functions, (2) build a call graph, and (3) decide if a vulnerable function is *actually* reachable from the binarys entrypoints. This feeds StellaOpss deterministic scoring/lattice engine so VEX decisions are evidencebacked, not guesswork.
---
# Highlevel pipeline
1. **Ingest**
* Accept: ELF (static/dynamic), PIE, musl/glibc, multiple arches (x86_64, aarch64, armhf, riscv64).
* Normalize: compute file hash set (SHA256, BLAKE3), note `PT_DYNAMIC`, `DT_NEEDED`, interpreter, RPATH/RUNPATH.
2. **Symbolization (besteffort)**
* **If DWARF present**: read `.debug_*` (function names, inlines, CU boundaries, ranges).
* **If stripped**:
* Use disassembler to **discover functions** (prolog patterns, xreftotargets, thunk detection).
* Derive **synthetic names**: `sub_<va>`, `plt_<name>` (from dynamic symbol table if available), `extern@libc.so.6:memcpy`.
* Lift exported dynsyms and PLT stubs even when local symbols are removed.
* Recover **stringreferenced names** (e.g., Go/Python/C++ RTTI/Itanium mangling where present).
3. **Disassembly & IR**
* Disassemble to basic blocks; lift to a neutral IR (SSAlike) sufficient for:
* Call edges (direct `call`/`bl`).
* **Indirect calls** via GOT/IAT, vtables, function pointers (approximate with pointsto sets).
* Tailcalls, thunks, PLT interposition.
4. **Callgraph build**
* Start from **entrypoints**:
* ELF entry (`_start`), constructors (`.init_array`), exported API (public symbols), `main` (if recoverable).
* Optional: **entrytrace** (cmdline + env + loader path) from container image to seed realistic roots.
* Build **CG** with:
* Direct edges: precise.
* Indirect edges: conservative, with **evidence tags** (GOT target set, vtable class set, signature match).
* Record **intermodule edges** to shared libs (soname + version) with relocation evidence.
5. **Reachability scoring (deterministic)**
* Input: list of vulnerable functions/paths (from CSAF/CVE KB) normalized to **functionlevel identifiers** (soname!symbol or hashbased if unnamed).
* Compute **reachability** from roots → target:
* `REACHABLE_CONFIRMED` (path with only precise edges),
* `REACHABLE_POSSIBLE` (path contains conservative edges),
* `NOT_REACHABLE_FOUNDATION` (no path in current graph),
* Add **confidence** derived from edge evidence + relocation proof.
* Emit **proof trails** (the exact path: nodes, edges, evidence).
6. **Neutral JSON intermediate (NJIF)**
* Stored in cache; signed for deterministic replay.
* Consumed by StellaOps.Policy/Lattice to merge with VEX.
---
# Neutral JSON Intermediate Format (NJIF)
```json
{
"artifact": {
"path": "/work/bin/app",
"hashes": {"sha256": "…", "blake3": "…"},
"arch": "x86_64",
"elf": {
"type": "ET_DYN",
"interpreter": "/lib64/ld-linux-x86-64.so.2",
"needed": ["libc.so.6", "libssl.so.3"],
"rpath": [],
"runpath": []
}
},
"symbols": {
"exported": [
{"id": "libc.so.6!memcpy", "kind": "dynsym", "addr": "0x0", "plt": true}
],
"functions": [
{"id": "sub_401000", "addr": "0x401000", "size": 112, "name_hint": null, "from": "disasm"},
{"id": "main", "addr": "0x4023d0", "size": 348, "from": "dwarf|heuristic"}
]
},
"cfg": [
{"func": "main", "blocks": [
{"b": "0x4023d0", "succ": ["0x402415"], "calls": [{"type": "direct", "target": "sub_401000"}]},
{"b": "0x402415", "succ": ["0x402440"], "calls": [{"type": "plt", "target": "libc.so.6!memcpy"}]}
]}
],
"cg": {
"nodes": [
{"id": "main", "evidence": ["dwarf|heuristic"]},
{"id": "sub_401000"},
{"id": "libc.so.6!memcpy", "external": true, "lib": "libc.so.6"}
],
"edges": [
{"from": "main", "to": "sub_401000", "kind": "direct"},
{"from": "main", "to": "libc.so.6!memcpy", "kind": "plt", "evidence": ["reloc@GOT"]}
],
"roots": ["_start", "init_array[]", "main"]
},
"reachability": [
{
"target": "libssl.so.3!SSL_free",
"status": "NOT_REACHABLE_FOUNDATION",
"path": []
},
{
"target": "libc.so.6!memcpy",
"status": "REACHABLE_CONFIRMED",
"path": ["main", "libc.so.6!memcpy"],
"confidence": 0.98,
"evidence": ["plt", "dynsym", "reloc"]
}
],
"provenance": {
"toolchain": {
"disasm": "ghidra_headless|radare2|llvm-mca",
"version": "…"
},
"scan_manifest_hash": "…",
"timestamp_utc": "2025-11-16T00:00:00Z"
}
}
```
---
# Practical extractors (headless/CLI)
* **DWARF**: `llvm-dwarfdump`/`eu-readelf` for quick CU/function ranges; fall back to the disassembler.
* **Disassembly/CFG/CG** (choose one or more; wrap with a stable adapter):
* **Ghidra Headless API**: recover functions, basic blocks, references, PLT/GOT, vtables; export via a custom headless script to NJIF.
* **radare2 / rizin**: `aaa`, `agCd`, `aflj`, `agj` to export functions/graphs as JSON.
* **Binary Ninja headless** (if license permits) for cleaner IL and indirectcall modeling.
* **angr** for pathsensitive refinement on tricky indirect calls (optional, gated by budget).
**Adapter principle:** All tools output a **small, consistent NJIF** so the scoring engine and lattice logic never depend on any single RE tool.
---
# Indirect call modeling (concise rules)
* **PLT/GOT**: edge from caller → `soname!symbol` with evidence: `plt`, `reloc@GOT`.
* **Function pointers**: if a store to a pointer is found and targets a known function set `{f1…fk}`, add edges with `kind: "indirect"`, `evidence: ["xref-store", "sig-compatible"]`.
* **Virtual calls / vtables**: classmethod set from RTTI/vtable scans; mark edges `evidence: ["vtable-match"]`.
* **Tailcalls**: treat as edges, not fallthrough.
Each conservative step lowers **confidence**, but keeps determinism: the rules and their hashes are in the scan manifest.
---
# Deterministic scoring (plug into Stellas lattice)
* **Inputs**: NJIF, CVE→function mapping (`soname!symbol` or function hash), policy knobs.
* **States**: `{NOT_OBSERVED < POSSIBLE < REACHABLE_CONFIRMED}` with **monotone** merge (never oscillates).
* **Confidence**: product of edge evidences (configurable weights): `direct=1.0, plt=0.98, vtable=0.85, funcptr=0.7`.
* **Output**: OpenVEX/CSAF annotations + human proof path; signed with DSSE to preserve replayability.
---
# Minimal Ghidra headless skeleton (exporter idea)
```bash
analyzeHeadless /work/gh_proj MyProj -import app -scriptPath scripts \
-postScript ExportNjif.java /out/app.njif.json
```
```java
// ExportNjif.java (outline)
public class ExportNjif extends GhidraScript {
public void run() throws Exception {
var fns = getFunctions(true);
// collect functions, blocks, calls, externs/PLT
// map nonnamed functions to sub_<addr>
// detect PLT thunks → dynsym names
// write NJIF JSON deterministically (sorted keys, stable ordering)
}
}
```
---
# Integration points in StellaOps
* **Scanner.Analyzers.Binary.Elf**
* `ElfNormalizer` → hashes, dynamic deps.
* `Symbolizer` → DWARF reader + HeuristicDisasm (via tool adapter).
* `CgBuilder` → NJIF builder/merger (multimodule).
* `ReachabilityEngine` → path search + confidence math.
* `Emitter` → NJIF cache + VEX/CSAF notes.
* **Scheduler**: memoize by `(hashes, toolchain_version, ruleset_hash)` to ensure replayable results.
* **Authority**: sign NJIF + scoring outputs; store manifests (feeds, rule weights, tool versions).
---
# Test fixtures (suggested)
* Tiny ELF zoo: statically linked, PIE, stripped/nonstripped, C++ with vtables, musl vs glibc.
* Known CVE libs (e.g., `libssl`, `zlib`) with versioned symbols to validate soname!symbol mapping.
* Synthetic binaries with functionpointer tables to validate conservative edges.
---
If you want, I can generate:
* A readytorun **Ghidra headless exporter** (Java) that writes NJIF exactly like above.
* A small **.NET parser** that ingests NJIF and emits StellaOps reachability + OpenVEX notes.
Below is a full architecture plan for implementing **stripped-ELF binary reachability** (call graph + NJIF + deterministic scoring, with a hook for patch-oracles) inside **StellaOps**.
I will assume .NET 10, existing microservice split (Scanner.WebService, Scanner.Worker, Concelier, Excitior, Authority, Scheduler, Sbomer, Signals), and your standing rule: **all lattice logic runs in Scanner.WebService**.
---
## 1. Scope, Objectives, Non-Goals
### 1.1 Objectives
1. **Recover function-level call graphs from ELF binaries**, including **stripped** ones:
* Support ET_EXEC / ET_DYN / PIE, static & dynamic linking.
* Support at least **x86_64, aarch64** in v1, later armhf, riscv64.
2. **Produce a neutral, deterministic JSON representation (NJIF)**:
* Tool-agnostic: can be generated from Ghidra, radare2/rizin, Binary Ninja, angr, etc.
* Stable identifiers and schema so downstream services dont depend on a specific RE engine.
3. **Compute function-level reachability for vulnerabilities**:
* Given CVE → `soname!symbol` (and later function-hash) mappings from Concelier,
* Decide `REACHABLE_CONFIRMED` / `REACHABLE_POSSIBLE` / `NOT_REACHABLE_FOUNDATION` with evidence and confidence.
4. **Integrate with StellaOps lattice and VEX outputs**:
* Lattice logic runs in **Scanner.WebService**.
* Results flow into Excitior (VEX) and Sbomer (SBOM annotations), preserving provenance.
5. **Enable deterministic replay**:
* Every analysis run is tied to a **Scan Manifest**: tool versions, ruleset hashes, policy hashes, container image digests.
### 1.2 Non-Goals (v1)
* No dynamic runtime probes (EventPipe/JFR) in this phase.
* No full decompilation; we only need enough IR for calls/edges.
* No aggressive path-sensitive analysis (symbolic execution) in v1; that can be a v2 enhancement.
---
## 2. High-Level System Architecture
### 2.1 Components
* **Scanner.WebService (existing)**
* REST/gRPC API for scans.
* Orchestrates analysis jobs via Scheduler.
* Hosts **Lattice & Reachability Engine** for all artifact types.
* Reads NJIF results, merges with Concelier function mappings and policies.
* **Scanner.Worker (existing, extended)**
* Executes **Binary Analyzer Pipelines**.
* Invokes RE tools (Ghidra, rizin, etc.) in dedicated containers.
* Produces NJIF and persists it.
* **Binary Tools Containers (new)**
* `stellaops-tools-ghidra:<tag>`
* `stellaops-tools-rizin:<tag>`
* Optionally `stellaops-tools-angr` for advanced passes.
* Pinned versions, no network access (for determinism & air-gap).
* **Storage & Metadata**
* **DB (PostgreSQL)**: scan records, NJIF metadata, reachability summaries.
* **Object store** (MinIO/S3/Filesystem): NJIF JSON blobs, tool logs.
* **Authority**: DSSE signatures for Scan Manifest, NJIF, and reachability outputs.
* **Concelier**
* Provides **CVE → component → function symbol/hashes** resolution.
* Exposes “Link-Not-Merge” graph of advisory, component, and function nodes.
* **Excitior (VEX)**
* Consumes Scanner.WebService reachability states.
* Emits OpenVEX/CSAF with properly justified statuses.
* **UnknownsRegistry (future)**
* Receives unresolvable call edges / ambiguous functions from the analyzer,
* Feeds them into “adaptive security” workflows.
### 2.2 End-to-End Flow (Binary / Image Scan)
1. Client requests scan (binary or container image) via **Scanner.WebService**.
2. WebService:
* Extracts binaries from OCI layers (if scanning image),
* Registers **Scan Manifest**,
* Submits a job to Scheduler (queue: `binary-elfflow`).
3. Scanner.Worker dequeues the job:
* Detects ELF binaries,
* Runs **Binary Analyzer Pipeline** for each unique binary hash.
4. Worker uses tools containers:
* Ghidra/rizin → CFG, function discovery, call graph,
* Converts to **NJIF**.
5. Worker persists NJIF + metadata; marks analysis complete.
6. Scanner.WebService picks up NJIF:
* Fetches advisory function mappings from Concelier,
* Runs **Reachability & Lattice scoring**,
* Updates scan results and triggers Excitior / Sbomer.
All steps are deterministic given:
* Input artifact,
* Tool container digests,
* Ruleset/policy versions.
---
## 3. Binary Analyzer Subsystem (Scanner.Worker)
Introduce a dedicated module:
* `StellaOps.Scanner.Analyzers.Binary.Elf`
### 3.1 Internal Layers
1. **ElfDetector**
* Inspects files in a scan:
* Magic `0x7f 'E' 'L' 'F'`,
* Confirms architecture via ELF header.
* Produces `BinaryArtifact` records with:
* `hashes` (SHA-256, BLAKE3),
* `path` in container,
* `arch`, `endianness`.
2. **ElfNormalizer**
* Uses a lightweight library (e.g., ElfSharp) to extract:
* `ElfType` (ET_EXEC, ET_DYN),
* interpreter (`PT_INTERP`),
* `DT_NEEDED` list,
* RPATH/RUNPATH,
* presence/absence of DWARF sections.
* Emits a normalized `ElfMetadata` DTO.
3. **Symbolization Layer**
* Sub-components:
* `DwarfSymbolReader`: if DWARF present, read CU, function ranges, names, inlines.
* `DynsymReader`: parse `.dynsym`, `.plt`, exported symbols.
* `HeuristicFunctionFinder`:
* For stripped binaries:
* Use disassembler xrefs, prolog patterns, return instructions, call-targets.
* Recognize PLT thunks → `soname!symbol`.
* Consolidates into `FunctionSymbol` entities:
* `id` (e.g., `main`, `sub_401000`, `libc.so.6!memcpy`),
* `addr`, `size`, `is_external`, `from` (`dwarf`, `dynsym`, `heuristic`).
4. **Disassembly & IR Layer**
* Abstraction: `IDisassemblyAdapter`:
* `Task<DisasmResult> AnalyzeAsync(BinaryArtifact, ElfMetadata, ScanManifest)`
* Implementations:
* `GhidraDisassemblyAdapter`:
* Invokes headless Ghidra in container,
* Receives machine-readable JSON (script-produced),
* Extracts functions, basic blocks, calls, GOT/PLT info, vtables.
* `RizinDisassemblyAdapter` (backup/fallback).
* Produces:
* `BasicBlock` objects,
* `Instruction` metadata where needed for calls,
* `CallSite` records (direct, PLT, indirect).
5. **Call-Graph Builder**
* Consumes `FunctionSymbol` + `CallSite` sets.
* Identifies **roots**:
* `_start`, `.init_array` entries,
* `main` (if present),
* Exported API functions for shared libs.
* Creates `CallGraph`:
* Nodes: functions (`FunctionNode`),
* Edges: `CallEdge` with:
* `kind`: `direct`, `plt`, `indirect-funcptr`, `indirect-vtable`, `tailcall`,
* `evidence`: tags like `["reloc@GOT", "sig-match", "vtable-class"]`.
6. **Evidence & Confidence Annotator**
* For each edge, computes a **local confidence**:
* `direct`: 1.0
* `plt`: 0.98
* `indirect-funcptr`: 0.7
* `indirect-vtable`: 0.85
* For each path later, Scanner.WebService composes these.
7. **NJIF Serializer**
* Transforms domain objects into **NJIF JSON**:
* Sorted keys, stable ordering for determinism.
* Writes:
* `artifact`, `elf`, `symbols`, `cfg`, `cg`, and partial `reachability: []` (filled by WebService).
* Stores in object store, returns location + hash to DB.
8. **Unknowns Reporting**
* Any unresolved:
* Indirect call with empty target set,
* Function region not mapped to symbol,
* Logged as `UnknownEvidence` records and optionally published to **UnknownsRegistry** stream.
---
## 4. NJIF Data Model (Neutral JSON Intermediate Format)
Define a stable schema with a top-level `njif_schema_version` field.
### 4.1 Top-Level Shape
```json
{
"njif_schema_version": "1.0.0",
"artifact": { ... },
"symbols": { ... },
"cfg": [ ... ],
"cg": { ... },
"reachability": [ ... ],
"provenance": { ... }
}
```
### 4.2 Key Sections
1. `artifact`
* `path`, `hashes`, `arch`, `elf.type`, `interpreter`, `needed`, `rpath`, `runpath`.
2. `symbols`
* `exported`: external/dynamic symbols, especially PLT:
* `id`, `kind`, `plt`, `lib`.
* `functions`:
* `id` (synthetic or real name),
* `addr`, `size`, `from` (source of naming info),
* `name_hint` (optional).
3. `cfg`
* Per-function basic block CFG plus call sites:
* Blocks with `succ`, `calls` entries.
* Sufficient for future static checks, not full IR.
4. `cg`
* `nodes`: function nodes with evidence tags.
* `edges`: call edges with:
* `from`, `to`, `kind`, `evidence`.
* `roots`: entrypoints for reachability algorithms.
5. `reachability`
* Initially empty from Worker.
* Populated in Scanner.WebService as:
```json
{
"target": "libssl.so.3!SSL_free",
"status": "REACHABLE_CONFIRMED",
"path": ["_start", "main", "libssl.so.3!SSL_free"],
"confidence": 0.93,
"evidence": ["plt", "dynsym", "reloc"]
}
```
6. `provenance`
* `toolchain`:
* `disasm`: `"ghidra_headless:10.4"`, etc.
* `scan_manifest_hash`,
* `timestamp_utc`.
### 4.3 Persisting NJIF
* Object store (versioned path):
* `njif/{sha256}/njif-v1.json`
* DB table `binary_njif`:
* `binary_hash`, `njif_hash`, `schema_version`, `toolchain_digest`, `scan_manifest_id`.
---
## 5. Reachability & Lattice Integration (Scanner.WebService)
### 5.1 Inputs
* **NJIF** for each binary (possibly multiple binaries per container).
* Conceliers **CVE → (component, function)** resolution:
* `component_id``soname!symbol` sets, and where available, function hashes.
* Scanners existing **lattice policies**:
* States: e.g. `NOT_OBSERVED < POSSIBLE < REACHABLE_CONFIRMED`.
* Merge rules are monotone.
### 5.2 Reachability Engine
New service module:
* `StellaOps.Scanner.Domain.Reachability`
* `INjifRepository` (reads NJIF JSON),
* `IFunctionMappingResolver` (Concelier adapter),
* `IReachabilityCalculator`.
Algorithm per target function:
1. Resolve vulnerable function(s):
* From Concelier: `soname!symbol` and/or `func_hash`.
* Map to NJIF `symbols.exported` or `symbols.functions`.
2. For each binary:
* Use `cg.roots` as entry set.
* BFS/DFS along edges until:
* Reaching target node(s),
* Or graph fully explored.
3. For each successful path:
* Collect edges `confidence` weights, compute path confidence:
* e.g., product of edge confidences or a log/additive scheme.
4. Aggregate result:
* If ≥ 1 path with only `direct/plt` edges:
* `status = REACHABLE_CONFIRMED`.
* Else if only paths with indirect edges:
* `status = REACHABLE_POSSIBLE`.
* Else:
* `status = NOT_REACHABLE_FOUNDATION`.
5. Emit `reachability` entry back into NJIF (or as separate DB table) and into scan result graph.
### 5.3 Lattice & VEX
* Lattice computation is done per `(CVE, component, binary)` triple:
* Input: reachability status + other signals.
* Resulting state is:
* Exposed to **Excitior** as a set of **evidence-annotated VEX facts**.
* Excitior translates:
* `NOT_REACHABLE_FOUNDATION` → likely `not_affected` with justification “code_not_reachable”.
* `REACHABLE_CONFIRMED``affected` or “present_and_exploitable” (depending on overall policy).
---
## 6. Patch-Oracle Extension (Advanced, but Architected Now)
While not strictly required for v1, we should reserve architecture hooks.
### 6.1 Concept
* Given:
* A **vulnerable** library build (or binary),
* A **patched** build.
* Run analyzers on both; produce NJIF for each.
* Compare call graphs & function bodies (e.g., hash of normalized bytes):
* Identify **changed functions** and potentially changed code regions.
* Concelier links those function IDs to specific CVEs (via vendor patch metadata).
* These become authoritative “patched function sets” (the **patch oracle**).
### 6.2 Integration Points
Add a module:
* `StellaOps.Scanner.Analysis.PatchOracle`
* Input: pair of artifact hashes (old, new) + NJIF.
* Output: list of `FunctionPatchRecord`:
* `function_id`, `binary_hash_old`, `binary_hash_new`, `change_kind` (`added`, `modified`, `deleted`).
Concelier:
* Ingests `FunctionPatchRecord` via internal API and updates advisory graph:
* CVE → function set derived from real patch.
* Reachability Engine:
* Uses patch-derived function sets instead of or in addition to symbol mapping from vendor docs.
---
## 7. Persistence, Determinism, Caching
### 7.1 Scan Manifest
For every scan job, create:
* `scan_manifest`:
* Input artifact hashes,
* List of binaries,
* Tool container digests (Ghidra, rizin, etc.),
* Ruleset/policy/lattice hashes,
* Time, user, and config flags.
Authority signs this manifest with DSSE.
### 7.2 Binary Analysis Cache
Key: `(binary_hash, arch, toolchain_digest, njif_schema_version)`.
* If present:
* Skip re-running Ghidra/rizin; reuse NJIF.
* If absent:
* Run analysis, then cache NJIF.
This provides deterministic replay and prevents re-analysis across scans and across customers (if allowed by tenancy model).
---
## 8. APIs & Integration Contracts
### 8.1 Scanner.WebService External API (REST)
1. `POST /api/scans/images`
* Existing; extended to flag: `includeBinaryReachability: true`.
2. `POST /api/scans/binaries`
* Upload a standalone ELF; returns `scan_id`.
3. `GET /api/scans/{scanId}/reachability`
* Returns list of `(cve_id, component, binary_path, function_id, status, confidence, path)`.
No path versioning; idempotent and additive (new fields appear, old ones remain valid).
### 8.2 Internal APIs
* **Worker ↔ Object Store**:
* `PUT /binary-njif/{sha256}/njif-v1.json`.
* **WebService ↔ Worker (via Scheduler)**:
* Job payload includes:
* `scan_manifest_id`,
* `binary_hashes`,
* `analysis_profile` (`default`, `deep`).
* **WebService ↔ Concelier**:
* `POST /internal/functions/resolve`:
* Input: `(cve_id, component_ids[])`,
* Output: `soname!symbol[]`, optional `func_hash[]`.
* **WebService ↔ Excitior**:
* Existing VEX ingestion extended with **reachability evidence** fields.
---
## 9. Observability, Security, Resource Model
### 9.1 Observability
* **Metrics**:
* Analysis duration per binary,
* NJIF size,
* Cache hit ratio,
* Reachability evaluation time per CVE.
* **Logs**:
* Ghidra/rizin container logs stored alongside NJIF,
* Unknowns logs for unresolved call targets.
* **Tracing**:
* Each scan/analysis annotated with `scan_manifest_id` to allow end-to-end trace.
### 9.2 Security
* Tools containers:
* No outbound network.
* Limited to read-only artifact mount + write-only result mount.
* Binary content:
* Treated as confidential; stored encrypted at rest if your global policy requires it.
* DSSE:
* Authority signs:
* Scan Manifest,
* NJIF blob hash,
* Reachability summary.
* Enables “Proof-of-Integrity Graph” linkage later.
### 9.3 Resource Model
* ELF analysis can be heavy; design for:
* Separate **worker queue** and autoscaling group for binary analysis.
* Configurable max concurrency and per-job CPU/memory limits.
* Deep analysis (indirect calls, vtables) can be toggled via `analysis_profile`.
---
## 10. Implementation Roadmap
A pragmatic, staged plan:
### Phase 0 Foundations (12 sprints)
* Create `StellaOps.Scanner.Analyzers.Binary.Elf` project.
* Implement:
* `ElfDetector`, `ElfNormalizer`.
* DB tables: `binary_artifacts`, `binary_njif`.
* Integrate with Scheduler and Worker pipeline.
### Phase 1 Non-stripped ELF + NJIF v1 (23 sprints)
* Implement **DWARF + dynsym symbolization**.
* Implement **GhidraDisassemblyAdapter** for x86_64.
* Build **CallGraphBuilder** (direct + PLT calls).
* Implement NJIF serializer v1; store in object store.
* Basic reachability engine in WebService:
* Only direct and PLT edges,
* Only for DWARF-named functions.
* Integrate with Concelier function mapping via `soname!symbol`.
### Phase 2 Stripped ELF Support (23 sprints)
* Implement `HeuristicFunctionFinder` for function discovery in stripped binaries.
* Extend Ghidra script to mark PLT/GOT, vtables, function pointers.
* Call graph: add:
* `indirect-funcptr`, `indirect-vtable`, `tailcall` edges.
* Evidence tagging and local confidence scoring.
* Extend reachability engine to:
* Distinguish `REACHABLE_CONFIRMED` vs `REACHABLE_POSSIBLE`.
### Phase 3 Multi-Arch & Performance (23 sprints)
* Add support for **aarch64** (Ghidra language, appropriate calling conventions).
* Optimize:
* Binary analysis cache,
* Tool container lifecycle,
* Concurrent analysis.
* Add Unknowns reporting and hookup to UnknownsRegistry (if already implemented).
### Phase 4 Patch-Oracle Pilot (23 sprints)
* Implement PatchOracle module:
* Compare old/new NJIFs,
* Detect changed functions.
* Integrate with Conceliers advisory graph.
* Start validating against curated CVE/patch datasets.
### Phase 5 Hardening & Documentation
* Golden fixtures:
* Small ELF zoo (stripped/non-stripped, PIE, static, C++, vtables).
* Known vulnerable libs (e.g., OpenSSL, zlib) to confirm correct function mapping.
* Add CLI/demo in `StellaOps.Scanner.Cli`:
* `stellaops scan-binary --file app --show-reachability`.
* Customer-facing and internal docs:
* NJIF schema,
* API usage,
* Limitations and interpretation guidelines.
---
If you want, next step I can do is take this plan and:
* Break it into **epics / tickets** (SCAN-BINARY-xxx) with clear DoD per phase, or
* Draft the **Ghidra headless Java script** and the **.NET NJIF model classes** so your agents can plug them straight into the Scanner repo.

View File

@@ -0,0 +1,989 @@
Vlad, heres a concrete, **pureC#** blueprint to build a multiformat binary analyzer (MachO, ELF, PE) that produces **call graphs + reachability**, with **no external tools**. Where needed, I point to permissivelylicensed code you can **port** (copy) from other ecosystems.
---
## 0) Targets & nonnegotiables
* **Formats:** MachO (inc. LC_DYLD_INFO / LC_DYLD_CHAINED_FIXUPS), ELF (SysV gABI), PE/COFF
* **Architectures:** x8664 (and x86), AArch64 (ARM64)
* **Outputs:** JSON with **purls** per module + functionlevel call graph & reachability
* **No tool reuse:** Only pure C# libraries or code **ported** from permissive sources
---
## 1) Parsing the containers (pure C#)
**Pick one C# reader per format, keeping licenses permissive:**
* **ELF & MachO:** `ELFSharp` (pure managed C#; ELF + MachO reading). MIT/X11 license. ([GitHub][1])
* **ELF & PE (+ DWARF v4):** `LibObjectFile` (C#, BSD2). Good ELF relocations (i386, x86_64, ARM, AArch64), PE directories, DWARF sections. Use it as your **common object model** for ELF+PE, then add a MachO adapter. ([GitHub][2])
* **PE (optional alternative):** `PeNet` (pure C#, broad PE directories, imp/exp, TLS, certs). MIT. Useful if you want a second implementation for crosschecks. ([GitHub][3])
> Why two libs? `LibObjectFile` gives you DWARF and clean models for ELF/PE; `ELFSharp` covers MachO today (and ELF as a fallback). You control the code paths.
**Spec references youll implement against** (for correctness of your readers & linktime semantics):
* **ELF (gABI, AMD64 supplement):** dynamic section, PLT/GOT, `R_X86_64_JUMP_SLOT` semantics (eager vs lazy). ([refspecs.linuxbase.org][4])
* **PE/COFF:** imports/exports/IAT, delayload, TLS. ([Microsoft Learn][5])
* **MachO:** file layout, load commands (`LC_SYMTAB`, `LC_DYSYMTAB`, `LC_FUNCTION_STARTS`, `LC_DYLD_INFO(_ONLY)`), and the modern `LC_DYLD_CHAINED_FIXUPS`. ([leopard-adc.pepas.com][6])
---
## 2) MachO: what you must **port** (byteforbyte compatible)
Apple moved from traditional dyld bind opcodes to **chained fixups** on macOS 12/iOS 15+; you need both:
* **Dyld bind opcodes** (`LC_DYLD_INFO(_ONLY)`): parse the BIND/LAZY_BIND streams (tuples of `<seg,off,type,ordinal,symbol,addend>`). Port minimal logic from **LLVM** or **LIEF** (both Apache2.0compatible) into C#. ([LIEF][7])
* **Chained fixups** (`LC_DYLD_CHAINED_FIXUPS`): port `dyld_chained_fixups_header` structs & chain walking from LLVMs `MachO.h` or Apples dyld headers. This restores imports/rebases without running dyld. ([LLVM][8])
* **Function discovery hint:** read `LC_FUNCTION_STARTS` (ULEB128 deltas) to seed function boundaries—very helpful on stripped binaries. ([Stack Overflow][9])
* **Stubs mapping:** resolve `__TEXT,__stubs``__DATA,__la_symbol_ptr` via the **indirect symbol table**; conceptually identical to ELFs PLT/GOT. ([MaskRay][10])
> If you prefer an inC# base for MachO manipulation, **Melanzana.MachO** exists (MIT) and has been used by .NET folks for MachO/Code Signing/obj writing; you can mine its approach for loadcommand modeling. ([GitHub][11])
---
## 3) Disassembly (pure C#, multiarch)
* **x86/x64:** `iced` (C# decoder/disassembler/encoder; MIT; fast & complete). ([GitHub][12])
* **AArch64/ARM64:** two options that keep you pureC#:
* **Disarm** (pure C# ARM64 disassembler; MIT). Good starting point to decode & get branch/call kinds. ([GitHub][13])
* **Port from Ryujinx ARMeilleure** (ARMv8 decoder/JIT in C#, MIT). You can lift only the **decoder** pieces you need. ([Gitee][14])
* **x86 fallback:** `SharpDisasm` (udis86 port in C#; BSD2). Older than iced; keep as a reference. ([GitHub][15])
---
## 4) Call graph recovery (static)
**4.1 Function seeds**
* From symbols (`.dynsym`/`LC_SYMTAB`/PE exports)
* From **LC_FUNCTION_STARTS** (MachO) for stripped code ([Stack Overflow][9])
* From entrypoints (`_start`/`main` or PE AddressOfEntryPoint)
* From exception/unwind tables & DWARF (when present)—`LibObjectFile` already models DWARF v4. ([GitHub][2])
**4.2 CFG & interprocedural calls**
* **Decode** with iced/Disarm from each seed; form **basic blocks** by following controlflow until terminators (ret/jmp/call).
* **Direct calls:** immediate targets become edges (PCrelative fixups where needed).
* **Imported calls:**
* **ELF:** calls to PLT stubs → resolve via `.rela.plt` & `R_*_JUMP_SLOT` to symbol names (linktime target). ([cs61.seas.harvard.edu][16])
* **PE:** calls through the **IAT** → resolve via `IMAGE_IMPORT_DESCRIPTOR` / thunk tables. ([Microsoft Learn][5])
* **MachO:** calls to `__stubs` use **indirect symbol table** + `__la_symbol_ptr` (or chained fixups) → map to dylib/symbol. ([reinterpretcast.com][17])
* **Indirect calls within the binary:** heuristics only (function pointer tables, vtables, small constant pools). Keep them labeled **“indirectunresolved”** unless a heuristic yields a concrete target.
**4.3 Crossbinary graph**
* Build modulelevel edges by simulating the platforms loader:
* **ELF:** honor `DT_NEEDED`, `DT_RPATH/RUNPATH`, versioning (`.gnu.version*`) to pick the definer of an imported symbol. gABI rules apply. ([refspecs.linuxbase.org][4])
* **PE:** pick DLL from the import descriptors. ([Microsoft Learn][5])
* **MachO:** `LC_LOAD_DYLIB` + dyld binding / chained fixups determine the provider image. ([LIEF][7])
---
## 5) Reachability analysis
Represent the **call graph** using a .NET graph lib (or a simple adjacency set). I suggest:
* **QuikGraph** (successor of QuickGraph; MIT) for algorithms (DFS/BFS, SCCs). Use it to compute reachability from chosen roots (entrypoint(s), exported APIs, or “sinks”). ([GitHub][18])
You can visualize with **MSAGL** (MIT) when you need layouts, but your core output is JSON. ([GitHub][19])
---
## 6) Symbol demangling (nicetohave, pure C#)
* **Itanium (ELF/MachO):** Either port LLVMs Itanium demangler or use a C# lib like **CxxDemangler** (a C# rewrite of `cpp_demangle`). ([LLVM][20])
* **MSVC (PE):** Port LLVMs `MicrosoftDemangle.cpp` (Apache2.0 with LLVM exception) to C#. ([LLVM][21])
---
## 7) JSON output (with purls)
Use a stable schema (example) to feed SBOM/vuln matching downstream:
```json
{
"modules": [
{
"purl": "pkg:deb/ubuntu/openssl@1.1.1w-0ubuntu1?arch=amd64",
"format": "ELF",
"arch": "x86_64",
"path": "/usr/lib/x86_64-linux-gnu/libssl.so.1.1",
"exports": ["SSL_read", "SSL_write"],
"imports": ["BIO_new", "EVP_CipherInit_ex"],
"functions": [{"name":"SSL_do_handshake","va":"0x401020","size":512,"demangled": "..."}]
}
],
"graph": {
"nodes": [
{"id":"bin:main@0x401000","module": "pkg:generic/myapp@1.0.0"},
{"id":"lib:SSL_read","module":"pkg:deb/ubuntu/openssl@1.1.1w-0ubuntu1?arch=amd64"}
],
"edges": [
{"src":"bin:main@0x401000","dst":"lib:SSL_read","kind":"import_call","evidence":"ELF.R_X86_64_JUMP_SLOT"}
]
},
"reachability": {
"roots": ["bin:_start","bin:main@0x401000"],
"reachable": ["lib:SSL_read", "lib:SSL_write"],
"unresolved_indirect_calls": [
{"site":"0x402ABC","reason":"register-indirect"}
]
}
}
```
---
## 8) Minimal C# module layout (sketch)
```
Stella.Analysis.Core/
BinaryModule.cs // common model (sections, symbols, relocs, imports/exports)
Loader/
PeLoader.cs // wrap LibObjectFile (or PeNet) to BinaryModule
ElfLoader.cs // wrap LibObjectFile to BinaryModule
MachOLoader.cs // wrap ELFSharp + your ported Dyld/ChainedFixups
Disasm/
X86Disassembler.cs // iced bridge: bytes -> instructions
Arm64Disassembler.cs // Disarm (or ARMeilleure port) bridge
Graph/
CallGraphBuilder.cs // builds CFG per function + inter-procedural edges
Reachability.cs // BFS/DFS over QuikGraph
Demangle/
ItaniumDemangler.cs // port or wrap CxxDemangler
MicrosoftDemangler.cs // port from LLVM
Export/
JsonWriter.cs // writes schema above
```
---
## 9) Implementation notes (where issues usually bite)
* **MachO moderns:** Implement both dyld opcode **and** chained fixups; many macOS 12+/iOS15+ binaries only have chained fixups. ([emergetools.com][22])
* **Stubs vs real targets (MachO):** map `__stubs``__la_symbol_ptr` via **indirect symbols** to the true imported symbol (or its postfixup target). ([reinterpretcast.com][17])
* **ELF PLT/GOT:** treat `.plt` entries as **call trampolines**; ultimate edge should point to the symbol (library) that satisfies `DT_NEEDED` + version. ([refspecs.linuxbase.org][4])
* **PE delayload:** dont forget `IMAGE_DELAYLOAD_DESCRIPTOR` for delayed IATs. ([Microsoft Learn][5])
* **Function discovery:** use `LC_FUNCTION_STARTS` when symbols are stripped; its a cheap way to seed analysis. ([Stack Overflow][9])
* **Name clarity:** demangle Itanium/MSVC so downstream vuln rules can match consistently. ([LLVM][20])
---
## 10) What to **copy/port** verbatim (safe licenses)
* **Dyld bind & exports trie logic:** from **LLVM** or **LIEF** MachO (Apache2.0). Great for getting the exact opcode semantics right. ([LIEF][7])
* **Chained fixups structs/walkers:** from **LLVM MachO.h** or Apple dyld headers (permissive headers). ([LLVM][8])
* **Itanium/MS demanglers:** LLVM demangler sources are standalone; easy to translate to C#. ([LLVM][23])
* **ARM64 decoder:** if Disarm gaps hurt, lift just the **decoder** pieces from **Ryujinx ARMeilleure** (MIT). ([Gitee][14])
*(Avoid GPLd parsers like binutils/BFD; they will contaminate your codebases licensing.)*
---
## 11) Endtoend pipeline (per container image)
1. **Enumerate binaries** in the container FS.
2. **Parse** each with the appropriate loader → `BinaryModule` (+ imports/exports/symbols/relocs).
3. **Simulate linking** per platform to resolve imported functions to provider libraries. ([refspecs.linuxbase.org][4])
4. **Disassemble** functions (iced/Disarm) → CFGs → **call edges** (direct, PLT/IAT/stub, indirect).
5. **Assemble call graph** across modules; normalize names via demangling.
6. **Reachability**: given roots (entry or userspecified) compute reachable set; emit JSON with **purls** (from your SBOM/package resolver).
7. **(Optional)** dump GraphViz / MSAGL views for debugging. ([GitHub][19])
---
## 12) Quick heuristics for vulnerability triage
* **Sink maps**: flag edges to highrisk APIs (`strcpy`, `gets`, legacy SSL ciphers) even without CVE versioning.
* **DWARF line info** (when present): attach file:line to nodes for developer action. `LibObjectFile` gives you DWARF v4 reads. ([GitHub][2])
---
## 13) Test corpora
* **ELF:** glibc/openssl/libpng from distro repos; validate `R_*_JUMP_SLOT` handling and PLT edges. ([cs61.seas.harvard.edu][16])
* **PE:** system DLLs (Kernel32, Advapi32) and a small MSVC console app; validate IAT & delayload. ([Microsoft Learn][5])
* **MachO:** Xcodebuilt binaries across macOS 11 & 12+ to cover both dyld opcode and chained fixups paths; verify `LC_FUNCTION_STARTS` improves discovery. ([Stack Overflow][9])
---
## 14) Deliverables you can start coding now
* **MachOLoader.cs**
* Parse headers + load commands (ELFSharp).
* Implement `DyldInfoParser` (port from LLVM/LIEF) and `ChainedFixupsParser` (port structs & walkers). ([LIEF][7])
* **X86Disassembler.cs / Arm64Disassembler.cs** (iced / Disarm bridges). ([GitHub][12])
* **CallGraphBuilder.cs** (recursive descent + linear sweep fallback; PLT/IAT/stub resolution).
* **Reachability.cs** (QuikGraph BFS/DFS). ([GitHub][18])
* **JsonWriter.cs** (schema above with purls).
---
### References (core, loadbearing)
* **ELFSharp** (ELF + MachO pure C#). ([GitHub][1])
* **LibObjectFile** (ELF/PE/DWARF C#, BSD2). ([GitHub][2])
* **iced** (x86/x64 disasm, C#, MIT). ([GitHub][12])
* **Disarm** (ARM64 disasm, C#, MIT). ([GitHub][13])
* **Ryujinx (ARMeilleure)** (ARMv8 decode/JIT in C#, MIT). ([Gitee][14])
* **ELF gABI & AMD64 supplement** (PLT/GOT, relocations). ([refspecs.linuxbase.org][4])
* **PE/COFF** (imports/exports/IAT). ([Microsoft Learn][5])
* **MachO docs** (load commands; LC_FUNCTION_STARTS; dyld bindings; chained fixups). ([Apple Developer][24])
---
If you want, I can draft **`MachOLoader` + `DyldInfoParser`** in C# next, including chainedfixups structs (ported from LLVMs headers) and an **iced**based calledge walker for x8664.
[1]: https://github.com/konrad-kruczynski/elfsharp "GitHub - konrad-kruczynski/elfsharp: Pure managed C# library for reading ELF, UImage, Mach-O binaries."
[2]: https://github.com/xoofx/LibObjectFile "GitHub - xoofx/LibObjectFile: LibObjectFile is a .NET library to read, manipulate and write linker and executable object files (e.g ELF, PE, DWARF, ar...)"
[3]: https://github.com/secana/PeNet?utm_source=chatgpt.com "secana/PeNet: Portable Executable (PE) library written in . ..."
[4]: https://refspecs.linuxbase.org/elf/gabi4%2B/contents.html?utm_source=chatgpt.com "System V Application Binary Interface - DRAFT - 24 April 2001"
[5]: https://learn.microsoft.com/en-us/windows/win32/debug/pe-format?utm_source=chatgpt.com "PE Format - Win32 apps"
[6]: https://leopard-adc.pepas.com/documentation/DeveloperTools/Conceptual/MachOTopics/0-Introduction/introduction.html?utm_source=chatgpt.com "Mach-O Programming Topics: Introduction"
[7]: https://lief.re/doc/stable/doxygen/classLIEF_1_1MachO_1_1DyldInfo.html?utm_source=chatgpt.com "MachO::DyldInfo Class Reference - LIEF"
[8]: https://llvm.org/doxygen/structllvm_1_1MachO_1_1dyld__chained__fixups__header.html?utm_source=chatgpt.com "MachO::dyld_chained_fixups_header Struct Reference"
[9]: https://stackoverflow.com/questions/9602438/mach-o-file-lc-function-starts-load-command?utm_source=chatgpt.com "Mach-O file LC_FUNCTION_STARTS load command"
[10]: https://maskray.me/blog/2021-09-19-all-about-procedure-linkage-table?utm_source=chatgpt.com "All about Procedure Linkage Table"
[11]: https://github.com/dotnet/runtime/issues/77178 "Discussion: ObjWriter in C# · Issue #77178 · dotnet/runtime · GitHub"
[12]: https://github.com/icedland/iced?utm_source=chatgpt.com "icedland/iced: Blazing fast and correct x86/x64 ..."
[13]: https://github.com/SamboyCoding/Disarm?utm_source=chatgpt.com "SamboyCoding/Disarm: Fast, pure-C# ARM64 Disassembler"
[14]: https://gitee.com/ryujinx/Ryujinx/blob/master/LICENSE.txt?utm_source=chatgpt.com "Ryujinx/Ryujinx"
[15]: https://github.com/justinstenning/SharpDisasm?utm_source=chatgpt.com "justinstenning/SharpDisasm"
[16]: https://cs61.seas.harvard.edu/site/2022/pdf/x86-64-abi-20210928.pdf?utm_source=chatgpt.com "System V Application Binary Interface"
[17]: https://www.reinterpretcast.com/hello-world-mach-o?utm_source=chatgpt.com "The Nitty Gritty of “Hello World” on macOS | reinterpretcast.com"
[18]: https://github.com/KeRNeLith/QuikGraph?utm_source=chatgpt.com "KeRNeLith/QuikGraph: Generic Graph Data Structures and ..."
[19]: https://github.com/microsoft/automatic-graph-layout?utm_source=chatgpt.com "microsoft/automatic-graph-layout: A set of tools for ..."
[20]: https://llvm.org/doxygen/structllvm_1_1ItaniumPartialDemangler.html?utm_source=chatgpt.com "ItaniumPartialDemangler Struct Reference"
[21]: https://llvm.org/doxygen/MicrosoftDemangle_8cpp_source.html?utm_source=chatgpt.com "lib/Demangle/MicrosoftDemangle.cpp Source File"
[22]: https://www.emergetools.com/blog/posts/iOS15LaunchTime?utm_source=chatgpt.com "How iOS 15 makes your app launch faster"
[23]: https://llvm.org/doxygen/ItaniumDemangle_8cpp.html?utm_source=chatgpt.com "lib/Demangle/ItaniumDemangle.cpp File Reference"
[24]: https://developer.apple.com/library/archive/documentation/Performance/Conceptual/CodeFootprint/Articles/MachOOverview.html?utm_source=chatgpt.com "Overview of the Mach-O Executable Format"
Good, this is exactly the point where you want to nail the internal model, otherwise everything on top (VEX, policy, reporting) will be shaky.
Below is a detailed, implementation-ready plan for a **reachability graph with purl-aware edges**, covering ELF, PE, and Mach-O, in C#.
Ill structure it as:
1. Overall graph design (3 layers: function, module, purl)
2. Core C# data model
3. Pipeline steps (end-to-end)
4. Format-specific edge construction (ELF / PE / Mach-O)
5. Reachability queries (from entrypoints to vulnerable purls / functions)
6. JSON output layout and integration with SBOM
---
## 1. Overall graph design
You want three tightly linked graph layers:
1. **Function-level call graph (FLG)**
* Nodes: individual **functions** inside binaries
* Edges: calls from function A → function B (intra- or inter-module)
2. **Module-level graph (MLG)**
* Nodes: **binaries** (ELF/PE/Mach-O files)
* Edges: “module A calls module B at least once” (aggregated from FLG)
3. **Purl-level graph (PLG)**
* Nodes: **purls** (packages or generic artifacts)
* Edges: “purl P1 depends-at-runtime on purl P2” (aggregated from module edges)
The **reachability algorithm** runs primarily on the **function graph**, but:
* You can project reachability results to **module** and **purl** nodes.
* You can also run coarse-grained analysis directly on **purl graph** when needed (“Is any code in purl X reachable from the container entrypoint?”).
---
## 2. Core C# data model
### 2.1 Identifiers and enums
```csharp
public enum BinaryFormat { Elf, Pe, MachO }
public readonly record struct ModuleId(string Path, BinaryFormat Format);
public readonly record struct Purl(string Value);
public enum EdgeKind
{
IntraModuleDirect, // call foo -> bar in same module
ImportCall, // call via plt/iat/stub to imported function
SyntheticRoot, // root (entrypoint) edge
IndirectUnresolved // optional: we saw an indirect call we couldn't resolve
}
```
### 2.2 Function node
```csharp
public sealed class FunctionNode
{
public int Id { get; init; } // internal numeric id
public ModuleId Module { get; init; }
public Purl Purl { get; init; } // resolved from Module -> Purl
public ulong Address { get; init; } // VA or RVA
public string Name { get; init; } // mangled
public string? DemangledName { get; init; } // optional
public bool IsExported { get; init; }
public bool IsImportedStub { get; init; } // e.g. PLT stub, Mach-O stub, PE thunks
public bool IsRoot { get; set; } // _start/main/entrypoint etc.
}
```
### 2.3 Edges
```csharp
public sealed class CallEdge
{
public int FromId { get; init; } // FunctionNode.Id
public int ToId { get; init; } // FunctionNode.Id
public EdgeKind Kind { get; init; }
public string Evidence { get; init; } // e.g. "ELF.R_X86_64_JUMP_SLOT", "PE.IAT", "MachO.indirectSym"
}
```
### 2.4 Graph container
```csharp
public sealed class CallGraph
{
public IReadOnlyDictionary<int, FunctionNode> Nodes { get; init; }
public IReadOnlyDictionary<int, List<CallEdge>> OutEdges { get; init; }
public IReadOnlyDictionary<int, List<CallEdge>> InEdges { get; init; }
// Convenience: mappings
public IReadOnlyDictionary<ModuleId, List<int>> FunctionsByModule { get; init; }
public IReadOnlyDictionary<Purl, List<int>> FunctionsByPurl { get; init; }
}
```
### 2.5 Purl-level graph view
You dont store a separate physical graph; you **derive** it on demand:
```csharp
public sealed class PurlEdge
{
public Purl From { get; init; }
public Purl To { get; init; }
public List<(int FromFnId, int ToFnId)> SupportingCalls { get; init; }
}
public sealed class PurlGraphView
{
public IReadOnlyDictionary<Purl, HashSet<Purl>> Adjacent { get; init; }
public IReadOnlyList<PurlEdge> Edges { get; init; }
}
```
---
## 3. Pipeline steps (end-to-end)
### Step 0 Inputs
* Set of binaries (files) extracted from container image.
* SBOM or other metadata that can map a file path (or hash) → **purl**.
### Step 1 Parse binaries → `BinaryModule` objects
You define a common in-memory model:
```csharp
public sealed class BinaryModule
{
public ModuleId Id { get; init; }
public Purl Purl { get; init; }
public BinaryFormat Format { get; init; }
// Raw sections / segments
public IReadOnlyList<SectionInfo> Sections { get; init; }
// Symbols
public IReadOnlyList<SymbolInfo> Symbols { get; init; } // imports + exports + locals
// Relocations / fixups
public IReadOnlyList<RelocationInfo> Relocations { get; init; }
// Import/export tables (PE)/dylib commands (Mach-O)/DT_NEEDED (ELF)
public ImportInfo[] Imports { get; init; }
public ExportInfo[] Exports { get; init; }
}
```
Implement format-specific loaders:
* `ElfLoader : IBinaryLoader`
* `PeLoader : IBinaryLoader`
* `MachOLoader : IBinaryLoader`
Each loader uses your chosen C# parsers or ported code and fills `BinaryModule`.
### Step 2 Disassembly → basic blocks & candidate functions
For each `BinaryModule`:
1. Use appropriate decoder (iced for x86/x64; Disarm/ported ARMeilleure for AArch64).
2. Seed function starts:
* Exported functions
* Entry points (`_start`, `main`, AddressOfEntryPoint)
* Mach-O `LC_FUNCTION_STARTS` if available
3. Walk instructions to build basic blocks:
* Stop blocks at conditional/unconditional branches, calls, rets.
* Record for each call site:
* Address of caller function
* Operand type (immediate, memory with import table address, etc.)
Disassembler outputs a list of `FunctionNode` skeletons (no cross-module link yet) and a list of **raw call sites**:
```csharp
public sealed class RawCallSite
{
public int CallerFunctionId { get; init; }
public ulong InstructionAddress { get; init; }
public ulong? DirectTargetAddress { get; init; } // e.g. CALL 0x401000
public ulong? MemoryTargetAddress { get; init; } // e.g. CALL [0x404000]
public bool IsIndirect { get; init; } // register-based etc.
}
```
### Step 3 Build function nodes
Using disassembly + symbol tables:
* For each discovered function:
* Determine: address, name (if sym available), export/import flags.
* Map `ModuleId``Purl` using `IPurlResolver`.
* Populate `FunctionNode` instances and index them by `Id`.
### Step 4 Construct intra-module edges
For each `RawCallSite`:
* If `DirectTargetAddress` falls inside a known functions address range in the **same module**, add **IntraModuleDirect** edge.
This gives you “normal” calls like `foo()` calling `bar()` in the same .so/.dll/.
### Step 5 Construct inter-module edges (import calls)
This is where ELF/PE/Mach-O differ; details in section 4 below.
But the abstract logic is:
1. For each call site with `MemoryTargetAddress` (IAT slot / GOT entry / la_symbol_ptr / PLT):
2. From the modules import, relocation or fixup tables, determine:
* Which **imported symbol** it corresponds to (name, ordinal, etc.).
* Which **imported module / dylib / DLL** provides that symbol.
3. Find (or create) a `FunctionNode` representing that imported symbol in the **provider module**.
4. Add an **ImportCall** edge from caller function to the provider `FunctionNode`.
This is the key to turning low-level dynamic linking into **purl-aware cross-module edges**, because each `FunctionNode` is already stamped with a `Purl`.
### Step 6 Build adjacency structures
Once you have all `FunctionNode`s and `CallEdge`s:
* Build `OutEdges` and `InEdges` dictionaries keyed by `FunctionNode.Id`.
* Build `FunctionsByModule` / `FunctionsByPurl`.
---
## 4. Format-specific edge construction
This is the “how” for step 5, per binary format.
### 4.1 ELF
Goal: map call sites that go via PLT/GOT to an imported function in a `DT_NEEDED` library.
Algorithm:
1. Parse:
* `.dynsym`, `.dynstr` dynamic symbol table
* `.rela.plt` / `.rel.plt` relocation entries for PLT
* `.got.plt` / `.got` PLTs GOT
* `DT_NEEDED` entries list of linked shared objects and their sonames
2. For each relocation of type `R_*_JUMP_SLOT`:
* It applies to an entry in the PLT GOT; that GOT entry is what CALL instructions read from.
* Relocation gives you:
* Offset in GOT (`r_offset`)
* Symbol index (`r_info` → symbol) → dynamic symbol (`ElfSymbol`)
* Symbol name, type (FUNC), binding, etc.
3. Link GOT entries to call sites:
* For each `RawCallSite` with `MemoryTargetAddress`, check if that address falls inside `.got.plt` (or `.got`). If it does:
* Find relocation whose `r_offset` equals that GOT entry offset.
* That tells you which **symbol** is being called.
4. Determine provider module:
* From the symbols `st_name` and `DT_NEEDED` list, decide which shared object is expected to define it (an approximation is: first DT_NEEDED that provides that name).
* Map DT_NEEDED → `ModuleId` (youll have loaded these modules separately, or you can create “placeholder modules” if theyre not in the container image).
5. Create edges:
* Create/find `FunctionNode` for the **imported symbol** in provider module.
* Add `CallEdge` from caller function to imported function, `EdgeKind = ImportCall`, `Evidence = "ELF.R_X86_64_JUMP_SLOT"` (or arch-specific).
This yields edges like:
* `myapp:main``libssl.so.1.1:SSL_read`
* `libfoo.so:foo``libc.so.6:malloc`
### 4.2 PE
Goal: map call sites that go via the Import Address Table (IAT) to imported functions in DLLs.
Algorithm:
1. Parse:
* `IMAGE_IMPORT_DESCRIPTOR[]` each for a DLL name.
* Original thunk table (INT) names/ordinals of imported symbols.
* IAT where the loader writes function addresses at runtime.
2. For each import entry:
* Determine:
* DLL name (`Name`)
* Function name or ordinal (from INT)
* IAT slot address (RVA)
3. Link IAT slots to call sites:
* For each `RawCallSite` with `MemoryTargetAddress`:
* Check if this address equals the VA of an IAT slot.
* If yes, the call site is effectively calling that imported function.
4. Determine provider module:
* The DLL name gives you a target module (e.g. `KERNEL32.dll``ModuleId`).
* Ensure that DLL is represented as a `BinaryModule` or a “placeholder” if not present in image.
5. Create edges:
* Create/find `FunctionNode` for imported function in provider module.
* Add `CallEdge` with `EdgeKind = ImportCall` and `Evidence = "PE.IAT"` (or `"PE.DelayLoad"` if using delay load descriptors).
Example:
* `myservice.exe:Start``SSPICLI.dll:AcquireCredentialsHandleW`
### 4.3 Mach-O
Goal: map stub calls via `__TEXT,__stubs` / `__DATA,__la_symbol_ptr` (and / or chained fixups) to symbols in dependent dylibs.
Algorithm (for classic dyld opcodes, not chained fixups, then extend):
1. Parse:
* Load commands:
* `LC_SYMTAB`, `LC_DYSYMTAB`
* `LC_LOAD_DYLIB` (to know dependent dylibs)
* `LC_FUNCTION_STARTS` (for seeding functions)
* `LC_DYLD_INFO` (rebase/bind/lazy bind)
* `__TEXT,__stubs` stub code
* `__DATA,__la_symbol_ptr` (or `__DATA_CONST,__la_symbol_ptr`) lazy pointer table
* **Indirect symbol table** maps slot indices to symbol table indices
2. Stub → la_symbol_ptr mapping:
* Stubs are small functions (usually a few instructions) that indirect through the corresponding `la_symbol_ptr` entry.
* For each stub function:
* Determine which la_symbol_ptr entry it uses (based on stub index and linking metadata).
* From the indirect symbol table, find which dynamic symbol that la_symbol_ptr entry corresponds to.
* This gives you symbol name and the index in `LC_LOAD_DYLIB` (dylib ordinal).
3. Link stub call sites:
* In disassembly, treat calls to these stub functions as **import calls**.
* For each call instruction `CALL stub_function`:
* `RawCallSite.DirectTargetAddress` lies inside `__TEXT,__stubs`.
* Resolve stub → la_symbol_ptr → symbol → dylib.
4. Determine provider module:
* From dylib ordinal and load commands, get the path / install name of dylib (`libssl.1.1.dylib`, etc.).
* Map that to a `ModuleId` in your module set.
5. Create edges:
* Create/find imported `FunctionNode` in provider module.
* Add `CallEdge` from caller to that function with `EdgeKind = ImportCall`, `Evidence = "MachO.IndirectSymbol"`.
For **chained fixups** (`LC_DYLD_CHAINED_FIXUPS`), youll compute a similar mapping but walking chain entries instead of traditional lazy/weak binds. The key is still:
* Map a stub or function to a **fixup** entry.
* From fixup, determine the symbol and dylib.
* Then connect call-site → imported function.
---
## 5. Reachability queries
Once the graph is built, reachability is “just graph algorithms” + mapping back to purls.
### 5.1 Roots
Decide what are your **root functions**:
* Binary entrypoints:
* ELF: `_start`, `main`, constructors (`.init_array`)
* PE: AddressOfEntryPoint, registered service entrypoints
* Mach-O: `_main`, constructors
* Optionally, any exported API function that a container orchestrator or plugin system will call.
Mark them as `FunctionNode.IsRoot = true` and create synthetic edges from a special root node if you want:
```csharp
var syntheticRoot = new FunctionNode
{
Id = 0,
Name = "<root>",
IsRoot = true,
// Module, Purl can be special markers
};
foreach (var fn in allFunctions.Where(f => f.IsRoot))
{
edges.Add(new CallEdge
{
FromId = syntheticRoot.Id,
ToId = fn.Id,
Kind = EdgeKind.SyntheticRoot,
Evidence = "Root"
});
}
```
### 5.2 Reachability algorithm (function-level)
Use BFS/DFS from the root node(s):
```csharp
public sealed class ReachabilityResult
{
public HashSet<int> ReachableFunctions { get; } = new();
}
public ReachabilityResult ComputeReachableFunctions(CallGraph graph, IEnumerable<int> rootIds)
{
var visited = new HashSet<int>();
var stack = new Stack<int>();
foreach (var root in rootIds)
{
if (visited.Add(root))
stack.Push(root);
}
while (stack.Count > 0)
{
var current = stack.Pop();
if (!graph.OutEdges.TryGetValue(current, out var edges))
continue;
foreach (var edge in edges)
{
if (visited.Add(edge.ToId))
stack.Push(edge.ToId);
}
}
return new ReachabilityResult { ReachableFunctions = visited };
}
```
### 5.3 Project reachability to modules and purls
Given `ReachableFunctions`:
```csharp
public sealed class ReachabilityProjection
{
public HashSet<ModuleId> ReachableModules { get; } = new();
public HashSet<Purl> ReachablePurls { get; } = new();
}
public ReachabilityProjection ProjectToModulesAndPurls(CallGraph graph, ReachabilityResult result)
{
var projection = new ReachabilityProjection();
foreach (var fnId in result.ReachableFunctions)
{
if (!graph.Nodes.TryGetValue(fnId, out var fn))
continue;
projection.ReachableModules.Add(fn.Module);
projection.ReachablePurls.Add(fn.Purl);
}
return projection;
}
```
Now you can answer questions like:
* “Is any code from purl `pkg:deb/openssl@1.1.1w-1` reachable from the container entrypoint?”
* “Which purls are reachable at all?”
### 5.4 Vulnerability reachability
Assume youve mapped each vulnerability to:
* `Purl` (where it lives)
* `AffectedFunctionNames` (symbols; optionally demangled)
You can implement:
```csharp
public sealed class VulnerabilitySink
{
public string VulnerabilityId { get; init; } // CVE-...
public Purl Purl { get; init; }
public string FunctionName { get; init; } // symbol name or demangled
}
```
Resolution algorithm:
1. For each `VulnerabilitySink`, find all `FunctionNode` with:
* `node.Purl == sink.Purl` and
* `node.Name` or `node.DemangledName` matches `sink.FunctionName`.
2. For each such node, check `ReachableFunctions.Contains(node.Id)`.
3. Build a `Finding` object:
```csharp
public sealed class VulnerabilityFinding
{
public string VulnerabilityId { get; init; }
public Purl Purl { get; init; }
public bool IsReachable { get; init; }
public List<int> SinkFunctionIds { get; init; } = new();
}
```
Plus, if you want **path evidence**, you run a shortest-path search (BFS predecessor map) from root to sink and store the sequence of `FunctionNode.Id`s.
---
## 6. Purl edges (derived graph)
For reporting and analytics, its useful to produce a **purl-level dependency graph**.
Given `CallGraph`:
```csharp
public PurlGraphView BuildPurlGraph(CallGraph graph)
{
var edgesByPair = new Dictionary<(Purl From, Purl To), PurlEdge>();
foreach (var kv in graph.OutEdges)
{
var fromFn = graph.Nodes[kv.Key];
foreach (var edge in kv.Value)
{
var toFn = graph.Nodes[edge.ToId];
if (fromFn.Purl.Equals(toFn.Purl))
continue; // intra-purl, skip if you only care about inter-purl
var key = (fromFn.Purl, toFn.Purl);
if (!edgesByPair.TryGetValue(key, out var pe))
{
pe = new PurlEdge
{
From = fromFn.Purl,
To = toFn.Purl,
SupportingCalls = new List<(int, int)>()
};
edgesByPair[key] = pe;
}
pe.SupportingCalls.Add((fromFn.Id, toFn.Id));
}
}
var adj = new Dictionary<Purl, HashSet<Purl>>();
foreach (var kv in edgesByPair)
{
var (from, to) = kv.Key;
if (!adj.TryGetValue(from, out var list))
{
list = new HashSet<Purl>();
adj[from] = list;
}
list.Add(to);
}
return new PurlGraphView
{
Adjacent = adj,
Edges = edgesByPair.Values.ToList()
};
}
```
This gives you:
* A coarse view of runtime dependencies between purls (“Purl A calls into Purl B”).
* Enough context to emit purl-level VEX or to reason about trust at package granularity.
---
## 7. JSON output and SBOM integration
### 7.1 JSON shape (high level)
You can emit a composite document:
```json
{
"image": "registry.example.com/app@sha256:...",
"modules": [
{
"moduleId": { "path": "/usr/lib/libssl.so.1.1", "format": "Elf" },
"purl": "pkg:deb/ubuntu/openssl@1.1.1w-0ubuntu1",
"arch": "x86_64"
}
],
"functions": [
{
"id": 42,
"name": "SSL_do_handshake",
"demangledName": null,
"module": { "path": "/usr/lib/libssl.so.1.1", "format": "Elf" },
"purl": "pkg:deb/ubuntu/openssl@1.1.1w-0ubuntu1",
"address": "0x401020",
"exported": true
}
],
"edges": [
{
"from": 10,
"to": 42,
"kind": "ImportCall",
"evidence": "ELF.R_X86_64_JUMP_SLOT"
}
],
"reachability": {
"roots": [1],
"reachableFunctions": [1,10,42]
},
"purlGraph": {
"edges": [
{
"from": "pkg:generic/myapp@1.0.0",
"to": "pkg:deb/ubuntu/openssl@1.1.1w-0ubuntu1",
"supportingCalls": [[10,42]]
}
]
},
"vulnerabilities": [
{
"id": "CVE-2024-XXXX",
"purl": "pkg:deb/ubuntu/openssl@1.1.1w-0ubuntu1",
"sinkFunctions": [42],
"reachable": true,
"paths": [
[1, 10, 42]
]
}
]
}
```
### 7.2 Purl resolution
Implement an `IPurlResolver` interface:
```csharp
public interface IPurlResolver
{
Purl ResolveForModule(string filePath, byte[] contentHash);
}
```
Possible implementations:
* `SbomPurlResolver` given a CycloneDX/SPDX SBOM for the image, match by path or checksum.
* `LinuxPackagePurlResolver` read `/var/lib/dpkg/status` / rpm DB in the filesystem.
* `GenericPurlResolver` fallback: `pkg:generic/<hash>`.
You call the resolver in your loaders so that **every `BinaryModule` has a purl** and thus every `FunctionNode` has a purl.
---
## 8. Concrete implementation tasks for your team
1. **Data model & interfaces**
* Implement `ModuleId`, `FunctionNode`, `CallEdge`, `CallGraph`.
* Define `RawCallSite`, `BinaryModule`, and `IPurlResolver`.
2. **Loaders**
* `ElfLoader`: fill symbols, dynamic relocations (PLT), DT_NEEDED, etc.
* `PeLoader`: import descriptors, IAT, delay-load.
* `MachOLoader`: load commands, stubs, la_symbol_ptr, indirect symbols / chained fixups.
3. **Disassembly**
* `X86Disassembler` (iced) and `Arm64Disassembler` (Disarm or port).
* Function detection and `RawCallSite` extraction.
4. **CallGraphBuilder**
* Build intra-module edges from direct calls.
* Build inter-module edges using the format-specific rules above.
* Construct final `CallGraph` with adjacency maps and purl mappings.
5. **Reachability**
* Implement BFS/DFS from root functions.
* Projection to modules + purls.
* Vulnerability sink resolution & path reconstruction.
6. **Export**
* JSON serializer for the schema above.
* Optional: purl-level summary / VEX generator.
---
If you want, next step I can do a **more concrete design for `CallGraphBuilder`** (including per-format helper classes with method signatures) or a **C# skeleton** for the `ElfImportResolver`, `PeImportResolver`, and `MachOStubResolver` that plug directly into this plan.

View File

@@ -0,0 +1,927 @@
Heres a crisp idea that could give StellaOps a real moat: **binarylevel reachability**—linking CVEs directly to the exact functions and offsets inside compiled artifacts (ELF/PE/MachO), not just to packages.
---
### Why this matters (quick background)
* **Packagelevel flags are noisy.** Most scanners say “vuln in `libX v1.2`,” but that library might be present and never executed.
* **Languagelevel call graphs help** (when you have source or rich metadata), but containers often ship only **stripped binaries**.
* **Binary reachability** answers: *Is the vulnerable function actually in this image? Is its code path reachable from the entrypoints we observed or can construct?*
---
### The missing layer: Symbolization
Build a **symbolization layer** that normalizes debug and symbol info across platforms:
* **Inputs**: DWARF (ELF/MachO), PDB (PE/Windows), symtabs, exported symbols, `.eh_frame`, and (when stripped) heuristic signatures (e.g., function bytehashes, CFG fingerprints).
* **Outputs**: a sourceagnostic map: `{binary → sections → functions → (addresses, ranges, hashes, demangled names, inlined frames)}`.
* **Normalization**: Put everything into a common schema (e.g., `Stella.Symbolix.v1`) so higher layers dont care if it came from DWARF or PDB.
---
### Endtoend reachability (binaryfirst, sourceagnostic)
1. **Acquire & parse**
* Detect format (ELF/PE/MachO), parse headers, sections, symbol tables.
* If debug info present: parse DWARF/PDB; else fall back to disassembly + function boundary recovery.
2. **Function catalog**
* Assign stable IDs per function: `(imageHash, textSectionHash, startVA, size, fnHashXX)`.
* Record xrefs (calls/jumps), imports/exports, PLT/IAT edges.
3. **Entrypoint discovery**
* Docker entry, process launch args, service scripts; infer likely mains (Go `main.main`, .NET hostfxr path, JVM launcher, etc.).
4. **Callgraph build (binary CFG)**
* Build inter/intraprocedural graph (direct + resolved indirect via IAT/PLT). Keep “unknowntarget” edges for conservative safety.
5. **CVE→function linking**
* Maintain a **signature bank** per CVE advisory: vulnerable function names, file paths, and—crucially—**bytesequence or basicblock fingerprints** for patched vs vulnerable versions (works even when stripped).
6. **Reachability analysis**
* Is the vulnerable function present? Is there a path from any entrypoint to it (under conservative assumptions)? Tag as `Present+Reachable`, `Present+Uncertain`, or `Absent`.
7. **Runtime confirmation (optional, when users allow)**
* Lightweight probes (eBPF on Linux, ETW on Windows, perf/JFR/EventPipe) capture function hits; crosscheck with the static result to upgrade confidence.
---
### Minimal component plan (drop into StellaOps)
* **Scanner.Symbolizer**
Parsers: ELF/DWARF (libdw or puremanaged reader), PE/PDB (Dia/LLVM PDB), MachO/DSYM.
Output: `Symbolix.v1` blobs stored in OCI layer cache.
* **Scanner.CFG**
Lifts functions to a normalized IR (capstone/icedx86 for decode) → builds CFG & call graph.
* **Advisory.FingerprintBank**
Ingests CSAF/OpenVEX plus curated fingerprints (fn names, block hashes, patch diff markers). Versioned, signed, airgapsyncable.
* **Reachability.Engine**
Joins (`Symbolix` + `CFG` + `FingerprintBank`) → emits `ReachabilityEvidence` with lattice states for VEX.
* **VEXer.Adapter**
Emits **OpenVEX** statements with `status: affected/not_affected` and `justification: function_not_present | function_not_reachable | mitigated_at_runtime`, attaching Evidence URIs.
* **Console UX**
“Why not affected?” panel showing entrypoint→…→function path (or absence), with bytehash proof.
---
### Data model sketch (concise)
* `ImageFunction { id, name?, startVA, size, fnHash, sectionHash, demangled?, provenance:{DWARF|PDB|Heuristic} }`
* `Edge { srcFnId, dstFnId, kind:{direct|plt|iat|indirect?} }`
* `CveSignature { cveId, fnName?, libHints[], blockFingerprints[], versionRanges }`
* `Evidence { cveId, imageId, functionMatches[], reachable: bool?, confidence:[low|med|high], method:[static|runtime|hybrid] }`
---
### Practical phases (810 weeks of focused work)
1. **P0**: ELF/DWARF symbolizer + basic function catalog; link a handful of CVEs via nameonly; emit OpenVEX `function_not_present`.
2. **P1**: CFG builder (direct calls) + PLT/IAT resolution; simple reachability; first fingerprints for top 50 CVEs in glibc, openssl, curl, zlib.
3. **P2**: Strippedbinary heuristics (block hashing) + Go/Rust name demangling; Windows PDB ingestion for PE.
4. **P3**: Runtime probes (optin) + confidence upgrade logic; Console path explorer; evidence signing (DSSE).
---
### KPIs to prove the moat
* **Noise cut**: % reduction in “affected” flags after reachability (target 4070% on typical containers).
* **Precision**: Groundtruth validation vs PoC images (TP/FP/FN on presence & reachability).
* **Coverage**: % images where we can make a determination without source (goal: >80%).
* **Latency**: Added scan time per image (<15s typical with caches).
---
### Risks & how to handle them
* **Stripped binaries** mitigate with blockhash fingerprints & libraryversion heuristics.
* **Obfuscated/packed code** mark `Uncertain`; allow usersupplied hints; prefer runtime confirmation.
* **Advisory inconsistency** keep our own curated CVEfunction fingerprint bank; sign & version it.
* **Platform spread** start Linux/ELF, then Windows/PDB, then MachO.
---
### Why competitors struggle
Most tools stop at packages because binary CFG + fingerprint curation is hard and expensive. Shipping a **sourceagnostic reachability engine** tied to signed evidence in VEX would set StellaOps apartespecially in offline/airgapped and sovereign contexts you already target.
If you want, I can draft:
* the `Symbolix.v1` protobuf,
* a tiny PoC (ELFfunctionsmatch CVE with a block fingerprint),
* and the OpenVEX emission snippet your VEXer can produce.
Below is a detailed architecture plan for implementing reachability and call-graph analysis in Stella Ops, covering JavaScript, Python, PHP, and binaries, and integrating with your existing Scanner / Concelier / VEXer stack.
I will assume:
* .NET 10 for core services.
* Scanner is the place where all trust algebra / lattice runs (per your standing rule).
* Concelier and VEXer remain preserve/prune layers and do not run lattice logic.
* Output must be JSON-centric with PURLs and OpenVEX.
---
## 1. Scope & Objectives
### 1.1 Primary goals
1. From an OCI image, build:
* A **library-level usage graph** (which libraries are used by which entrypoints).
* A **function-level call graph** for JS / Python / PHP / binaries.
2. Map CVEs (from Concelier) to:
* Concrete **components** (PURLs) in the SBOM.
* Concrete **functions / entrypoints / code regions** inside those components.
3. Perform **reachability analysis** to classify each vulnerability as:
* `present + reachable`
* `present + not_reachable`
* `function_not_present` (no vulnerable symbol)
* `uncertain` (dynamic features, unresolved calls)
4. Emit:
* **Structured JSON** with PURLs and call-graph nodes/edges (“reachability evidence”).
* **OpenVEX** documents with appropriate `status`/`justification`.
### 1.2 Non-goals (for now)
* Full dynamic analysis of the running container (eBPF, ptrace, etc.) leave as Phase 3+ optional add-on.
* Perfect call graph precision for dynamic languages (aim for safe, conservative approximations).
* Automatic fix recommendations (handled by other Stella Ops agents later).
---
## 2. High-Level Architecture
### 2.1 Major components
Within Stella Ops:
* **Scanner.WebService**
* User-facing API.
* Orchestrates full scan (SBOM, CVEs, reachability).
* Hosts the **Lattice/Policy engine** that merges evidence and produces decisions.
* **Scanner.Worker**
* Runs per-image analysis jobs.
* Invokes analyzers (JS, Python, PHP, Binary) inside its own container context.
* **Scanner.Reachability Core Library**
* Unified IR for call graphs and reachability evidence.
* Interfaces for language and binary analyzers.
* Graph algorithms (BFS/DFS, lattice evaluation, entrypoint expansion).
* **Language Analyzers**
* `Scanner.Analyzers.JavaScript`
* `Scanner.Analyzers.Python`
* `Scanner.Analyzers.Php`
* `Scanner.Analyzers.Binary`
* **Symbolization & CFG (for binaries)**
* `Scanner.Symbolization` (ELF, PE, Mach-O parsers, DWARF/PDB)
* `Scanner.Cfg` (CFG + call graph for binaries)
* **Vulnerability Signature Bank**
* `Concelier.Signatures` (curated CVEfunction/library fingerprints).
* Exposed to Scanner as **offline bundle**.
* **VEXer**
* `Vexer.Adapter.Reachability` transforms reachability evidence into OpenVEX.
### 2.2 Data flow (logical)
```mermaid
flowchart LR
A[OCI Image / Tar] --> B[Scanner.Worker: Extract FS]
B --> C[SBOM Engine (CycloneDX/SPDX)]
C --> D[Vuln Match (Concelier feeds)]
B --> E1[JS Analyzer]
B --> E2[Python Analyzer]
B --> E3[PHP Analyzer]
B --> E4[Binary Analyzer + Symbolizer/CFG]
D --> F[Reachability Orchestrator]
E1 --> F
E2 --> F
E3 --> F
E4 --> F
F --> G[Lattice/Policy Engine (Scanner.WebService)]
G --> H[Reachability Evidence JSON]
G --> I[VEXer: OpenVEX]
G --> J[Graph/Cartographer (optional)]
```
---
## 3. Data Model & JSON Contracts
### 3.1 Core IR types (Scanner.Reachability)
Define in a central assembly, e.g. `StellaOps.Scanner.Reachability`:
```csharp
public record ComponentRef(
string Purl,
string? BomRef,
string? Name,
string? Version);
public enum SymbolKind { Function, Method, Constructor, Lambda, Import, Export }
public record SymbolId(
string Language, // "js", "python", "php", "binary"
string ComponentPurl, // SBOM component PURL or "" for app code
string LogicalName, // e.g., "server.js:handleLogin"
string? FilePath,
int? Line);
public record CallGraphNode(
string Id, // stable id, e.g., hash(SymbolId)
SymbolId Symbol,
SymbolKind Kind,
bool IsEntrypoint);
public enum CallEdgeKind { Direct, Indirect, Dynamic, External, Ffi }
public record CallGraphEdge(
string FromNodeId,
string ToNodeId,
CallEdgeKind Kind);
public record CallGraph(
string GraphId,
IReadOnlyList<CallGraphNode> Nodes,
IReadOnlyList<CallGraphEdge> Edges);
```
### 3.2 Vulnerability mapping
```csharp
public record VulnerabilitySignature(
string Source, // "csaf", "nvd", "vendor"
string Id, // "CVE-2023-12345"
IReadOnlyList<string> Purls,
IReadOnlyList<string> TargetSymbolPatterns, // glob-like or regex
IReadOnlyList<string>? FilePathPatterns,
IReadOnlyList<string>? BlockFingerprints // for binaries, optional
);
```
### 3.3 Reachability evidence
```csharp
public enum ReachabilityStatus
{
PresentReachable,
PresentNotReachable,
FunctionNotPresent,
Unknown
}
public record ReachabilityEvidence
(
string ImageRef,
string VulnId, // CVE or advisory id
ComponentRef Component,
ReachabilityStatus Status,
double Confidence, // 0..1
string Method, // "static-callgraph", "binary-fingerprint", etc.
IReadOnlyList<string> EntrypointNodeIds,
IReadOnlyList<IReadOnlyList<string>>? ExamplePaths // optional list of node-paths
);
```
### 3.4 JSON structure (external)
Minimal external JSON (what you store / expose):
```json
{
"image": "registry.example.com/app:1.2.3",
"components": [
{
"purl": "pkg:npm/express@4.18.0",
"bomRef": "component-1"
}
],
"callGraphs": [
{
"graphId": "js-main",
"language": "js",
"nodes": [ /* CallGraphNode */ ],
"edges": [ /* CallGraphEdge */ ]
}
],
"reachability": [
{
"vulnId": "CVE-2023-12345",
"componentPurl": "pkg:npm/express@4.18.0",
"status": "PresentReachable",
"confidence": 0.92,
"entrypoints": [ "node:..." ],
"paths": [
["node:entry", "node:routeHandler", "node:vulnFn"]
]
}
]
}
```
---
## 4. Scanner-Side Architecture
### 4.1 Project layout (suggested)
```text
src/
Scanner/
StellaOps.Scanner.WebService/
StellaOps.Scanner.Worker/
StellaOps.Scanner.Core/ # shared scan domain
StellaOps.Scanner.Reachability/
StellaOps.Scanner.Symbolization/
StellaOps.Scanner.Cfg/
StellaOps.Scanner.Analyzers.JavaScript/
StellaOps.Scanner.Analyzers.Python/
StellaOps.Scanner.Analyzers.Php/
StellaOps.Scanner.Analyzers.Binary/
```
### 4.2 API surface (Scanner.WebService)
* `POST /api/scan/image`
* Request: `{ "imageRef": "...", "profile": { "reachability": true, ... } }`
* Returns: scan id.
* `GET /api/scan/{id}/reachability`
* Returns: `ReachabilityEvidence[]`, plus call graph summary (optional).
* `GET /api/scan/{id}/vex`
* Returns: OpenVEX with statuses based on reachability lattice.
### 4.3 Worker orchestration
`StellaOps.Scanner.Worker`:
1. Receives scan job with `imageRef`.
2. Extracts filesystem (layered rootfs) under `/mnt/scans/{scanId}/rootfs`.
3. Invokes SBOM generator (CycloneDX/SPDX).
4. Invokes Concelier via offline feeds to get:
* Component vulnerabilities (CVE list per PURL).
* Vulnerability signatures (fingerprints).
5. Builds a `ReachabilityPlan`:
```csharp
public record ReachabilityPlan(
IReadOnlyList<ComponentRef> Components,
IReadOnlyList<VulnerabilitySignature> Vulns,
IReadOnlyList<AnalyzerTarget> AnalyzerTargets // files/dirs grouped by language
);
```
6. For each language target, dispatch analyzer:
* JavaScript: `IReachabilityAnalyzer` implementation for JS.
* Python: likewise.
* PHP: likewise.
* Binary: symbolizer + CFG.
7. Collects call graphs from each analyzer and merges them into a single IR (or separate per-language graphs with shared IDs).
8. Sends merged graphs + vuln list to **Reachability Engine** (Scanner.Reachability).
---
## 5. Language Analyzers (JS / Python / PHP)
All analyzers implement a common interface:
```csharp
public interface IReachabilityAnalyzer
{
string Language { get; } // "js", "python", "php"
Task<CallGraph> AnalyzeAsync(AnalyzerContext context, CancellationToken ct);
}
public record AnalyzerContext(
string RootFsPath,
IReadOnlyList<ComponentRef> Components,
IReadOnlyList<VulnerabilitySignature> Vulnerabilities,
IReadOnlyDictionary<string, string> Env, // container env, entrypoint, etc.
string? EntrypointCommand // container CMD/ENTRYPOINT
);
```
### 5.1 JavaScript (Node.js focus)
**Inputs:**
* `/app` tree inside container (or discovered via SBOM).
* `package.json` files.
* Container entrypoint (e.g., `["node", "server.js"]`).
**Core steps:**
1. Identify **app root**:
* Heuristics: directory containing `package.json` that owns the entry script.
2. Parse:
* All `.js`, `.mjs`, `.cjs` in app and `node_modules` for vulnerable PURLs.
* Use a parsing frontend (e.g., Tree-sitter via .NET binding, or Node+AST-as-JSON).
3. Build module graph:
* `require`, `import`, `export`.
4. Function-level graph:
* For each function/method, create `CallGraphNode`.
* For each `callExpression`, create `CallGraphEdge` (try to resolve callee).
5. Entrypoints:
* Main script in CMD/ENTRYPOINT.
* HTTP route handlers (for express/koa) detected by patterns (e.g., `app.get("/...")`).
6. Map vulnerable symbols:
* From `VulnerabilitySignature.TargetSymbolPatterns` (e.g., `express/lib/router/layer.js:handle_request`).
* Identify nodes whose `SymbolId` matches patterns.
**Output:**
* `CallGraph` for JS with:
* `IsEntrypoint = true` for main and detected handlers.
* Node attributes include file path, line, component PURL.
### 5.2 Python
**Inputs:**
* Site-packages paths from SBOM.
* Entrypoint script (CMD/ENTRYPOINT).
* Framework heuristics (Django, Flask) from environment variables or common entrypoints.
**Core steps:**
1. Discover Python interpreter chain: not needed for pure static, but useful for heuristics.
2. Parse `.py` files of:
* App code.
* Vulnerable packages (per PURL).
3. Build module import graph (`import`, `from x import y`).
4. Function-level graph:
* Nodes for functions, methods, class constructors.
* Edges for call expressions; conservative for dynamic calls.
5. Entrypoints:
* Main script.
* WSGI callable (e.g., `application` in `wsgi.py`).
* Django URLconf -> view functions.
6. Map vulnerable symbols using `TargetSymbolPatterns` like `django.middleware.security.SecurityMiddleware.__call__`.
### 5.3 PHP
**Inputs:**
* Web root (from container image or conventional paths `/var/www/html`, `/app/public`, etc.).
* Composer metadata (`composer.json`, `vendor/`).
* Web server config if present (optional).
**Core steps:**
1. Discover front controllers (e.g., `index.php`, `public/index.php`).
2. Parse PHP files (again, via Tree-sitter or any suitable parser).
3. Resolve include/require chains to build file-level inclusion graph.
4. Build function/method graph:
* Functions, methods, class constructors.
* Calls with best-effort resolution for namespaced functions.
5. Entrypoints:
* Front controllers and router entrypoints (e.g., Symfony, Laravel detection).
6. Map vulnerable symbols (e.g., functions in certain vendor packages, particular methods).
---
## 6. Binary Analyzer & Symbolizer
Project: `StellaOps.Scanner.Analyzers.Binary` + `Symbolization` + `Cfg`.
### 6.1 Inputs
* All binaries and shared libraries in:
* `/usr/lib`, `/lib`, `/app/bin`, etc.
* SBOM link: each binary mapped to its component PURL when possible.
* Vulnerability signatures for native libs: function names, symbol names, fingerprints.
### 6.2 Symbolization
Module: `StellaOps.Scanner.Symbolization`
* Detect format: ELF, PE, Mach-O.
* For ELF/Mach-O:
* Parse symbol tables (`.symtab`, `.dynsym`).
* Parse DWARF (if present) to map functions to source files/lines.
* For PE:
* Parse PDB (if present) or export table.
* For stripped binaries:
* Run function boundary recovery (linear sweep + heuristic).
* Compute block/fn-level hashes for fingerprinting.
Output:
```csharp
public record ImageFunction(
string ImageId, // e.g., SHA256 of file
ulong StartVa,
uint Size,
string? SymbolName, // demangled if possible
string FnHash, // stable hash of bytes / CFG
string? SourceFile,
int? SourceLine);
```
### 6.3 CFG + Call graph
Module: `StellaOps.Scanner.Cfg`
* Disassemble `.text` using Capstone/Iced.x86.
* Build basic blocks and CFG.
* Identify:
* Direct calls (resolved).
* PLT/IAT indirections to shared libraries.
* Build `CallGraph` for binary functions:
* Entrypoints: `main`, exported functions, Go `main.main`, etc.
* Map application functions to library functions via PLT/IAT edges.
### 6.4 Linking vulnerabilities
* For each vulnerability affecting a native library (e.g., OpenSSL):
* Map to candidate binaries via SBOM + PURL.
* Within library image, find `ImageFunction`s matching:
* `SymbolName` patterns.
* `FnHash` / `BlockFingerprints` (for precise detection).
* Determine reachability:
* Starting from application entrypoints, traverse call graph to see if calls to vulnerable library function occur.
---
## 7. Reachability Engine & Lattice (Scanner.WebService)
Project: `StellaOps.Scanner.Reachability`
### 7.1 Inputs to engine
* Combined `CallGraph[]` (per language + binary).
* Vulnerability list (CVE, GHSA, etc.) with affected PURLs.
* Vulnerability signatures.
* Entrypoint hints:
* Container CMD/ENTRYPOINT.
* Detected HTTP handlers, WSGI/PSGI entrypoints, etc.
### 7.2 Algorithm steps
1. **Entrypoint expansion**
* Identify all `CallGraphNode` with `IsEntrypoint=true`.
* Add language-specific “framework entrypoints” (e.g., Express route dispatch, Django URL dispatch) when detected.
2. **Graph traversal**
* For each entrypoint node:
* BFS/DFS through edges.
* Maintain `reachable` bit on each node.
* For dynamic edges:
* Conservative: if target cannot be resolved, mark affected path as partially unknown and downgrade confidence.
3. **Vuln symbol resolution**
* For each vulnerability:
* For each vulnerable component PURL found in SBOM:
* Find candidate nodes whose `SymbolId` matches `TargetSymbolPatterns` / binary fingerprints.
* If none found:
* `FunctionNotPresent` (if component version range indicates vulnerable but we cannot find symbol low confidence).
* If found:
* Check `reachable` bit:
* If reachable by at least one entrypoint, `PresentReachable`.
* Else, `PresentNotReachable`.
4. **Confidence computation**
* Start from:
* `1.0` for direct match with explicit function name & static call.
* Lower for:
* Heuristic framework entrypoints.
* Dynamic calls.
* Fingerprint-only matches on stripped binaries.
* Example rule-of-thumb:
* direct static path only: 0.951.0.
* dynamic edges but symbol found: 0.70.9.
* symbol not found but version says vulnerable: 0.40.6.
5. **Lattice merge**
* Represent each CVE+component pair as a lattice element with states: `{affected, not_affected, unknown}`.
* Reachability engine produces a **local state**:
* `PresentReachable` → candidate `affected`.
* `PresentNotReachable` or `FunctionNotPresent` → candidate `not_affected`.
* `Unknown` → `unknown`.
* Merge with:
* Upstream vendor VEX (from Concelier).
* Policy overrides (e.g., “treat certain CVEs as affected unless vendor says otherwise”).
* Final state computed here (Scanner.WebService), not in Concelier or VEXer.
6. **Evidence output**
* For each vulnerability:
* Emit `ReachabilityEvidence` with:
* Status.
* Confidence.
* Method.
* Example entrypoint paths (for UX and audit).
* Persist this evidence alongside regular scan results.
---
## 8. Integration with SBOM & VEX
### 8.1 SBOM annotation
* Extend SBOM documents (CycloneDX / SPDX) with extra properties:
* CycloneDX:
* `component.properties`:
* `stellaops:reachability:status` = `present_reachable|present_not_reachable|function_not_present|unknown`
* `stellaops:reachability:confidence` = `0.0-1.0`
* SPDX:
* `Annotation` or `ExternalRef` with similar metadata.
### 8.2 OpenVEX generation
Module: `StellaOps.Vexer.Adapter.Reachability`
* For each `(vuln, component)` pair:
* Map to VEX statement:
* If `PresentReachable`:
* `status: affected`
* `justification: component_not_fixed` or similar.
* If `PresentNotReachable`:
* `status: not_affected`
* `justification: function_not_reachable`
* If `FunctionNotPresent`:
* `status: not_affected`
* `justification: component_not_present` or `function_not_present`
* If `Unknown`:
* `status: under_investigation` (configurable).
* Attach evidence via:
* `analysis` / `details` fields (link to internal evidence JSON or audit link).
* VEXer does not recalculate reachability; it uses the already computed decision + evidence.
---
## 9. Executable Containers & Offline Operation
### 9.1 Executable containers
* Analyzers run inside a dedicated Scanner worker container that has:
* .NET 10 runtime.
* Language runtimes if needed for parsing (Node, Python, PHP), or Tree-sitter-based parsing.
* Target image filesystem is mounted read-only under `/mnt/rootfs`.
* No network access (offline/air-gap).
* This satisfies “we will use executable containers” while keeping separation between:
* Target image (mount only).
* Analyzer container (StellaOps code).
### 9.2 Offline signature bundles
* Concelier periodically exports:
* Vulnerability database (CSAF/NVD).
* Vulnerability Signature Bank.
* Bundles are:
* DSSE-signed.
* Versioned (e.g., `signatures-2025-11-01.tar.zst`).
* Scanner uses:
* The bundle digest as part of the **Scan Manifest** for deterministic replay.
---
## 10. Determinism & Caching
### 10.1 Layer-level caching
* Key: `layerDigest + analyzerVersion + signatureBundleVersion`.
* Cache artifacts:
* CallGraph(s) per layer (for JS/Python/PHP code present in that layer).
* Symbolization results per binary file hash.
* For images sharing layers:
* Merge cached graphs instead of re-analyzing.
### 10.2 Deterministic scan manifest
For each scan, produce:
```json
{
"imageRef": "registry/app:1.2.3",
"imageDigest": "sha256:...",
"scannerVersion": "1.4.0",
"analyzerVersions": {
"js": "1.0.0",
"python": "1.0.0",
"php": "1.0.0",
"binary": "1.0.0"
},
"signatureBundleDigest": "sha256:...",
"callGraphDigest": "sha256:...", // canonical JSON hash
"reachabilityEvidenceDigest": "sha256:..."
}
```
This manifest can be signed (Authority module) and used for audits and replay.
---
## 11. Implementation Roadmap (Phased)
### Phase 0 Infrastructure & Binary presence
**Duration:** 1 sprint
* Set up `Scanner.Reachability` core types and interfaces.
* Implement:
* Basic Symbolizer for ELF + DWARF.
* Binary function catalog without CFG.
* Link a small set of CVEs to binary function presence via `SymbolName`.
* Expose minimal evidence:
* `PresentReachable`/`FunctionNotPresent` based only on presence (no call graph).
* Integrate with VEXer to emit `function_not_present` justifications.
**Success criteria:**
* For selected demo images with known vulnerable/ patched OpenSSL, scanner can:
* Distinguish images where vulnerable function is present vs. absent.
* Emit OpenVEX with correct `not_affected` when patched.
---
### Phase 1 JS/Python/PHP call graphs & basic reachability
**Duration:** 12 sprints
* Implement:
* `Scanner.Analyzers.JavaScript` with module + function call graph.
* `Scanner.Analyzers.Python` and `Scanner.Analyzers.Php` with basic graphs.
* Entrypoint detection:
* JS: main script from CMD, basic HTTP handlers.
* Python: main script + Django/Flask heuristics.
* PHP: front controllers.
* Implement core reachability algorithm (BFS/DFS).
* Implement simple `VulnerabilitySignature` that uses function names and file paths.
* Hook lattice engine in Scanner.WebService and integrate with:
* Concelier vulnerability feeds.
* VEXer.
**Success criteria:**
* For demo apps (Node, Django, Laravel):
* Identify vulnerable functions and mark them reachable/unreachable.
* Demonstrate noise reduction (some CVEs flagged as `not_affected`).
---
### Phase 2 Binary CFG & Fingerprinting, Improved Confidence
**Duration:** 12 sprints
* Extend Symbolizer & CFG for:
* Stripped binaries (function hashing).
* Shared libraries (PLT/IAT resolution).
* Implement `VulnerabilitySignature.BlockFingerprints` to distinguish patched vs vulnerable binary functions.
* Refine confidence scoring:
* Use fingerprint match quality.
* Consider presence/absence of debug info.
* Expand coverage:
* glibc, curl, zlib, OpenSSL, libxml2, etc.
**Success criteria:**
* For curated images:
* Confirm ability to differentiate patched vs vulnerable versions even when binaries are stripped.
* Reachability reflects true call paths across app→lib boundaries.
---
### Phase 3 Runtime hooks (optional), UX, and Hardening
**Duration:** 2+ sprints
* Add opt-in runtime confirmation:
* eBPF probes for function hits (Linux).
* Map runtime addresses back to `ImageFunction` via symbolization.
* Enhance console UX:
* Path explorer UI: show entrypoint → … → vulnerable function path.
* Evidence view with hash-based proofs.
* Hardening:
* Performance optimization for large images (parallel analysis, caching).
* Conservative fallbacks for dynamic language features.
**Success criteria:**
* For selected environments where runtime is allowed:
* Static reachability is confirmed by runtime traces in majority of cases.
* No significant performance regression on typical images.
---
## 12. How this satisfies your initial bullets
From your initial requirements:
1. **JavaScript, Python, PHP, binary**
→ Dedicated analyzers per language + binary symbolization/CFG, unified in `Scanner.Reachability`.
2. **Executable containers**
→ Analyzers run inside Scanners worker container, mounting the target image rootfs; no network access.
3. **Libraries usage call graph**
→ Call graphs map from entrypoints → app code → library functions; SBOM + PURLs tie functions to libraries.
4. **Reachability analysis**
→ BFS/DFS from entrypoints over per-language and binary graphs, with lattice-based merging in `Scanner.WebService`.
5. **JSON + PURLs**
→ All evidence is JSON with PURL-tagged components; SBOM is annotated, and VEX statements reference those PURLs.
---
If you like, next step can be: I draft concrete C# interface definitions (including some initial Tree-sitter integration stubs for JS/Python/PHP) and a skeleton of the `ReachabilityPlan` and `ReachabilityEngine` classes that you can drop into the monorepo.

View File

@@ -0,0 +1,719 @@
Heres a crisp idea you can drop straight into StellaOps: treat “unknowns” as firstclass data, not noise.
---
# Unknowns Registry — turning uncertainty into signals
**Why:** Scanners and VEX feeds miss things (ambiguous package IDs, unverifiable hashes, orphaned layers, missing SBOM edges, runtime-only artifacts). Today these get logged and forgotten. If we **structure** them, downstream agents can reason about risk and shrink blast radius proactively.
**What it is:** A small service + schema that records every uncertainty with enough context for later inference.
## Core model (v0)
```json
{
"unknown_id": "unk:sha256:…",
"observed_at": "2025-11-18T12:00:00Z",
"provenance": {
"source": "Scanner.Analyzer.DotNet|Sbomer|Signals|Vexer",
"host": "runner-42",
"scan_id": "scan:…"
},
"scope": {
"artifact": { "type": "oci.image", "ref": "registry/app@sha256:…" },
"subpath": "/app/bin/Contoso.dll",
"phase": "build|scan|runtime"
},
"unknown_type": "identity_gap|version_conflict|hash_mismatch|missing_edge|runtime_shadow|policy_undecidable",
"evidence": {
"raw": "nuget id 'Serilog' but assembly name 'Serilog.Core'",
"signals": ["sym:Serilog.Core.Logger", "procopen:/app/agent"]
},
"transitive": {
"depth": 2,
"parents": ["pkg:nuget/Serilog@?"],
"children": []
},
"confidence": { "p": 0.42, "method": "bayes-merge|rule" },
"exposure_hints": {
"surface": ["logging pipeline", "startup path"],
"runtime_hits": 3
},
"status": "open|triaged|suppressed|resolved",
"labels": ["reachability:possible", "sbom:incomplete"]
}
```
## Categorize by three axes
* **Provenance** (where it came from): Scanner vs Sbomer vs Vexer vs Signals.
* **Scope** (what it touches): image/layer/file/symbol/runtimeproc/policy.
* **Transitive depth** (how far from an entry point): 0 = direct, 1..N via deps.
## How agents use it
* **Cartographer**: includes unknown edges in the graph with special weight; lets Policy/Lattice downrank vulnerable nodes near highimpact unknowns.
* **Remedy Assistant (Zastava)**: proposes microprobes (“add EventPipe/JFR tap for X symbol”) or buildtime assertions (“pin Serilog>=3.1, regenerate SBOM”).
* **Scheduler**: prioritizes scans where unknown density × asset criticality is highest.
## Minimal API (idempotent, additive)
* `POST /unknowns/ingest` — upsert by `unknown_id` (hash of type+scope+evidence).
* `GET /unknowns?artifact=…&status=open` — list for a target.
* `POST /unknowns/:id/triage` — set status/labels, attach rationale.
* `GET /metrics` — density by artifact/namespace/unknown_type.
*All additive; no versioning required. Repeat calls with the same payload are noops.*
## Scoring hook (into your lattice)
* Add a **“Unknowns Pressure”** term:
`risk = base ⊕ (α * density_depth≤1) ⊕ (β * runtime_shadow) ⊕ (γ * policy_undecidable)`
* Gate “green” only if `density_depth≤1 == 0` **or** compensating controls active.
## Storage & plumbing
* **Store:** appendonly KV (Badger/Rocks) + Graph overlay (SQLite/Neo4j—your call).
* **Emit:** DSSEsigned “Unknowns Attestation” per scan for replayable audits.
* **UI:** heatmap per artifact (unknowns by type × depth), drilldown to evidence.
## First 2day slice
1. Define `unknown_type` enum + hashable `unknown_id`.
2. Wire Scanner/Sbomer/Vexer to emit unknowns (start with: identity_gap, missing_edge).
3. Persist + expose `/metrics` (density, by depth and type).
4. In Policy Studio, add the Unknowns Pressure term with default α/β/γ.
If you want, Ill draft the exact protobuf/JSON schema and drop a .NET 10 record types + EF model, plus a tiny CLI to query and a Grafana panel JSON.
I will treat “it” as the whole vision behind **Pushing Binary Reachability Toward True Determinism** inside Stella Ops: function-/symbol-level reachability for binaries and higher-level languages, wired into Scanner, Cartographer, Signals, and VEX.
Below is an implementation-oriented architecture plan you can hand directly to agents.
---
## 1. Scope, goals, and non-negotiable invariants
### 1.1. Scope
Deliver a deterministic reachability pipeline for containers that:
1. Builds **call graphs** and **symbol usage maps** for:
* Native binaries (ELF, PE, Mach-O) — primary for this branch.
* Scripted/VM languages later: JS, Python, PHP (as part of the same architecture).
2. Maps symbols and functions to:
* Packages (purls).
* Vulnerabilities (CVE → symbol/function list via Concelier/VEX data).
3. Computes **deterministic reachability states** for each `(vulnerability, artifact)` pair.
4. Emits:
* Machine-readable JSON (with `purl`s).
* Graph overlays for Cartographer.
* Inputs for the lattice/trust engine and VEXer/Excitor.
### 1.2. Invariants
* **Deterministic replay**: Given the same:
* Image digest(s),
* Analyzer versions,
* Config + policy,
* Runtime trace inputs (if any),
the same reachability outputs must be produced, bit-for-bit.
* **Idempotent, additive APIs**:
* No versioning of endpoints, only additive/optional fields.
* Same request = same response, no side effects besides storing/caching.
* **Lattice logic runs in `Scanner.WebService`**:
* All “reachable/unreachable/unknown” and confidence merging lives in Scanner, not Concelier/Excitors.
* **Preserve prune source**:
* Concelier and Excitors preserve provenance and do not “massage” reachability; they only consume it.
* **Offline, air-gap friendly**:
* No mandatory external calls; dependency on local analyzers and local advisory/VEX cache.
---
## 2. High-level pipeline
From container image to reachability output:
1. **Image enumeration**
`Scanner.WebService` receives an image ref or tarball and spawns an analysis run.
2. **Binary discovery & classification**
Binary analyzers detect ELF/PE/Mach-O + main interpreters (python, node, php) and scripts.
3. **Symbolization & call graph building**
* For each binary/module, we produce:
* Symbol table (exported + imported).
* Call graph edges (function-level where possible).
* For dynamic languages, we later plug in appropriate analyzers.
4. **Symbol→package mapping**
* Match symbols to packages and `purl`s using:
* Known vendor symbol maps (from Concelier / Feedser).
* Heuristics, path patterns, build IDs.
5. **Vulnerability→symbol mapping**
* From Concelier/VEX/CSAF: map each CVE to the set of symbols/functions it affects.
6. **Reachability solving**
* For each `(CVE, artifact)`:
* Determine presence and reachability of affected symbols from known entrypoints.
* Merge static call graph and runtime signals (if available) via deterministic lattice.
7. **Output & storage**
* Reachability JSON with purls and confidence.
* Graph overlay into Cartographer.
* Signals/events for downstream scoring.
* DSSE-signed reachability attestation for replay/audit.
---
## 3. Component architecture
### 3.1. New and extended services
1. **`StellaOps.Scanner.WebService` (extended)**
* Orchestration of reachability analyses.
* Lattice/merging engine.
* Idempotent reachability APIs.
2. **`StellaOps.Scanner.Analyzers.Binary.*` (new)**
* `…Binary.Discovery`: file type detection, ELF/PE/Mach-O parsing.
* `…Binary.Symbolizer`: resolves symbols, imports/exports, relocations.
* `…Binary.CallGraph.Native`: builds call graphs where possible (via disassembly/CFG).
* `…Binary.CallGraph.DynamicStubs`: heuristics for indirect calls, PLT/GOT, vtables.
3. **`StellaOps.Scanner.Analyzers.Script.*` (future extension)**
* `…Lang.JavaScript.CallGraph`
* `…Lang.Python.CallGraph`
* `…Lang.Php.CallGraph`
* These emit the same generic call-graph IR.
4. **`StellaOps.Reachability.Engine` (within Scanner.WebService)**
* Normalizes all call graphs into a common IR.
* Merges static and dynamic evidence.
* Computes reachability states and scores.
5. **`StellaOps.Cartographer.ReachabilityOverlay` (new overlay module)**
* Stores per-artifact call graphs and reachability tags.
* Provides graph queries for UI and policy tools.
6. **`StellaOps.Signals` (extended)**
* Ingests runtime call traces (e.g., from EventPipe/JFR/ebpf in other branches).
* Feeds function-hit events into the Reachability Engine.
7. **Unknowns Registry integration (optional but recommended)**
* Stores unresolved symbol/package mappings and incomplete edges as `unknowns`.
* Used to adjust risk scores (“Unknowns Pressure”) when binary analysis is incomplete.
---
## 4. Detailed design by layer
### 4.1. Static analysis layer (binaries)
#### 4.1.1. Binary discovery
Module: `StellaOps.Scanner.Analyzers.Binary.Discovery`
* Inputs:
* Per-image file list (from existing Scanner).
* Byte slices of candidate binaries.
* Logic:
* Detect ELF/PE/Mach-O via magic bytes, not extensions.
* Classify as:
* Main executable
* Shared library
* Plugin/module
* Output:
* `binary_manifest.json` per image:
```json
{
"image_ref": "registry/app@sha256:…",
"binaries": [
{
"id": "bin:elf:/usr/local/bin/app",
"path": "/usr/local/bin/app",
"format": "elf",
"arch": "x86_64",
"role": "executable"
}
]
}
```
#### 4.1.2. Symbolization
Module: `StellaOps.Scanner.Analyzers.Binary.Symbolizer`
* Uses:
* ELF/PE/Mach-O parsers (internal or third-party), no external calls.
* Output per binary:
```json
{
"binary_id": "bin:elf:/usr/local/bin/app",
"build_id": "buildid:abcd…",
"exports": ["pkg1::ClassA::method1", "..."],
"imports": ["openssl::EVP_EncryptInit_ex", "..."],
"sections": { "text": { "va": "0x...", "size": 12345 } }
}
```
* Writes unresolved symbol sets to Unknowns Registry when:
* Imports cannot be tied to known packages or symbols.
#### 4.1.3. Call graph construction
Module: `StellaOps.Scanner.Analyzers.Binary.CallGraph.Native`
* Core tasks:
* Build control-flow graphs (CFG) for each function via:
* Disassembly.
* Basic block detection.
* Identify direct calls (`call func`) and indirect calls (function pointers, vtables).
* IR model:
```json
{
"binary_id": "bin:elf:/usr/local/bin/app",
"functions": [
{ "fid": "func:app::main", "va": "0x401000", "size": 128 },
{ "fid": "func:libssl::EVP_EncryptInit_ex", "external": true }
],
"edges": [
{ "caller": "func:app::main", "callee": "func:app::init_config", "type": "direct" },
{ "caller": "func:app::main", "callee": "func:libssl::EVP_EncryptInit_ex", "type": "import" }
]
}
```
* Edge confidence:
* `type: direct|import|indirect|heuristic`
* Used later by the lattice.
#### 4.1.4. Entry point inference
* Sources:
* ELF `PT_INTERP`, PE `AddressOfEntryPoint`.
* Application-level hints (known frameworks, service main methods).
* Container metadata (CMD, ENTRYPOINT).
* Output:
```json
{
"binary_id": "bin:elf:/usr/local/bin/app",
"entrypoints": ["func:app::main"]
}
```
> Note: For JS/Python/PHP, equivalent analyzers will later define module entrypoints (`index.js`, `wsgi_app`, `public/index.php`).
---
### 4.2. Symbol-to-package and CVE-to-symbol mapping
#### 4.2.1. Symbol→package mapping
Module: `StellaOps.Reachability.Mapping.SymbolToPurl`
* Inputs:
* Binary symbolization outputs.
* Local mapping DB in Concelier (vendor symbol maps, debug info, name patterns).
* File path + container context (`/usr/lib/...`, `/site-packages/...`).
* Output:
```json
{
"symbol": "libssl::EVP_EncryptInit_ex",
"purl": "pkg:apk/alpine/openssl@3.1.5-r2",
"confidence": 0.93,
"method": "vendor_map+path_heuristic"
}
```
* Unresolved / ambiguous symbols:
* Stored as `unknowns` of type `identity_gap`.
#### 4.2.2. CVE→symbol mapping
Responsibility: Concelier + its advisory ingestion.
* For each vulnerability:
```json
{
"cve_id": "CVE-2025-12345",
"purl": "pkg:apk/alpine/openssl@3.1.5-r2",
"affected_symbols": [
"libssl::EVP_EncryptInit_ex",
"libssl::EVP_EncryptUpdate"
],
"source": "vendor_vex",
"confidence": 1.0
}
```
* Reachability Engine consumes this mapping read-only.
---
### 4.3. Reachability Engine
Module: `StellaOps.Reachability.Engine` (in Scanner.WebService)
#### 4.3.1. Core data model
Per `(artifact, cve, purl)`:
```json
{
"artifact": { "type": "oci.image", "ref": "registry/app@sha256:…" },
"cve_id": "CVE-2025-12345",
"purl": "pkg:apk/alpine/openssl@3.1.5-r2",
"symbols": [
{
"symbol": "libssl::EVP_EncryptInit_ex",
"static_presence": "present|absent|unknown",
"static_reachability": "reachable|unreachable|unknown",
"runtime_hits": 3,
"runtime_reachability": "observed|not_observed|unknown"
}
],
"reachability_state": "confirmed_reachable|statically_reachable|present_not_reachable|not_present|unknown",
"confidence": {
"p": 0.87,
"evidence": ["static_callgraph", "runtime_trace", "symbol_map"],
"unknowns_pressure": 0.12
}
}
```
#### 4.3.2. Lattice / state machine
Define a deterministic lattice over states:
* `NOT_PRESENT`
* `PRESENT_NOT_REACHABLE`
* `STATICALLY_REACHABLE`
* `RUNTIME_OBSERVED`
And “unknown” flags overlayed when evidence is missing.
Merging rules (simplified):
* If `NOT_PRESENT` and no conflicting evidence → `NOT_PRESENT`.
* If at least one affected symbol is on a static path from any entrypoint → `STATICALLY_REACHABLE`.
* If symbol observed at runtime → `RUNTIME_OBSERVED` (top state).
* If symbol present in binary but not on any static path → `PRESENT_NOT_REACHABLE`, unless unknown edges exist near it (then downgrade with lower confidence).
* Unknowns Registry entries near affected symbols increase `unknowns_pressure` and may push from `NOT_PRESENT` to `UNKNOWN`.
Implementation: pure functional merge functions inside Scanner.WebService:
```csharp
ReachabilityState Merge(ReachabilityState a, ReachabilityState b);
ReachabilityState FromEvidence(StaticEvidence s, RuntimeEvidence r, UnknownsPressure u);
```
#### 4.3.3. Deterministic inputs
To guarantee replay:
* Build **Reachability Plan Manifest** per run:
```json
{
"plan_id": "reach:sha256:…",
"scanner_version": "1.4.0",
"analyzers": {
"binary_discovery": "1.0.0",
"binary_symbolizer": "1.1.0",
"binary_callgraph": "1.2.0"
},
"inputs": {
"image_digest": "sha256:…",
"runtime_trace_files": ["signals:run:2025-11-18T12:00:00Z"],
"config": {
"assume_indirect_calls": "conservative",
"max_call_depth": 10
}
}
}
```
* DSSE-sign the plan + result.
---
### 4.4. Storage and graph overlay
#### 4.4.1. Reachability store
Backend: re-use existing Scanner/Cartographer storage stack (e.g., Postgres or SQLite + blob store).
Tables/collections:
* `reachability_runs`
* `plan_id`, `image_ref`, `created_at`, `scanner_version`.
* `reachability_results`
* `plan_id`, `cve_id`, `purl`, `state`, `confidence_p`, `unknowns_pressure`, `payload_json`.
* Indexes on `(image_ref, cve_id)`, `(image_ref, purl)`.
#### 4.4.2. Cartographer overlay
Edges:
* `IMAGE` → `BINARY` → `FUNCTION` → `PACKAGE` → `CVE`
* Extra property on `IMAGE -[AFFECTED_BY]-> CVE`:
* `reachability_state`
* `reachability_plan_id`
Enables queries:
* “Show me all CVEs with `STATICALLY_REACHABLE` in this namespace.”
* “Show me binaries with high density of reachable crypto CVEs.”
---
### 4.5. APIs (idempotent, additive)
#### 4.5.1. Trigger reachability
`POST /reachability/runs`
Request:
```json
{
"artifact": { "type": "oci.image", "ref": "registry/app@sha256:…" },
"config": {
"include_languages": ["binary"],
"max_call_depth": 10,
"assume_indirect_calls": "conservative"
}
}
```
Response:
```json
{ "plan_id": "reach:sha256:…" }
```
* Idempotent key: `(image_ref, config_hash)`. Subsequent calls return same `plan_id`.
#### 4.5.2. Fetch results
`GET /reachability/runs/:plan_id`
```json
{
"plan": { /* reachability plan manifest */ },
"results": [
{
"cve_id": "CVE-2025-12345",
"purl": "pkg:apk/alpine/openssl@3.1.5-r2",
"reachability_state": "static_reachable",
"confidence": { "p": 0.84, "unknowns_pressure": 0.1 }
}
]
}
```
#### 4.5.3. Per-CVE view for VEXer/Excitor
`GET /reachability/by-cve?artifact=…&cve_id=…`
* Returns filtered result for downstream VEX creation.
All APIs are **read-only** except for the side effect of storing/caching runs.
---
## 5. Interaction with other Stella Ops modules
### 5.1. Concelier
* Provides:
* CVE→purl→symbol mapping.
* Vendor VEX statements indicating affected functions.
* Consumes:
* Nothing from reachability directly; Scanner/WebService passes reachability summary to VEXer/Excitor which merges with vendor statements.
### 5.2. VEXer / Excitor
* Input:
* For each `(artifact, cve)`:
* Reachability state.
* Confidence.
* Logic:
* Translate states to VEX statements:
* `NOT_PRESENT` → `not_affected`
* `PRESENT_NOT_REACHABLE` → `not_affected` (with justification “code not reachable according to analysis”)
* `STATICALLY_REACHABLE` → `affected`
* `RUNTIME_OBSERVED` → `affected` (higher severity)
* Attach determinism proof:
* Plan ID + DSSE of reachability run.
### 5.3. Signals
* Provides:
* Function hit events: `(binary_id, function_id, timestamp)` aggregated per image.
* Reachability Engine:
* Marks `runtime_hits` and state `RUNTIME_OBSERVED` for symbols with hits.
* Unknowns:
* If runtime sees hits in functions with no static edges to entrypoints (or unmapped symbols), these produce Unknowns and increase `unknowns_pressure`.
### 5.4. Unknowns Registry
* From reachability pipeline, create Unknowns when:
* Symbol→package mapping is ambiguous.
* CVE→symbol mapping exists, but symbol cannot be found in binaries.
* Call graph has indirect calls that cannot be resolved.
* The “Unknowns Pressure” term is fed into:
* Reachability confidence.
* Global risk scoring (Trust Algebra Studio).
---
## 6. Implementation phases and engineering plan
### Phase 0 Scaffolding & manifests (1 sprint)
* Create:
* `StellaOps.Reachability.Engine` skeleton.
* Reachability Plan Manifest schema.
* Reachability Run + Result persistence.
* Add `/reachability/runs` and `/reachability/runs/:plan_id` endpoints, returning mock data.
* Wire DSSE attestation generation for reachability results (even if payload is empty).
### Phase 1 Binary discovery + symbolization (12 sprints)
* Implement `Binary.Discovery` and `Binary.Symbolizer`.
* Feed symbol tables into Reachability Engine as “presence-only evidence”:
* States: `NOT_PRESENT` vs `PRESENT_NOT_REACHABLE` vs `UNKNOWN`.
* Integrate with Conceliers CVE→purl mapping (no symbol-level yet):
* For CVEs affecting a package present in the image, mark as `PRESENT_NOT_REACHABLE`.
* Emit Unknowns for unresolved binary roles and ambiguous package mapping.
Deliverable: package-level reachability with deterministic manifests.
### Phase 2 Binary call graphs & entrypoints (23 sprints)
* Implement `Binary.CallGraph.Native`:
* CFG + direct call edges.
* Implement entrypoint inference from binary + container ENTRYPOINT/CMD.
* Add static reachability algorithm:
* DFS/BFS from entrypoints through call graph.
* Mark affected symbols as reachable if found on paths.
* Extend Concelier to ingest symbol-aware vulnerability metadata (for pilots; can be partial).
Deliverable: function-level static reachability for native binaries where symbol maps exist.
### Phase 3 Runtime integration (2 sprints, may be in parallel workstream)
* Integrate Signals runtime evidence:
* Define schema for function hit events.
* Add ingestion path into Reachability Engine.
* Update lattice:
* Promote symbols to `RUNTIME_OBSERVED` when hits exist.
* Extend DSSE attestation to reference runtime evidence URIs (hashes of trace inputs).
Deliverable: static + runtime-confirmed reachability.
### Phase 4 Unknowns & pressure (1 sprint)
* Wire Unknowns Registry:
* Emit unknowns from Symbolizer and CallGraph (identity gaps, missing edges).
* Compute `unknowns_pressure` per `(artifact, cve)` as density of unknowns near affected symbols.
* Adjust confidence calculation in Reachability Engine.
* Expose unknowns metrics in API and Cartographer.
Deliverable: explicit modelling of uncertainty, feeding into trust/lattice.
### Phase 5 Language extensions (JS/Python/PHP) (ongoing)
* Implement per-language call-graph analyzers creating the same IR as binary.
* Extend symbol→purl mapping for these ecosystems (npm, PyPI, Packagist).
* Update reachability solver to include multi-language edges (e.g., Python calling into native modules).
---
## 7. Minimal contracts for agents
To hand off to agents, you can codify:
1. **IR schemas**
* Call graph IR.
* Reachability Result JSON.
* Reachability Plan Manifest.
2. **API contracts**
* `POST /reachability/runs`
* `GET /reachability/runs/:plan_id`
* `GET /reachability/by-cve`
3. **Module boundaries**
* `Scanner.Analyzers.Binary.*` produce IR only; NO network calls.
* `Reachability.Engine` is the only place where lattice logic lives.
* `Concelier` is read-only for reachability; no custom logic there.
4. **Determinism practices**
* All algorithmic randomness is banned; where unavoidable, seed with values derived from plan_id.
* All external inputs must be listed in the Plan Manifest.
If you like, next step I can draft:
* Concrete C# record types for the IRs.
* A small pseudo-code implementation of the lattice functions and static reachability DFS.
* A proposed directory layout under `src/StellaOps.Scanner` and `src/StellaOps.Cartographer`.

View File

@@ -0,0 +1,635 @@
Heres a simple, cheap way to sanitycheck your vuln function recovery without fancy ground truth: **build “patch oracles.”**
---
### What it is (in plain words)
Take a known CVE and compile two **tiny** binaries from the same source:
* **Vulnerable** commit/revision
* **Fixed** commit/revision
Then diff the discovered functions + call edges between the two. If your analyzer cant see the symbol (or guard) the patch adds/removes/tightens, your recall is suspect.
---
### Why it works
Patches for real CVEs usually:
* add/remove a **function** (e.g., `validate_len`)
* change a **call site** (new guard before `memcpy`)
* tweak **control flow** (early return on bounds check)
Those are precisely the things your function recovery / callgraph pass should surface—even on stripped ELFs. If they dont move in your graph, youve got blind spots.
---
### Minimal workflow (5 steps)
1. **Pick a CVE** with a clean, public fix (e.g., OpenSSL/zlib/busybox).
2. **Isolate the patch** (git range or cherrypick) and craft a *tiny harness* that calls the affected code path.
3. **Build both** with the same toolchain/flags; produce **stripped** ELFs (`-s`) to mimic production.
4. **Run your discovery** on both:
* function list, demangled where possible
* call edges (A→B), basic blocks (optional)
5. **Diff the graphs**: look for the new guard function, removed unsafe call, or altered edge count.
---
### A tiny “oracle spec” (drop-in YAML for your test runner)
```yaml
cve: CVE-YYYY-XXXX
target: libfoo 1.2.3
build:
cc: clang
cflags: [-O2, -fno-omit-frame-pointer]
ldflags: []
strip: true
evidence:
expect_functions_added: [validate_len]
expect_functions_removed: [unsafe_copy] # optional
expect_call_added:
- caller: foo_parse
callee: validate_len
expect_call_removed:
- caller: foo_parse
callee: memcpy
tolerances:
allow_unresolved_symbols: 0
allow_extra_funcs: 2
```
---
### Quick harness pattern (C)
```c
// before: foo_parse -> memcpy(buf, src, len);
// after : foo_parse -> validate_len(len) -> memcpy(...)
extern int foo_parse(const char*);
int main(int argc, char** argv) {
const char* in = argc > 1 ? argv[1] : "AAAA";
return foo_parse(in);
}
```
---
### What to flag as a failure
* Expected **function not discovered** (e.g., `validate_len` missing).
* Expected **edge not present** (`foo_parse → validate_len` absent).
* **No CFG change** where patch clearly adds a guard/early return.
---
### Where this plugs into StellaOps
* Put these oracles under `Scanner/tests/patch-oracles/*` per language.
* Run them in CI for **.NET/JVM/C/C++/Go/Rust** analyzers.
* Use them to gate any changes to symbolization, demangling, or callgraph building.
* Record peranalyzer **recall deltas** when you tweak heuristics or switch disassemblers.
---
If you want, I can scaffold the first three oracles (e.g., zlib overflow fix, OpenSSL length check, BusyBox `ash` patch) with readytorun Makefiles and expected graph diffs.
Understood — let us turn the “patch oracle” idea into something you can actually drop into the Stella Ops repo and CI.
I will walk through:
1. How to structure this inside the monorepo
2. How to build one oracle end-to-end (C/C++ example)
3. How to do the same for .NET/JVM
4. How to automate running and asserting them
5. Practical rules and pitfalls so these stay stable and useful
---
## 1. Where this lives in Stella Ops
A simple, language-agnostic layout that will scale:
```text
src/
StellaOps.Scanner/
... # your scanner code
StellaOps.Scanner.Tests/ # existing tests (if any)
PatchOracles/
c/
CVE-YYYY-XXXX-<short-name>/
src/
build.sh
oracle.yml
README.md
cpp/
...
dotnet/
CVE-YYYY-XXXX-<short-name>/
src/
build.ps1
oracle.yml
README.md
jvm/
...
go/
...
rust/
...
tools/
scanner-oracle-runner/ # tiny runner (C# console or bash)
```
Key principles:
* Each CVE/test case is **self-contained** (its own folder with sources, build script, oracle.yml).
* Build scripts produce **two binaries/artifacts**: `vuln` and `fixed`.
* `oracle.yml` describes: how to build, what to scan, and what differences to expect in Scanners call graph/function list.
---
## 2. How to build a single patch oracle (C/C++)
Think of a patch oracle as: “Given these two binaries, Scanner must see specific changes in functions and call edges.”
### 2.1. Step-by-step workflow
For one C/C++ CVE:
1. **Pick & freeze the patch**
* Choose a small, clean CVE in a library with easily buildable code (zlib, OpenSSL, BusyBox, etc.).
* Identify commit `A` (vulnerable) and commit `B` (fixed).
* Extract only the minimal sources needed to build the affected function + a harness into `src/`.
2. **Create a minimal harness**
Example: patch adds `validate_len` and guards a `memcpy` in `foo_parse`.
```c
// src/main.c
#include <stdio.h>
int foo_parse(const char* in); // from the library code under test
int main(int argc, char** argv) {
const char* in = (argc > 1) ? argv[1] : "AAAA";
return foo_parse(in);
}
```
Under `src/`, you keep two sets of sources:
```text
src/
vuln/
foo.c # vulnerable version
api.h
main.c
fixed/
foo.c # fixed version (adds validate_len, changes calls)
api.h
main.c
```
3. **Provide a deterministic build script**
Example `build.sh`:
```bash
#!/usr/bin/env bash
set -euo pipefail
CC="${CC:-clang}"
CFLAGS="${CFLAGS:- -O2 -fno-omit-frame-pointer -g0}"
LDFLAGS="${LDFLAGS:- }"
build_one() {
local name="$1" # vuln or fixed
mkdir -p build
${CC} ${CFLAGS} src/${name}/*.c ${LDFLAGS} -o build/${name}
# Strip symbols to simulate production
strip build/${name}
}
build_one "vuln"
build_one "fixed"
```
Guidelines:
* Fix the toolchain: either run this inside a Docker image (e.g., `debian:bookworm` with specific `clang` version) or at least document required versions in `README.md`.
* Always build both artifacts with **identical flags**; the only difference should be the code change.
* Use `strip` to ensure Scanner doesnt accidentally rely on debug symbols.
4. **Define the oracle (what must change)**
You define expectations based on the patch:
* Functions added/removed/renamed.
* New call edges (e.g., `foo_parse -> validate_len`).
* Removed call edges (e.g., `foo_parse -> memcpy`).
* Optionally: new basic blocks, conditional branches, or early returns.
A practical `oracle.yml` for this case:
```yaml
cve: CVE-YYYY-XXXX
name: zlib_len_guard_example
language: c
toolchain:
cc: clang
cflags: "-O2 -fno-omit-frame-pointer -g0"
ldflags: ""
build:
script: "./build.sh"
artifacts:
vulnerable: "build/vuln"
fixed: "build/fixed"
scan:
scanner_cli: "dotnet run --project ../../StellaOps.Scanner.Cli"
# If you have a Dockerized scanner, you could do:
# scanner_cli: "docker run --rm -v $PWD:/work stellaops/scanner:dev"
args:
- "--format=json"
- "--analyzers=native"
timeout_seconds: 120
expectations:
functions:
must_exist_in_fixed:
- name: "validate_len"
must_not_exist_in_vuln:
- name: "validate_len"
calls:
must_add:
- caller: "foo_parse"
callee: "validate_len"
must_remove:
- caller: "foo_parse"
callee: "memcpy"
tolerances:
allow_unresolved_symbols: 0
allow_extra_functions: 5
allow_missing_calls: 0
```
5. **Connect Scanner output to the oracle**
Assume your Scanner CLI produces something like:
```json
{
"binary": "build/fixed",
"functions": [
{ "name": "foo_parse", "address": "0x401000" },
{ "name": "validate_len", "address": "0x401080" },
...
],
"calls": [
{ "caller": "foo_parse", "callee": "validate_len" },
{ "caller": "validate_len", "callee": "memcpy" }
]
}
```
Your oracle-runner will:
* Run scanner on `vuln``vuln.json`
* Run scanner on `fixed``fixed.json`
* Compare each expectation in `oracle.yml` against `vuln.json` and `fixed.json`
Pseudo-logic for a function expectation:
```csharp
bool HasFunction(JsonElement doc, string name) =>
doc.GetProperty("functions")
.EnumerateArray()
.Any(f => f.GetProperty("name").GetString() == name);
bool HasCall(JsonElement doc, string caller, string callee) =>
doc.GetProperty("calls")
.EnumerateArray()
.Any(c =>
c.GetProperty("caller").GetString() == caller &&
c.GetProperty("callee").GetString() == callee);
```
The runner will produce a small report, per oracle:
```text
[PASS] CVE-YYYY-XXXX zlib_len_guard_example
+ validate_len appears only in fixed → OK
+ foo_parse → validate_len call added → OK
+ foo_parse → memcpy call removed → OK
```
If anything fails, it prints the mismatches and exits with non-zero code so CI fails.
---
## 3. Implementing the oracle runner (practical variant)
You can implement this either as:
* A standalone C# console (`StellaOps.Scanner.PatchOracleRunner`), or
* A set of xUnit tests that read `oracle.yml` and run dynamically.
### 3.1. Console runner skeleton (C#)
High-level structure:
```text
src/tools/scanner-oracle-runner/
Program.cs
Oracles/
(symlink or reference to src/StellaOps.Scanner.Tests/PatchOracles)
```
Core responsibilities:
1. Discover all `oracle.yml` files under `PatchOracles/`.
2. For each:
* Run the `build` script.
* Run the scanner on both artifacts.
* Evaluate expectations.
3. Aggregate results and exit with appropriate status.
Pseudo-code outline:
```csharp
static int Main(string[] args)
{
var root = args.Length > 0 ? args[0] : "src/StellaOps.Scanner.Tests/PatchOracles";
var oracleFiles = Directory.GetFiles(root, "oracle.yml", SearchOption.AllDirectories);
var failures = new List<string>();
foreach (var oracleFile in oracleFiles)
{
var result = RunOracle(oracleFile);
if (!result.Success)
{
failures.Add($"{result.Name}: {result.FailureReason}");
}
}
if (failures.Any())
{
Console.Error.WriteLine("Patch oracle failures:");
foreach (var f in failures) Console.Error.WriteLine(" - " + f);
return 1;
}
Console.WriteLine("All patch oracles passed.");
return 0;
}
```
`RunOracle` does:
* Deserialize YAML (e.g., via `YamlDotNet`).
* `Process.Start` for `build.script`.
* `Process.Start` for `scanner_cli` twice (vuln/fixed).
* Read/parse JSON outputs.
* Run checks `functions.must_*` and `calls.must_*`.
This is straightforward plumbing code; once built, adding a new patch oracle is just adding a folder + `oracle.yml`.
---
## 4. Managed (.NET / JVM) patch oracles
Exact same concept, slightly different mechanics.
### 4.1. .NET example
Directory:
```text
PatchOracles/
dotnet/
CVE-2021-XXXXX-systemtextjson/
src/
vuln/
Example.sln
Api/...
fixed/
Example.sln
Api/...
build.ps1
oracle.yml
```
`build.ps1` (PowerShell, simplified):
```powershell
param(
[string]$Configuration = "Release"
)
$ErrorActionPreference = "Stop"
function Build-One([string]$name) {
Push-Location "src/$name"
dotnet clean
dotnet publish -c $Configuration -p:DebugType=None -p:DebugSymbols=false -o ../../build/$name
Pop-Location
}
New-Item -ItemType Directory -Force -Path "build" | Out-Null
Build-One "vuln"
Build-One "fixed"
```
`oracle.yml`:
```yaml
cve: CVE-2021-XXXXX
name: systemtextjson_escape_fix
language: dotnet
build:
script: "pwsh ./build.ps1"
artifacts:
vulnerable: "build/vuln/Api.dll"
fixed: "build/fixed/Api.dll"
scan:
scanner_cli: "dotnet run --project ../../StellaOps.Scanner.Cli"
args:
- "--format=json"
- "--analyzers=dotnet"
timeout_seconds: 120
expectations:
methods:
must_exist_in_fixed:
- "Api.JsonHelper::EscapeString"
must_not_exist_in_vuln:
- "Api.JsonHelper::EscapeString"
calls:
must_add:
- caller: "Api.Controller::Handle"
callee: "Api.JsonHelper::EscapeString"
tolerances:
allow_missing_calls: 0
allow_extra_methods: 10
```
Scanners .NET analyzer should produce method identifiers in a stable format (e.g., `Namespace.Type::Method(Signature)`), which you then use in the oracle.
### 4.2. JVM example
Similar structure, but artifacts are JARs:
```yaml
build:
script: "./gradlew :app:assemble"
artifacts:
vulnerable: "app-vuln.jar"
fixed: "app-fixed.jar"
scan:
scanner_cli: "dotnet run --project ../../StellaOps.Scanner.Cli"
args:
- "--format=json"
- "--analyzers=jvm"
```
Expectations then refer to methods like `com.example.JsonHelper.escapeString:(Ljava/lang/String;)Ljava/lang/String;`.
---
## 5. Wiring into CI
You can integrate this in your existing pipeline (GitLab Runner / Gitea / etc.) as a separate job.
Example CI job skeleton (GitLab-like YAML for illustration):
```yaml
patch-oracle-tests:
stage: test
image: mcr.microsoft.com/dotnet/sdk:10.0
script:
- dotnet build src/StellaOps.Scanner/StellaOps.Scanner.csproj -c Release
- dotnet build src/tools/scanner-oracle-runner/scanner-oracle-runner.csproj -c Release
- dotnet run --project src/tools/scanner-oracle-runner/scanner-oracle-runner.csproj -- \
src/StellaOps.Scanner.Tests/PatchOracles
artifacts:
when: on_failure
paths:
- src/StellaOps.Scanner.Tests/PatchOracles/**/build
- oracle-results.log
```
You can also:
* Tag the job (e.g., `oracle` or `reachability`) so you can run it nightly or on changes to Scanner analyzers.
* Pin Docker images with the exact C/C++/Java toolchains used by patch oracles so results are deterministic.
---
## 6. Practical guidelines and pitfalls
Here are concrete rules of thumb for making this robust:
### 6.1. Choosing good CVE oracles
Prefer cases where:
* The patch clearly adds/removes a **function** or **method**, or introduces a separate helper such as `validate_len`, `check_bounds`, etc.
* The patch adds/removes a **call** that is easy to see even under optimization (e.g., non-inline, non-template).
* The project is easy to build and not heavily reliant on obscure toolchains.
For each supported language in Scanner, target:
* 35 small C or C++ oracles.
* 35 .NET or JVM oracles.
* 13 for Go and Rust once those analyzers exist.
You do not need many; you want **sharp, surgical tests**, not coverage.
### 6.2. Handle inlining and optimization
Compilers may inline small functions; this can break naive “must have call edge” expectations.
Mitigations:
* Choose functions that are “large enough” or mark them `__attribute__((noinline))` (GCC/Clang) in your test harness code if necessary.
* Alternatively, relax expectations using `should_add` vs `must_add` for some edges:
```yaml
calls:
must_add: []
should_add:
- caller: "foo_parse"
callee: "validate_len"
```
In the runner, `should_add` failures can mark the oracle as “degraded” but not fatal, while `must_*` failures break the build.
### 6.3. Keep oracles stable over time
To avoid flakiness:
* **Vendor sources** into the repo (or at least snapshot the patch) so upstream changes do not affect builds.
* Pin toolchain versions in Docker images for CI.
* Capture and pin scanner configuration: analyzers enabled, rules, version. If you support “deterministic scan manifests” later, these oracles are perfect consumers of that.
### 6.4. What to assert beyond functions/calls
When your Scanner gets more advanced, you can extend `oracle.yml`:
```yaml
cfg:
must_increase_blocks:
- function: "foo_parse"
must_add_branch_on:
- function: "foo_parse"
operand_pattern: "len <= MAX_LEN"
```
Initially, I would keep it to:
* Function presence/absence
* Call edges presence/absence
and add CFG assertions only when your analyzers and JSON model for CFG stabilize.
### 6.5. How to use failures
When a patch oracle fails, it is a **signal** that either:
* A change in Scanner or a new optimization pattern created a blind spot, or
* The oracle is too strict (e.g., relying on a call that got inlined).
You then:
1. Inspect the disassembly / Scanner JSON for `vuln` and `fixed`.
2. Decide if Scanner is wrong (fix analyzer) or oracle is too rigid (relax to `should_*`).
3. Commit both the code change and updated oracle (if needed) in the same merge request.
---
## 7. Minimal checklist for adding a new patch oracle
For your future self and your agents, here is a compressed checklist:
1. Select CVE + patch; copy minimal affected sources into `src/…/<lang>/<CVE>/src/{vuln,fixed}`.
2. Add a tiny harness that calls the patched code path.
3. Write `build.sh` / `build.ps1` to produce `build/vuln` and `build/fixed` artifacts, stripped/Release.
4. Run manual `scanner` on both artifacts once; inspect JSON to find real symbol names and call edges.
5. Create `oracle.yml` with:
* `build.script` and `artifacts.*` paths
* `scan.scanner_cli` + args
* `expectations.functions.*` and `expectations.calls.*`
6. Run `scanner-oracle-runner` locally; fix any mismatches or over-strict expectations.
7. Commit and ensure CI job `patch-oracle-tests` runs and must pass on MR.
If you wish, next step we can design the actual JSON schema that Scanner should emit for function/call graphs and write a first C# implementation of `scanner-oracle-runner` aligned with that schema.

View File

@@ -0,0 +1,784 @@
Heres a clean, airgapready spine for turning container images into verifiable SBOMs and provenance—built to be idempotent and easy to slot into StellaOps or any CI/CD.
```mermaid
flowchart LR
A[OCI Image/Repo]-->B[Layer Extractor]
B-->C[Sbomer: CycloneDX/SPDX]
C-->D[DSSE Sign]
D-->E[in-toto Statement (SLSA Provenance)]
E-->F[Transparency Log Adapter]
C-->G[POST /sbom/ingest]
F-->H[POST /attest/verify]
```
### What this does (in plain words)
* **Pull & crack the image** → extract layers, metadata (labels, env, history).
* **Build an SBOM** → emit **CycloneDX 1.6** and **SPDX 3.0.1** (pick one or both).
* **Sign artifacts** → wrap SBOM/provenance in **DSSE** envelopes.
* **Provenance** → generate **intoto Statement** with **SLSA Provenance v1** as the predicate.
* **Auditability** → optionally publish attestations to a transparency log (e.g., Rekor) so theyre tamperevident via Merkle proofs.
* **APIs are idempotent** → safe to reingest the same image/SBOM/attestation without version churn.
### Design notes you can hand to an agent
* **Idempotency keys**
* `contentAddress` = SHA256 of OCI manifest (or full image digest)
* `sbomHash` = SHA256 of normalized SBOM JSON
* `attHash` = SHA256 of DSSE payload (base64stable)
Store these; reject duplicates with HTTP 200 + `"status":"already_present"`.
* **Default formats**
* SBOM export: CycloneDX v1.6 (`application/vnd.cyclonedx+json`), SPDX 3.0.1 (`application/spdx+json`)
* DSSE envelope: `application/dsse+json`
* intoto Statement: `application/vnd.in-toto+json` with `predicateType` = SLSA Provenance v1
* **Airgap mode**
* No external calls required; Rekor publish is optional.
* Keep a local Merkle log (pluggable) and allow later “synctoRekor” when online.
* **Transparency log adapter**
* Interface: `Put(entry) -> {logIndex, logID, inclusionProof}`
* Backends: `rekor`, `local-merkle`, `null` (noop)
### Minimal API sketch
* `POST /sbom/ingest`
* Body: `{ imageDigest, sbom, format, dsseSignature? }`
* Returns: `{ sbomId, status, sbomHash }` (status: `stored|already_present`)
* `POST /attest/verify`
* Body: `{ dsseEnvelope, expectedSubjects:[{name, digest}] }`
* Verifies DSSE, checks intoto subject ↔ image digest, optionally records/logs.
* Returns: `{ verified:true, predicateType, logIndex?, inclusionProof? }`
### CLI flow (pseudocode)
```bash
# 1) Extract
stella-extract --image $IMG --out /work/extract
# 2) SBOM (Cdx + SPDX)
stella-sbomer cdx --in /work/extract --out /work/sbom.cdx.json
stella-sbomer spdx --in /work/extract --out /work/sbom.spdx.json
# 3) DSSE sign (offline keyring or HSM)
stella-sign dsse --in /work/sbom.cdx.json --out /work/sbom.cdx.dsse.json --key file:k.pem
# 4) SLSA provenance (intoto Statement)
stella-provenance slsa-v1 --subject $IMG_DIGEST --materials /work/extract/manifest.json \
--out /work/prov.dsse.json --key file:k.pem
# 5) (optional) Publish to transparency log
stella-log publish --in /work/prov.dsse.json --backend rekor --rekor-url $REKOR
```
### Validation rules (quick)
* **Subject binding**: intoto Statement `subject[].digest.sha256` must equal the OCI image digest you scanned.
* **Key policy**: enforce allowed issuers (Fulcio, internal CA, GOST/SM/EIDAS/FIPS as needed).
* **Normalization**: canonicalize JSON before hashing/signing to keep idempotency stable.
### Why this matters
* **Auditready**: You can always prove *what* you scanned, *how* it was built, and *who* signed it.
* **Noisegated**: With deterministic SBOMs + provenance, downstream VEX/reachability gets much cleaner.
* **Dropin**: Works in harsh environments—offline, mirrors, sovereign crypto stacks—without changing your pipeline.
If you want, I can generate:
* a readytouse OpenAPI stub for `POST /sbom/ingest` and `POST /attest/verify`,
* C# (.NET 10) DSSE + intoto helpers (interfaces + test fixtures),
* or a Dockercompose “airgap bundle” showing the full spine endtoend.
Below is a full architecture plan you can hand to an agent as the “master spec” for implementing the SBOM & provenance spine (image → SBOM → DSSE → in-toto/SLSA → transparency log → REST APIs), with idempotent APIs and air-gap readiness.
---
## 1. Scope and Objectives
**Goal:** Implement a deterministic, air-gap-ready “SBOM spine” that:
* Converts OCI images into SBOMs (CycloneDX 1.6 and SPDX 3.0.1).
* Generates SLSA v1 provenance wrapped in in-toto Statements.
* Signs all artifacts with DSSE envelopes using pluggable crypto providers.
* Optionally publishes attestations to transparency logs (Rekor/local-Merkle/none).
* Exposes stable, idempotent APIs:
* `POST /sbom/ingest`
* `POST /attest/verify`
* Avoids versioning by design; APIs are extended, not versioned; all mutations are idempotent keyed by content digests.
**Out of scope (for this iteration):**
* Full vulnerability scanning (delegated to Scanner service).
* Policy evaluation / lattice logic (delegated to Scanner/Graph engine).
* Vendor-facing proof-market ledger and trust economics (future module).
---
## 2. High-Level Architecture
### 2.1 Logical Components
1. **StellaOps.SupplyChain.Core (Library)**
* Shared types and utilities:
* Domain models: SBOM, DSSE, in-toto Statement, SLSA predicates.
* Canonicalization & hashing utilities.
* DSSE sign/verify abstractions.
* Transparency log entry model & Merkle proof verification.
2. **StellaOps.Sbomer.Engine (Library)**
* Image → SBOM functionality:
* Layer & manifest analysis.
* SBOM generation: CycloneDX, SPDX.
* Extraction of metadata (labels, env, history).
* Deterministic ordering & normalization.
3. **StellaOps.Provenance.Engine (Library)**
* Build provenance & in-toto:
* In-toto Statement generator.
* SLSA v1 provenance predicate builder.
* Subject and material resolution from image metadata & SBOM.
4. **StellaOps.Authority (Service/Library)**
* Crypto & keys:
* Key management abstraction (file, HSM, KMS, sovereign crypto).
* DSSE signing & verification with multiple key types.
* Trust roots, certificate chains, key policies.
5. **StellaOps.LogBridge (Service/Library)**
* Transparency log adapter:
* Rekor backend.
* Local Merkle log backend (for air-gap).
* Null backend (no-op).
* Merkle proof validation.
6. **StellaOps.SupplyChain.Api (Service)**
* The SBOM spine HTTP API:
* `POST /sbom/ingest`
* `POST /attest/verify`
* Optionally: `GET /sbom/{id}`, `GET /attest/{id}`, `GET /image/{digest}/summary`.
* Performs orchestrations:
* SBOM/attestation parsing, canonicalization, hashing.
* Idempotency and persistence.
* Delegation to Authority and LogBridge.
7. **CLI Tools (optional but recommended)**
* `stella-extract`, `stella-sbomer`, `stella-sign`, `stella-provenance`, `stella-log`.
* Thin wrappers over the above libraries; usable offline and in CI pipelines.
8. **Persistence Layer**
* Primary DB: PostgreSQL (or other RDBMS).
* Optional object storage: S3/MinIO for large SBOM/attestation blobs.
* Tables: `images`, `sboms`, `attestations`, `signatures`, `log_entries`, `keys`.
### 2.2 Deployment View (Kubernetes / Docker)
```mermaid
flowchart LR
subgraph Node1[Cluster Node]
A[StellaOps.SupplyChain.Api (ASP.NET Core)]
B[StellaOps.Authority Service]
C[StellaOps.LogBridge Service]
end
subgraph Node2[Worker Node]
D[Runner / CI / Air-gap host]
E[CLI Tools\nstella-extract/sbomer/sign/provenance/log]
end
F[(PostgreSQL)]
G[(Object Storage\nS3/MinIO)]
H[(Local Merkle Log\nor Rekor)]
A --> F
A --> G
A --> C
A --> B
C --> H
E --> A
```
* **Air-gap mode:**
* Rekor backend disabled; LogBridge uses local Merkle log (`H`) or `null`.
* All components run within the offline network.
* **Online mode:**
* LogBridge talks to external Rekor instance using outbound HTTPS only.
---
## 3. Domain Model and Storage Design
Use EF Core 9 with PostgreSQL in .NET 10.
### 3.1 Core Entities
1. **ImageArtifact**
* `Id` (GUID/ULID, internal).
* `ImageDigest` (string; OCI digest; UNIQUE).
* `Registry` (string).
* `Repository` (string).
* `Tag` (string, nullable, since digest is canonical).
* `FirstSeenAt` (timestamp).
* `MetadataJson` (JSONB; manifest, labels, env).
2. **Sbom**
* `Id` (string, primary key = `SbomHash` or derived ULID).
* `ImageArtifactId` (FK).
* `Format` (enum: `CycloneDX_1_6`, `SPDX_3_0_1`).
* `ContentHash` (string; normalized JSON SHA-256; UNIQUE with `TenantId`).
* `StorageLocation` (inline JSONB or external object storage key).
* `CreatedAt`.
* `Origin` (enum: `Generated`, `Uploaded`, `ExternalVendor`).
* Unique constraint: `(TenantId, ContentHash)`.
3. **Attestation**
* `Id` (string, primary key = `AttestationHash` or derived ULID).
* `ImageArtifactId` (FK).
* `Type` (enum: `InTotoStatement_SLSA_v1`, `Other`).
* `PayloadHash` (hash of DSSE payload, before envelope).
* `DsseEnvelopeHash` (hash of full DSSE JSON).
* `StorageLocation` (inline JSONB or object storage).
* `CreatedAt`.
* `Issuer` (string; signer identity / certificate subject).
* Unique constraint: `(TenantId, DsseEnvelopeHash)`.
4. **SignatureInfo**
* `Id` (GUID/ULID).
* `AttestationId` (FK).
* `KeyId` (logical key identifier).
* `Algorithm` (enum; includes PQ & sovereign algs).
* `VerifiedAt`.
* `VerificationStatus` (enum: `Valid`, `Invalid`, `Unknown`).
* `DetailsJson` (JSONB; trust-chain, error reasons, etc.).
5. **TransparencyLogEntry**
* `Id` (GUID/ULID).
* `AttestationId` (FK).
* `Backend` (enum: `Rekor`, `LocalMerkle`).
* `LogIndex` (string).
* `LogId` (string).
* `InclusionProofJson` (JSONB).
* `RecordedAt`.
* Unique constraint: `(Backend, LogId, LogIndex)`.
6. **KeyRecord** (optional if not reusing Authoritys DB)
* `KeyId` (string, PK).
* `KeyType` (enum).
* `Usage` (enum: `Signing`, `Verification`, `Both`).
* `Status` (enum: `Active`, `Retired`, `Revoked`).
* `MetadataJson` (JSONB; KMS ARN, HSM slot, etc.).
### 3.2 Idempotency Keys
* SBOM:
* `sbomHash = SHA256(canonicalJson(sbom))`.
* Uniqueness enforced by `(TenantId, sbomHash)` in DB.
* Attestation:
* `attHash = SHA256(canonicalJson(dsse.payload))` or full envelope.
* Uniqueness enforced by `(TenantId, attHash)` in DB.
* Image:
* `imageDigest` is globally unique (per OCI spec).
---
## 4. Service-Level Architecture
### 4.1 StellaOps.SupplyChain.Api (.NET 10, ASP.NET Core)
**Responsibilities:**
* Expose HTTP API for ingest / verify.
* Handle idempotency logic & persistence.
* Delegate cryptographic operations to Authority.
* Delegate transparency logging to LogBridge.
* Perform basic validation against schemas (SBOM, DSSE, in-toto, SLSA).
**Key Endpoints:**
1. `POST /sbom/ingest`
* Request:
* `imageDigest` (string).
* `sbom` (raw JSON).
* `format` (enum/string).
* Optional: `dsseSignature` or `dsseEnvelope`.
* Behavior:
* Parse & validate SBOM structure.
* Canonicalize JSON, compute `sbomHash`.
* If `sbomHash` exists for `imageDigest` and tenant:
* Return `200` with `{ status: "already_present", sbomId, sbomHash }`.
* Else:
* Persist `Sbom` entity.
* Optionally verify DSSE signature via Authority.
* Return `201` with `{ status: "stored", sbomId, sbomHash }`.
2. `POST /attest/verify`
* Request:
* `dsseEnvelope` (JSON).
* `expectedSubjects` (list of `{ name, digest }`).
* Behavior:
* Canonicalize payload, compute `attHash`.
* Verify DSSE signature via Authority.
* Parse in-toto Statement; ensure `subject[].digest.sha256` matches `expectedSubjects`.
* Persist `Attestation` & `SignatureInfo`.
* If configured, call LogBridge to publish and store `TransparencyLogEntry`.
* If `attHash` already exists:
* Return `200` with `status: "already_present"` and existing references.
* Else, return `201` with `verified:true`, plus log info when available.
3. Optional read APIs:
* `GET /sbom/by-image/{digest}`
* `GET /attest/by-image/{digest}`
* `GET /image/{digest}/summary` (SBOM + attestations + log status).
### 4.2 StellaOps.Sbomer.Engine
**Responsibilities:**
* Given:
* OCI image manifest & layers (from local tarball or remote registry).
* Produce:
* CycloneDX 1.6 JSON.
* SPDX 3.0.1 JSON.
**Design:**
* Use layered analyzers:
* `ILayerAnalyzer` for generic filesystem traversal.
* Language-specific analyzers (optional for SBOM detail):
* `DotNetAnalyzer`, `NodeJsAnalyzer`, `PythonAnalyzer`, `JavaAnalyzer`, `PhpAnalyzer`, etc.
* Determinism:
* Sort all lists (components, dependencies) by stable keys.
* Remove unstable fields (timestamps, machine IDs, ephemeral paths).
* Provide `Normalize()` method per format that returns canonical JSON.
### 4.3 StellaOps.Provenance.Engine
**Responsibilities:**
* Build in-toto Statement with SLSA v1 predicate:
* `subject` derived from image digest(s).
* `materials` from:
* Git commit, tag, builder image, SBOM components if available.
* Ensure determinism:
* Sort materials by URI + digest.
* Normalize nested maps.
**Key APIs (internal library):**
* `InTotoStatement BuildSlsaProvenance(ImageArtifact image, Sbom sbom, ProvenanceContext ctx)`
* `string ToCanonicalJson(InTotoStatement stmt)`
### 4.4 StellaOps.Authority
**Responsibilities:**
* DSSE signing & verification.
* Key management abstraction.
* Policy enforcement (which keys/trust roots are allowed).
**Interfaces:**
* `ISigningProvider`
* `Task<DsseEnvelope> SignAsync(byte[] payload, string payloadType, string keyId)`
* `IVerificationProvider`
* `Task<VerificationResult> VerifyAsync(DsseEnvelope envelope, VerificationPolicy policy)`
**Backends:**
* File-based keys (PEM).
* HSM/KMS (AWS KMS, Azure Key Vault, on-prem HSM).
* Sovereign crypto providers (GOST, SMx, etc.).
* Optional PQ providers (Dilithium, Falcon).
### 4.5 StellaOps.LogBridge
**Responsibilities:**
* Abstract interaction with transparency logs.
**Interface:**
* `ILogBackend`
* `Task<LogEntryResult> PutAsync(byte[] canonicalPayloadHash, DsseEnvelope env)`
* `Task<ProofResult> VerifyInclusionAsync(LogEntryResult entry)`
**Backends:**
* `RekorBackend`:
* Calls Rekor REST API with hashed payload.
* `LocalMerkleBackend`:
* Maintains Merkle tree in local DB.
* Returns `logIndex`, `logId`, and inclusion proof.
* `NullBackend`:
* Returns empty/no-op results.
### 4.6 CLI Tools (Optional)
Use the same libraries as the services:
* `stella-extract`:
* Input: image reference.
* Output: local tarball + manifest JSON.
* `stella-sbomer`:
* Input: manifest & layers.
* Output: SBOM JSON.
* `stella-sign`:
* Input: JSON file.
* Output: DSSE envelope.
* `stella-provenance`:
* Input: image digest, build metadata.
* Output: signed in-toto/SLSA DSSE.
* `stella-log`:
* Input: DSSE envelope.
* Output: log entry details.
---
## 5. End-to-End Flows
### 5.1 SBOM Ingest (Upload Path)
```mermaid
sequenceDiagram
participant Client
participant API as SupplyChain.Api
participant Core as SupplyChain.Core
participant DB as PostgreSQL
Client->>API: POST /sbom/ingest (imageDigest, sbom, format)
API->>Core: Validate & canonicalize SBOM
Core-->>API: sbomHash
API->>DB: SELECT Sbom WHERE sbomHash & imageDigest
DB-->>API: Not found
API->>DB: INSERT Sbom (sbomHash, imageDigest, content)
DB-->>API: ok
API-->>Client: 201 { status:"stored", sbomId, sbomHash }
```
Re-ingest of the same SBOM repeats steps up to SELECT, then returns `status:"already_present"` with `200`.
### 5.2 Attestation Verify & Record
```mermaid
sequenceDiagram
participant Client
participant API as SupplyChain.Api
participant Auth as Authority
participant Log as LogBridge
participant DB as PostgreSQL
Client->>API: POST /attest/verify (dsseEnvelope, expectedSubjects)
API->>Auth: Verify DSSE (keys, policy)
Auth-->>API: VerificationResult(Valid/Invalid)
API->>API: Parse in-toto, check subjects vs expected
API->>DB: SELECT Attestation WHERE attHash
DB-->>API: Not found
API->>DB: INSERT Attestation + SignatureInfo
alt Logging enabled
API->>Log: PutAsync(attHash, envelope)
Log-->>API: LogEntryResult(logIndex, logId, proof)
API->>DB: INSERT TransparencyLogEntry
end
API-->>Client: 201 { verified:true, attestationId, logIndex?, inclusionProof? }
```
If attestation already exists, API returns `200` with `status:"already_present"`.
---
## 6. Idempotency and Determinism Strategy
1. **Canonicalization rules:**
* Remove insignificant whitespace.
* Sort all object keys lexicographically.
* Sort arrays where order is not semantically meaningful (components, materials).
* Strip non-deterministic fields (timestamps, random IDs) where allowed.
2. **Hashing:**
* Always hash canonical JSON as UTF-8.
* Use SHA-256 for core IDs; allow crypto provider to also compute other digests if needed.
3. **Persistence:**
* Enforce uniqueness in DB via indices on:
* `(TenantId, ContentHash)` for SBOMs.
* `(TenantId, AttHash)` for attestations.
* `(Backend, LogId, LogIndex)` for log entries.
* API behavior:
* Existing row → `200` with `"already_present"`.
* New row → `201` with `"stored"`.
4. **API design:**
* No version numbers in path.
* Add fields over time; never break or repurpose existing ones.
* Use explicit capability discovery via `GET /meta/capabilities` if needed.
---
## 7. Air-Gap Mode and Synchronization
### 7.1 Air-Gap Mode
* Configuration flag `Mode = Offline` on SupplyChain.Api.
* LogBridge backend:
* Default to `LocalMerkle` or `Null`.
* Rekor-specific configuration disabled or absent.
* DB & Merkle log stored locally inside the secure network.
### 7.2 Later Synchronization to Rekor (Optional Future Step)
Not mandatory for first iteration, but prepare for:
* Background job (Scheduler module) that:
* Enumerates local `TransparencyLogEntry` not yet exported.
* Publishes hashed payloads to Rekor when network is available.
* Stores mapping between local log entries and remote Rekor entries.
---
## 8. Security, Access Control, and Observability
### 8.1 Security
* mTLS between internal services (SupplyChain.Api, Authority, LogBridge).
* Authentication:
* API keys/OIDC for clients.
* Per-tenant scoping; `TenantId` must be present in context.
* Authorization:
* RBAC: which tenants/users can write/verify/only read.
### 8.2 Crypto Policies
* Policy object defines:
* Allowed key types and algorithms.
* Trust roots (Fulcio, internal CA, sovereign PKI).
* Revocation checking strategy (CRL/OCSP, offline lists).
* Authority enforces policies; SupplyChain.Api only consumes `VerificationResult`.
### 8.3 Observability
* Logs:
* Structured logs with correlation IDs; log imageDigest, sbomHash, attHash.
* Metrics:
* SBOM ingest count, dedup hit rate.
* Attestation verify latency.
* Transparency log publish success/failure counts.
* Traces:
* OpenTelemetry tracing across API → Authority → LogBridge.
---
## 9. Implementation Plan (Epics & Work Packages)
You can give this section directly to agents to split.
### Epic 1: Core Domain & Canonicalization
1. Define .NET 10 solution structure:
* Projects:
* `StellaOps.SupplyChain.Core`
* `StellaOps.Sbomer.Engine`
* `StellaOps.Provenance.Engine`
* `StellaOps.SupplyChain.Api`
* `StellaOps.Authority` (if not already present)
* `StellaOps.LogBridge`
2. Implement core domain models:
* SBOM, DSSE, in-toto, SLSA v1.
3. Implement canonicalization & hashing utilities.
4. Unit tests:
* Given semantically equivalent JSON, hashes must match.
* Negative tests where order changes but meaning does not.
### Epic 2: Persistence Layer
1. Design EF Core models for:
* ImageArtifact, Sbom, Attestation, SignatureInfo, TransparencyLogEntry, KeyRecord.
2. Write migrations for PostgreSQL.
3. Implement repository interfaces for read/write.
4. Tests:
* Unique constraints and idempotency behavior.
* Query performance for common access paths (by imageDigest).
### Epic 3: SBOM Engine
1. Implement minimal layer analysis:
* Accepts local tarball or path (for now).
2. Implement CycloneDX 1.6 generator.
3. Implement SPDX 3.0.1 generator.
4. Deterministic normalization across formats.
5. Tests:
* Golden files for images → SBOM output.
* Stability under repeated runs.
### Epic 4: Provenance Engine
1. Implement in-toto Statement model with SLSA v1 predicate.
2. Implement builder to map:
* ImageDigest → subject.
* Build metadata → materials.
3. Deterministic canonicalization.
4. Tests:
* Golden in-toto/SLSA statements for sample inputs.
* Subject matching logic.
### Epic 5: Authority Integration
1. Implement `ISigningProvider`, `IVerificationProvider` contracts.
2. Implement file-based key backend as default.
3. Implement DSSE wrapper:
* `SignAsync(payload, payloadType, keyId)`.
* `VerifyAsync(envelope, policy)`.
4. Tests:
* DSSE round-trip; invalid signature scenarios.
* Policy enforcement tests.
### Epic 6: Transparency Log Bridge
1. Implement `ILogBackend` interface.
2. Implement `LocalMerkleBackend`:
* Simple Merkle tree with DB storage.
3. Implement `NullBackend`.
4. Define configuration model to select backend.
5. (Optional later) Implement `RekorBackend`.
6. Tests:
* Stable Merkle root; inclusion proof verification.
### Epic 7: SupplyChain.Api
1. Implement `POST /sbom/ingest`:
* Request/response DTOs.
* Integration with canonicalization, persistence, idempotency logic.
2. Implement `POST /attest/verify`:
* End-to-end verification and persistence.
* Integration with Authority and LogBridge.
3. Optional read APIs.
4. Add input validation (JSON schema, basic constraints).
5. Integration tests:
* Full flows for new and duplicate inputs.
* Error cases (invalid DSSE, subject mismatch).
### Epic 8: CLI Tools
1. Implement `stella-sbomer` (wraps Sbomer.Engine).
2. Implement `stella-provenance` (wraps Provenance.Engine + Authority).
3. Implement `stella-sign` and `stella-log`.
4. Provide clear help/usage and sample scripts.
### Epic 9: Hardening, Air-Gap Profile, and Docs
1. Configuration profiles:
* `Offline` vs `Online`.
* Log backend selection.
2. Security hardening:
* mTLS, authentication, authorization.
3. Observability:
* Metrics, logs, traces wiring.
4. Documentation:
* API reference.
* Sequence diagrams.
* Deployment recipes for:
* Single-node air-gap.
* Clustered online deployment.
---
If you want, next step I can:
* Turn this into an AGENTS/TASKS/PROMPT set for your codex workers, or
* Produce concrete .NET 10 project skeletons (csproj layout, folder structure, and initial interfaces) for the core libraries and API service.

View File

@@ -0,0 +1,3 @@
{"subject":"pkg:docker/stellaops/evidencelocker@sha256:111","dsseHash":"sha256:aaaaaaaa","rekorEntry":"sha256:rekor111"}
{"subject":"pkg:docker/stellaops/exportcenter@sha256:222","dsseHash":"sha256:bbbbbbbb","rekorEntry":"sha256:rekor222"}
{"subject":"pkg:docker/stellaops/timelineindexer@sha256:333","dsseHash":"sha256:cccccccc","rekorEntry":"sha256:rekor333"}

View File

@@ -0,0 +1,5 @@
{
"pkg:docker/stellaops/evidencelocker@sha256:111": "sha256:rekor111",
"pkg:docker/stellaops/exportcenter@sha256:222": "sha256:rekor222",
"pkg:docker/stellaops/timelineindexer@sha256:333": "sha256:rekor333"
}

View File

@@ -0,0 +1,19 @@
# Crypto Registry Decision · 2025-11-18
## Outcome
- Agree to ship `ICryptoProviderRegistry` with the following defaults:
- PreferredProviders (global default): `default`, `ru.openssl.gost`, `ru.pkcs11`.
- ActiveProfile for RU/sovereign deployments: `ru-offline` with preferred order `ru.cryptopro.csp`, `ru.openssl.gost`, `ru.pkcs11`.
- For non-RU deployments, ActiveProfile remains `default`.
- Registry contract to be published via shared library (`StellaOps.Cryptography` stack) and referenced by EvidenceLocker/ExportCenter/TimelineIndexer and downstream services.
- Deterministic config binding: keep profile names and provider IDs lowercase ASCII; enforce ISO-8601 UTC timestamps for any audit material generated by registry actions.
## Rationale
- Aligns with 2025-11-07 crypto routing audit (`docs/security/crypto-routing-audit-2025-11-07.md`) to ensure sovereign-ready providers are selectable without code changes.
- Keeps default provider chain intact for non-sovereign deployments while enabling RU-specific stacks where mandated.
## Required follow-ups
- Publish NuGet/package update exposing the approved registry contract and provider IDs.
- Update module hosts (EvidenceLocker, ExportCenter, TimelineIndexer, CLI) to bind `StellaOps:Crypto:Registry` using the defaults above.
- Add CI smoke to assert registry resolves the chosen ActiveProfile on Linux and Windows.
- Mirror decision into sprint docs for affected modules (160/161).

View File

@@ -1,5 +1,5 @@
{ {
"generated_utc": "2025-11-18T21:32:46.618821Z", "generated_utc": "2025-11-18T21:41:22.263398Z",
"source": "StellaOps binary prereq consolidation", "source": "StellaOps binary prereq consolidation",
"base_dir": "local-nugets", "base_dir": "local-nugets",
"count": 91, "count": 91,

View File

@@ -0,0 +1,12 @@
{
"generated_utc": "2025-11-18T21:41:23.244597Z",
"summary": "Offline feed bundles registered here. Add entries when baking air-gap bundles.",
"feeds": [
{
"name": "telemetry-offline-bundle",
"path": "offline/feeds/telemetry-offline-bundle.tar.gz",
"sha256": "49d3ac3502bad1caaed4c1f7bceaa4ce40fdfce6210d4ae20c90386aeb84ca4e",
"description": "Telemetry offline bundle (migrated from out/telemetry)"
}
]
}

View File

@@ -5,7 +5,8 @@
"type": "module", "type": "module",
"scripts": { "scripts": {
"docs:attestor:validate": "node scripts/validate-attestation-schemas.mjs", "docs:attestor:validate": "node scripts/validate-attestation-schemas.mjs",
"docs:attestor:generate": "dotnet run --project src/Attestor/StellaOps.Attestor.Types/Tools/StellaOps.Attestor.Types.Generator --configuration Release" "docs:attestor:generate": "dotnet run --project src/Attestor/StellaOps.Attestor.Types/Tools/StellaOps.Attestor.Types.Generator --configuration Release",
"api:lint": "sh -c 'set -e; files=$(find src/Api/StellaOps.Api.OpenApi -type f -name \"*.yaml\" 2>/dev/null | wc -l); if [ \"$files\" -eq 0 ]; then echo \"[api:lint] no OpenAPI files found; skipping\"; exit 0; fi; npx --yes @stoplight/spectral-cli lint src/Api/StellaOps.Api.OpenApi/**/*.yaml'"
}, },
"dependencies": { "dependencies": {
"ajv": "^8.17.1", "ajv": "^8.17.1",

View File

@@ -0,0 +1,160 @@
#!/usr/bin/env python3
"""Generate manifests for curated binaries.
- local-nugets/manifest.json : NuGet packages (id, version, sha256)
- vendor/manifest.json : Plugin/tool/deploy/ops binaries with sha256
- offline/feeds/manifest.json : Offline bundles (tar/tgz/zip) with sha256
Intended to be idempotent and run in CI to ensure manifests stay current.
"""
from __future__ import annotations
import hashlib
import json
import re
from datetime import datetime, timezone
from pathlib import Path
ROOT = Path(__file__).resolve().parent.parent
def iso_timestamp() -> str:
return datetime.now(timezone.utc).isoformat().replace("+00:00", "Z")
def sha256(path: Path) -> str:
with path.open("rb") as fh:
return hashlib.sha256(fh.read()).hexdigest()
VERSION_RE = re.compile(r"^\d+\.")
def split_id_version(package_path: Path) -> tuple[str, str]:
stem = package_path.stem
parts = stem.split(".")
for i in range(len(parts) - 1, 0, -1):
version = ".".join(parts[i:])
if VERSION_RE.match(version):
pkg_id = ".".join(parts[:i])
return pkg_id, version
return stem, "unknown"
def write_json(path: Path, payload: dict) -> None:
path.write_text(json.dumps(payload, indent=2))
def generate_local_nugets_manifest() -> None:
nuget_dir = ROOT / "local-nugets"
nuget_dir.mkdir(exist_ok=True)
packages = []
for pkg in sorted(nuget_dir.glob("*.nupkg"), key=lambda p: p.name.lower()):
pkg_id, version = split_id_version(pkg)
packages.append(
{
"id": pkg_id,
"version": version,
"filename": pkg.name,
"sha256": sha256(pkg),
}
)
manifest = {
"generated_utc": iso_timestamp(),
"source": "StellaOps binary prereq consolidation",
"base_dir": "local-nugets",
"count": len(packages),
"packages": packages,
}
write_json(nuget_dir / "manifest.json", manifest)
BINARY_EXTS = {".dll", ".exe", ".so", ".dylib", ".bin"}
VENDOR_ROOTS = ["plugins", "tools", "deploy", "ops", "vendor"]
def generate_vendor_manifest() -> None:
entries = []
for root_name in VENDOR_ROOTS:
root_dir = ROOT / root_name
if not root_dir.exists():
continue
for path in root_dir.rglob("*"):
if path.is_file() and path.suffix.lower() in BINARY_EXTS:
entries.append(
{
"path": path.relative_to(ROOT).as_posix(),
"sha256": sha256(path),
"type": "binary",
"owner": root_name,
}
)
entries.sort(key=lambda x: x["path"])
manifest = {
"generated_utc": iso_timestamp(),
"summary": "Pinned binaries (non-NuGet) tracked for integrity; relocate new artefacts here or under offline/feeds.",
"entries": entries,
}
vendor_dir = ROOT / "vendor"
vendor_dir.mkdir(exist_ok=True)
write_json(vendor_dir / "manifest.json", manifest)
FEED_SUFFIXES = (".tar.gz", ".tgz", ".tar", ".zip", ".gz")
def generate_offline_manifest() -> None:
feeds_dir = ROOT / "offline" / "feeds"
feeds_dir.mkdir(parents=True, exist_ok=True)
existing = {}
manifest_path = feeds_dir / "manifest.json"
if manifest_path.exists():
try:
existing = json.loads(manifest_path.read_text())
except json.JSONDecodeError:
existing = {}
prior = {f.get("name"): f for f in existing.get("feeds", []) if isinstance(f, dict)}
feeds = []
for path in sorted(feeds_dir.rglob("*"), key=lambda p: p.as_posix()):
if path.is_file() and any(path.name.endswith(suf) for suf in FEED_SUFFIXES):
name = path.name
# strip first matching suffix for readability
for suf in FEED_SUFFIXES:
if name.endswith(suf):
name = name[: -len(suf)]
break
previous = prior.get(name, {})
feeds.append(
{
"name": name,
"path": path.relative_to(ROOT).as_posix(),
"sha256": sha256(path),
"description": previous.get("description", ""),
}
)
manifest = {
"generated_utc": iso_timestamp(),
"summary": existing.get(
"summary",
"Offline feed bundles registered here. Add entries when baking air-gap bundles.",
),
"feeds": feeds,
}
write_json(manifest_path, manifest)
def main() -> None:
generate_local_nugets_manifest()
generate_vendor_manifest()
generate_offline_manifest()
if __name__ == "__main__":
main()

View File

@@ -0,0 +1,25 @@
#!/usr/bin/env bash
set -euo pipefail
# Verifies binary artefacts live only in approved locations.
# Allowed roots: local-nugets (curated feed), .nuget/packages (cache), vendor (pinned binaries),
# offline (air-gap bundles/templates), plugins/tools/deploy/ops (module-owned binaries).
repo_root="$(git rev-parse --show-toplevel)"
cd "$repo_root"
# Extensions considered binary artefacts.
binary_ext="(nupkg|dll|exe|so|dylib|a|lib|tar|tar.gz|tgz|zip|jar|deb|rpm|bin)"
# Locations allowed to contain binaries.
allowed_prefix="^(local-nugets|local-nugets/packages|vendor|offline|plugins|tools|deploy|ops|third_party|docs/artifacts|samples|src/.*/Fixtures|src/.*/fixtures)/"
# Only consider files that currently exist in the working tree (skip deleted placeholders).
violations=$(git ls-files | while read -r f; do [[ -f "$f" ]] && echo "$f"; done | grep -E "\\.${binary_ext}$" | grep -Ev "$allowed_prefix" || true)
if [[ -n "$violations" ]]; then
echo "Binary artefacts found outside approved directories:" >&2
echo "$violations" >&2
exit 1
fi
printf "Binary layout OK (allowed roots: %s)\n" "$allowed_prefix"

View File

@@ -0,0 +1,75 @@
using System.Text.Json.Serialization;
namespace StellaOps.Orchestrator.Schemas;
public sealed record AdvisoryEvidenceBundle
{
[JsonPropertyName("bundleId")]
public string BundleId { get; init; } = string.Empty;
[JsonPropertyName("advisoryId")]
public string AdvisoryId { get; init; } = string.Empty;
[JsonPropertyName("tenant")]
public string Tenant { get; init; } = string.Empty;
[JsonPropertyName("generatedAt")]
public DateTimeOffset GeneratedAt { get; init; }
[JsonPropertyName("schemaVersion")]
public int SchemaVersion { get; init; } = 0;
[JsonPropertyName("observations")]
public IReadOnlyList<AdvisoryObservation> Observations { get; init; } = Array.Empty<AdvisoryObservation>();
[JsonPropertyName("signatures")]
public IReadOnlyList<SignatureInfo>? Signatures { get; init; }
}
public sealed record AdvisoryObservation
{
[JsonPropertyName("observationId")]
public string ObservationId { get; init; } = string.Empty;
[JsonPropertyName("source")]
public string Source { get; init; } = string.Empty;
[JsonPropertyName("purl")]
public string? Purl { get; init; }
[JsonPropertyName("cve")]
public string? Cve { get; init; }
[JsonPropertyName("severity")]
public string? Severity { get; init; }
[JsonPropertyName("cvss")]
public CvssVector? Cvss { get; init; }
[JsonPropertyName("summary")]
public string? Summary { get; init; }
[JsonPropertyName("evidence")]
public IDictionary<string, object>? Evidence { get; init; }
}
public sealed record CvssVector
{
[JsonPropertyName("vector")]
public string? Vector { get; init; }
[JsonPropertyName("score")]
public double? Score { get; init; }
}
public sealed record SignatureInfo
{
[JsonPropertyName("signature")]
public string Signature { get; init; } = string.Empty;
[JsonPropertyName("keyId")]
public string KeyId { get; init; } = string.Empty;
[JsonPropertyName("algorithm")]
public string? Algorithm { get; init; }
}

View File

@@ -0,0 +1,72 @@
using System.Text.Json.Serialization;
namespace StellaOps.Orchestrator.Schemas;
public sealed record OrchestratorEnvelope<TPayload>
{
[JsonPropertyName("eventId")]
public Guid EventId { get; init; }
[JsonPropertyName("kind")]
public string Kind { get; init; } = string.Empty;
[JsonPropertyName("version")]
public int Version { get; init; }
[JsonPropertyName("tenant")]
public string Tenant { get; init; } = string.Empty;
[JsonPropertyName("occurredAt")]
public DateTimeOffset OccurredAt { get; init; }
[JsonPropertyName("recordedAt")]
public DateTimeOffset? RecordedAt { get; init; }
[JsonPropertyName("source")]
public string Source { get; init; } = string.Empty;
[JsonPropertyName("idempotencyKey")]
public string IdempotencyKey { get; init; } = string.Empty;
[JsonPropertyName("correlationId")]
public string? CorrelationId { get; init; }
[JsonPropertyName("traceId")]
public string? TraceId { get; init; }
[JsonPropertyName("spanId")]
public string? SpanId { get; init; }
[JsonPropertyName("scope")]
public OrchestratorScope? Scope { get; init; }
[JsonPropertyName("attributes")]
public IDictionary<string, string>? Attributes { get; init; }
[JsonPropertyName("payload")]
public TPayload Payload { get; init; } = default!;
}
public sealed record OrchestratorScope
{
[JsonPropertyName("namespace")]
public string? Namespace { get; init; }
[JsonPropertyName("repo")]
public string Repo { get; init; } = string.Empty;
[JsonPropertyName("digest")]
public string Digest { get; init; } = string.Empty;
[JsonPropertyName("component")]
public string? Component { get; init; }
[JsonPropertyName("image")]
public string? Image { get; init; }
}
public static class OrchestratorEventKinds
{
public const string ScannerReportReady = "scanner.event.report.ready";
public const string ScannerScanCompleted = "scanner.event.scan.completed";
}

View File

@@ -0,0 +1,124 @@
using System.Text.Json;
using System.Text.Json.Serialization;
namespace StellaOps.Orchestrator.Schemas;
public sealed record ScannerReportReadyPayload
{
[JsonPropertyName("reportId")]
public string ReportId { get; init; } = string.Empty;
[JsonPropertyName("scanId")]
public string? ScanId { get; init; }
[JsonPropertyName("imageDigest")]
public string? ImageDigest { get; init; }
[JsonPropertyName("generatedAt")]
public DateTimeOffset GeneratedAt { get; init; }
[JsonPropertyName("verdict")]
public string Verdict { get; init; } = string.Empty;
[JsonPropertyName("summary")]
public Summary Summary { get; init; } = new();
[JsonPropertyName("delta")]
public Delta? Delta { get; init; }
[JsonPropertyName("quietedFindingCount")]
public int? QuietedFindingCount { get; init; }
[JsonPropertyName("policy")]
public PolicyRevision? Policy { get; init; }
[JsonPropertyName("links")]
public ReportLinks Links { get; init; } = new();
[JsonPropertyName("dsse")]
public DsseEnvelope? Dsse { get; init; }
[JsonPropertyName("report")]
public JsonElement Report { get; init; }
}
public sealed record Summary
{
[JsonPropertyName("total")]
public int Total { get; init; }
[JsonPropertyName("blocked")]
public int Blocked { get; init; }
[JsonPropertyName("warned")]
public int Warned { get; init; }
[JsonPropertyName("ignored")]
public int Ignored { get; init; }
[JsonPropertyName("quieted")]
public int Quieted { get; init; }
}
public sealed record Delta
{
[JsonPropertyName("newCritical")]
public int? NewCritical { get; init; }
[JsonPropertyName("newHigh")]
public int? NewHigh { get; init; }
[JsonPropertyName("kev")]
public IReadOnlyList<string>? Kev { get; init; }
}
public sealed record PolicyRevision
{
[JsonPropertyName("digest")]
public string? Digest { get; init; }
[JsonPropertyName("revisionId")]
public string? RevisionId { get; init; }
}
public sealed record ReportLinks
{
[JsonPropertyName("report.ui")]
public string? ReportUi { get; init; }
[JsonPropertyName("report.api")]
public string? ReportApi { get; init; }
[JsonPropertyName("policy.ui")]
public string? PolicyUi { get; init; }
[JsonPropertyName("policy.api")]
public string? PolicyApi { get; init; }
[JsonPropertyName("attestation.ui")]
public string? AttestationUi { get; init; }
[JsonPropertyName("attestation.api")]
public string? AttestationApi { get; init; }
}
public sealed record DsseEnvelope
{
[JsonPropertyName("payloadType")]
public string PayloadType { get; init; } = string.Empty;
[JsonPropertyName("payload")]
public string Payload { get; init; } = string.Empty;
[JsonPropertyName("signatures")]
public IReadOnlyList<DsseSignature> Signatures { get; init; } = Array.Empty<DsseSignature>();
}
public sealed record DsseSignature
{
[JsonPropertyName("keyid")]
public string? KeyId { get; init; }
[JsonPropertyName("sig")]
public string Sig { get; init; } = string.Empty;
}

View File

@@ -0,0 +1,58 @@
using System.Text.Json;
using System.Text.Json.Serialization;
namespace StellaOps.Orchestrator.Schemas;
public sealed record ScannerScanCompletedPayload
{
[JsonPropertyName("reportId")]
public string ReportId { get; init; } = string.Empty;
[JsonPropertyName("scanId")]
public string ScanId { get; init; } = string.Empty;
[JsonPropertyName("imageDigest")]
public string? ImageDigest { get; init; }
[JsonPropertyName("verdict")]
public string Verdict { get; init; } = string.Empty;
[JsonPropertyName("summary")]
public Summary Summary { get; init; } = new();
[JsonPropertyName("delta")]
public Delta? Delta { get; init; }
[JsonPropertyName("policy")]
public PolicyRevision? Policy { get; init; }
[JsonPropertyName("links")]
public ReportLinks Links { get; init; } = new();
[JsonPropertyName("findings")]
public IReadOnlyList<ScanFinding>? Findings { get; init; }
[JsonPropertyName("dsse")]
public DsseEnvelope? Dsse { get; init; }
[JsonPropertyName("report")]
public JsonElement? Report { get; init; }
}
public sealed record ScanFinding
{
[JsonPropertyName("id")]
public string Id { get; init; } = string.Empty;
[JsonPropertyName("severity")]
public string Severity { get; init; } = string.Empty;
[JsonPropertyName("cve")]
public string? Cve { get; init; }
[JsonPropertyName("purl")]
public string? Purl { get; init; }
[JsonPropertyName("reachability")]
public string? Reachability { get; init; }
}

View File

@@ -0,0 +1,7 @@
<Project Sdk="Microsoft.NET.Sdk">
<PropertyGroup>
<TargetFramework>net10.0</TargetFramework>
<ImplicitUsings>enable</ImplicitUsings>
<Nullable>enable</Nullable>
</PropertyGroup>
</Project>

View File

@@ -0,0 +1,39 @@
using System.Text.Json.Serialization;
namespace StellaOps.PolicyAuthoritySignals.Contracts;
public sealed record PolicyContract
{
[JsonPropertyName("policyId")]
public string PolicyId { get; init; } = string.Empty;
[JsonPropertyName("version")]
public string Version { get; init; } = "0.1-draft";
[JsonPropertyName("rulesHash")]
public string? RulesHash { get; init; }
}
public sealed record TenantScope
{
[JsonPropertyName("tenantId")]
public string TenantId { get; init; } = string.Empty;
[JsonPropertyName("scopes")]
public IReadOnlyList<string> Scopes { get; init; } = Array.Empty<string>();
}
public sealed record SignalSymbol
{
[JsonPropertyName("symbolId")]
public string SymbolId { get; init; } = string.Empty;
[JsonPropertyName("language")]
public string? Language { get; init; }
[JsonPropertyName("package")]
public string? Package { get; init; }
[JsonPropertyName("version")]
public string? Version { get; init; }
}

View File

@@ -0,0 +1,7 @@
<Project Sdk="Microsoft.NET.Sdk">
<PropertyGroup>
<TargetFramework>net10.0</TargetFramework>
<ImplicitUsings>enable</ImplicitUsings>
<Nullable>enable</Nullable>
</PropertyGroup>
</Project>

View File

@@ -1,7 +1,7 @@
<Project Sdk="Microsoft.NET.Sdk"> <Project Sdk="Microsoft.NET.Sdk">
<PropertyGroup> <PropertyGroup>
<TargetFramework>net10.0</TargetFramework> <TargetFramework>net10.0</TargetFramework>
<RestorePackagesPath>../../local-nuget</RestorePackagesPath> <RestorePackagesPath>../../local-nugets/packages</RestorePackagesPath>
<DisableImplicitFrameworkReferences>true</DisableImplicitFrameworkReferences> <DisableImplicitFrameworkReferences>true</DisableImplicitFrameworkReferences>
<EnableDefaultItems>false</EnableDefaultItems> <EnableDefaultItems>false</EnableDefaultItems>
</PropertyGroup> </PropertyGroup>
@@ -43,4 +43,4 @@
<PackageDownload Include="Google.Apis.Auth" Version="[1.69.0]" /> <PackageDownload Include="Google.Apis.Auth" Version="[1.69.0]" />
<PackageDownload Include="Google.Apis.Core" Version="[1.64.0]" /> <PackageDownload Include="Google.Apis.Core" Version="[1.64.0]" />
</ItemGroup> </ItemGroup>
</Project> <

114
vendor/manifest.json vendored Normal file
View File

@@ -0,0 +1,114 @@
{
"generated_utc": "2025-11-18T21:41:23.225667Z",
"summary": "Pinned binaries (non-NuGet) tracked for integrity; relocate new artefacts here or under offline/feeds.",
"entries": [
{
"path": "plugins/scanner/analyzers/lang/StellaOps.Scanner.Analyzers.Lang.Deno.Tests/StellaOps.Scanner.Analyzers.Lang.Deno.Tests.dll",
"sha256": "347e600c14671db7015aa3d08b449a7e7bbd9dcfb3b1d4e31cd5a44d2af7b4c7",
"type": "binary",
"owner": "plugins"
},
{
"path": "plugins/scanner/analyzers/lang/StellaOps.Scanner.Analyzers.Lang.Deno/StellaOps.Scanner.Analyzers.Lang.Deno.dll",
"sha256": "6fb59d1497c6c222df883405177ee7a03e967570671b4a4e39c1ca41df5ee507",
"type": "binary",
"owner": "plugins"
},
{
"path": "plugins/scanner/analyzers/lang/StellaOps.Scanner.Analyzers.Lang.DotNet/StellaOps.Scanner.Analyzers.Lang.DotNet.dll",
"sha256": "aceea5db1340463db2038cecb528357532d3d5d0102fc9ce0f13d1f0888f0621",
"type": "binary",
"owner": "plugins"
},
{
"path": "plugins/scanner/analyzers/lang/StellaOps.Scanner.Analyzers.Lang.Go/StellaOps.Scanner.Analyzers.Lang.Go.dll",
"sha256": "87a0308b4e25f29137d2722bf091628d1753a02414e474f6958c01353d78a95f",
"type": "binary",
"owner": "plugins"
},
{
"path": "plugins/scanner/analyzers/lang/StellaOps.Scanner.Analyzers.Lang.Java.Tests/StellaOps.Scanner.Analyzers.Lang.Java.Tests.dll",
"sha256": "64279fba6e3dcd6e34290565f3d324ad306bc9e971b2fa191eeafbd70868411b",
"type": "binary",
"owner": "plugins"
},
{
"path": "plugins/scanner/analyzers/lang/StellaOps.Scanner.Analyzers.Lang.Java/StellaOps.Scanner.Analyzers.Lang.Java.dll",
"sha256": "fb2201b2d1ae60c31d2f2390f37b5a574368952e952f05c41989cbec96746dc5",
"type": "binary",
"owner": "plugins"
},
{
"path": "plugins/scanner/analyzers/lang/StellaOps.Scanner.Analyzers.Lang.Node.Tests/StellaOps.Scanner.Analyzers.Lang.Node.Tests.dll",
"sha256": "95f11346a72b28297c307d71c226b2d7f2dc7b465a85b6ca99e6fc739ff92c73",
"type": "binary",
"owner": "plugins"
},
{
"path": "plugins/scanner/analyzers/lang/StellaOps.Scanner.Analyzers.Lang.Node/StellaOps.Scanner.Analyzers.Lang.Node.dll",
"sha256": "45d59201b3d52fcb022035b00afca0c27f62993d727f5dbfc3ec120e1f3090ba",
"type": "binary",
"owner": "plugins"
},
{
"path": "plugins/scanner/analyzers/lang/StellaOps.Scanner.Analyzers.Lang.Python/StellaOps.Scanner.Analyzers.Lang.Python.dll",
"sha256": "e4ccaed15c551f859dbee367849c8c99ca5554a5c10926988c9fe2afe0af07ea",
"type": "binary",
"owner": "plugins"
},
{
"path": "plugins/scanner/analyzers/lang/StellaOps.Scanner.Analyzers.Lang.Ruby.Tests/StellaOps.Scanner.Analyzers.Lang.Ruby.Tests.dll",
"sha256": "a0b641a18ff55056e16c5f15b3124a7fcfa8f99e2e16166b68df9372a79c37b2",
"type": "binary",
"owner": "plugins"
},
{
"path": "plugins/scanner/analyzers/lang/StellaOps.Scanner.Analyzers.Lang.Ruby/StellaOps.Scanner.Analyzers.Lang.Ruby.dll",
"sha256": "20624ef44aa797339e73e448dbc82e28e9adfac5262ba4b6c9fddb4e1ed89cbc",
"type": "binary",
"owner": "plugins"
},
{
"path": "plugins/scanner/analyzers/lang/StellaOps.Scanner.Analyzers.Lang.Rust.Benchmarks/StellaOps.Scanner.Analyzers.Lang.Rust.Benchmarks.dll",
"sha256": "a0df5ffdbb043354adef3b3b1203e151b64a4f1c34e560d2bd182188e5535538",
"type": "binary",
"owner": "plugins"
},
{
"path": "plugins/scanner/analyzers/lang/StellaOps.Scanner.Analyzers.Lang.Rust/StellaOps.Scanner.Analyzers.Lang.Rust.dll",
"sha256": "af19afd814ede740b547514073640a1ce7cd55d346335761d5393d31b0f64224",
"type": "binary",
"owner": "plugins"
},
{
"path": "plugins/scanner/analyzers/lang/StellaOps.Scanner.Analyzers.Lang.Tests/StellaOps.Scanner.Analyzers.Lang.Tests.dll",
"sha256": "819e7fa3d30d37d972c630c96828ad121bbef184ca977bc2245f9e9ec9815cc8",
"type": "binary",
"owner": "plugins"
},
{
"path": "plugins/scanner/analyzers/os/StellaOps.Scanner.Analyzers.OS.Apk/StellaOps.Scanner.Analyzers.OS.Apk.dll",
"sha256": "760b531182a497e76c1fa987d6bd834aa4b369f815542fa6b8e10452dc7048ff",
"type": "binary",
"owner": "plugins"
},
{
"path": "plugins/scanner/analyzers/os/StellaOps.Scanner.Analyzers.OS.Dpkg/StellaOps.Scanner.Analyzers.OS.Dpkg.dll",
"sha256": "8cc75f09efa8c656106ed96ad5ab08a0c388aa4beb56aadf6b07bf6d76c00085",
"type": "binary",
"owner": "plugins"
},
{
"path": "plugins/scanner/analyzers/os/StellaOps.Scanner.Analyzers.OS.Rpm/StellaOps.Scanner.Analyzers.OS.Rpm.dll",
"sha256": "987593dd273f398f07f38b349eaedd6338c5615e976dad1633323348f7b3e9ac",
"type": "binary",
"owner": "plugins"
},
{
"path": "plugins/scanner/buildx/StellaOps.Scanner.Sbomer.BuildXPlugin/StellaOps.Scanner.Sbomer.BuildXPlugin.dll",
"sha256": "4266013acbf3a0d0a02e2682c7e32335c2c3f9263e71b917bac34dac4f70d476",
"type": "binary",
"owner": "plugins"
}
]
}