feat: Add VEX Status Chip component and integration tests for reachability drift detection
- Introduced `VexStatusChipComponent` to display VEX status with color coding and tooltips. - Implemented integration tests for reachability drift detection, covering various scenarios including drift detection, determinism, and error handling. - Enhanced `ScannerToSignalsReachabilityTests` with a null implementation of `ICallGraphSyncService` for better test isolation. - Updated project references to include the new Reachability Drift library.
This commit is contained in:
305
docs/modules/excititor/schemas/issuer_directory_contract.md
Normal file
305
docs/modules/excititor/schemas/issuer_directory_contract.md
Normal file
@@ -0,0 +1,305 @@
|
||||
# Issuer Directory Contract v1.0.0
|
||||
|
||||
**Status:** APPROVED
|
||||
**Version:** 1.0.0
|
||||
**Effective:** 2025-12-19
|
||||
**Owner:** VEX Lens Guild + Issuer Directory Guild
|
||||
**Sprint:** SPRINT_0129_0001_0001 (unblocks VEXLENS-30-003)
|
||||
|
||||
---
|
||||
|
||||
## 1. Purpose
|
||||
|
||||
The Issuer Directory provides a registry of known VEX statement issuers with trust metadata, signing key information, and provenance tracking.
|
||||
|
||||
## 2. Data Model
|
||||
|
||||
### 2.1 Issuer Entity
|
||||
|
||||
```csharp
|
||||
public sealed record Issuer
|
||||
{
|
||||
/// <summary>Unique issuer identifier (e.g., "vendor:redhat", "cert:cisa").</summary>
|
||||
public required string IssuerId { get; init; }
|
||||
|
||||
/// <summary>Issuer category.</summary>
|
||||
public required IssuerCategory Category { get; init; }
|
||||
|
||||
/// <summary>Display name.</summary>
|
||||
public required string DisplayName { get; init; }
|
||||
|
||||
/// <summary>Trust tier assignment.</summary>
|
||||
public required IssuerTrustTier TrustTier { get; init; }
|
||||
|
||||
/// <summary>Official website URL.</summary>
|
||||
public string? WebsiteUrl { get; init; }
|
||||
|
||||
/// <summary>Security advisory feed URL.</summary>
|
||||
public string? AdvisoryFeedUrl { get; init; }
|
||||
|
||||
/// <summary>Registered signing keys.</summary>
|
||||
public ImmutableArray<SigningKeyInfo> SigningKeys { get; init; }
|
||||
|
||||
/// <summary>Products/ecosystems this issuer is authoritative for.</summary>
|
||||
public ImmutableArray<string> AuthoritativeFor { get; init; }
|
||||
|
||||
/// <summary>When this issuer record was created.</summary>
|
||||
public DateTimeOffset CreatedAt { get; init; }
|
||||
|
||||
/// <summary>When this issuer record was last updated.</summary>
|
||||
public DateTimeOffset UpdatedAt { get; init; }
|
||||
|
||||
/// <summary>Whether issuer is active.</summary>
|
||||
public bool IsActive { get; init; } = true;
|
||||
}
|
||||
```
|
||||
|
||||
### 2.2 Issuer Category
|
||||
|
||||
```csharp
|
||||
public enum IssuerCategory
|
||||
{
|
||||
/// <summary>Software vendor/maintainer.</summary>
|
||||
Vendor = 0,
|
||||
|
||||
/// <summary>Linux distribution.</summary>
|
||||
Distribution = 1,
|
||||
|
||||
/// <summary>CERT/security response team.</summary>
|
||||
Cert = 2,
|
||||
|
||||
/// <summary>Security research organization.</summary>
|
||||
SecurityResearch = 3,
|
||||
|
||||
/// <summary>Community project.</summary>
|
||||
Community = 4,
|
||||
|
||||
/// <summary>Commercial security vendor.</summary>
|
||||
Commercial = 5
|
||||
}
|
||||
```
|
||||
|
||||
### 2.3 Signing Key Info
|
||||
|
||||
```csharp
|
||||
public sealed record SigningKeyInfo
|
||||
{
|
||||
/// <summary>Key fingerprint (SHA-256).</summary>
|
||||
public required string Fingerprint { get; init; }
|
||||
|
||||
/// <summary>Key type (pgp, x509, sigstore).</summary>
|
||||
public required string KeyType { get; init; }
|
||||
|
||||
/// <summary>Key algorithm (rsa, ecdsa, ed25519).</summary>
|
||||
public string? Algorithm { get; init; }
|
||||
|
||||
/// <summary>Key size in bits.</summary>
|
||||
public int? KeySize { get; init; }
|
||||
|
||||
/// <summary>Key creation date.</summary>
|
||||
public DateTimeOffset? CreatedAt { get; init; }
|
||||
|
||||
/// <summary>Key expiration date.</summary>
|
||||
public DateTimeOffset? ExpiresAt { get; init; }
|
||||
|
||||
/// <summary>Whether key is currently valid.</summary>
|
||||
public bool IsValid { get; init; } = true;
|
||||
|
||||
/// <summary>Public key location (URL or inline).</summary>
|
||||
public string? PublicKeyUri { get; init; }
|
||||
}
|
||||
```
|
||||
|
||||
## 3. Pre-Registered Issuers
|
||||
|
||||
### 3.1 Authoritative Tier (Trust Tier 0)
|
||||
|
||||
| Issuer ID | Display Name | Category | Authoritative For |
|
||||
|-----------|--------------|----------|-------------------|
|
||||
| `vendor:redhat` | Red Hat Product Security | Vendor | `pkg:rpm/redhat/*`, `pkg:oci/registry.redhat.io/*` |
|
||||
| `vendor:canonical` | Ubuntu Security Team | Distribution | `pkg:deb/ubuntu/*` |
|
||||
| `vendor:debian` | Debian Security Team | Distribution | `pkg:deb/debian/*` |
|
||||
| `vendor:suse` | SUSE Security Team | Distribution | `pkg:rpm/suse/*`, `pkg:rpm/opensuse/*` |
|
||||
| `vendor:microsoft` | Microsoft Security Response | Vendor | `pkg:nuget/*` (Microsoft packages) |
|
||||
| `vendor:oracle` | Oracle Security | Vendor | `pkg:maven/com.oracle.*/*` |
|
||||
| `vendor:apache` | Apache Security Team | Community | `pkg:maven/org.apache.*/*` |
|
||||
| `vendor:google` | Google Security Team | Vendor | `pkg:golang/google.golang.org/*` |
|
||||
|
||||
### 3.2 Trusted Tier (Trust Tier 1)
|
||||
|
||||
| Issuer ID | Display Name | Category |
|
||||
|-----------|--------------|----------|
|
||||
| `cert:cisa` | CISA | Cert |
|
||||
| `cert:nist` | NIST NVD | Cert |
|
||||
| `cert:github` | GitHub Security Advisories | SecurityResearch |
|
||||
| `cert:snyk` | Snyk Security | Commercial |
|
||||
| `research:oss-fuzz` | Google OSS-Fuzz | SecurityResearch |
|
||||
|
||||
### 3.3 Community Tier (Trust Tier 2)
|
||||
|
||||
| Issuer ID | Display Name | Category |
|
||||
|-----------|--------------|----------|
|
||||
| `community:osv` | OSV (Open Source Vulnerabilities) | Community |
|
||||
| `community:vulndb` | VulnDB | Community |
|
||||
|
||||
## 4. API Endpoints
|
||||
|
||||
### 4.1 List Issuers
|
||||
|
||||
```
|
||||
GET /api/v1/issuers
|
||||
```
|
||||
|
||||
Query Parameters:
|
||||
- `category`: Filter by category
|
||||
- `trust_tier`: Filter by trust tier
|
||||
- `active`: Filter by active status (default: true)
|
||||
- `limit`: Max results (default: 100)
|
||||
- `cursor`: Pagination cursor
|
||||
|
||||
### 4.2 Get Issuer
|
||||
|
||||
```
|
||||
GET /api/v1/issuers/{issuerId}
|
||||
```
|
||||
|
||||
### 4.3 Register Issuer (Admin)
|
||||
|
||||
```
|
||||
POST /api/v1/issuers
|
||||
Authorization: Bearer {admin_token}
|
||||
|
||||
{
|
||||
"issuerId": "vendor:acme",
|
||||
"category": "vendor",
|
||||
"displayName": "ACME Security",
|
||||
"trustTier": "trusted",
|
||||
"websiteUrl": "https://security.acme.example",
|
||||
"advisoryFeedUrl": "https://security.acme.example/feed.json",
|
||||
"authoritativeFor": ["pkg:npm/@acme/*"]
|
||||
}
|
||||
```
|
||||
|
||||
### 4.4 Register Signing Key (Admin)
|
||||
|
||||
```
|
||||
POST /api/v1/issuers/{issuerId}/keys
|
||||
Authorization: Bearer {admin_token}
|
||||
|
||||
{
|
||||
"fingerprint": "sha256:abc123...",
|
||||
"keyType": "pgp",
|
||||
"algorithm": "rsa",
|
||||
"keySize": 4096,
|
||||
"publicKeyUri": "https://security.acme.example/keys/signing.asc"
|
||||
}
|
||||
```
|
||||
|
||||
### 4.5 Lookup by Fingerprint
|
||||
|
||||
```
|
||||
GET /api/v1/issuers/by-fingerprint/{fingerprint}
|
||||
```
|
||||
|
||||
Returns the issuer associated with a signing key fingerprint.
|
||||
|
||||
## 5. Trust Tier Resolution
|
||||
|
||||
### 5.1 Automatic Assignment
|
||||
|
||||
When a VEX statement is received:
|
||||
|
||||
1. **Check signature:** If signed, lookup issuer by key fingerprint
|
||||
2. **Check domain:** Match issuer by advisory feed domain
|
||||
3. **Check authoritativeFor:** Match issuer by product PURL patterns
|
||||
4. **Fallback:** Assign `Unknown` tier if no match
|
||||
|
||||
### 5.2 Override Rules
|
||||
|
||||
Operators can configure trust overrides:
|
||||
|
||||
```yaml
|
||||
# etc/vexlens.yaml
|
||||
issuer_overrides:
|
||||
- issuer_id: "community:custom-feed"
|
||||
trust_tier: "trusted" # Promote community to trusted
|
||||
- issuer_id: "vendor:untrusted-vendor"
|
||||
trust_tier: "community" # Demote vendor to community
|
||||
```
|
||||
|
||||
## 6. Issuer Verification
|
||||
|
||||
### 6.1 PGP Signature Verification
|
||||
|
||||
```csharp
|
||||
public interface IIssuerVerifier
|
||||
{
|
||||
/// <summary>
|
||||
/// Verifies a VEX document signature against registered issuer keys.
|
||||
/// </summary>
|
||||
Task<IssuerVerificationResult> VerifyAsync(
|
||||
byte[] documentBytes,
|
||||
byte[] signatureBytes,
|
||||
CancellationToken cancellationToken = default);
|
||||
}
|
||||
|
||||
public sealed record IssuerVerificationResult
|
||||
{
|
||||
public bool IsValid { get; init; }
|
||||
public string? IssuerId { get; init; }
|
||||
public string? KeyFingerprint { get; init; }
|
||||
public IssuerTrustTier? TrustTier { get; init; }
|
||||
public string? VerificationError { get; init; }
|
||||
}
|
||||
```
|
||||
|
||||
### 6.2 Sigstore Verification
|
||||
|
||||
For Sigstore-signed documents:
|
||||
|
||||
1. Verify Rekor inclusion proof
|
||||
2. Extract OIDC identity from certificate
|
||||
3. Match identity to registered issuer
|
||||
4. Return issuer info with trust tier
|
||||
|
||||
## 7. Database Schema
|
||||
|
||||
```sql
|
||||
CREATE TABLE vex.issuers (
|
||||
issuer_id TEXT PRIMARY KEY,
|
||||
category TEXT NOT NULL,
|
||||
display_name TEXT NOT NULL,
|
||||
trust_tier INT NOT NULL DEFAULT 3,
|
||||
website_url TEXT,
|
||||
advisory_feed_url TEXT,
|
||||
authoritative_for TEXT[] DEFAULT '{}',
|
||||
is_active BOOLEAN DEFAULT TRUE,
|
||||
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
|
||||
updated_at TIMESTAMPTZ NOT NULL DEFAULT NOW()
|
||||
);
|
||||
|
||||
CREATE TABLE vex.issuer_signing_keys (
|
||||
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
|
||||
issuer_id TEXT NOT NULL REFERENCES vex.issuers(issuer_id),
|
||||
fingerprint TEXT NOT NULL UNIQUE,
|
||||
key_type TEXT NOT NULL,
|
||||
algorithm TEXT,
|
||||
key_size INT,
|
||||
public_key_uri TEXT,
|
||||
is_valid BOOLEAN DEFAULT TRUE,
|
||||
created_at TIMESTAMPTZ,
|
||||
expires_at TIMESTAMPTZ,
|
||||
registered_at TIMESTAMPTZ NOT NULL DEFAULT NOW()
|
||||
);
|
||||
|
||||
CREATE INDEX idx_issuer_signing_keys_fingerprint ON vex.issuer_signing_keys(fingerprint);
|
||||
CREATE INDEX idx_issuers_trust_tier ON vex.issuers(trust_tier);
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Changelog
|
||||
|
||||
| Version | Date | Changes |
|
||||
|---------|------|---------|
|
||||
| 1.0.0 | 2025-12-19 | Initial release |
|
||||
271
docs/modules/excititor/schemas/vex_normalization_contract.md
Normal file
271
docs/modules/excititor/schemas/vex_normalization_contract.md
Normal file
@@ -0,0 +1,271 @@
|
||||
# VEX Normalization Contract v1.0.0
|
||||
|
||||
**Status:** APPROVED
|
||||
**Version:** 1.0.0
|
||||
**Effective:** 2025-12-19
|
||||
**Owner:** VEX Lens Guild
|
||||
**Sprint:** SPRINT_0129_0001_0001 (unblocks VEXLENS-30-001 through 30-011)
|
||||
|
||||
---
|
||||
|
||||
## 1. Purpose
|
||||
|
||||
This contract defines the normalization rules for VEX (Vulnerability Exploitability eXchange) documents from multiple sources into a canonical StellaOps internal representation.
|
||||
|
||||
## 2. Supported Input Formats
|
||||
|
||||
| Format | Version | Parser |
|
||||
|--------|---------|--------|
|
||||
| OpenVEX | 0.2.0+ | `OpenVexParser` |
|
||||
| CycloneDX VEX | 1.5+ | `CycloneDxVexParser` |
|
||||
| CSAF VEX | 2.0 | `CsafVexParser` |
|
||||
|
||||
## 3. Canonical Representation
|
||||
|
||||
### 3.1 NormalizedVexStatement
|
||||
|
||||
```csharp
|
||||
public sealed record NormalizedVexStatement
|
||||
{
|
||||
/// <summary>Unique statement identifier (deterministic hash).</summary>
|
||||
public required string StatementId { get; init; }
|
||||
|
||||
/// <summary>CVE or vulnerability identifier.</summary>
|
||||
public required string VulnerabilityId { get; init; }
|
||||
|
||||
/// <summary>Normalized status (not_affected, affected, fixed, under_investigation).</summary>
|
||||
public required VexStatus Status { get; init; }
|
||||
|
||||
/// <summary>Justification code (when status = not_affected).</summary>
|
||||
public VexJustification? Justification { get; init; }
|
||||
|
||||
/// <summary>Human-readable impact statement.</summary>
|
||||
public string? ImpactStatement { get; init; }
|
||||
|
||||
/// <summary>Action statement for remediation.</summary>
|
||||
public string? ActionStatement { get; init; }
|
||||
|
||||
/// <summary>Products affected by this statement.</summary>
|
||||
public required ImmutableArray<ProductIdentifier> Products { get; init; }
|
||||
|
||||
/// <summary>Source document metadata.</summary>
|
||||
public required VexSourceMetadata Source { get; init; }
|
||||
|
||||
/// <summary>Statement timestamp (UTC, ISO-8601).</summary>
|
||||
public required DateTimeOffset Timestamp { get; init; }
|
||||
|
||||
/// <summary>Issuer information.</summary>
|
||||
public required IssuerInfo Issuer { get; init; }
|
||||
}
|
||||
```
|
||||
|
||||
### 3.2 VexStatus Enum
|
||||
|
||||
```csharp
|
||||
public enum VexStatus
|
||||
{
|
||||
/// <summary>Product is not affected by the vulnerability.</summary>
|
||||
NotAffected = 0,
|
||||
|
||||
/// <summary>Product is affected and vulnerable.</summary>
|
||||
Affected = 1,
|
||||
|
||||
/// <summary>Product was affected but is now fixed.</summary>
|
||||
Fixed = 2,
|
||||
|
||||
/// <summary>Impact is being investigated.</summary>
|
||||
UnderInvestigation = 3
|
||||
}
|
||||
```
|
||||
|
||||
### 3.3 VexJustification Enum
|
||||
|
||||
```csharp
|
||||
public enum VexJustification
|
||||
{
|
||||
/// <summary>Component is not present.</summary>
|
||||
ComponentNotPresent = 0,
|
||||
|
||||
/// <summary>Vulnerable code is not present.</summary>
|
||||
VulnerableCodeNotPresent = 1,
|
||||
|
||||
/// <summary>Vulnerable code is not in execute path.</summary>
|
||||
VulnerableCodeNotInExecutePath = 2,
|
||||
|
||||
/// <summary>Vulnerable code cannot be controlled by adversary.</summary>
|
||||
VulnerableCodeCannotBeControlledByAdversary = 3,
|
||||
|
||||
/// <summary>Inline mitigations exist.</summary>
|
||||
InlineMitigationsAlreadyExist = 4
|
||||
}
|
||||
```
|
||||
|
||||
## 4. Normalization Rules
|
||||
|
||||
### 4.1 Status Mapping
|
||||
|
||||
| Source Format | Source Value | Normalized Status |
|
||||
|---------------|--------------|-------------------|
|
||||
| OpenVEX | `not_affected` | NotAffected |
|
||||
| OpenVEX | `affected` | Affected |
|
||||
| OpenVEX | `fixed` | Fixed |
|
||||
| OpenVEX | `under_investigation` | UnderInvestigation |
|
||||
| CycloneDX | `notAffected` | NotAffected |
|
||||
| CycloneDX | `affected` | Affected |
|
||||
| CycloneDX | `resolved` | Fixed |
|
||||
| CycloneDX | `inTriage` | UnderInvestigation |
|
||||
| CSAF | `not_affected` | NotAffected |
|
||||
| CSAF | `known_affected` | Affected |
|
||||
| CSAF | `fixed` | Fixed |
|
||||
| CSAF | `under_investigation` | UnderInvestigation |
|
||||
|
||||
### 4.2 Justification Mapping
|
||||
|
||||
| Source Format | Source Value | Normalized Justification |
|
||||
|---------------|--------------|--------------------------|
|
||||
| OpenVEX | `component_not_present` | ComponentNotPresent |
|
||||
| OpenVEX | `vulnerable_code_not_present` | VulnerableCodeNotPresent |
|
||||
| OpenVEX | `vulnerable_code_not_in_execute_path` | VulnerableCodeNotInExecutePath |
|
||||
| OpenVEX | `vulnerable_code_cannot_be_controlled_by_adversary` | VulnerableCodeCannotBeControlledByAdversary |
|
||||
| OpenVEX | `inline_mitigations_already_exist` | InlineMitigationsAlreadyExist |
|
||||
| CycloneDX | Same as OpenVEX (camelCase) | Same mapping |
|
||||
| CSAF | `component_not_present` | ComponentNotPresent |
|
||||
| CSAF | `vulnerable_code_not_present` | VulnerableCodeNotPresent |
|
||||
| CSAF | `vulnerable_code_not_in_execute_path` | VulnerableCodeNotInExecutePath |
|
||||
| CSAF | `vulnerable_code_cannot_be_controlled_by_adversary` | VulnerableCodeCannotBeControlledByAdversary |
|
||||
| CSAF | `inline_mitigations_already_exist` | InlineMitigationsAlreadyExist |
|
||||
|
||||
### 4.3 Product Identifier Normalization
|
||||
|
||||
Products are normalized to PURL (Package URL) format:
|
||||
|
||||
```
|
||||
pkg:{ecosystem}/{namespace}/{name}@{version}?{qualifiers}#{subpath}
|
||||
```
|
||||
|
||||
| Source | Extraction Method |
|
||||
|--------|-------------------|
|
||||
| OpenVEX | Direct from `product.id` if PURL, else construct from `product.identifiers` |
|
||||
| CycloneDX | From `bom-ref` PURL or construct from `component.purl` |
|
||||
| CSAF | From `product_id` → `product_identification_helper.purl` |
|
||||
|
||||
### 4.4 Statement ID Generation
|
||||
|
||||
Statement IDs are deterministic SHA-256 hashes:
|
||||
|
||||
```csharp
|
||||
public static string GenerateStatementId(
|
||||
string vulnerabilityId,
|
||||
VexStatus status,
|
||||
IEnumerable<string> productPurls,
|
||||
string issuerId,
|
||||
DateTimeOffset timestamp)
|
||||
{
|
||||
var input = $"{vulnerabilityId}|{status}|{string.Join(",", productPurls.OrderBy(p => p))}|{issuerId}|{timestamp:O}";
|
||||
var hash = SHA256.HashData(Encoding.UTF8.GetBytes(input));
|
||||
return $"stmt:{Convert.ToHexString(hash).ToLowerInvariant()[..32]}";
|
||||
}
|
||||
```
|
||||
|
||||
## 5. Issuer Directory Integration
|
||||
|
||||
Normalized statements include issuer information from the Issuer Directory:
|
||||
|
||||
```csharp
|
||||
public sealed record IssuerInfo
|
||||
{
|
||||
/// <summary>Issuer identifier (e.g., "vendor:redhat", "vendor:canonical").</summary>
|
||||
public required string IssuerId { get; init; }
|
||||
|
||||
/// <summary>Display name.</summary>
|
||||
public required string DisplayName { get; init; }
|
||||
|
||||
/// <summary>Trust tier (authoritative, trusted, community, unknown).</summary>
|
||||
public required IssuerTrustTier TrustTier { get; init; }
|
||||
|
||||
/// <summary>Issuer's signing key fingerprints (if signed).</summary>
|
||||
public ImmutableArray<string> SigningKeyFingerprints { get; init; }
|
||||
}
|
||||
|
||||
public enum IssuerTrustTier
|
||||
{
|
||||
Authoritative = 0, // Vendor/maintainer of the product
|
||||
Trusted = 1, // Known security research org
|
||||
Community = 2, // Community contributor
|
||||
Unknown = 3 // Unverified source
|
||||
}
|
||||
```
|
||||
|
||||
## 6. API Governance
|
||||
|
||||
### 6.1 Endpoints
|
||||
|
||||
| Endpoint | Method | Description |
|
||||
|----------|--------|-------------|
|
||||
| `/api/v1/vex/statements` | GET | Query normalized statements |
|
||||
| `/api/v1/vex/statements/{id}` | GET | Get specific statement |
|
||||
| `/api/v1/vex/normalize` | POST | Normalize a VEX document |
|
||||
| `/api/v1/vex/issuers` | GET | List known issuers |
|
||||
| `/api/v1/vex/issuers/{id}` | GET | Get issuer details |
|
||||
|
||||
### 6.2 Query Parameters
|
||||
|
||||
| Parameter | Type | Description |
|
||||
|-----------|------|-------------|
|
||||
| `vulnerability` | string | Filter by CVE/vulnerability ID |
|
||||
| `product` | string | Filter by PURL (URL-encoded) |
|
||||
| `status` | enum | Filter by VEX status |
|
||||
| `issuer` | string | Filter by issuer ID |
|
||||
| `since` | datetime | Statements after timestamp |
|
||||
| `limit` | int | Max results (default: 100, max: 1000) |
|
||||
| `cursor` | string | Pagination cursor |
|
||||
|
||||
### 6.3 Response Format
|
||||
|
||||
```json
|
||||
{
|
||||
"statements": [
|
||||
{
|
||||
"statementId": "stmt:a1b2c3d4e5f6...",
|
||||
"vulnerabilityId": "CVE-2024-1234",
|
||||
"status": "not_affected",
|
||||
"justification": "vulnerable_code_not_in_execute_path",
|
||||
"products": ["pkg:npm/lodash@4.17.21"],
|
||||
"issuer": {
|
||||
"issuerId": "vendor:lodash",
|
||||
"displayName": "Lodash Maintainers",
|
||||
"trustTier": "authoritative"
|
||||
},
|
||||
"timestamp": "2024-12-19T10:30:00Z"
|
||||
}
|
||||
],
|
||||
"cursor": "next_page_token",
|
||||
"total": 42
|
||||
}
|
||||
```
|
||||
|
||||
## 7. Precedence Rules
|
||||
|
||||
When multiple statements exist for the same vulnerability+product:
|
||||
|
||||
1. **Timestamp:** Later statements supersede earlier ones
|
||||
2. **Trust Tier:** Higher trust tiers take precedence (Authoritative > Trusted > Community > Unknown)
|
||||
3. **Specificity:** More specific product matches win (exact version > version range > package)
|
||||
|
||||
## 8. Validation
|
||||
|
||||
All normalized statements must pass:
|
||||
|
||||
1. `vulnerabilityId` matches CVE/GHSA/vendor pattern
|
||||
2. `status` is a valid enum value
|
||||
3. `products` contains at least one valid PURL
|
||||
4. `timestamp` is valid ISO-8601 UTC
|
||||
5. `issuer.issuerId` exists in Issuer Directory or is marked Unknown
|
||||
|
||||
---
|
||||
|
||||
## Changelog
|
||||
|
||||
| Version | Date | Changes |
|
||||
|---------|------|---------|
|
||||
| 1.0.0 | 2025-12-19 | Initial release |
|
||||
@@ -0,0 +1,529 @@
|
||||
# Staleness & Time Anchor Contract v1.0.0
|
||||
|
||||
**Status:** APPROVED
|
||||
**Version:** 1.0.0
|
||||
**Effective:** 2025-12-19
|
||||
**Owner:** AirGap Guild + Findings Ledger Guild
|
||||
**Sprint:** SPRINT_0510_0001_0001 (unblocks LEDGER-AIRGAP-56-002, LEDGER-AIRGAP-57-001)
|
||||
|
||||
---
|
||||
|
||||
## 1. Purpose
|
||||
|
||||
This contract defines how air-gapped StellaOps installations maintain trusted time references, calculate data staleness, and enforce freshness policies. It enables deterministic vulnerability triage even when disconnected from external time sources.
|
||||
|
||||
## 2. Schema References
|
||||
|
||||
| Schema | Location |
|
||||
|--------|----------|
|
||||
| Time Anchor | `docs/schemas/time-anchor.schema.json` |
|
||||
| Ledger Staleness | `docs/schemas/ledger-airgap-staleness.schema.json` |
|
||||
| Sealed Mode | `docs/schemas/sealed-mode.schema.json` |
|
||||
|
||||
## 3. Architecture
|
||||
|
||||
```
|
||||
┌─────────────────────────────────────────────────────────────────────────┐
|
||||
│ Air-Gapped Environment │
|
||||
├─────────────────────────────────────────────────────────────────────────┤
|
||||
│ │
|
||||
│ ┌──────────────┐ ┌──────────────┐ ┌──────────────────────┐ │
|
||||
│ │ Mirror │───▶│ AirGap │───▶│ AirGap Time │ │
|
||||
│ │ Bundle │ │ Controller │ │ Service │ │
|
||||
│ │ (time anchor)│ └──────────────┘ └──────────────────────┘ │
|
||||
│ └──────────────┘ │ │ │
|
||||
│ │ │ │
|
||||
│ ▼ ▼ │
|
||||
│ ┌──────────────────────────────────────────┐ │
|
||||
│ │ Staleness Calculator │ │
|
||||
│ │ (drift, budgets, validation) │ │
|
||||
│ └──────────────────────────────────────────┘ │
|
||||
│ │ │ │
|
||||
│ ┌─────────────┴─────────────────────┴───────────┐ │
|
||||
│ │ │ │
|
||||
│ ▼ ▼ │
|
||||
│ ┌──────────────────────┐ ┌──────────────────────┐ │
|
||||
│ │ Findings Ledger │ │ Policy Engine │ │
|
||||
│ │ (staleness tracking) │ │ (evaluation gating) │ │
|
||||
│ └──────────────────────┘ └──────────────────────┘ │
|
||||
│ │
|
||||
└─────────────────────────────────────────────────────────────────────────┘
|
||||
```
|
||||
|
||||
## 4. Core Types
|
||||
|
||||
### 4.1 TimeAnchor
|
||||
|
||||
A cryptographically signed time reference:
|
||||
|
||||
```csharp
|
||||
public sealed record TimeAnchor
|
||||
{
|
||||
/// <summary>RFC 3339 timestamp of the anchor.</summary>
|
||||
public required DateTimeOffset AnchorTime { get; init; }
|
||||
|
||||
/// <summary>Source of the time anchor.</summary>
|
||||
public required TimeSource Source { get; init; }
|
||||
|
||||
/// <summary>Format identifier (roughtime-v1, rfc3161-v1).</summary>
|
||||
public required string Format { get; init; }
|
||||
|
||||
/// <summary>SHA-256 digest of the time token.</summary>
|
||||
public required string TokenDigest { get; init; }
|
||||
|
||||
/// <summary>Signing key fingerprint.</summary>
|
||||
public string? SignatureFingerprint { get; init; }
|
||||
|
||||
/// <summary>Verification status.</summary>
|
||||
public VerificationStatus? Verification { get; init; }
|
||||
|
||||
/// <summary>Monotonic counter for replay protection.</summary>
|
||||
public long? MonotonicCounter { get; init; }
|
||||
}
|
||||
|
||||
public enum TimeSource
|
||||
{
|
||||
Roughtime = 0,
|
||||
Rfc3161 = 1,
|
||||
HardwareClock = 2,
|
||||
AttestationTsa = 3,
|
||||
Manual = 4,
|
||||
Unknown = 5
|
||||
}
|
||||
|
||||
public sealed record VerificationStatus
|
||||
{
|
||||
public required VerificationState Status { get; init; }
|
||||
public string? Reason { get; init; }
|
||||
public DateTimeOffset? VerifiedAt { get; init; }
|
||||
}
|
||||
|
||||
public enum VerificationState
|
||||
{
|
||||
Unknown = 0,
|
||||
Passed = 1,
|
||||
Failed = 2
|
||||
}
|
||||
```
|
||||
|
||||
### 4.2 StalenessBudget
|
||||
|
||||
Configuration for acceptable data freshness:
|
||||
|
||||
```csharp
|
||||
public sealed record StalenessBudget
|
||||
{
|
||||
/// <summary>Budget identifier.</summary>
|
||||
public required string BudgetId { get; init; }
|
||||
|
||||
/// <summary>Domain this budget applies to.</summary>
|
||||
public required string DomainId { get; init; }
|
||||
|
||||
/// <summary>Maximum staleness in seconds before data is stale.</summary>
|
||||
public required TimeSpan FreshnessThreshold { get; init; }
|
||||
|
||||
/// <summary>Warning threshold (percentage of freshness threshold).</summary>
|
||||
public decimal WarningThresholdPercent { get; init; } = 75m;
|
||||
|
||||
/// <summary>Critical threshold (percentage of freshness threshold).</summary>
|
||||
public decimal CriticalThresholdPercent { get; init; } = 90m;
|
||||
|
||||
/// <summary>Grace period after threshold before hard enforcement.</summary>
|
||||
public TimeSpan GracePeriod { get; init; } = TimeSpan.FromDays(1);
|
||||
|
||||
/// <summary>Enforcement mode.</summary>
|
||||
public EnforcementMode EnforcementMode { get; init; } = EnforcementMode.Strict;
|
||||
}
|
||||
|
||||
public enum EnforcementMode
|
||||
{
|
||||
Strict = 0, // Block operations when stale
|
||||
Warn = 1, // Allow but log warnings
|
||||
Disabled = 2 // No enforcement
|
||||
}
|
||||
```
|
||||
|
||||
### 4.3 StalenessEvaluation
|
||||
|
||||
Result of staleness calculation:
|
||||
|
||||
```csharp
|
||||
public sealed record StalenessEvaluation
|
||||
{
|
||||
/// <summary>Domain evaluated.</summary>
|
||||
public required string DomainId { get; init; }
|
||||
|
||||
/// <summary>Current staleness duration.</summary>
|
||||
public required TimeSpan CurrentStaleness { get; init; }
|
||||
|
||||
/// <summary>Configured threshold.</summary>
|
||||
public required TimeSpan Threshold { get; init; }
|
||||
|
||||
/// <summary>Staleness as percentage of threshold.</summary>
|
||||
public required decimal PercentOfThreshold { get; init; }
|
||||
|
||||
/// <summary>Overall status.</summary>
|
||||
public required StalenessStatus Status { get; init; }
|
||||
|
||||
/// <summary>When data will become stale.</summary>
|
||||
public DateTimeOffset? ProjectedStaleAt { get; init; }
|
||||
|
||||
/// <summary>Time anchor used for calculation.</summary>
|
||||
public required TimeAnchor TimeAnchor { get; init; }
|
||||
|
||||
/// <summary>Last bundle import timestamp.</summary>
|
||||
public required DateTimeOffset LastImportAt { get; init; }
|
||||
|
||||
/// <summary>Source timestamp of last bundle.</summary>
|
||||
public required DateTimeOffset LastSourceTimestamp { get; init; }
|
||||
}
|
||||
|
||||
public enum StalenessStatus
|
||||
{
|
||||
Fresh = 0, // < warning threshold
|
||||
Warning = 1, // >= warning, < critical
|
||||
Critical = 2, // >= critical, < threshold
|
||||
Stale = 3, // >= threshold, < threshold + grace
|
||||
Breached = 4 // >= threshold + grace
|
||||
}
|
||||
```
|
||||
|
||||
### 4.4 BundleProvenance
|
||||
|
||||
Provenance record for imported bundles:
|
||||
|
||||
```csharp
|
||||
public sealed record BundleProvenance
|
||||
{
|
||||
/// <summary>Unique bundle identifier.</summary>
|
||||
public required Guid BundleId { get; init; }
|
||||
|
||||
/// <summary>Bundle domain (vex-advisories, vulnerability-feeds, etc.).</summary>
|
||||
public required string DomainId { get; init; }
|
||||
|
||||
/// <summary>When bundle was imported.</summary>
|
||||
public required DateTimeOffset ImportedAt { get; init; }
|
||||
|
||||
/// <summary>Original generation timestamp from source.</summary>
|
||||
public required DateTimeOffset SourceTimestamp { get; init; }
|
||||
|
||||
/// <summary>Source environment identifier.</summary>
|
||||
public string? SourceEnvironment { get; init; }
|
||||
|
||||
/// <summary>SHA-256 digest of bundle contents.</summary>
|
||||
public required string BundleDigest { get; init; }
|
||||
|
||||
/// <summary>SHA-256 digest of bundle manifest.</summary>
|
||||
public string? ManifestDigest { get; init; }
|
||||
|
||||
/// <summary>Staleness at import time.</summary>
|
||||
public required TimeSpan StalenessAtImport { get; init; }
|
||||
|
||||
/// <summary>Time anchor used for staleness calculation.</summary>
|
||||
public required TimeAnchor TimeAnchor { get; init; }
|
||||
|
||||
/// <summary>DSSE attestation covering this bundle.</summary>
|
||||
public BundleAttestation? Attestation { get; init; }
|
||||
|
||||
/// <summary>Exports included in this bundle.</summary>
|
||||
public ImmutableArray<ExportRecord> Exports { get; init; }
|
||||
}
|
||||
```
|
||||
|
||||
## 5. Staleness Domains
|
||||
|
||||
| Domain ID | Description | Default Threshold | Default Grace |
|
||||
|-----------|-------------|-------------------|---------------|
|
||||
| `vulnerability-feeds` | Advisory and CVE data | 7 days | 1 day |
|
||||
| `vex-advisories` | VEX statements | 7 days | 1 day |
|
||||
| `scanner-signatures` | Scanner detection rules | 14 days | 3 days |
|
||||
| `policy-packs` | Policy bundles | 30 days | 7 days |
|
||||
| `trust-roots` | Certificate/key roots | 90 days | 14 days |
|
||||
| `runtime-evidence` | Runtime observation data | 1 day | 4 hours |
|
||||
|
||||
## 6. Time Anchor Verification
|
||||
|
||||
### 6.1 Roughtime Verification
|
||||
|
||||
```csharp
|
||||
public interface IRoughtimeVerifier
|
||||
{
|
||||
/// <summary>
|
||||
/// Verifies a Roughtime response against trusted servers.
|
||||
/// </summary>
|
||||
Task<TimeAnchorValidationResult> VerifyAsync(
|
||||
byte[] roughtimeResponse,
|
||||
RoughtimeRoot[] trustedRoots,
|
||||
CancellationToken cancellationToken = default);
|
||||
}
|
||||
```
|
||||
|
||||
Roughtime provides:
|
||||
- Sub-second accuracy with 1-2 second uncertainty
|
||||
- Ed25519 signatures
|
||||
- Chain of trust via server public keys
|
||||
- Radius-based uncertainty bounds
|
||||
|
||||
### 6.2 RFC 3161 Verification
|
||||
|
||||
```csharp
|
||||
public interface IRfc3161Verifier
|
||||
{
|
||||
/// <summary>
|
||||
/// Verifies an RFC 3161 timestamp token.
|
||||
/// </summary>
|
||||
Task<TimeAnchorValidationResult> VerifyAsync(
|
||||
byte[] timestampToken,
|
||||
Rfc3161Root[] trustedRoots,
|
||||
CancellationToken cancellationToken = default);
|
||||
}
|
||||
```
|
||||
|
||||
RFC 3161 provides:
|
||||
- X.509 certificate-based trust
|
||||
- ASN.1/DER encoded tokens
|
||||
- Hash algorithm binding
|
||||
- Nonce for uniqueness
|
||||
|
||||
### 6.3 Validation Result
|
||||
|
||||
```csharp
|
||||
public sealed record TimeAnchorValidationResult
|
||||
{
|
||||
public required bool IsValid { get; init; }
|
||||
public required TimeAnchor? Anchor { get; init; }
|
||||
public TimeAnchorError? Error { get; init; }
|
||||
public TimeSpan? Uncertainty { get; init; }
|
||||
}
|
||||
|
||||
public enum TimeAnchorError
|
||||
{
|
||||
None = 0,
|
||||
SignatureInvalid = 1,
|
||||
RootNotTrusted = 2,
|
||||
TokenExpired = 3,
|
||||
TokenMalformed = 4,
|
||||
CounterReplay = 5,
|
||||
UncertaintyTooHigh = 6
|
||||
}
|
||||
```
|
||||
|
||||
## 7. API Endpoints
|
||||
|
||||
### 7.1 AirGap Time Service
|
||||
|
||||
| Endpoint | Method | Description |
|
||||
|----------|--------|-------------|
|
||||
| `GET /api/v1/time/status` | GET | Current anchor metadata and drift |
|
||||
| `GET /api/v1/time/anchor` | GET | Active time anchor |
|
||||
| `POST /api/v1/time/anchor` | POST | Import new time anchor |
|
||||
| `GET /api/v1/time/metrics` | GET | Prometheus metrics |
|
||||
| `GET /api/v1/time/health` | GET | Health check |
|
||||
|
||||
### 7.2 Staleness Endpoints
|
||||
|
||||
| Endpoint | Method | Description |
|
||||
|----------|--------|-------------|
|
||||
| `GET /api/v1/staleness/domains` | GET | List all domain staleness |
|
||||
| `GET /api/v1/staleness/domains/{domainId}` | GET | Get domain staleness |
|
||||
| `POST /api/v1/staleness/validate` | POST | Validate staleness for context |
|
||||
| `GET /api/v1/staleness/config` | GET | Get staleness configuration |
|
||||
| `PUT /api/v1/staleness/config` | PUT | Update staleness configuration |
|
||||
|
||||
### 7.3 Response Formats
|
||||
|
||||
```json
|
||||
{
|
||||
"domainId": "vex-advisories",
|
||||
"currentStaleness": "PT172800S",
|
||||
"threshold": "PT604800S",
|
||||
"percentOfThreshold": 28.57,
|
||||
"status": "fresh",
|
||||
"projectedStaleAt": "2025-12-26T10:00:00Z",
|
||||
"timeAnchor": {
|
||||
"anchorTime": "2025-12-19T10:00:00Z",
|
||||
"source": "roughtime",
|
||||
"format": "roughtime-v1",
|
||||
"tokenDigest": "sha256:abc123...",
|
||||
"verification": {
|
||||
"status": "passed",
|
||||
"verifiedAt": "2025-12-19T10:00:01Z"
|
||||
}
|
||||
},
|
||||
"lastImportAt": "2025-12-17T10:00:00Z",
|
||||
"lastSourceTimestamp": "2025-12-17T08:00:00Z"
|
||||
}
|
||||
```
|
||||
|
||||
## 8. Integration Points
|
||||
|
||||
### 8.1 Findings Ledger Integration
|
||||
|
||||
The Ledger tracks staleness per projection:
|
||||
|
||||
```csharp
|
||||
public interface IStalenessValidationService
|
||||
{
|
||||
/// <summary>
|
||||
/// Validates that data is fresh enough for the given context.
|
||||
/// </summary>
|
||||
Task<StalenessValidationResult> ValidateAsync(
|
||||
string tenantId,
|
||||
string domainId,
|
||||
StalenessContext context,
|
||||
CancellationToken cancellationToken = default);
|
||||
|
||||
/// <summary>
|
||||
/// Updates staleness tracking after bundle import.
|
||||
/// </summary>
|
||||
Task UpdateStalenessAsync(
|
||||
string tenantId,
|
||||
BundleProvenance provenance,
|
||||
CancellationToken cancellationToken = default);
|
||||
}
|
||||
|
||||
public enum StalenessContext
|
||||
{
|
||||
Export = 0, // Generating exports
|
||||
Query = 1, // Querying data
|
||||
PolicyEval = 2, // Policy evaluation
|
||||
Attestation = 3 // Creating attestations
|
||||
}
|
||||
```
|
||||
|
||||
### 8.2 Policy Engine Integration
|
||||
|
||||
Policy Engine gates evaluations based on staleness:
|
||||
|
||||
```csharp
|
||||
public interface ISealedModeService
|
||||
{
|
||||
/// <summary>
|
||||
/// Checks if sealed mode should block the operation.
|
||||
/// </summary>
|
||||
Task<SealedModeDecision> CheckAsync(
|
||||
string tenantId,
|
||||
SealedModeContext context,
|
||||
CancellationToken cancellationToken = default);
|
||||
}
|
||||
|
||||
public sealed record SealedModeDecision
|
||||
{
|
||||
public required bool IsBlocked { get; init; }
|
||||
public SealedModeReason? Reason { get; init; }
|
||||
public ImmutableArray<StalenessEvaluation> StaleDomains { get; init; }
|
||||
}
|
||||
|
||||
public enum SealedModeReason
|
||||
{
|
||||
None = 0,
|
||||
DataStale = 1,
|
||||
TimeAnchorMissing = 2,
|
||||
TimeAnchorExpired = 3,
|
||||
SignatureInvalid = 4
|
||||
}
|
||||
```
|
||||
|
||||
## 9. Telemetry
|
||||
|
||||
### 9.1 Metrics
|
||||
|
||||
| Metric | Type | Labels | Description |
|
||||
|--------|------|--------|-------------|
|
||||
| `airgap_anchor_age_seconds` | gauge | - | Age of current time anchor |
|
||||
| `airgap_anchor_drift_seconds` | gauge | - | Drift from anchor time |
|
||||
| `airgap_anchor_expiry_seconds` | gauge | - | Seconds until anchor expires |
|
||||
| `airgap_staleness_seconds` | gauge | `domain` | Current staleness per domain |
|
||||
| `airgap_staleness_threshold_seconds` | gauge | `domain` | Threshold per domain |
|
||||
| `airgap_staleness_percent` | gauge | `domain` | Staleness as % of threshold |
|
||||
| `airgap_staleness_status` | gauge | `domain`, `status` | Current status (0=fresh, 3=stale) |
|
||||
| `airgap_bundle_imports_total` | counter | `domain`, `result` | Bundle imports |
|
||||
| `airgap_validation_total` | counter | `domain`, `context`, `result` | Staleness validations |
|
||||
|
||||
### 9.2 Alerts
|
||||
|
||||
```yaml
|
||||
# Recommended alerting rules
|
||||
groups:
|
||||
- name: airgap-staleness
|
||||
rules:
|
||||
- alert: AirGapDataApproachingStale
|
||||
expr: airgap_staleness_percent > 75
|
||||
for: 1h
|
||||
labels:
|
||||
severity: warning
|
||||
annotations:
|
||||
summary: "{{ $labels.domain }} data approaching staleness"
|
||||
|
||||
- alert: AirGapDataStale
|
||||
expr: airgap_staleness_percent >= 100
|
||||
for: 5m
|
||||
labels:
|
||||
severity: critical
|
||||
annotations:
|
||||
summary: "{{ $labels.domain }} data is stale"
|
||||
|
||||
- alert: AirGapTimeAnchorMissing
|
||||
expr: airgap_anchor_age_seconds > 86400
|
||||
for: 5m
|
||||
labels:
|
||||
severity: critical
|
||||
annotations:
|
||||
summary: "Time anchor is older than 24 hours"
|
||||
```
|
||||
|
||||
## 10. Configuration
|
||||
|
||||
```yaml
|
||||
# etc/airgap.yaml
|
||||
AirGap:
|
||||
Time:
|
||||
Enabled: true
|
||||
TrustRootsPath: "/etc/stellaops/trust-roots.json"
|
||||
MaxAnchorAgeHours: 168 # 7 days
|
||||
MaxUncertaintyMs: 5000 # 5 seconds
|
||||
|
||||
Staleness:
|
||||
DefaultThresholdDays: 7
|
||||
DefaultGracePeriodDays: 1
|
||||
EnforcementMode: "Strict" # Strict, Warn, Disabled
|
||||
|
||||
Domains:
|
||||
vulnerability-feeds:
|
||||
ThresholdDays: 7
|
||||
GracePeriodDays: 1
|
||||
vex-advisories:
|
||||
ThresholdDays: 7
|
||||
GracePeriodDays: 1
|
||||
runtime-evidence:
|
||||
ThresholdDays: 1
|
||||
GracePeriodHours: 4
|
||||
|
||||
Notifications:
|
||||
- PercentOfThreshold: 75
|
||||
Severity: warning
|
||||
Channels: [slack, metric]
|
||||
- PercentOfThreshold: 90
|
||||
Severity: critical
|
||||
Channels: [email, slack, metric]
|
||||
```
|
||||
|
||||
## 11. Error Codes
|
||||
|
||||
| Code | Description | Resolution |
|
||||
|------|-------------|------------|
|
||||
| `ERR_AIRGAP_STALE` | Data exceeds staleness threshold | Import fresh bundle |
|
||||
| `ERR_AIRGAP_NO_BUNDLE` | No bundle imported for domain | Import initial bundle |
|
||||
| `ERR_AIRGAP_TIME_ANCHOR_MISSING` | No time anchor available | Import time anchor with bundle |
|
||||
| `ERR_AIRGAP_TIME_DRIFT` | Excessive drift detected | Re-verify time anchor |
|
||||
| `ERR_AIRGAP_ATTESTATION_INVALID` | Bundle attestation invalid | Verify bundle source |
|
||||
| `ERR_AIRGAP_SIGNATURE_INVALID` | Time token signature invalid | Check trust roots |
|
||||
| `ERR_AIRGAP_COUNTER_REPLAY` | Monotonic counter replay | Import newer anchor |
|
||||
|
||||
---
|
||||
|
||||
## Changelog
|
||||
|
||||
| Version | Date | Changes |
|
||||
|---------|------|---------|
|
||||
| 1.0.0 | 2025-12-19 | Initial release |
|
||||
472
docs/modules/policy/contracts/reachability-input-contract.md
Normal file
472
docs/modules/policy/contracts/reachability-input-contract.md
Normal file
@@ -0,0 +1,472 @@
|
||||
# Reachability Input Contract v1.0.0
|
||||
|
||||
**Status:** APPROVED
|
||||
**Version:** 1.0.0
|
||||
**Effective:** 2025-12-19
|
||||
**Owner:** Policy Guild + Signals Guild
|
||||
**Sprint:** SPRINT_0126_0001_0001 (unblocks POLICY-ENGINE-80-001 through 80-004)
|
||||
|
||||
---
|
||||
|
||||
## 1. Purpose
|
||||
|
||||
This contract defines the integration between the Signals service (reachability analysis) and the Policy Engine. It specifies how reachability and exploitability facts flow into policy evaluation, enabling risk-aware decisions based on static analysis, runtime observations, and exploit intelligence.
|
||||
|
||||
## 2. Schema Reference
|
||||
|
||||
The canonical JSON schema is at:
|
||||
```
|
||||
docs/schemas/reachability-input.schema.json
|
||||
```
|
||||
|
||||
## 3. Data Flow
|
||||
|
||||
```
|
||||
┌─────────────┐ ┌──────────────┐ ┌───────────────┐ ┌──────────────┐
|
||||
│ Scanner │────▶│ Signals │────▶│ Reachability │────▶│ Policy │
|
||||
│ (callgraph) │ │ Service │ │ Facts Store │ │ Engine │
|
||||
└─────────────┘ └──────────────┘ └───────────────┘ └──────────────┘
|
||||
│ ▲
|
||||
│ │
|
||||
┌──────▼──────┐ │
|
||||
│ Runtime │──────────────┘
|
||||
│ Agent │
|
||||
└─────────────┘
|
||||
```
|
||||
|
||||
## 4. Core Types
|
||||
|
||||
### 4.1 ReachabilityInput
|
||||
|
||||
The input payload submitted to Policy Engine for evaluation:
|
||||
|
||||
```csharp
|
||||
public sealed record ReachabilityInput
|
||||
{
|
||||
/// <summary>Subject being evaluated (component + vulnerability).</summary>
|
||||
public required Subject Subject { get; init; }
|
||||
|
||||
/// <summary>Static reachability analysis results.</summary>
|
||||
public required ImmutableArray<ReachabilityFact> ReachabilityFacts { get; init; }
|
||||
|
||||
/// <summary>Exploitability assessments from KEV, EPSS, vendor advisories.</summary>
|
||||
public ImmutableArray<ExploitabilityFact> ExploitabilityFacts { get; init; }
|
||||
|
||||
/// <summary>References to stored callgraphs.</summary>
|
||||
public ImmutableArray<CallgraphRef> CallgraphRefs { get; init; }
|
||||
|
||||
/// <summary>Runtime observation facts.</summary>
|
||||
public ImmutableArray<RuntimeFact> RuntimeFacts { get; init; }
|
||||
|
||||
/// <summary>Scanner entropy/trust score for confidence weighting.</summary>
|
||||
public EntropyScore? EntropyScore { get; init; }
|
||||
|
||||
/// <summary>Input timestamp (UTC).</summary>
|
||||
public required DateTimeOffset Timestamp { get; init; }
|
||||
}
|
||||
```
|
||||
|
||||
### 4.2 Subject
|
||||
|
||||
```csharp
|
||||
public sealed record Subject
|
||||
{
|
||||
/// <summary>Package URL of the component.</summary>
|
||||
public required string Purl { get; init; }
|
||||
|
||||
/// <summary>CVE identifier (e.g., CVE-2024-1234).</summary>
|
||||
public string? CveId { get; init; }
|
||||
|
||||
/// <summary>GitHub Security Advisory ID.</summary>
|
||||
public string? GhsaId { get; init; }
|
||||
|
||||
/// <summary>Internal vulnerability identifier.</summary>
|
||||
public string? VulnerabilityId { get; init; }
|
||||
|
||||
/// <summary>Vulnerable symbols/functions in the component.</summary>
|
||||
public ImmutableArray<string> AffectedSymbols { get; init; }
|
||||
|
||||
/// <summary>Affected version range (e.g., "<1.2.3").</summary>
|
||||
public string? VersionRange { get; init; }
|
||||
}
|
||||
```
|
||||
|
||||
### 4.3 ReachabilityFact
|
||||
|
||||
```csharp
|
||||
public sealed record ReachabilityFact
|
||||
{
|
||||
/// <summary>Reachability state determination.</summary>
|
||||
public required ReachabilityState State { get; init; }
|
||||
|
||||
/// <summary>Confidence score (0.0-1.0).</summary>
|
||||
public required decimal Confidence { get; init; }
|
||||
|
||||
/// <summary>Source of determination.</summary>
|
||||
public required ReachabilitySource Source { get; init; }
|
||||
|
||||
/// <summary>Analyzer that produced this fact.</summary>
|
||||
public string? Analyzer { get; init; }
|
||||
|
||||
/// <summary>Analyzer version.</summary>
|
||||
public string? AnalyzerVersion { get; init; }
|
||||
|
||||
/// <summary>Call path from entry point to vulnerable symbol.</summary>
|
||||
public CallPath? CallPath { get; init; }
|
||||
|
||||
/// <summary>Entry points that can reach vulnerable code.</summary>
|
||||
public ImmutableArray<EntryPoint> EntryPoints { get; init; }
|
||||
|
||||
/// <summary>Supporting evidence.</summary>
|
||||
public ReachabilityEvidence? Evidence { get; init; }
|
||||
|
||||
/// <summary>When this fact was evaluated.</summary>
|
||||
public DateTimeOffset? EvaluatedAt { get; init; }
|
||||
}
|
||||
|
||||
public enum ReachabilityState
|
||||
{
|
||||
Reachable = 0,
|
||||
Unreachable = 1,
|
||||
PotentiallyReachable = 2,
|
||||
Unknown = 3
|
||||
}
|
||||
|
||||
public enum ReachabilitySource
|
||||
{
|
||||
StaticAnalysis = 0,
|
||||
DynamicAnalysis = 1,
|
||||
SbomInference = 2,
|
||||
Manual = 3,
|
||||
External = 4
|
||||
}
|
||||
```
|
||||
|
||||
### 4.4 ExploitabilityFact
|
||||
|
||||
```csharp
|
||||
public sealed record ExploitabilityFact
|
||||
{
|
||||
/// <summary>Exploitability state.</summary>
|
||||
public required ExploitabilityState State { get; init; }
|
||||
|
||||
/// <summary>Confidence score (0.0-1.0).</summary>
|
||||
public required decimal Confidence { get; init; }
|
||||
|
||||
/// <summary>Source of determination.</summary>
|
||||
public required ExploitabilitySource Source { get; init; }
|
||||
|
||||
/// <summary>EPSS probability score (0.0-1.0).</summary>
|
||||
public decimal? EpssScore { get; init; }
|
||||
|
||||
/// <summary>EPSS percentile (0-100).</summary>
|
||||
public decimal? EpssPercentile { get; init; }
|
||||
|
||||
/// <summary>Listed in CISA Known Exploited Vulnerabilities.</summary>
|
||||
public bool? KevListed { get; init; }
|
||||
|
||||
/// <summary>KEV remediation due date.</summary>
|
||||
public DateOnly? KevDueDate { get; init; }
|
||||
|
||||
/// <summary>Exploit maturity level (per CVSS).</summary>
|
||||
public ExploitMaturity? ExploitMaturity { get; init; }
|
||||
|
||||
/// <summary>References to known exploits.</summary>
|
||||
public ImmutableArray<Uri> ExploitRefs { get; init; }
|
||||
|
||||
/// <summary>Conditions required for exploitation.</summary>
|
||||
public ImmutableArray<ExploitCondition> Conditions { get; init; }
|
||||
|
||||
/// <summary>When this fact was evaluated.</summary>
|
||||
public DateTimeOffset? EvaluatedAt { get; init; }
|
||||
}
|
||||
|
||||
public enum ExploitabilityState
|
||||
{
|
||||
Exploitable = 0,
|
||||
NotExploitable = 1,
|
||||
ConditionallyExploitable = 2,
|
||||
Unknown = 3
|
||||
}
|
||||
|
||||
public enum ExploitabilitySource
|
||||
{
|
||||
Kev = 0,
|
||||
Epss = 1,
|
||||
VendorAdvisory = 2,
|
||||
InternalAnalysis = 3,
|
||||
ExploitDb = 4
|
||||
}
|
||||
|
||||
public enum ExploitMaturity
|
||||
{
|
||||
NotDefined = 0,
|
||||
Unproven = 1,
|
||||
Poc = 2,
|
||||
Functional = 3,
|
||||
High = 4
|
||||
}
|
||||
```
|
||||
|
||||
### 4.5 RuntimeFact
|
||||
|
||||
```csharp
|
||||
public sealed record RuntimeFact
|
||||
{
|
||||
/// <summary>Type of runtime observation.</summary>
|
||||
public required RuntimeFactType Type { get; init; }
|
||||
|
||||
/// <summary>Observed symbol/function.</summary>
|
||||
public string? Symbol { get; init; }
|
||||
|
||||
/// <summary>Observed module.</summary>
|
||||
public string? Module { get; init; }
|
||||
|
||||
/// <summary>Number of times called.</summary>
|
||||
public int? CallCount { get; init; }
|
||||
|
||||
/// <summary>Last invocation time.</summary>
|
||||
public DateTimeOffset? LastCalled { get; init; }
|
||||
|
||||
/// <summary>When observation was recorded.</summary>
|
||||
public required DateTimeOffset ObservedAt { get; init; }
|
||||
|
||||
/// <summary>Observation window duration (e.g., "7d").</summary>
|
||||
public string? ObservationWindow { get; init; }
|
||||
|
||||
/// <summary>Environment where observed.</summary>
|
||||
public RuntimeEnvironment? Environment { get; init; }
|
||||
}
|
||||
|
||||
public enum RuntimeFactType
|
||||
{
|
||||
FunctionCalled = 0,
|
||||
FunctionNotCalled = 1,
|
||||
PathExecuted = 2,
|
||||
PathNotExecuted = 3,
|
||||
ModuleLoaded = 4,
|
||||
ModuleNotLoaded = 5
|
||||
}
|
||||
|
||||
public enum RuntimeEnvironment
|
||||
{
|
||||
Production = 0,
|
||||
Staging = 1,
|
||||
Development = 2,
|
||||
Test = 3
|
||||
}
|
||||
```
|
||||
|
||||
## 5. Policy Engine Integration
|
||||
|
||||
### 5.1 ReachabilityFactsJoiningService
|
||||
|
||||
The `ReachabilityFactsJoiningService` provides efficient batch lookups with caching:
|
||||
|
||||
```csharp
|
||||
public interface IReachabilityFactsJoiningService
|
||||
{
|
||||
/// <summary>
|
||||
/// Gets reachability facts for a batch of component-advisory pairs.
|
||||
/// Uses cache-first strategy with store fallback.
|
||||
/// </summary>
|
||||
Task<ReachabilityFactsBatch> GetFactsBatchAsync(
|
||||
string tenantId,
|
||||
IReadOnlyList<ReachabilityFactsRequest> items,
|
||||
CancellationToken cancellationToken = default);
|
||||
|
||||
/// <summary>
|
||||
/// Enriches signal context with reachability facts.
|
||||
/// </summary>
|
||||
Task<bool> EnrichSignalsAsync(
|
||||
string tenantId,
|
||||
string componentPurl,
|
||||
string advisoryId,
|
||||
IDictionary<string, object?> signals,
|
||||
CancellationToken cancellationToken = default);
|
||||
}
|
||||
```
|
||||
|
||||
### 5.2 SPL Predicates
|
||||
|
||||
Reachability is exposed in SPL (StellaOps Policy Language) via the `reachability` scope:
|
||||
|
||||
```yaml
|
||||
# Example SPL rule using reachability predicates
|
||||
rules:
|
||||
- name: "Suppress unreachable critical CVEs"
|
||||
when:
|
||||
all:
|
||||
- severity >= critical
|
||||
- reachability.state == "unreachable"
|
||||
- reachability.confidence >= 0.9
|
||||
then:
|
||||
effect: suppress
|
||||
justification: "Unreachable code path with high confidence"
|
||||
|
||||
- name: "Escalate reachable with exploit"
|
||||
when:
|
||||
all:
|
||||
- reachability.state == "reachable"
|
||||
- exploitability.kev_listed == true
|
||||
then:
|
||||
effect: escalate
|
||||
priority: critical
|
||||
```
|
||||
|
||||
Available predicates:
|
||||
| Predicate | Type | Description |
|
||||
|-----------|------|-------------|
|
||||
| `reachability.state` | string | "reachable", "unreachable", "potentially_reachable", "unknown" |
|
||||
| `reachability.confidence` | decimal | Confidence score 0.0-1.0 |
|
||||
| `reachability.score` | decimal | Computed risk score |
|
||||
| `reachability.has_runtime_evidence` | bool | Whether runtime facts support determination |
|
||||
| `reachability.is_high_confidence` | bool | Confidence >= 0.8 |
|
||||
| `reachability.source` | string | Source of determination |
|
||||
| `reachability.method` | string | Analysis method used |
|
||||
| `exploitability.state` | string | "exploitable", "not_exploitable", "conditionally_exploitable", "unknown" |
|
||||
| `exploitability.epss_score` | decimal | EPSS probability 0.0-1.0 |
|
||||
| `exploitability.epss_percentile` | decimal | EPSS percentile 0-100 |
|
||||
| `exploitability.kev_listed` | bool | In CISA KEV catalog |
|
||||
| `exploitability.kev_due_date` | date | KEV remediation deadline |
|
||||
| `exploitability.maturity` | string | "not_defined", "unproven", "poc", "functional", "high" |
|
||||
|
||||
### 5.3 ReachabilityOutput
|
||||
|
||||
Policy evaluation produces enriched output:
|
||||
|
||||
```csharp
|
||||
public sealed record ReachabilityOutput
|
||||
{
|
||||
/// <summary>Subject evaluated.</summary>
|
||||
public required Subject Subject { get; init; }
|
||||
|
||||
/// <summary>Effective reachability state after policy rules.</summary>
|
||||
public required ReachabilityState EffectiveState { get; init; }
|
||||
|
||||
/// <summary>Effective exploitability after policy rules.</summary>
|
||||
public ExploitabilityState? EffectiveExploitability { get; init; }
|
||||
|
||||
/// <summary>Risk adjustment from policy evaluation.</summary>
|
||||
public required RiskAdjustment RiskAdjustment { get; init; }
|
||||
|
||||
/// <summary>Policy rule trace.</summary>
|
||||
public ImmutableArray<PolicyRuleTrace> PolicyTrace { get; init; }
|
||||
|
||||
/// <summary>When evaluation occurred.</summary>
|
||||
public required DateTimeOffset EvaluatedAt { get; init; }
|
||||
}
|
||||
|
||||
public sealed record RiskAdjustment
|
||||
{
|
||||
/// <summary>Risk multiplier (0=suppress, 1=neutral, >1=amplify).</summary>
|
||||
public required decimal Factor { get; init; }
|
||||
|
||||
/// <summary>Severity override if rules dictate.</summary>
|
||||
public Severity? SeverityOverride { get; init; }
|
||||
|
||||
/// <summary>Justification for adjustment.</summary>
|
||||
public string? Justification { get; init; }
|
||||
}
|
||||
```
|
||||
|
||||
## 6. API Endpoints
|
||||
|
||||
### 6.1 Signals Service Endpoints
|
||||
|
||||
| Endpoint | Method | Description |
|
||||
|----------|--------|-------------|
|
||||
| `POST /signals/reachability/recompute` | POST | Recompute reachability for a subject |
|
||||
| `GET /signals/facts/{subjectKey}` | GET | Get reachability facts for a subject |
|
||||
| `POST /signals/runtime-facts` | POST | Ingest runtime observations |
|
||||
|
||||
### 6.2 Policy Engine Endpoints
|
||||
|
||||
| Endpoint | Method | Description |
|
||||
|----------|--------|-------------|
|
||||
| `POST /api/policy/evaluate` | POST | Evaluate with reachability enrichment |
|
||||
| `POST /api/policy/simulate` | POST | Simulate with reachability overrides |
|
||||
| `GET /api/policy/reachability/stats` | GET | Get reachability integration metrics |
|
||||
|
||||
## 7. Caching Strategy
|
||||
|
||||
### 7.1 Cache Layers
|
||||
|
||||
1. **L1: In-Memory Overlay Cache**
|
||||
- Per-request deduplication
|
||||
- TTL: Request lifetime
|
||||
- Key: `{tenantId}:{componentPurl}:{advisoryId}`
|
||||
|
||||
2. **L2: Redis Distributed Cache**
|
||||
- Shared across Policy Engine instances
|
||||
- TTL: 5 minutes (configurable)
|
||||
- Key: `rf:{tenantId}:{sha256(purl+advisoryId)}`
|
||||
|
||||
3. **L3: Postgres Facts Store**
|
||||
- Authoritative source
|
||||
- Indexed by `(tenant_id, component_purl, advisory_id)`
|
||||
|
||||
### 7.2 Cache Invalidation
|
||||
|
||||
- Facts are invalidated when:
|
||||
- New callgraph is ingested
|
||||
- Runtime facts are updated
|
||||
- Manual override is applied
|
||||
- TTL expires
|
||||
|
||||
## 8. Telemetry
|
||||
|
||||
### 8.1 Metrics
|
||||
|
||||
| Metric | Type | Labels | Description |
|
||||
|--------|------|--------|-------------|
|
||||
| `policy_reachability_applied_total` | counter | `state` | Facts applied to evaluations |
|
||||
| `policy_reachability_cache_hits_total` | counter | - | Cache hits |
|
||||
| `policy_reachability_cache_misses_total` | counter | - | Cache misses |
|
||||
| `policy_reachability_cache_hit_ratio` | gauge | - | Hit ratio (0.0-1.0) |
|
||||
| `policy_reachability_lookups_total` | counter | `outcome` | Lookup attempts |
|
||||
| `policy_reachability_lookup_seconds` | histogram | - | Lookup latency |
|
||||
|
||||
### 8.2 Traces
|
||||
|
||||
Activity: `reachability_facts.batch_lookup`
|
||||
Tags:
|
||||
- `tenant`: Tenant ID
|
||||
- `batch_size`: Number of items requested
|
||||
- `cache_hits`: Items found in cache
|
||||
- `cache_misses`: Items not in cache
|
||||
- `store_hits`: Items fetched from store
|
||||
|
||||
## 9. Configuration
|
||||
|
||||
```yaml
|
||||
# etc/policy-engine.yaml
|
||||
PolicyEngine:
|
||||
Reachability:
|
||||
Enabled: true
|
||||
CacheTtlSeconds: 300
|
||||
MaxBatchSize: 1000
|
||||
DefaultConfidenceThreshold: 0.7
|
||||
HighConfidenceThreshold: 0.9
|
||||
|
||||
ReachabilityCache:
|
||||
Type: "redis" # or "memory"
|
||||
RedisConnectionString: "${REDIS_URL}"
|
||||
KeyPrefix: "rf:"
|
||||
```
|
||||
|
||||
## 10. Validation Rules
|
||||
|
||||
1. `Subject.Purl` must be a valid Package URL
|
||||
2. `ReachabilityFact.Confidence` must be 0.0-1.0
|
||||
3. `ReachabilityFact.State` must be a valid enum value
|
||||
4. `Timestamp` must be valid UTC ISO-8601
|
||||
5. At least one of `CveId`, `GhsaId`, or `VulnerabilityId` must be present
|
||||
|
||||
---
|
||||
|
||||
## Changelog
|
||||
|
||||
| Version | Date | Changes |
|
||||
|---------|------|---------|
|
||||
| 1.0.0 | 2025-12-19 | Initial release |
|
||||
346
docs/modules/signals/contracts/signals-provenance-contract.md
Normal file
346
docs/modules/signals/contracts/signals-provenance-contract.md
Normal file
@@ -0,0 +1,346 @@
|
||||
# Signals Provenance Contract v1.0.0
|
||||
|
||||
**Status:** APPROVED
|
||||
**Version:** 1.0.0
|
||||
**Effective:** 2025-12-19
|
||||
**Owner:** Signals Guild + Platform Storage Guild
|
||||
**Sprint:** SPRINT_0140_0001_0001 (unblocks SIGNALS-24-002, 24-003, 24-004, 24-005)
|
||||
|
||||
---
|
||||
|
||||
## 1. Purpose
|
||||
|
||||
This contract defines the provenance tracking for runtime facts, callgraph storage, and CAS (Content-Addressable Storage) promotion policies. It enables deterministic, auditable signal processing with signed manifests and attestations.
|
||||
|
||||
## 2. Schema References
|
||||
|
||||
| Schema | Location |
|
||||
|--------|----------|
|
||||
| Provenance Feed | `docs/schemas/provenance-feed.schema.json` |
|
||||
| Runtime Facts | `docs/signals/runtime-facts.md` |
|
||||
| Reachability Input | `docs/modules/policy/contracts/reachability-input-contract.md` |
|
||||
|
||||
## 3. CAS Storage Architecture
|
||||
|
||||
### 3.1 Bucket Structure
|
||||
|
||||
```
|
||||
cas://signals/
|
||||
├── callgraphs/
|
||||
│ ├── {tenant}/
|
||||
│ │ ├── {graph_id}.ndjson.zst # Compressed callgraph
|
||||
│ │ └── {graph_id}.meta.json # Callgraph metadata
|
||||
│ └── global/
|
||||
│ └── ...
|
||||
├── manifests/
|
||||
│ ├── {graph_id}.json # Signed manifest
|
||||
│ └── {graph_id}.json.dsse # DSSE envelope
|
||||
├── runtime-facts/
|
||||
│ ├── {tenant}/
|
||||
│ │ ├── {batch_id}.ndjson.zst # Runtime fact batch
|
||||
│ │ └── {batch_id}.provenance.json # Provenance record
|
||||
│ └── global/
|
||||
│ └── ...
|
||||
└── attestations/
|
||||
└── {batch_id}.dsse # Batch attestation
|
||||
```
|
||||
|
||||
### 3.2 Access Policies
|
||||
|
||||
| Principal | callgraphs | manifests | runtime-facts | attestations |
|
||||
|-----------|------------|-----------|---------------|--------------|
|
||||
| Signals Service | read/write | read/write | read/write | read/write |
|
||||
| Policy Engine | read | read | read | read |
|
||||
| Scanner Worker | write | - | - | - |
|
||||
| Audit Service | read | read | read | read |
|
||||
| All Others | deny | deny | deny | deny |
|
||||
|
||||
### 3.3 Retention Policies
|
||||
|
||||
| Content Type | Retention | GC Policy |
|
||||
|--------------|-----------|-----------|
|
||||
| Manifests | Indefinite | Never delete |
|
||||
| Callgraphs (referenced) | Indefinite | Never delete |
|
||||
| Callgraphs (orphan) | 30 days | Rolling GC |
|
||||
| Runtime Facts | 90 days | Rolling GC |
|
||||
| Attestations | Indefinite | Never delete |
|
||||
|
||||
## 4. Manifest Schema
|
||||
|
||||
### 4.1 CallgraphManifest
|
||||
|
||||
```csharp
|
||||
public sealed record CallgraphManifest
|
||||
{
|
||||
/// <summary>Unique graph identifier (ULID).</summary>
|
||||
public required string GraphId { get; init; }
|
||||
|
||||
/// <summary>SHA-256 digest of callgraph content.</summary>
|
||||
public required string Digest { get; init; }
|
||||
|
||||
/// <summary>Programming language.</summary>
|
||||
public required string Language { get; init; }
|
||||
|
||||
/// <summary>Source identifier (scanner, analyzer, runtime agent).</summary>
|
||||
public required string Source { get; init; }
|
||||
|
||||
/// <summary>When the callgraph was created.</summary>
|
||||
public required DateTimeOffset CreatedAt { get; init; }
|
||||
|
||||
/// <summary>Tenant scope.</summary>
|
||||
public required string TenantId { get; init; }
|
||||
|
||||
/// <summary>Component PURL.</summary>
|
||||
public required string ComponentPurl { get; init; }
|
||||
|
||||
/// <summary>Entry points discovered.</summary>
|
||||
public ImmutableArray<string> EntryPoints { get; init; }
|
||||
|
||||
/// <summary>Node count in the graph.</summary>
|
||||
public int NodeCount { get; init; }
|
||||
|
||||
/// <summary>Edge count in the graph.</summary>
|
||||
public int EdgeCount { get; init; }
|
||||
|
||||
/// <summary>Signing key ID.</summary>
|
||||
public string? SignerKeyId { get; init; }
|
||||
|
||||
/// <summary>Signature (Base64).</summary>
|
||||
public string? Signature { get; init; }
|
||||
|
||||
/// <summary>Rekor log UUID if transparency-logged.</summary>
|
||||
public string? RekorUuid { get; init; }
|
||||
}
|
||||
```
|
||||
|
||||
### 4.2 JSON Example
|
||||
|
||||
```json
|
||||
{
|
||||
"graphId": "01HWXYZ123456789ABCDEFGHJK",
|
||||
"digest": "sha256:7d9cd5f1a2a0dd9a41a2c43a5b7d8a0bcd9e34cf39b3f43a70595c834f0a4aee",
|
||||
"language": "javascript",
|
||||
"source": "stella-callgraph-node",
|
||||
"createdAt": "2025-12-19T10:00:00Z",
|
||||
"tenantId": "tenant-001",
|
||||
"componentPurl": "pkg:npm/%40acme/backend@1.2.3",
|
||||
"entryPoints": ["src/index.js", "src/server.js"],
|
||||
"nodeCount": 1523,
|
||||
"edgeCount": 4892,
|
||||
"signerKeyId": "signals-signer-2025-001",
|
||||
"signature": "base64...",
|
||||
"rekorUuid": "24296fb24b8ad77a..."
|
||||
}
|
||||
```
|
||||
|
||||
## 5. Runtime Facts Provenance
|
||||
|
||||
### 5.1 ProvenanceRecord
|
||||
|
||||
```csharp
|
||||
public sealed record RuntimeFactProvenance
|
||||
{
|
||||
/// <summary>Provenance record ID (ULID).</summary>
|
||||
public required string ProvenanceId { get; init; }
|
||||
|
||||
/// <summary>Callgraph ID this fact batch relates to.</summary>
|
||||
public required string CallgraphId { get; init; }
|
||||
|
||||
/// <summary>Batch ID for this fact set.</summary>
|
||||
public required string BatchId { get; init; }
|
||||
|
||||
/// <summary>When facts were ingested.</summary>
|
||||
public required DateTimeOffset IngestedAt { get; init; }
|
||||
|
||||
/// <summary>When facts were received from source.</summary>
|
||||
public required DateTimeOffset ReceivedAt { get; init; }
|
||||
|
||||
/// <summary>Tenant scope.</summary>
|
||||
public required string TenantId { get; init; }
|
||||
|
||||
/// <summary>Source host/service.</summary>
|
||||
public required string Source { get; init; }
|
||||
|
||||
/// <summary>Pipeline version (git SHA or build ID).</summary>
|
||||
public required string PipelineVersion { get; init; }
|
||||
|
||||
/// <summary>SHA-256 of raw fact blob.</summary>
|
||||
public required string ProvenanceHash { get; init; }
|
||||
|
||||
/// <summary>Signing key ID.</summary>
|
||||
public string? SignerKeyId { get; init; }
|
||||
|
||||
/// <summary>Rekor UUID or skip reason.</summary>
|
||||
public string? RekorUuid { get; init; }
|
||||
|
||||
/// <summary>Skip reason if not transparency-logged.</summary>
|
||||
public string? SkipReason { get; init; }
|
||||
|
||||
/// <summary>Fact count in this batch.</summary>
|
||||
public int FactCount { get; init; }
|
||||
|
||||
/// <summary>Fact types included.</summary>
|
||||
public ImmutableArray<string> FactTypes { get; init; }
|
||||
}
|
||||
```
|
||||
|
||||
### 5.2 Enrichment Pipeline
|
||||
|
||||
```
|
||||
┌─────────────────┐ ┌──────────────────┐ ┌─────────────────┐
|
||||
│ Runtime Agent │────▶│ Signals Ingest │────▶│ CAS Storage │
|
||||
│ (runtime-facts) │ │ (provenance) │ │ (facts+prov) │
|
||||
└─────────────────┘ └──────────────────┘ └─────────────────┘
|
||||
│
|
||||
▼
|
||||
┌──────────────────┐
|
||||
│ DSSE Attestation │
|
||||
│ (per batch) │
|
||||
└──────────────────┘
|
||||
```
|
||||
|
||||
## 6. API Endpoints
|
||||
|
||||
### 6.1 Callgraph Management
|
||||
|
||||
| Endpoint | Method | Description |
|
||||
|----------|--------|-------------|
|
||||
| `POST /signals/callgraphs` | POST | Store new callgraph |
|
||||
| `GET /signals/callgraphs/{graphId}` | GET | Retrieve callgraph |
|
||||
| `GET /signals/callgraphs/{graphId}/manifest` | GET | Get signed manifest |
|
||||
| `GET /signals/callgraphs/by-purl/{purl}` | GET | Find by component PURL |
|
||||
|
||||
### 6.2 Runtime Facts
|
||||
|
||||
| Endpoint | Method | Description |
|
||||
|----------|--------|-------------|
|
||||
| `POST /signals/runtime-facts` | POST | Ingest runtime fact batch |
|
||||
| `GET /signals/runtime-facts/{batchId}` | GET | Retrieve fact batch |
|
||||
| `GET /signals/runtime-facts/{batchId}/provenance` | GET | Get provenance record |
|
||||
| `GET /signals/runtime-facts/ndjson` | GET | Stream facts (with provenance) |
|
||||
|
||||
### 6.3 Query Parameters
|
||||
|
||||
| Parameter | Type | Description |
|
||||
|-----------|------|-------------|
|
||||
| `tenant` | string | Filter by tenant |
|
||||
| `callgraph_id` | string | Filter by callgraph |
|
||||
| `since` | datetime | Facts after timestamp |
|
||||
| `include_provenance` | bool | Include provenance_hash and callgraph_id |
|
||||
|
||||
## 7. Signing and Attestation
|
||||
|
||||
### 7.1 Manifest Signing
|
||||
|
||||
All callgraph manifests are signed using:
|
||||
- Algorithm: `ECDSA-P256-SHA256` or `Ed25519`
|
||||
- Key management: Via Authority service key registry
|
||||
- Transparency: Optional Sigstore Rekor logging
|
||||
|
||||
```csharp
|
||||
public interface IManifestSigner
|
||||
{
|
||||
Task<SignedManifest> SignAsync(
|
||||
CallgraphManifest manifest,
|
||||
CancellationToken cancellationToken = default);
|
||||
|
||||
Task<bool> VerifyAsync(
|
||||
SignedManifest signedManifest,
|
||||
CancellationToken cancellationToken = default);
|
||||
}
|
||||
```
|
||||
|
||||
### 7.2 Batch Attestation
|
||||
|
||||
Runtime fact batches are attested using in-toto/DSSE:
|
||||
|
||||
```csharp
|
||||
public sealed record RuntimeFactAttestation
|
||||
{
|
||||
public required string PredicateType { get; init; } // "https://stella.ops/attestation/runtime-facts/v1"
|
||||
public required string BatchId { get; init; }
|
||||
public required string ProvenanceHash { get; init; }
|
||||
public required int FactCount { get; init; }
|
||||
public required DateTimeOffset Timestamp { get; init; }
|
||||
public required ImmutableArray<string> Subjects { get; init; } // callgraph IDs
|
||||
}
|
||||
```
|
||||
|
||||
## 8. Telemetry
|
||||
|
||||
### 8.1 Metrics
|
||||
|
||||
| Metric | Type | Labels | Description |
|
||||
|--------|------|--------|-------------|
|
||||
| `signals_callgraphs_stored_total` | counter | `language`, `tenant` | Callgraphs stored |
|
||||
| `signals_callgraph_nodes_total` | histogram | `language` | Nodes per callgraph |
|
||||
| `signals_runtime_facts_ingested_total` | counter | `fact_type`, `tenant` | Facts ingested |
|
||||
| `signals_runtime_facts_batch_size` | histogram | - | Facts per batch |
|
||||
| `signals_provenance_records_total` | counter | - | Provenance records created |
|
||||
| `signals_attestations_created_total` | counter | - | DSSE attestations created |
|
||||
| `signals_cas_operations_total` | counter | `operation`, `result` | CAS operations |
|
||||
|
||||
### 8.2 Alerts
|
||||
|
||||
```yaml
|
||||
groups:
|
||||
- name: signals-provenance
|
||||
rules:
|
||||
- alert: SignalsAttestationFailure
|
||||
expr: increase(signals_attestations_created_total{result="failure"}[5m]) > 0
|
||||
for: 1m
|
||||
labels:
|
||||
severity: warning
|
||||
annotations:
|
||||
summary: "Runtime fact attestation failures detected"
|
||||
|
||||
- alert: SignalsProvenanceMissing
|
||||
expr: signals_runtime_facts_ingested_total - signals_provenance_records_total > 100
|
||||
for: 5m
|
||||
labels:
|
||||
severity: critical
|
||||
annotations:
|
||||
summary: "Runtime facts missing provenance records"
|
||||
```
|
||||
|
||||
## 9. Configuration
|
||||
|
||||
```yaml
|
||||
# etc/signals.yaml
|
||||
Signals:
|
||||
CAS:
|
||||
BucketPrefix: "cas://signals"
|
||||
WriteEnabled: true
|
||||
RetentionDays:
|
||||
RuntimeFacts: 90
|
||||
OrphanCallgraphs: 30
|
||||
|
||||
Provenance:
|
||||
Enabled: true
|
||||
SignManifests: true
|
||||
AttestBatches: true
|
||||
RekorEnabled: true # Set to false for air-gap
|
||||
|
||||
Signing:
|
||||
KeyId: "signals-signer-2025-001"
|
||||
Algorithm: "ECDSA-P256-SHA256"
|
||||
```
|
||||
|
||||
## 10. Validation Rules
|
||||
|
||||
1. `GraphId` must be valid ULID
|
||||
2. `Digest` must be valid `sha256:` prefixed hex
|
||||
3. `Language` must be known language identifier
|
||||
4. `TenantId` must exist in Authority tenant registry
|
||||
5. `ComponentPurl` must be valid Package URL
|
||||
6. `ProvenanceHash` must match recomputed hash of fact blob
|
||||
7. Manifests must have valid signature if `SignManifests: true`
|
||||
8. Attestations must have valid DSSE envelope
|
||||
|
||||
---
|
||||
|
||||
## Changelog
|
||||
|
||||
| Version | Date | Changes |
|
||||
|---------|------|---------|
|
||||
| 1.0.0 | 2025-12-19 | Initial release - unblocks SIGNALS-24-002 through 24-005 |
|
||||
@@ -0,0 +1,473 @@
|
||||
# OBS-50 Telemetry Baselines Contract v1.0.0
|
||||
|
||||
**Status:** APPROVED
|
||||
**Version:** 1.0.0
|
||||
**Effective:** 2025-12-19
|
||||
**Owner:** Observability Guild + Telemetry Core Guild
|
||||
**Sprint:** SPRINT_0170_0001_0001 (unblocks 51-002, ORCH-OBS-50-001)
|
||||
|
||||
---
|
||||
|
||||
## 1. Purpose
|
||||
|
||||
This contract defines the baseline telemetry standards for all StellaOps services, ensuring consistent observability across the platform. It specifies common envelope schemas, metric naming conventions, trace span standards, log formats, and redaction requirements.
|
||||
|
||||
## 2. Schema References
|
||||
|
||||
| Schema | Location |
|
||||
|--------|----------|
|
||||
| Telemetry Config | `docs/modules/telemetry/schemas/telemetry-config.schema.json` |
|
||||
| Telemetry Bundle | `docs/modules/telemetry/schemas/telemetry-bundle.schema.json` |
|
||||
| Telemetry Standards | `docs/observability/telemetry-standards.md` |
|
||||
| Telemetry Bootstrap | `docs/observability/telemetry-bootstrap.md` |
|
||||
|
||||
## 3. Common Envelope Schema
|
||||
|
||||
### 3.1 Required Fields
|
||||
|
||||
All telemetry signals (traces, metrics, logs) MUST include these resource attributes:
|
||||
|
||||
```csharp
|
||||
public sealed record TelemetryEnvelope
|
||||
{
|
||||
/// <summary>W3C trace context identifier.</summary>
|
||||
public required string TraceId { get; init; }
|
||||
|
||||
/// <summary>W3C span identifier.</summary>
|
||||
public required string SpanId { get; init; }
|
||||
|
||||
/// <summary>W3C trace flags.</summary>
|
||||
public int TraceFlags { get; init; }
|
||||
|
||||
/// <summary>Tenant identifier.</summary>
|
||||
public required string TenantId { get; init; }
|
||||
|
||||
/// <summary>Service/workload name.</summary>
|
||||
public required string Workload { get; init; }
|
||||
|
||||
/// <summary>Deployment region.</summary>
|
||||
public required string Region { get; init; }
|
||||
|
||||
/// <summary>Environment (dev/stage/prod).</summary>
|
||||
public required string Environment { get; init; }
|
||||
|
||||
/// <summary>Service version (git SHA or semver).</summary>
|
||||
public required string Version { get; init; }
|
||||
|
||||
/// <summary>Module/component name.</summary>
|
||||
public required string Component { get; init; }
|
||||
|
||||
/// <summary>Operation name (verb/action).</summary>
|
||||
public required string Operation { get; init; }
|
||||
|
||||
/// <summary>UTC ISO-8601 timestamp.</summary>
|
||||
public required DateTimeOffset Timestamp { get; init; }
|
||||
|
||||
/// <summary>Outcome status.</summary>
|
||||
public required TelemetryStatus Status { get; init; }
|
||||
}
|
||||
|
||||
public enum TelemetryStatus
|
||||
{
|
||||
Ok = 0,
|
||||
Error = 1,
|
||||
Fault = 2,
|
||||
Throttle = 3
|
||||
}
|
||||
```
|
||||
|
||||
### 3.2 Optional Fields
|
||||
|
||||
```csharp
|
||||
public sealed record TelemetryContext
|
||||
{
|
||||
/// <summary>Correlation ID for request chains.</summary>
|
||||
public string? CorrelationId { get; init; }
|
||||
|
||||
/// <summary>Subject identifier (PURL, URI, or hashed ID).</summary>
|
||||
public string? Resource { get; init; }
|
||||
|
||||
/// <summary>Project identifier within tenant.</summary>
|
||||
public string? ProjectId { get; init; }
|
||||
|
||||
/// <summary>Actor identity (user/service).</summary>
|
||||
public string? Actor { get; init; }
|
||||
|
||||
/// <summary>Policy rule that was applied.</summary>
|
||||
public string? ImposedRule { get; init; }
|
||||
|
||||
/// <summary>Job/task run identifier.</summary>
|
||||
public string? RunId { get; init; }
|
||||
}
|
||||
```
|
||||
|
||||
### 3.3 JSON Example
|
||||
|
||||
```json
|
||||
{
|
||||
"trace_id": "4bf92f3577b34da6a3ce929d0e0e4736",
|
||||
"span_id": "00f067aa0ba902b7",
|
||||
"trace_flags": 1,
|
||||
"tenant_id": "tenant-001",
|
||||
"workload": "StellaOps.Orchestrator",
|
||||
"region": "eu-west-1",
|
||||
"environment": "prod",
|
||||
"version": "1.2.3",
|
||||
"component": "scheduler",
|
||||
"operation": "job.dispatch",
|
||||
"timestamp": "2025-12-19T10:00:00.000Z",
|
||||
"status": "ok",
|
||||
"correlation_id": "req-abc123",
|
||||
"run_id": "run-xyz789"
|
||||
}
|
||||
```
|
||||
|
||||
## 4. Metric Naming Conventions
|
||||
|
||||
### 4.1 Naming Pattern
|
||||
|
||||
```
|
||||
{module}_{component}_{metric_type}_{unit}
|
||||
```
|
||||
|
||||
Examples:
|
||||
- `orchestrator_jobs_dispatched_total` (counter)
|
||||
- `scanner_analysis_duration_seconds` (histogram)
|
||||
- `policy_evaluations_active` (gauge)
|
||||
- `concelier_ingestion_bytes_total` (counter)
|
||||
|
||||
### 4.2 Required Labels
|
||||
|
||||
| Label | Description | Cardinality |
|
||||
|-------|-------------|-------------|
|
||||
| `tenant` | Tenant identifier | Low |
|
||||
| `workload` | Service name | Low |
|
||||
| `environment` | Deployment environment | Low |
|
||||
| `status` | Outcome (ok/error/fault) | Low |
|
||||
|
||||
### 4.3 Histogram Buckets
|
||||
|
||||
| Metric Type | Default Buckets |
|
||||
|-------------|-----------------|
|
||||
| Duration (seconds) | `[0.005, 0.01, 0.025, 0.05, 0.1, 0.25, 0.5, 1, 2.5, 5, 10]` |
|
||||
| Size (bytes) | `[256, 512, 1024, 4096, 16384, 65536, 262144, 1048576]` |
|
||||
| Count | `[1, 5, 10, 25, 50, 100, 250, 500, 1000]` |
|
||||
|
||||
### 4.4 Golden Signal Metrics
|
||||
|
||||
Every service MUST expose these metrics:
|
||||
|
||||
| Metric | Type | Description |
|
||||
|--------|------|-------------|
|
||||
| `{service}_requests_total` | counter | Total requests by status |
|
||||
| `{service}_request_duration_seconds` | histogram | Request latency |
|
||||
| `{service}_errors_total` | counter | Error count by type |
|
||||
| `{service}_saturation_ratio` | gauge | Resource utilization (0.0-1.0) |
|
||||
|
||||
## 5. Trace Span Standards
|
||||
|
||||
### 5.1 Span Naming
|
||||
|
||||
```
|
||||
{component}.{operation}
|
||||
```
|
||||
|
||||
Examples:
|
||||
- `scheduler.dispatch`
|
||||
- `policy.evaluate`
|
||||
- `scanner.analyze`
|
||||
- `concelier.ingest`
|
||||
|
||||
### 5.2 Required Span Attributes
|
||||
|
||||
| Attribute | Description |
|
||||
|-----------|-------------|
|
||||
| `tenant.id` | Tenant identifier |
|
||||
| `workload` | Service name |
|
||||
| `component` | Module/subsystem |
|
||||
| `operation` | Action being performed |
|
||||
| `status.code` | OpenTelemetry status code |
|
||||
| `status.message` | Status description |
|
||||
|
||||
### 5.3 Span Events
|
||||
|
||||
Use span events for notable occurrences within a span:
|
||||
|
||||
```csharp
|
||||
public sealed record SpanEventContract
|
||||
{
|
||||
public required string Name { get; init; }
|
||||
public required DateTimeOffset Timestamp { get; init; }
|
||||
public ImmutableDictionary<string, object>? Attributes { get; init; }
|
||||
}
|
||||
```
|
||||
|
||||
Standard event names:
|
||||
- `exception` - Exception occurred
|
||||
- `retry` - Retry attempt
|
||||
- `cache.hit` / `cache.miss` - Cache interaction
|
||||
- `policy.applied` - Policy rule applied
|
||||
|
||||
## 6. Log Format Standards
|
||||
|
||||
### 6.1 Structured Log Fields
|
||||
|
||||
```csharp
|
||||
public sealed record StructuredLogEntry
|
||||
{
|
||||
/// <summary>UTC ISO-8601 timestamp.</summary>
|
||||
public required DateTimeOffset Timestamp { get; init; }
|
||||
|
||||
/// <summary>Log severity level.</summary>
|
||||
public required LogLevel Level { get; init; }
|
||||
|
||||
/// <summary>Log message template.</summary>
|
||||
public required string MessageTemplate { get; init; }
|
||||
|
||||
/// <summary>Rendered message.</summary>
|
||||
public required string Message { get; init; }
|
||||
|
||||
/// <summary>Exception details if present.</summary>
|
||||
public ExceptionInfo? Exception { get; init; }
|
||||
|
||||
/// <summary>Trace context.</summary>
|
||||
public required TraceContext TraceContext { get; init; }
|
||||
|
||||
/// <summary>Service context.</summary>
|
||||
public required ServiceContext ServiceContext { get; init; }
|
||||
|
||||
/// <summary>Additional properties.</summary>
|
||||
public ImmutableDictionary<string, object>? Properties { get; init; }
|
||||
}
|
||||
|
||||
public enum LogLevel
|
||||
{
|
||||
Trace = 0,
|
||||
Debug = 1,
|
||||
Information = 2,
|
||||
Warning = 3,
|
||||
Error = 4,
|
||||
Critical = 5
|
||||
}
|
||||
```
|
||||
|
||||
### 6.2 Log Rate Limits
|
||||
|
||||
| Level | Default Rate | Notes |
|
||||
|-------|--------------|-------|
|
||||
| Trace/Debug | 10/s per component | Disabled in production |
|
||||
| Information | 100/s per component | Sampled under pressure |
|
||||
| Warning | 500/s per component | Never sampled |
|
||||
| Error/Critical | Unlimited | Always emitted |
|
||||
|
||||
## 7. Redaction and Scrubbing
|
||||
|
||||
### 7.1 Denylist Patterns
|
||||
|
||||
The following patterns MUST be redacted before emission:
|
||||
|
||||
| Category | Patterns |
|
||||
|----------|----------|
|
||||
| Secrets | `authorization`, `bearer`, `token`, `api[-_]?key`, `secret`, `password`, `credential` |
|
||||
| PII | `email`, `phone`, `ssn`, `address`, `name` (when user-provided) |
|
||||
| Security | `private[-_]?key`, `certificate`, `session[-_]?id` |
|
||||
|
||||
### 7.2 Redaction Format
|
||||
|
||||
```json
|
||||
{
|
||||
"authorization": "[REDACTED]",
|
||||
"redaction": {
|
||||
"reason": "secret",
|
||||
"policy": "default-v1",
|
||||
"timestamp": "2025-12-19T10:00:00Z"
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
### 7.3 Hash Policy
|
||||
|
||||
When identifiers need to be preserved for correlation but hidden:
|
||||
|
||||
```csharp
|
||||
public sealed record HashedIdentifier
|
||||
{
|
||||
/// <summary>SHA-256 lowercase hex of original value.</summary>
|
||||
public required string Hash { get; init; }
|
||||
|
||||
/// <summary>Marker indicating this is a hash.</summary>
|
||||
public bool IsHashed { get; init; } = true;
|
||||
|
||||
/// <summary>Original field name.</summary>
|
||||
public required string FieldName { get; init; }
|
||||
}
|
||||
```
|
||||
|
||||
## 8. Sampling Policies
|
||||
|
||||
### 8.1 Trace Sampling
|
||||
|
||||
| Environment | Head Sampling | Error Boost | Audit Boost |
|
||||
|-------------|--------------|-------------|-------------|
|
||||
| Development | 100% | - | - |
|
||||
| Staging | 10% | 100% | 100% |
|
||||
| Production | 5% | 100% | 100% |
|
||||
|
||||
### 8.2 Audit Spans
|
||||
|
||||
Spans tagged `audit=true` are always sampled and retained for extended periods:
|
||||
|
||||
```csharp
|
||||
public interface IAuditableOperation
|
||||
{
|
||||
/// <summary>Mark span for audit trail.</summary>
|
||||
void MarkAudit(string reason);
|
||||
}
|
||||
```
|
||||
|
||||
## 9. Service Integration
|
||||
|
||||
### 9.1 Bootstrap Registration
|
||||
|
||||
```csharp
|
||||
public static class TelemetryBootstrap
|
||||
{
|
||||
public static IServiceCollection AddStellaOpsTelemetry(
|
||||
this IServiceCollection services,
|
||||
IConfiguration configuration,
|
||||
string serviceName,
|
||||
string serviceVersion,
|
||||
Action<TelemetryOptions>? configureOptions = null,
|
||||
Action<MeterProviderBuilder>? configureMetrics = null,
|
||||
Action<TracerProviderBuilder>? configureTracing = null);
|
||||
}
|
||||
|
||||
public sealed class TelemetryOptions
|
||||
{
|
||||
public CollectorOptions Collector { get; set; } = new();
|
||||
public SamplingOptions Sampling { get; set; } = new();
|
||||
public RedactionOptions Redaction { get; set; } = new();
|
||||
public bool SealedMode { get; set; }
|
||||
}
|
||||
```
|
||||
|
||||
### 9.2 Context Propagation
|
||||
|
||||
HTTP headers for W3C trace context:
|
||||
- `traceparent`: `{version}-{trace-id}-{parent-id}-{trace-flags}`
|
||||
- `tracestate`: Custom vendor state
|
||||
- `baggage`: Tenant/correlation context
|
||||
|
||||
gRPC metadata:
|
||||
- `x-trace-id`
|
||||
- `x-span-id`
|
||||
- `x-tenant-id`
|
||||
- `x-correlation-id`
|
||||
|
||||
## 10. Orchestrator Integration (ORCH-OBS-50-001)
|
||||
|
||||
### 10.1 Required Spans
|
||||
|
||||
The Orchestrator service MUST emit these trace spans:
|
||||
|
||||
| Span Name | Description |
|
||||
|-----------|-------------|
|
||||
| `scheduler.dispatch` | Job dispatch to worker |
|
||||
| `scheduler.schedule` | Job scheduling decision |
|
||||
| `controller.create_job` | Job creation API |
|
||||
| `controller.cancel_job` | Job cancellation API |
|
||||
| `worker.execute` | Worker job execution |
|
||||
|
||||
### 10.2 Required Metrics
|
||||
|
||||
| Metric | Type | Description |
|
||||
|--------|------|-------------|
|
||||
| `orchestrator_jobs_dispatched_total` | counter | Jobs dispatched by type |
|
||||
| `orchestrator_jobs_pending` | gauge | Jobs in queue |
|
||||
| `orchestrator_job_duration_seconds` | histogram | Job execution time |
|
||||
| `orchestrator_dispatch_latency_seconds` | histogram | Time to dispatch |
|
||||
| `orchestrator_worker_utilization` | gauge | Worker pool utilization |
|
||||
|
||||
### 10.3 Required Logs
|
||||
|
||||
| Event | Level | Fields |
|
||||
|-------|-------|--------|
|
||||
| Job scheduled | Info | `job_id`, `type`, `tenant_id`, `scheduled_at` |
|
||||
| Job started | Info | `job_id`, `worker_id`, `trace_id` |
|
||||
| Job completed | Info | `job_id`, `duration_ms`, `status` |
|
||||
| Job failed | Error | `job_id`, `error_code`, `error_message`, `retry_count` |
|
||||
|
||||
## 11. Telemetry
|
||||
|
||||
### 11.1 Self-Monitoring Metrics
|
||||
|
||||
| Metric | Type | Description |
|
||||
|--------|------|-------------|
|
||||
| `telemetry_exports_total` | counter | Export operations by status |
|
||||
| `telemetry_export_duration_seconds` | histogram | Export latency |
|
||||
| `telemetry_buffer_size` | gauge | Buffer utilization |
|
||||
| `telemetry_dropped_total` | counter | Dropped signals |
|
||||
|
||||
### 11.2 Alerts
|
||||
|
||||
```yaml
|
||||
groups:
|
||||
- name: telemetry-baselines
|
||||
rules:
|
||||
- alert: TelemetryExportFailure
|
||||
expr: increase(telemetry_exports_total{status="error"}[5m]) > 0
|
||||
for: 2m
|
||||
labels:
|
||||
severity: warning
|
||||
annotations:
|
||||
summary: "Telemetry export failures detected"
|
||||
|
||||
- alert: TelemetryHighDropRate
|
||||
expr: rate(telemetry_dropped_total[5m]) > 100
|
||||
for: 5m
|
||||
labels:
|
||||
severity: critical
|
||||
annotations:
|
||||
summary: "High telemetry signal drop rate"
|
||||
```
|
||||
|
||||
## 12. Configuration
|
||||
|
||||
```yaml
|
||||
# etc/telemetry.yaml
|
||||
Telemetry:
|
||||
Collector:
|
||||
Enabled: true
|
||||
Endpoint: "https://otel-collector.example:4317"
|
||||
Protocol: "grpc"
|
||||
|
||||
Sampling:
|
||||
HeadSamplingRatio: 0.05
|
||||
ErrorBoost: true
|
||||
AuditBoost: true
|
||||
|
||||
Redaction:
|
||||
Enabled: true
|
||||
PolicyVersion: "v1"
|
||||
StrictMode: true
|
||||
|
||||
SealedMode: false # Enable for air-gap
|
||||
```
|
||||
|
||||
## 13. Validation Rules
|
||||
|
||||
1. All signals MUST include `trace_id`, `tenant_id`, `workload`
|
||||
2. Timestamps MUST be UTC ISO-8601 format
|
||||
3. Metric names MUST follow `{module}_{component}_{type}_{unit}` pattern
|
||||
4. Span names MUST follow `{component}.{operation}` pattern
|
||||
5. Redaction MUST be applied before any external export
|
||||
6. Hash values MUST use SHA-256 lowercase hex
|
||||
7. Log messages MUST NOT contain raw PII/secrets
|
||||
|
||||
---
|
||||
|
||||
## Changelog
|
||||
|
||||
| Version | Date | Changes |
|
||||
|---------|------|---------|
|
||||
| 1.0.0 | 2025-12-19 | Initial release - unblocks 51-002, ORCH-OBS-50-001 |
|
||||
Reference in New Issue
Block a user