- Implement `SbomVexOrderingDeterminismProperties` for testing component list and vulnerability metadata hash consistency. - Create `UnicodeNormalizationDeterminismProperties` to validate NFC normalization and Unicode string handling. - Add project file for `StellaOps.Testing.Determinism.Properties` with necessary dependencies. - Introduce CI/CD template validation tests including YAML syntax checks and documentation content verification. - Create validation script for CI/CD templates ensuring all required files and structures are present.
456 lines
11 KiB
Markdown
456 lines
11 KiB
Markdown
# Budget Threshold Attestation
|
|
|
|
This document describes how unknown budget thresholds are attested in verdict bundles for reproducibility and audit purposes.
|
|
|
|
## Overview
|
|
|
|
Budget attestation captures the budget configuration applied during policy evaluation, enabling:
|
|
|
|
- **Auditability**: Verify what thresholds were enforced at decision time
|
|
- **Reproducibility**: Include all inputs for deterministic verification
|
|
- **Compliance**: Demonstrate policy enforcement for regulatory requirements
|
|
|
|
## Budget Check Predicate
|
|
|
|
The budget check is included in the verdict predicate:
|
|
|
|
```json
|
|
{
|
|
"_type": "https://stellaops.dev/predicates/policy-verdict@v1",
|
|
"tenantId": "tenant-1",
|
|
"policyId": "default-policy",
|
|
"policyVersion": 1,
|
|
"verdict": { ... },
|
|
"budgetCheck": {
|
|
"environment": "production",
|
|
"config": {
|
|
"maxUnknownCount": 10,
|
|
"maxCumulativeUncertainty": 2.5,
|
|
"action": "warn",
|
|
"reasonLimits": {
|
|
"Reachability": 5,
|
|
"Identity": 3
|
|
}
|
|
},
|
|
"actualCounts": {
|
|
"total": 3,
|
|
"cumulativeUncertainty": 1.2,
|
|
"byReason": {
|
|
"Reachability": 2,
|
|
"Identity": 1
|
|
}
|
|
},
|
|
"result": "pass",
|
|
"configHash": "sha256:abc123...",
|
|
"evaluatedAt": "2025-12-25T12:00:00Z",
|
|
"violations": []
|
|
}
|
|
}
|
|
```
|
|
|
|
## Fields
|
|
|
|
### budgetCheck.config
|
|
|
|
| Field | Type | Description |
|
|
|-------|------|-------------|
|
|
| `maxUnknownCount` | int | Maximum total unknowns allowed |
|
|
| `maxCumulativeUncertainty` | double | Maximum uncertainty score |
|
|
| `action` | string | Action when exceeded: warn, block |
|
|
| `reasonLimits` | object | Per-reason code limits |
|
|
|
|
### budgetCheck.actualCounts
|
|
|
|
| Field | Type | Description |
|
|
|-------|------|-------------|
|
|
| `total` | int | Total unknowns observed |
|
|
| `cumulativeUncertainty` | double | Sum of uncertainty factors |
|
|
| `byReason` | object | Breakdown by reason code |
|
|
|
|
### budgetCheck.result
|
|
|
|
Possible values:
|
|
- `pass` - All limits satisfied
|
|
- `warn` - Limits exceeded but action is warn
|
|
- `fail` - Limits exceeded and action is block
|
|
|
|
### budgetCheck.configHash
|
|
|
|
SHA-256 hash of the budget configuration for determinism verification. Format: `sha256:{64 hex characters}`
|
|
|
|
### budgetCheck.violations
|
|
|
|
List of violations when limits are exceeded:
|
|
|
|
```json
|
|
{
|
|
"violations": [
|
|
{
|
|
"type": "total",
|
|
"limit": 10,
|
|
"actual": 15
|
|
},
|
|
{
|
|
"type": "reason",
|
|
"limit": 5,
|
|
"actual": 8,
|
|
"reason": "Reachability"
|
|
}
|
|
]
|
|
}
|
|
```
|
|
|
|
## Usage
|
|
|
|
### Extracting Budget Check from Verdict
|
|
|
|
```csharp
|
|
using StellaOps.Policy.Engine.Attestation;
|
|
|
|
// Parse verdict predicate from DSSE envelope
|
|
var predicate = VerdictPredicate.Parse(dssePayload);
|
|
|
|
// Access budget check
|
|
if (predicate.BudgetCheck is not null)
|
|
{
|
|
var check = predicate.BudgetCheck;
|
|
Console.WriteLine($"Environment: {check.Environment}");
|
|
Console.WriteLine($"Result: {check.Result}");
|
|
Console.WriteLine($"Total: {check.ActualCounts.Total}/{check.Config.MaxUnknownCount}");
|
|
Console.WriteLine($"Config Hash: {check.ConfigHash}");
|
|
}
|
|
```
|
|
|
|
### Verifying Configuration Hash
|
|
|
|
```csharp
|
|
// Compute expected hash from current configuration
|
|
var currentConfig = new VerdictBudgetConfig(
|
|
maxUnknownCount: 10,
|
|
maxCumulativeUncertainty: 2.5,
|
|
action: "warn");
|
|
|
|
var expectedHash = VerdictBudgetCheck.ComputeConfigHash(currentConfig);
|
|
|
|
// Compare with attested hash
|
|
if (predicate.BudgetCheck?.ConfigHash != expectedHash)
|
|
{
|
|
Console.WriteLine("Warning: Budget configuration has changed since attestation");
|
|
}
|
|
```
|
|
|
|
## Determinism
|
|
|
|
The config hash ensures reproducibility:
|
|
|
|
1. Configuration is serialized to JSON with canonical ordering
|
|
2. SHA-256 is computed over the UTF-8 bytes
|
|
3. Hash is prefixed with `sha256:` algorithm identifier
|
|
|
|
This allows verification that the same budget configuration was used across runs.
|
|
|
|
## Integration Points
|
|
|
|
### VerdictPredicateBuilder
|
|
|
|
Budget check is added when building verdict predicates:
|
|
|
|
```csharp
|
|
var budgetCheck = new VerdictBudgetCheck(
|
|
environment: context.Environment,
|
|
config: config,
|
|
actualCounts: counts,
|
|
result: budgetResult.Passed ? "pass" : budgetResult.Budget.Action.ToString(),
|
|
configHash: VerdictBudgetCheck.ComputeConfigHash(config),
|
|
evaluatedAt: DateTimeOffset.UtcNow,
|
|
violations: violations);
|
|
|
|
var predicate = new VerdictPredicate(
|
|
tenantId: trace.TenantId,
|
|
policyId: trace.PolicyId,
|
|
// ... other fields
|
|
budgetCheck: budgetCheck);
|
|
```
|
|
|
|
### UnknownBudgetService
|
|
|
|
The enhanced `BudgetCheckResult` includes all data needed for attestation:
|
|
|
|
```csharp
|
|
var result = await budgetService.CheckBudget(environment, unknowns);
|
|
|
|
// result.Budget - the configuration applied
|
|
// result.CountsByReason - breakdown for attestation
|
|
// result.CumulativeUncertainty - total uncertainty score
|
|
```
|
|
|
|
---
|
|
|
|
# Risk Budget Enforcement
|
|
|
|
This section describes the risk budget enforcement system that tracks and controls release risk accumulation over time.
|
|
|
|
## Overview
|
|
|
|
Risk budgets limit the cumulative risk accepted during a budget window (typically monthly). Each release consumes risk points based on the vulnerabilities it introduces or carries forward. When a budget is exhausted, further high-risk releases are blocked.
|
|
|
|
## Key Concepts
|
|
|
|
### Service Tiers
|
|
|
|
Services are classified by criticality, which determines their risk budget allocation:
|
|
|
|
| Tier | Name | Monthly Allocation | Description |
|
|
|------|------|-------------------|-------------|
|
|
| 0 | Internal | 300 RP | Internal-only, low business impact |
|
|
| 1 | Customer-Facing Non-Critical | 200 RP | Customer-facing but non-critical |
|
|
| 2 | Customer-Facing Critical | 120 RP | Critical customer-facing services |
|
|
| 3 | Safety-Critical | 80 RP | Safety, financial, or data-critical |
|
|
|
|
### Budget Status Thresholds
|
|
|
|
Budget status transitions based on percentage consumed:
|
|
|
|
| Status | Threshold | Behavior |
|
|
|--------|-----------|----------|
|
|
| Green | < 40% consumed | Normal operations |
|
|
| Yellow | 40-69% consumed | Increased caution, warnings triggered |
|
|
| Red | 70-99% consumed | High-risk diffs frozen, only low-risk allowed |
|
|
| Exhausted | >= 100% consumed | Incident and security fixes only |
|
|
|
|
### Budget Windows
|
|
|
|
- **Default cadence**: Monthly (YYYY-MM format)
|
|
- **Reset behavior**: No carry-over; unused budget expires
|
|
- **Window boundary**: UTC midnight on the 1st of each month
|
|
|
|
## API Endpoints
|
|
|
|
### Check Budget Status
|
|
|
|
```http
|
|
GET /api/v1/policy/budget/status?serviceId={id}
|
|
```
|
|
|
|
Response:
|
|
```json
|
|
{
|
|
"budgetId": "budget:my-service:2025-12",
|
|
"serviceId": "my-service",
|
|
"tier": 1,
|
|
"window": "2025-12",
|
|
"allocated": 200,
|
|
"consumed": 85,
|
|
"remaining": 115,
|
|
"percentageUsed": 42.5,
|
|
"status": "Yellow"
|
|
}
|
|
```
|
|
|
|
### Record Consumption
|
|
|
|
```http
|
|
POST /api/v1/policy/budget/consume
|
|
Content-Type: application/json
|
|
|
|
{
|
|
"serviceId": "my-service",
|
|
"riskPoints": 25,
|
|
"releaseId": "v1.2.3"
|
|
}
|
|
```
|
|
|
|
### Adjust Allocation (Earned Capacity)
|
|
|
|
```http
|
|
POST /api/v1/policy/budget/adjust
|
|
Content-Type: application/json
|
|
|
|
{
|
|
"serviceId": "my-service",
|
|
"adjustment": 40,
|
|
"reason": "MTTR improvement over 2 months"
|
|
}
|
|
```
|
|
|
|
### View History
|
|
|
|
```http
|
|
GET /api/v1/policy/budget/history?serviceId={id}&window={yyyy-MM}
|
|
```
|
|
|
|
## CLI Commands
|
|
|
|
### Check Status
|
|
|
|
```bash
|
|
stella budget status --service my-service
|
|
```
|
|
|
|
Output:
|
|
```
|
|
Service: my-service
|
|
Window: 2025-12
|
|
Tier: Customer-Facing Non-Critical (1)
|
|
Status: Yellow
|
|
|
|
Budget: 85 / 200 RP (42.5%)
|
|
████████░░░░░░░░░░░░
|
|
|
|
Remaining: 115 RP
|
|
```
|
|
|
|
### Consume Budget
|
|
|
|
```bash
|
|
stella budget consume --service my-service --points 25 --reason "Release v1.2.3"
|
|
```
|
|
|
|
### List All Budgets
|
|
|
|
```bash
|
|
stella budget list --status Yellow,Red
|
|
```
|
|
|
|
## Earned Capacity Replenishment
|
|
|
|
Services demonstrating improved reliability can earn additional budget capacity:
|
|
|
|
### Eligibility Criteria
|
|
|
|
1. **MTTR Improvement**: Mean Time to Remediate must improve for 2 consecutive windows
|
|
2. **CFR Improvement**: Change Failure Rate must improve for 2 consecutive windows
|
|
3. **No Major Incidents**: No P1 incidents in the evaluation period
|
|
|
|
### Increase Calculation
|
|
|
|
- Minimum increase: 10% of base allocation
|
|
- Maximum increase: 20% of base allocation
|
|
- Scale: Proportional to improvement magnitude
|
|
|
|
### Example
|
|
|
|
```
|
|
Service: payment-api (Tier 2, base 120 RP)
|
|
MTTR: 48h → 36h → 24h (50% improvement)
|
|
CFR: 15% → 12% → 8% (47% improvement)
|
|
|
|
Earned capacity: +20% = 24 RP
|
|
New allocation: 144 RP for next window
|
|
```
|
|
|
|
## Notifications
|
|
|
|
Budget threshold transitions trigger notifications:
|
|
|
|
### Warning (Yellow)
|
|
|
|
Sent when budget reaches 40% consumption:
|
|
|
|
```
|
|
Subject: [Warning] Risk Budget at 40% for my-service
|
|
|
|
Your risk budget for my-service has reached the warning threshold.
|
|
|
|
Current: 80 / 200 RP (40%)
|
|
Status: Yellow
|
|
|
|
Consider pausing non-critical changes until the next budget window.
|
|
```
|
|
|
|
### Critical (Red/Exhausted)
|
|
|
|
Sent when budget reaches 70% or 100%:
|
|
|
|
```
|
|
Subject: [Critical] Risk Budget Exhausted for my-service
|
|
|
|
Your risk budget for my-service has been exhausted.
|
|
|
|
Current: 200 / 200 RP (100%)
|
|
Status: Exhausted
|
|
|
|
Only security fixes and incident responses are allowed.
|
|
Contact the Platform team for emergency capacity.
|
|
```
|
|
|
|
### Channels
|
|
|
|
Notifications are sent via:
|
|
- Email (to service owners)
|
|
- Slack (to designated channel)
|
|
- Microsoft Teams (to designated channel)
|
|
- Webhooks (for integration)
|
|
|
|
## Database Schema
|
|
|
|
```sql
|
|
CREATE TABLE policy.budget_ledger (
|
|
budget_id TEXT PRIMARY KEY,
|
|
service_id TEXT NOT NULL,
|
|
tenant_id TEXT,
|
|
tier INTEGER NOT NULL,
|
|
window TEXT NOT NULL,
|
|
allocated INTEGER NOT NULL,
|
|
consumed INTEGER NOT NULL DEFAULT 0,
|
|
status TEXT NOT NULL DEFAULT 'green',
|
|
created_at TIMESTAMPTZ NOT NULL DEFAULT now(),
|
|
updated_at TIMESTAMPTZ NOT NULL DEFAULT now(),
|
|
UNIQUE(service_id, window)
|
|
);
|
|
|
|
CREATE TABLE policy.budget_entries (
|
|
entry_id TEXT PRIMARY KEY,
|
|
service_id TEXT NOT NULL,
|
|
window TEXT NOT NULL,
|
|
release_id TEXT NOT NULL,
|
|
risk_points INTEGER NOT NULL,
|
|
consumed_at TIMESTAMPTZ NOT NULL DEFAULT now(),
|
|
FOREIGN KEY (service_id, window) REFERENCES policy.budget_ledger(service_id, window)
|
|
);
|
|
|
|
CREATE INDEX idx_budget_entries_service_window ON policy.budget_entries(service_id, window);
|
|
```
|
|
|
|
## Configuration
|
|
|
|
```yaml
|
|
# etc/policy.yaml
|
|
policy:
|
|
riskBudget:
|
|
enabled: true
|
|
windowCadence: monthly # monthly | weekly | sprint
|
|
carryOver: false
|
|
defaultTier: 1
|
|
|
|
tiers:
|
|
0: { name: Internal, allocation: 300 }
|
|
1: { name: CustomerFacingNonCritical, allocation: 200 }
|
|
2: { name: CustomerFacingCritical, allocation: 120 }
|
|
3: { name: SafetyCritical, allocation: 80 }
|
|
|
|
thresholds:
|
|
yellow: 40
|
|
red: 70
|
|
exhausted: 100
|
|
|
|
notifications:
|
|
enabled: true
|
|
channels: [email, slack]
|
|
aggregationWindow: 1h # Debounce rapid transitions
|
|
|
|
earnedCapacity:
|
|
enabled: true
|
|
requiredImprovementWindows: 2
|
|
minIncreasePercent: 10
|
|
maxIncreasePercent: 20
|
|
```
|
|
|
|
## Related Documentation
|
|
|
|
- [Unknown Budget Gates](./unknowns-budget-gates.md)
|
|
- [Verdict Attestations](../attestor/verdict-format.md)
|
|
- [BudgetCheckPredicate Model](../../api/attestor/budget-check-predicate.md)
|
|
- [Risk Point Scoring](./risk-point-scoring.md)
|
|
- [Diff-Aware Release Gates](./diff-aware-gates.md)
|