Files
git.stella-ops.org/docs/schemas/ttfs-event.schema.json
StellaOps Bot b058dbe031 up
2025-12-14 23:20:14 +02:00

323 lines
8.5 KiB
JSON

{
"$schema": "https://json-schema.org/draft/2020-12/schema",
"$id": "https://stella-ops.org/schemas/ttfs-event.schema.json",
"title": "Time-to-First-Signal (TTFS) Telemetry Event",
"description": "Schema for tracking time-to-first-signal metrics across UI, CLI, and CI surfaces",
"type": "object",
"required": [
"schema_version",
"event_type",
"timestamp",
"tenant_id",
"job_id",
"surface",
"ttfs_ms"
],
"properties": {
"schema_version": {
"type": "string",
"pattern": "^v[0-9]+\\.[0-9]+$",
"description": "Schema version (e.g., v1.0)",
"examples": ["v1.0"]
},
"event_type": {
"type": "string",
"enum": [
"signal.start",
"signal.rendered",
"signal.timeout",
"signal.error",
"signal.cache_hit",
"signal.cold_start"
],
"description": "Type of TTFS event"
},
"timestamp": {
"type": "string",
"format": "date-time",
"description": "ISO-8601 UTC timestamp when event occurred"
},
"tenant_id": {
"type": "string",
"minLength": 1,
"description": "Tenant identifier for scoping"
},
"job_id": {
"type": "string",
"format": "uuid",
"description": "Job identifier for the signal request"
},
"run_id": {
"type": ["string", "null"],
"format": "uuid",
"description": "Run identifier if job is part of a run"
},
"correlation_id": {
"type": ["string", "null"],
"description": "Correlation ID for distributed tracing"
},
"surface": {
"type": "string",
"enum": ["ui", "cli", "ci"],
"description": "Surface where the signal request originated"
},
"ttfs_ms": {
"type": "integer",
"minimum": 0,
"description": "Time-to-first-signal in milliseconds"
},
"cache_hit": {
"type": "boolean",
"default": false,
"description": "True if signal was served from cache"
},
"signal_source": {
"type": ["string", "null"],
"enum": ["snapshot", "cold_start", "failure_index", null],
"description": "Source of the signal data"
},
"kind": {
"type": ["string", "null"],
"enum": [
"queued",
"started",
"phase",
"blocked",
"failed",
"succeeded",
"canceled",
"unavailable",
null
],
"description": "Signal kind indicating current job state"
},
"phase": {
"type": ["string", "null"],
"enum": [
"resolve",
"fetch",
"restore",
"analyze",
"policy",
"report",
"unknown",
null
],
"description": "Current execution phase of the job"
},
"network_state": {
"type": ["string", "null"],
"description": "Client network state (e.g., '4g', 'wifi', 'offline')"
},
"device": {
"type": ["string", "null"],
"description": "Client device type (e.g., 'desktop', 'mobile', 'cli')"
},
"release": {
"type": ["string", "null"],
"description": "Application release version"
},
"trace_id": {
"type": ["string", "null"],
"pattern": "^[a-f0-9]{32}$",
"description": "OpenTelemetry trace ID (32 hex characters)"
},
"span_id": {
"type": ["string", "null"],
"pattern": "^[a-f0-9]{16}$",
"description": "OpenTelemetry span ID (16 hex characters)"
},
"error_code": {
"type": ["string", "null"],
"description": "Error code if event_type is signal.error or signal.timeout"
},
"error_message": {
"type": ["string", "null"],
"description": "Human-readable error message"
},
"slo_target_ms": {
"type": ["integer", "null"],
"minimum": 0,
"description": "SLO target in milliseconds for this request"
},
"slo_breach": {
"type": "boolean",
"default": false,
"description": "True if this event represents an SLO breach"
},
"is_offline_mode": {
"type": "boolean",
"default": false,
"description": "True if event occurred in offline/air-gap mode"
},
"metadata": {
"type": "object",
"additionalProperties": true,
"description": "Additional context-specific metadata"
}
},
"additionalProperties": false,
"allOf": [
{
"if": {
"properties": {
"event_type": { "const": "signal.error" }
},
"required": ["event_type"]
},
"then": {
"required": ["error_code"]
}
},
{
"if": {
"properties": {
"event_type": { "const": "signal.timeout" }
},
"required": ["event_type"]
},
"then": {
"required": ["error_code"]
}
}
],
"examples": [
{
"schema_version": "v1.0",
"event_type": "signal.rendered",
"timestamp": "2025-12-14T10:30:00.000Z",
"tenant_id": "tenant-123",
"job_id": "550e8400-e29b-41d4-a716-446655440000",
"run_id": "660e8400-e29b-41d4-a716-446655440001",
"surface": "ui",
"ttfs_ms": 850,
"cache_hit": true,
"signal_source": "snapshot",
"kind": "started",
"phase": "analyze",
"slo_target_ms": 2000,
"slo_breach": false,
"is_offline_mode": false
},
{
"schema_version": "v1.0",
"event_type": "signal.cache_hit",
"timestamp": "2025-12-14T10:30:01.000Z",
"tenant_id": "tenant-123",
"job_id": "550e8400-e29b-41d4-a716-446655440000",
"surface": "cli",
"ttfs_ms": 120,
"cache_hit": true,
"signal_source": "snapshot",
"kind": "phase",
"phase": "policy",
"device": "cli",
"release": "1.2.3"
},
{
"schema_version": "v1.0",
"event_type": "signal.cold_start",
"timestamp": "2025-12-14T10:31:00.000Z",
"tenant_id": "tenant-456",
"job_id": "770e8400-e29b-41d4-a716-446655440002",
"surface": "ci",
"ttfs_ms": 3200,
"cache_hit": false,
"signal_source": "cold_start",
"kind": "succeeded",
"phase": "report",
"slo_target_ms": 5000,
"slo_breach": false
},
{
"schema_version": "v1.0",
"event_type": "signal.timeout",
"timestamp": "2025-12-14T10:32:00.000Z",
"tenant_id": "tenant-789",
"job_id": "880e8400-e29b-41d4-a716-446655440003",
"surface": "ui",
"ttfs_ms": 5500,
"cache_hit": false,
"signal_source": "cold_start",
"kind": "unavailable",
"phase": "unknown",
"error_code": "TTFS_TIMEOUT_EXCEEDED",
"error_message": "Signal fetch exceeded 5s budget",
"slo_target_ms": 5000,
"slo_breach": true
},
{
"schema_version": "v1.0",
"event_type": "signal.error",
"timestamp": "2025-12-14T10:33:00.000Z",
"tenant_id": "tenant-abc",
"job_id": "990e8400-e29b-41d4-a716-446655440004",
"surface": "ui",
"ttfs_ms": 1200,
"cache_hit": false,
"error_code": "TTFS_JOB_NOT_FOUND",
"error_message": "Job not found or access denied",
"trace_id": "4bf92f3577b34da6a3ce929d0e0e4736",
"span_id": "00f067aa0ba902b7"
},
{
"schema_version": "v1.0",
"event_type": "signal.rendered",
"timestamp": "2025-12-14T10:34:00.000Z",
"tenant_id": "tenant-airgap",
"job_id": "aa0e8400-e29b-41d4-a716-446655440005",
"surface": "ui",
"ttfs_ms": 1800,
"cache_hit": false,
"signal_source": "failure_index",
"kind": "failed",
"phase": "analyze",
"is_offline_mode": true,
"metadata": {
"failure_signature_id": "sig-001",
"predicted_mttr_seconds": 300,
"likely_cause": "Registry rate limiting"
}
}
],
"$defs": {
"signal_kind": {
"type": "string",
"enum": [
"queued",
"started",
"phase",
"blocked",
"failed",
"succeeded",
"canceled",
"unavailable"
],
"description": "Enumeration of signal kinds"
},
"signal_phase": {
"type": "string",
"enum": [
"resolve",
"fetch",
"restore",
"analyze",
"policy",
"report",
"unknown"
],
"description": "Enumeration of execution phases"
},
"signal_surface": {
"type": "string",
"enum": ["ui", "cli", "ci"],
"description": "Enumeration of request surfaces"
},
"signal_source_type": {
"type": "string",
"enum": ["snapshot", "cold_start", "failure_index"],
"description": "Enumeration of signal data sources"
}
}
}