Add Policy DSL Validator, Schema Exporter, and Simulation Smoke tools
Some checks failed
Docs CI / lint-and-preview (push) Has been cancelled
Some checks failed
Docs CI / lint-and-preview (push) Has been cancelled
- Implemented PolicyDslValidator with command-line options for strict mode and JSON output. - Created PolicySchemaExporter to generate JSON schemas for policy-related models. - Developed PolicySimulationSmoke tool to validate policy simulations against expected outcomes. - Added project files and necessary dependencies for each tool. - Ensured proper error handling and usage instructions across tools.
This commit is contained in:
33
deploy/telemetry/storage/README.md
Normal file
33
deploy/telemetry/storage/README.md
Normal file
@@ -0,0 +1,33 @@
|
||||
# Telemetry Storage Stack
|
||||
|
||||
Configuration snippets for the default StellaOps observability backends used in
|
||||
staging and production environments. The stack comprises:
|
||||
|
||||
- **Prometheus** for metrics (scraping the collector's Prometheus exporter)
|
||||
- **Tempo** for traces (OTLP ingest via mTLS)
|
||||
- **Loki** for logs (HTTP ingest with tenant isolation)
|
||||
|
||||
## Files
|
||||
|
||||
| Path | Description |
|
||||
| ---- | ----------- |
|
||||
| `prometheus.yaml` | Scrape configuration for the collector (mTLS + bearer token placeholder). |
|
||||
| `tempo.yaml` | Tempo configuration with multitenancy enabled and local storage paths. |
|
||||
| `loki.yaml` | Loki configuration enabling per-tenant overrides and boltdb-shipper storage. |
|
||||
| `tenants/tempo-overrides.yaml` | Example tenant overrides for Tempo (retention, limits). |
|
||||
| `tenants/loki-overrides.yaml` | Example tenant overrides for Loki (rate limits, retention). |
|
||||
| `auth/` | Placeholder directory for Prometheus bearer token files (e.g., `token`). |
|
||||
|
||||
These configurations are referenced by the Docker Compose overlay
|
||||
(`deploy/compose/docker-compose.telemetry-storage.yaml`) and the staging rollout documented in
|
||||
`docs/ops/telemetry-storage.md`. Adjust paths, credentials, and overrides before running in
|
||||
connected environments. Place the Prometheus bearer token in `auth/token` when using the
|
||||
Compose overlay (the directory contains a `.gitkeep` placeholder and is gitignored by default).
|
||||
|
||||
## Security
|
||||
|
||||
- Both Tempo and Loki require mutual TLS.
|
||||
- Prometheus uses mTLS plus a bearer token that should be minted by Authority.
|
||||
- Update the overrides files to enforce per-tenant retention/ingestion limits.
|
||||
|
||||
For comprehensive deployment steps see `docs/ops/telemetry-storage.md`.
|
||||
0
deploy/telemetry/storage/auth/.gitkeep
Normal file
0
deploy/telemetry/storage/auth/.gitkeep
Normal file
48
deploy/telemetry/storage/loki.yaml
Normal file
48
deploy/telemetry/storage/loki.yaml
Normal file
@@ -0,0 +1,48 @@
|
||||
auth_enabled: true
|
||||
|
||||
server:
|
||||
http_listen_port: 3100
|
||||
log_level: info
|
||||
|
||||
common:
|
||||
ring:
|
||||
instance_addr: 127.0.0.1
|
||||
kvstore:
|
||||
store: inmemory
|
||||
replication_factor: 1
|
||||
path_prefix: /var/loki
|
||||
|
||||
schema_config:
|
||||
configs:
|
||||
- from: 2024-01-01
|
||||
store: boltdb-shipper
|
||||
object_store: filesystem
|
||||
schema: v13
|
||||
index:
|
||||
prefix: loki_index_
|
||||
period: 24h
|
||||
|
||||
storage_config:
|
||||
filesystem:
|
||||
directory: /var/loki/chunks
|
||||
boltdb_shipper:
|
||||
active_index_directory: /var/loki/index
|
||||
cache_location: /var/loki/index_cache
|
||||
shared_store: filesystem
|
||||
|
||||
ruler:
|
||||
storage:
|
||||
type: local
|
||||
local:
|
||||
directory: /var/loki/rules
|
||||
rule_path: /tmp/loki-rules
|
||||
enable_api: true
|
||||
|
||||
limits_config:
|
||||
enforce_metric_name: false
|
||||
reject_old_samples: true
|
||||
reject_old_samples_max_age: 168h
|
||||
max_entries_limit_per_query: 5000
|
||||
ingestion_rate_mb: 10
|
||||
ingestion_burst_size_mb: 20
|
||||
per_tenant_override_config: /etc/telemetry/tenants/loki-overrides.yaml
|
||||
19
deploy/telemetry/storage/prometheus.yaml
Normal file
19
deploy/telemetry/storage/prometheus.yaml
Normal file
@@ -0,0 +1,19 @@
|
||||
global:
|
||||
scrape_interval: 15s
|
||||
evaluation_interval: 30s
|
||||
|
||||
scrape_configs:
|
||||
- job_name: "stellaops-otel-collector"
|
||||
scheme: https
|
||||
metrics_path: /
|
||||
tls_config:
|
||||
ca_file: ${PROMETHEUS_TLS_CA_FILE:-/etc/telemetry/tls/ca.crt}
|
||||
cert_file: ${PROMETHEUS_TLS_CERT_FILE:-/etc/telemetry/tls/client.crt}
|
||||
key_file: ${PROMETHEUS_TLS_KEY_FILE:-/etc/telemetry/tls/client.key}
|
||||
insecure_skip_verify: false
|
||||
authorization:
|
||||
type: Bearer
|
||||
credentials_file: ${PROMETHEUS_BEARER_TOKEN_FILE:-/etc/telemetry/auth/token}
|
||||
static_configs:
|
||||
- targets:
|
||||
- ${PROMETHEUS_COLLECTOR_TARGET:-stellaops-otel-collector:9464}
|
||||
56
deploy/telemetry/storage/tempo.yaml
Normal file
56
deploy/telemetry/storage/tempo.yaml
Normal file
@@ -0,0 +1,56 @@
|
||||
multitenancy_enabled: true
|
||||
usage_report:
|
||||
reporting_enabled: false
|
||||
|
||||
server:
|
||||
http_listen_port: 3200
|
||||
log_level: info
|
||||
|
||||
distributor:
|
||||
receivers:
|
||||
otlp:
|
||||
protocols:
|
||||
grpc:
|
||||
tls:
|
||||
cert_file: ${TEMPO_TLS_CERT_FILE:-/etc/telemetry/tls/server.crt}
|
||||
key_file: ${TEMPO_TLS_KEY_FILE:-/etc/telemetry/tls/server.key}
|
||||
client_ca_file: ${TEMPO_TLS_CA_FILE:-/etc/telemetry/tls/ca.crt}
|
||||
require_client_cert: true
|
||||
http:
|
||||
tls:
|
||||
cert_file: ${TEMPO_TLS_CERT_FILE:-/etc/telemetry/tls/server.crt}
|
||||
key_file: ${TEMPO_TLS_KEY_FILE:-/etc/telemetry/tls/server.key}
|
||||
client_ca_file: ${TEMPO_TLS_CA_FILE:-/etc/telemetry/tls/ca.crt}
|
||||
require_client_cert: true
|
||||
|
||||
ingester:
|
||||
lifecycler:
|
||||
ring:
|
||||
instance_availability_zone: ${TEMPO_ZONE:-zone-a}
|
||||
trace_idle_period: 10s
|
||||
max_block_bytes: 1_048_576
|
||||
|
||||
compactor:
|
||||
compaction:
|
||||
block_retention: 168h
|
||||
|
||||
metrics_generator:
|
||||
registry:
|
||||
external_labels:
|
||||
cluster: stellaops
|
||||
|
||||
storage:
|
||||
trace:
|
||||
backend: local
|
||||
local:
|
||||
path: /var/tempo/traces
|
||||
wal:
|
||||
path: /var/tempo/wal
|
||||
metrics:
|
||||
backend: prometheus
|
||||
|
||||
overrides:
|
||||
defaults:
|
||||
ingestion_rate_limit_bytes: 1048576
|
||||
max_traces_per_user: 200000
|
||||
per_tenant_override_config: /etc/telemetry/tenants/tempo-overrides.yaml
|
||||
19
deploy/telemetry/storage/tenants/loki-overrides.yaml
Normal file
19
deploy/telemetry/storage/tenants/loki-overrides.yaml
Normal file
@@ -0,0 +1,19 @@
|
||||
# Example Loki per-tenant overrides
|
||||
# Adjust according to https://grafana.com/docs/loki/latest/configuration/#limits_config
|
||||
|
||||
stellaops-dev:
|
||||
ingestion_rate_mb: 10
|
||||
ingestion_burst_size_mb: 20
|
||||
max_global_streams_per_user: 5000
|
||||
retention_period: 168h
|
||||
|
||||
stellaops-stage:
|
||||
ingestion_rate_mb: 20
|
||||
ingestion_burst_size_mb: 40
|
||||
max_global_streams_per_user: 10000
|
||||
retention_period: 336h
|
||||
|
||||
__default__:
|
||||
ingestion_rate_mb: 5
|
||||
ingestion_burst_size_mb: 10
|
||||
retention_period: 72h
|
||||
16
deploy/telemetry/storage/tenants/tempo-overrides.yaml
Normal file
16
deploy/telemetry/storage/tenants/tempo-overrides.yaml
Normal file
@@ -0,0 +1,16 @@
|
||||
# Example Tempo per-tenant overrides
|
||||
# Consult https://grafana.com/docs/tempo/latest/configuration/#limits-configuration
|
||||
# before applying in production.
|
||||
|
||||
stellaops-dev:
|
||||
traces_per_second_limit: 100000
|
||||
max_bytes_per_trace: 10485760
|
||||
max_search_bytes_per_trace: 20971520
|
||||
|
||||
stellaops-stage:
|
||||
traces_per_second_limit: 200000
|
||||
max_bytes_per_trace: 20971520
|
||||
|
||||
__default__:
|
||||
traces_per_second_limit: 50000
|
||||
max_bytes_per_trace: 5242880
|
||||
Reference in New Issue
Block a user