145 lines
5.3 KiB
YAML
145 lines
5.3 KiB
YAML
# =============================================================================
|
|
# STELLA OPS - TELEMETRY STACK
|
|
# =============================================================================
|
|
# All-in-one observability: OpenTelemetry Collector, Prometheus, Tempo, Loki
|
|
#
|
|
# Usage:
|
|
# docker compose -f devops/compose/docker-compose.telemetry.yml up -d
|
|
#
|
|
# With main stack:
|
|
# docker compose -f devops/compose/docker-compose.stella-ops.yml \
|
|
# -f devops/compose/docker-compose.telemetry.yml up -d
|
|
#
|
|
# =============================================================================
|
|
|
|
x-telemetry-labels: &telemetry-labels
|
|
com.stellaops.component: "telemetry"
|
|
com.stellaops.profile: "observability"
|
|
|
|
networks:
|
|
stellaops-telemetry:
|
|
driver: bridge
|
|
name: stellaops-telemetry
|
|
stellaops:
|
|
external: true
|
|
name: stellaops
|
|
|
|
volumes:
|
|
prometheus-data:
|
|
tempo-data:
|
|
loki-data:
|
|
|
|
services:
|
|
# ---------------------------------------------------------------------------
|
|
# OpenTelemetry Collector - Unified telemetry ingestion
|
|
# ---------------------------------------------------------------------------
|
|
otel-collector:
|
|
image: otel/opentelemetry-collector:0.105.0
|
|
container_name: stellaops-otel-collector
|
|
restart: unless-stopped
|
|
command:
|
|
- "--config=/etc/otel-collector/config.yaml"
|
|
environment:
|
|
STELLAOPS_OTEL_TLS_CERT: /etc/otel-collector/tls/collector.crt
|
|
STELLAOPS_OTEL_TLS_KEY: /etc/otel-collector/tls/collector.key
|
|
STELLAOPS_OTEL_TLS_CA: /etc/otel-collector/tls/ca.crt
|
|
STELLAOPS_OTEL_PROMETHEUS_ENDPOINT: 0.0.0.0:9464
|
|
STELLAOPS_OTEL_REQUIRE_CLIENT_CERT: "true"
|
|
STELLAOPS_TENANT_ID: ${STELLAOPS_TENANT_ID:-default}
|
|
STELLAOPS_TEMPO_ENDPOINT: http://tempo:3200
|
|
STELLAOPS_TEMPO_TLS_CERT_FILE: /etc/otel-collector/tls/client.crt
|
|
STELLAOPS_TEMPO_TLS_KEY_FILE: /etc/otel-collector/tls/client.key
|
|
STELLAOPS_TEMPO_TLS_CA_FILE: /etc/otel-collector/tls/ca.crt
|
|
STELLAOPS_LOKI_ENDPOINT: http://loki:3100/loki/api/v1/push
|
|
STELLAOPS_LOKI_TLS_CERT_FILE: /etc/otel-collector/tls/client.crt
|
|
STELLAOPS_LOKI_TLS_KEY_FILE: /etc/otel-collector/tls/client.key
|
|
STELLAOPS_LOKI_TLS_CA_FILE: /etc/otel-collector/tls/ca.crt
|
|
volumes:
|
|
- ../telemetry/otel-collector-config.yaml:/etc/otel-collector/config.yaml:ro
|
|
- ../telemetry/certs:/etc/otel-collector/tls:ro
|
|
ports:
|
|
- "${OTEL_GRPC_PORT:-4317}:4317" # OTLP gRPC
|
|
- "${OTEL_HTTP_PORT:-4318}:4318" # OTLP HTTP
|
|
- "${OTEL_PROMETHEUS_PORT:-9464}:9464" # Prometheus exporter
|
|
- "${OTEL_HEALTH_PORT:-13133}:13133" # Health check
|
|
- "${OTEL_PPROF_PORT:-1777}:1777" # pprof
|
|
healthcheck:
|
|
test: ["CMD", "curl", "-f", "http://localhost:13133/healthz"]
|
|
interval: 30s
|
|
start_period: 15s
|
|
timeout: 5s
|
|
retries: 3
|
|
networks:
|
|
- stellaops-telemetry
|
|
- stellaops
|
|
labels: *telemetry-labels
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Prometheus - Metrics storage
|
|
# ---------------------------------------------------------------------------
|
|
prometheus:
|
|
image: prom/prometheus:v2.53.0
|
|
container_name: stellaops-prometheus
|
|
restart: unless-stopped
|
|
command:
|
|
- "--config.file=/etc/prometheus/prometheus.yaml"
|
|
- "--storage.tsdb.path=/prometheus"
|
|
- "--storage.tsdb.retention.time=${PROMETHEUS_RETENTION:-15d}"
|
|
- "--web.enable-lifecycle"
|
|
volumes:
|
|
- ../telemetry/storage/prometheus.yaml:/etc/prometheus/prometheus.yaml:ro
|
|
- prometheus-data:/prometheus
|
|
- ../telemetry/certs:/etc/telemetry/tls:ro
|
|
- ../telemetry/storage/auth:/etc/telemetry/auth:ro
|
|
environment:
|
|
PROMETHEUS_COLLECTOR_TARGET: otel-collector:9464
|
|
ports:
|
|
- "${PROMETHEUS_PORT:-9090}:9090"
|
|
depends_on:
|
|
- otel-collector
|
|
networks:
|
|
- stellaops-telemetry
|
|
labels: *telemetry-labels
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Tempo - Distributed tracing backend
|
|
# ---------------------------------------------------------------------------
|
|
tempo:
|
|
image: grafana/tempo:2.5.0
|
|
container_name: stellaops-tempo
|
|
restart: unless-stopped
|
|
command:
|
|
- "-config.file=/etc/tempo/tempo.yaml"
|
|
volumes:
|
|
- ../telemetry/storage/tempo.yaml:/etc/tempo/tempo.yaml:ro
|
|
- ../telemetry/storage/tenants/tempo-overrides.yaml:/etc/telemetry/tenants/tempo-overrides.yaml:ro
|
|
- ../telemetry/certs:/etc/telemetry/tls:ro
|
|
- tempo-data:/var/tempo
|
|
environment:
|
|
TEMPO_ZONE: docker
|
|
ports:
|
|
- "${TEMPO_PORT:-3200}:3200"
|
|
networks:
|
|
- stellaops-telemetry
|
|
labels: *telemetry-labels
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Loki - Log aggregation
|
|
# ---------------------------------------------------------------------------
|
|
loki:
|
|
image: grafana/loki:3.1.0
|
|
container_name: stellaops-loki
|
|
restart: unless-stopped
|
|
command:
|
|
- "-config.file=/etc/loki/loki.yaml"
|
|
volumes:
|
|
- ../telemetry/storage/loki.yaml:/etc/loki/loki.yaml:ro
|
|
- ../telemetry/storage/tenants/loki-overrides.yaml:/etc/telemetry/tenants/loki-overrides.yaml:ro
|
|
- ../telemetry/certs:/etc/telemetry/tls:ro
|
|
- loki-data:/var/loki
|
|
ports:
|
|
- "${LOKI_PORT:-3100}:3100"
|
|
networks:
|
|
- stellaops-telemetry
|
|
labels: *telemetry-labels
|