Files
git.stella-ops.org/deploy/compose/docker-compose.telemetry.yml
2026-01-25 23:27:41 +02:00

145 lines
5.3 KiB
YAML

# =============================================================================
# STELLA OPS - TELEMETRY STACK
# =============================================================================
# All-in-one observability: OpenTelemetry Collector, Prometheus, Tempo, Loki
#
# Usage:
# docker compose -f devops/compose/docker-compose.telemetry.yml up -d
#
# With main stack:
# docker compose -f devops/compose/docker-compose.stella-ops.yml \
# -f devops/compose/docker-compose.telemetry.yml up -d
#
# =============================================================================
x-telemetry-labels: &telemetry-labels
com.stellaops.component: "telemetry"
com.stellaops.profile: "observability"
networks:
stellaops-telemetry:
driver: bridge
name: stellaops-telemetry
stellaops:
external: true
name: stellaops
volumes:
prometheus-data:
tempo-data:
loki-data:
services:
# ---------------------------------------------------------------------------
# OpenTelemetry Collector - Unified telemetry ingestion
# ---------------------------------------------------------------------------
otel-collector:
image: otel/opentelemetry-collector:0.105.0
container_name: stellaops-otel-collector
restart: unless-stopped
command:
- "--config=/etc/otel-collector/config.yaml"
environment:
STELLAOPS_OTEL_TLS_CERT: /etc/otel-collector/tls/collector.crt
STELLAOPS_OTEL_TLS_KEY: /etc/otel-collector/tls/collector.key
STELLAOPS_OTEL_TLS_CA: /etc/otel-collector/tls/ca.crt
STELLAOPS_OTEL_PROMETHEUS_ENDPOINT: 0.0.0.0:9464
STELLAOPS_OTEL_REQUIRE_CLIENT_CERT: "true"
STELLAOPS_TENANT_ID: ${STELLAOPS_TENANT_ID:-default}
STELLAOPS_TEMPO_ENDPOINT: http://tempo:3200
STELLAOPS_TEMPO_TLS_CERT_FILE: /etc/otel-collector/tls/client.crt
STELLAOPS_TEMPO_TLS_KEY_FILE: /etc/otel-collector/tls/client.key
STELLAOPS_TEMPO_TLS_CA_FILE: /etc/otel-collector/tls/ca.crt
STELLAOPS_LOKI_ENDPOINT: http://loki:3100/loki/api/v1/push
STELLAOPS_LOKI_TLS_CERT_FILE: /etc/otel-collector/tls/client.crt
STELLAOPS_LOKI_TLS_KEY_FILE: /etc/otel-collector/tls/client.key
STELLAOPS_LOKI_TLS_CA_FILE: /etc/otel-collector/tls/ca.crt
volumes:
- ../telemetry/otel-collector-config.yaml:/etc/otel-collector/config.yaml:ro
- ../telemetry/certs:/etc/otel-collector/tls:ro
ports:
- "${OTEL_GRPC_PORT:-4317}:4317" # OTLP gRPC
- "${OTEL_HTTP_PORT:-4318}:4318" # OTLP HTTP
- "${OTEL_PROMETHEUS_PORT:-9464}:9464" # Prometheus exporter
- "${OTEL_HEALTH_PORT:-13133}:13133" # Health check
- "${OTEL_PPROF_PORT:-1777}:1777" # pprof
healthcheck:
test: ["CMD", "curl", "-f", "http://localhost:13133/healthz"]
interval: 30s
start_period: 15s
timeout: 5s
retries: 3
networks:
- stellaops-telemetry
- stellaops
labels: *telemetry-labels
# ---------------------------------------------------------------------------
# Prometheus - Metrics storage
# ---------------------------------------------------------------------------
prometheus:
image: prom/prometheus:v2.53.0
container_name: stellaops-prometheus
restart: unless-stopped
command:
- "--config.file=/etc/prometheus/prometheus.yaml"
- "--storage.tsdb.path=/prometheus"
- "--storage.tsdb.retention.time=${PROMETHEUS_RETENTION:-15d}"
- "--web.enable-lifecycle"
volumes:
- ../telemetry/storage/prometheus.yaml:/etc/prometheus/prometheus.yaml:ro
- prometheus-data:/prometheus
- ../telemetry/certs:/etc/telemetry/tls:ro
- ../telemetry/storage/auth:/etc/telemetry/auth:ro
environment:
PROMETHEUS_COLLECTOR_TARGET: otel-collector:9464
ports:
- "${PROMETHEUS_PORT:-9090}:9090"
depends_on:
- otel-collector
networks:
- stellaops-telemetry
labels: *telemetry-labels
# ---------------------------------------------------------------------------
# Tempo - Distributed tracing backend
# ---------------------------------------------------------------------------
tempo:
image: grafana/tempo:2.5.0
container_name: stellaops-tempo
restart: unless-stopped
command:
- "-config.file=/etc/tempo/tempo.yaml"
volumes:
- ../telemetry/storage/tempo.yaml:/etc/tempo/tempo.yaml:ro
- ../telemetry/storage/tenants/tempo-overrides.yaml:/etc/telemetry/tenants/tempo-overrides.yaml:ro
- ../telemetry/certs:/etc/telemetry/tls:ro
- tempo-data:/var/tempo
environment:
TEMPO_ZONE: docker
ports:
- "${TEMPO_PORT:-3200}:3200"
networks:
- stellaops-telemetry
labels: *telemetry-labels
# ---------------------------------------------------------------------------
# Loki - Log aggregation
# ---------------------------------------------------------------------------
loki:
image: grafana/loki:3.1.0
container_name: stellaops-loki
restart: unless-stopped
command:
- "-config.file=/etc/loki/loki.yaml"
volumes:
- ../telemetry/storage/loki.yaml:/etc/loki/loki.yaml:ro
- ../telemetry/storage/tenants/loki-overrides.yaml:/etc/telemetry/tenants/loki-overrides.yaml:ro
- ../telemetry/certs:/etc/telemetry/tls:ro
- loki-data:/var/loki
ports:
- "${LOKI_PORT:-3100}:3100"
networks:
- stellaops-telemetry
labels: *telemetry-labels