# ============================================================================= # STELLA OPS - TELEMETRY STACK # ============================================================================= # All-in-one observability: OpenTelemetry Collector, Prometheus, Tempo, Loki # # Usage: # docker compose -f devops/compose/docker-compose.telemetry.yml up -d # # With main stack: # docker compose -f devops/compose/docker-compose.stella-ops.yml \ # -f devops/compose/docker-compose.telemetry.yml up -d # # ============================================================================= x-telemetry-labels: &telemetry-labels com.stellaops.component: "telemetry" com.stellaops.profile: "observability" networks: stellaops-telemetry: driver: bridge name: stellaops-telemetry stellaops: external: true name: stellaops volumes: prometheus-data: tempo-data: loki-data: services: # --------------------------------------------------------------------------- # OpenTelemetry Collector - Unified telemetry ingestion # --------------------------------------------------------------------------- otel-collector: image: otel/opentelemetry-collector:0.105.0 container_name: stellaops-otel-collector restart: unless-stopped command: - "--config=/etc/otel-collector/config.yaml" environment: STELLAOPS_OTEL_TLS_CERT: /etc/otel-collector/tls/collector.crt STELLAOPS_OTEL_TLS_KEY: /etc/otel-collector/tls/collector.key STELLAOPS_OTEL_TLS_CA: /etc/otel-collector/tls/ca.crt STELLAOPS_OTEL_PROMETHEUS_ENDPOINT: 0.0.0.0:9464 STELLAOPS_OTEL_REQUIRE_CLIENT_CERT: "true" STELLAOPS_TENANT_ID: ${STELLAOPS_TENANT_ID:-default} STELLAOPS_TEMPO_ENDPOINT: http://tempo:3200 STELLAOPS_TEMPO_TLS_CERT_FILE: /etc/otel-collector/tls/client.crt STELLAOPS_TEMPO_TLS_KEY_FILE: /etc/otel-collector/tls/client.key STELLAOPS_TEMPO_TLS_CA_FILE: /etc/otel-collector/tls/ca.crt STELLAOPS_LOKI_ENDPOINT: http://loki:3100/loki/api/v1/push STELLAOPS_LOKI_TLS_CERT_FILE: /etc/otel-collector/tls/client.crt STELLAOPS_LOKI_TLS_KEY_FILE: /etc/otel-collector/tls/client.key STELLAOPS_LOKI_TLS_CA_FILE: /etc/otel-collector/tls/ca.crt volumes: - ../telemetry/otel-collector-config.yaml:/etc/otel-collector/config.yaml:ro - ../telemetry/certs:/etc/otel-collector/tls:ro ports: - "${OTEL_GRPC_PORT:-4317}:4317" # OTLP gRPC - "${OTEL_HTTP_PORT:-4318}:4318" # OTLP HTTP - "${OTEL_PROMETHEUS_PORT:-9464}:9464" # Prometheus exporter - "${OTEL_HEALTH_PORT:-13133}:13133" # Health check - "${OTEL_PPROF_PORT:-1777}:1777" # pprof healthcheck: test: ["CMD", "curl", "-f", "http://localhost:13133/healthz"] interval: 30s start_period: 15s timeout: 5s retries: 3 networks: - stellaops-telemetry - stellaops labels: *telemetry-labels # --------------------------------------------------------------------------- # Prometheus - Metrics storage # --------------------------------------------------------------------------- prometheus: image: prom/prometheus:v2.53.0 container_name: stellaops-prometheus restart: unless-stopped command: - "--config.file=/etc/prometheus/prometheus.yaml" - "--storage.tsdb.path=/prometheus" - "--storage.tsdb.retention.time=${PROMETHEUS_RETENTION:-15d}" - "--web.enable-lifecycle" volumes: - ../telemetry/storage/prometheus.yaml:/etc/prometheus/prometheus.yaml:ro - prometheus-data:/prometheus - ../telemetry/certs:/etc/telemetry/tls:ro - ../telemetry/storage/auth:/etc/telemetry/auth:ro environment: PROMETHEUS_COLLECTOR_TARGET: otel-collector:9464 ports: - "${PROMETHEUS_PORT:-9090}:9090" depends_on: - otel-collector networks: - stellaops-telemetry labels: *telemetry-labels # --------------------------------------------------------------------------- # Tempo - Distributed tracing backend # --------------------------------------------------------------------------- tempo: image: grafana/tempo:2.5.0 container_name: stellaops-tempo restart: unless-stopped command: - "-config.file=/etc/tempo/tempo.yaml" volumes: - ../telemetry/storage/tempo.yaml:/etc/tempo/tempo.yaml:ro - ../telemetry/storage/tenants/tempo-overrides.yaml:/etc/telemetry/tenants/tempo-overrides.yaml:ro - ../telemetry/certs:/etc/telemetry/tls:ro - tempo-data:/var/tempo environment: TEMPO_ZONE: docker ports: - "${TEMPO_PORT:-3200}:3200" networks: - stellaops-telemetry labels: *telemetry-labels # --------------------------------------------------------------------------- # Loki - Log aggregation # --------------------------------------------------------------------------- loki: image: grafana/loki:3.1.0 container_name: stellaops-loki restart: unless-stopped command: - "-config.file=/etc/loki/loki.yaml" volumes: - ../telemetry/storage/loki.yaml:/etc/loki/loki.yaml:ro - ../telemetry/storage/tenants/loki-overrides.yaml:/etc/telemetry/tenants/loki-overrides.yaml:ro - ../telemetry/certs:/etc/telemetry/tls:ro - loki-data:/var/loki ports: - "${LOKI_PORT:-3100}:3100" networks: - stellaops-telemetry labels: *telemetry-labels