mgmt/monitoring-stack.yml

131 lines
3.2 KiB
YAML

version: '3.8'
services:
# Prometheus 监控
prometheus:
image: prom/prometheus:latest
command:
- '--config.file=/etc/prometheus/prometheus.yml'
- '--storage.tsdb.path=/prometheus'
- '--web.console.libraries=/etc/prometheus/console_libraries'
- '--web.console.templates=/etc/prometheus/consoles'
- '--storage.tsdb.retention.time=200h'
- '--web.enable-lifecycle'
networks:
- traefik-public
- monitoring
configs:
- source: prometheus-config
target: /etc/prometheus/prometheus.yml
volumes:
- prometheus-data:/prometheus
deploy:
replicas: 1
labels:
- traefik.enable=true
- traefik.http.routers.prometheus.rule=Host(`prometheus.local`)
- traefik.http.routers.prometheus.entrypoints=web
- traefik.http.services.prometheus.loadbalancer.server.port=9090
restart_policy:
condition: on-failure
# Grafana 可视化
grafana:
image: grafana/grafana:latest
environment:
- GF_SECURITY_ADMIN_PASSWORD=admin123
- GF_USERS_ALLOW_SIGN_UP=false
networks:
- traefik-public
- monitoring
volumes:
- grafana-data:/var/lib/grafana
deploy:
replicas: 1
labels:
- traefik.enable=true
- traefik.http.routers.grafana.rule=Host(`grafana.local`)
- traefik.http.routers.grafana.entrypoints=web
- traefik.http.services.grafana.loadbalancer.server.port=3000
restart_policy:
condition: on-failure
# Node Exporter (系统指标)
node-exporter:
image: prom/node-exporter:latest
command:
- '--path.procfs=/host/proc'
- '--path.rootfs=/rootfs'
- '--path.sysfs=/host/sys'
- '--collector.filesystem.ignored-mount-points=^/(sys|proc|dev|host|etc)($$|/)'
volumes:
- /proc:/host/proc:ro
- /sys:/host/sys:ro
- /:/rootfs:ro
networks:
- monitoring
deploy:
mode: global
restart_policy:
condition: on-failure
# cAdvisor (容器指标)
cadvisor:
image: gcr.io/cadvisor/cadvisor:latest
volumes:
- /:/rootfs:ro
- /var/run:/var/run:rw
- /sys:/sys:ro
- /var/lib/docker/:/var/lib/docker:ro
- /dev/disk/:/dev/disk:ro
networks:
- monitoring
deploy:
mode: global
restart_policy:
condition: on-failure
networks:
traefik-public:
external: true
monitoring:
driver: overlay
volumes:
prometheus-data:
grafana-data:
configs:
prometheus-config:
content: |
global:
scrape_interval: 15s
evaluation_interval: 15s
scrape_configs:
# Traefik 指标
- job_name: 'traefik'
static_configs:
- targets: ['traefik:8080']
metrics_path: /metrics
# Prometheus 自身
- job_name: 'prometheus'
static_configs:
- targets: ['localhost:9090']
# Node Exporter
- job_name: 'node-exporter'
dns_sd_configs:
- names:
- 'tasks.node-exporter'
type: 'A'
port: 9100
# cAdvisor
- job_name: 'cadvisor'
dns_sd_configs:
- names:
- 'tasks.cadvisor'
type: 'A'
port: 8080