version: '3.8' services: # Prometheus 监控 prometheus: image: prom/prometheus:latest command: - '--config.file=/etc/prometheus/prometheus.yml' - '--storage.tsdb.path=/prometheus' - '--web.console.libraries=/etc/prometheus/console_libraries' - '--web.console.templates=/etc/prometheus/consoles' - '--storage.tsdb.retention.time=200h' - '--web.enable-lifecycle' networks: - traefik-public - monitoring configs: - source: prometheus-config target: /etc/prometheus/prometheus.yml volumes: - prometheus-data:/prometheus deploy: replicas: 1 labels: - traefik.enable=true - traefik.http.routers.prometheus.rule=Host(`prometheus.local`) - traefik.http.routers.prometheus.entrypoints=web - traefik.http.services.prometheus.loadbalancer.server.port=9090 restart_policy: condition: on-failure # Grafana 可视化 grafana: image: grafana/grafana:latest environment: - GF_SECURITY_ADMIN_PASSWORD=admin123 - GF_USERS_ALLOW_SIGN_UP=false networks: - traefik-public - monitoring volumes: - grafana-data:/var/lib/grafana deploy: replicas: 1 labels: - traefik.enable=true - traefik.http.routers.grafana.rule=Host(`grafana.local`) - traefik.http.routers.grafana.entrypoints=web - traefik.http.services.grafana.loadbalancer.server.port=3000 restart_policy: condition: on-failure # Node Exporter (系统指标) node-exporter: image: prom/node-exporter:latest command: - '--path.procfs=/host/proc' - '--path.rootfs=/rootfs' - '--path.sysfs=/host/sys' - '--collector.filesystem.ignored-mount-points=^/(sys|proc|dev|host|etc)($$|/)' volumes: - /proc:/host/proc:ro - /sys:/host/sys:ro - /:/rootfs:ro networks: - monitoring deploy: mode: global restart_policy: condition: on-failure # cAdvisor (容器指标) cadvisor: image: gcr.io/cadvisor/cadvisor:latest volumes: - /:/rootfs:ro - /var/run:/var/run:rw - /sys:/sys:ro - /var/lib/docker/:/var/lib/docker:ro - /dev/disk/:/dev/disk:ro networks: - monitoring deploy: mode: global restart_policy: condition: on-failure networks: traefik-public: external: true monitoring: driver: overlay volumes: prometheus-data: grafana-data: configs: prometheus-config: content: | global: scrape_interval: 15s evaluation_interval: 15s scrape_configs: # Traefik 指标 - job_name: 'traefik' static_configs: - targets: ['traefik:8080'] metrics_path: /metrics # Prometheus 自身 - job_name: 'prometheus' static_configs: - targets: ['localhost:9090'] # Node Exporter - job_name: 'node-exporter' dns_sd_configs: - names: - 'tasks.node-exporter' type: 'A' port: 9100 # cAdvisor - job_name: 'cadvisor' dns_sd_configs: - names: - 'tasks.cadvisor' type: 'A' port: 8080