job "monitoring-stack" { datacenters = ["dc1"] type = "service" # Grafana 服务组 group "grafana" { count = 1 constraint { attribute = "${node.unique.name}" operator = "=" value = "influxdb" } volume "grafana-data" { type = "host" read_only = false source = "grafana-data" } network { port "http" { static = 3000 to = 3000 } } task "grafana" { driver = "exec" volume_mount { volume = "grafana-data" destination = "/var/lib/grafana" read_only = false } config { command = "/usr/sbin/grafana-server" args = [ "--config", "/etc/grafana/grafana.ini", "--homepath", "/usr/share/grafana", "cfg:default.paths.data=/var/lib/grafana", "cfg:default.paths.logs=/var/log/grafana", "cfg:default.paths.plugins=/var/lib/grafana/plugins", "cfg:default.paths.provisioning=/etc/grafana/provisioning" ] } resources { cpu = 500 memory = 1024 } env { GF_SECURITY_ADMIN_PASSWORD = "admin123" GF_SERVER_DOMAIN = "grafana.tailnet-68f9.ts.net" GF_SERVER_ROOT_URL = "http://grafana.tailnet-68f9.ts.net:3000" } service { name = "grafana" port = "http" tags = [ "grafana", "monitoring", "dashboard" ] check { type = "http" path = "/api/health" interval = "30s" timeout = "5s" } } } } # Prometheus 服务组 group "prometheus" { count = 1 constraint { attribute = "${node.unique.name}" operator = "=" value = "influxdb" } volume "prometheus-data" { type = "host" read_only = false source = "prometheus-data" } network { port "http" { static = 9090 to = 9090 } } task "prometheus" { driver = "exec" volume_mount { volume = "prometheus-data" destination = "/var/lib/prometheus" read_only = false } config { command = "/usr/bin/prometheus" args = [ "--config.file=/etc/prometheus/prometheus.yml", "--storage.tsdb.path=/var/lib/prometheus", "--web.console.libraries=/etc/prometheus/console_libraries", "--web.console.templates=/etc/prometheus/consoles", "--storage.tsdb.retention.time=15d", "--web.enable-lifecycle" ] } resources { cpu = 500 memory = 1024 } service { name = "prometheus" port = "http" tags = [ "prometheus", "monitoring", "metrics" ] check { type = "http" path = "/-/healthy" interval = "30s" timeout = "5s" } } } } # Node Exporter 服务组 group "node-exporter" { count = 1 network { port "metrics" { static = 9100 to = 9100 } } task "node-exporter" { driver = "exec" config { command = "/usr/bin/prometheus-node-exporter" args = [ "--path.procfs=/proc", "--path.rootfs=/", "--path.sysfs=/sys", "--collector.filesystem.mount-points-exclude=^/(sys|proc|dev|host|etc)($$|/)" ] } resources { cpu = 100 memory = 256 } service { name = "node-exporter" port = "metrics" tags = [ "node-exporter", "monitoring", "metrics" ] check { type = "http" path = "/metrics" interval = "30s" timeout = "5s" } } } } }