job "monitoring-stack" { datacenters = ["dc1"] type = "service" # Grafana 服务组 group "grafana" { count = 1 volume "grafana-data" { type = "host" read_only = false source = "grafana-data" } network { port "http" { static = 3000 to = 3000 } } task "grafana" { driver = "exec" volume_mount { volume = "grafana-data" destination = "/opt/grafana/data" read_only = false } config { command = "/opt/grafana/bin/grafana-server" args = [ "--config", "/opt/grafana/conf/grafana.ini", "--homepath", "/opt/grafana", "cfg:default.paths.data=/opt/grafana/data", "cfg:default.paths.logs=/opt/grafana/logs", "cfg:default.paths.plugins=/opt/grafana/plugins", "cfg:default.paths.provisioning=/opt/grafana/conf/provisioning" ] } resources { cpu = 500 memory = 1024 } env { GF_SECURITY_ADMIN_PASSWORD = "admin123" GF_INSTALL_PLUGINS = "grafana-piechart-panel" GF_SERVER_DOMAIN = "grafana.tailnet-68f9.ts.net" GF_SERVER_ROOT_URL = "http://grafana.tailnet-68f9.ts.net:3000" } service { name = "grafana" port = "http" tags = [ "grafana", "monitoring", "dashboard" ] check { type = "http" path = "/api/health" interval = "30s" timeout = "5s" } } } } # Prometheus 服务组 group "prometheus" { count = 1 volume "prometheus-data" { type = "host" read_only = false source = "prometheus-data" } network { port "http" { static = 9090 to = 9090 } } task "prometheus" { driver = "exec" volume_mount { volume = "prometheus-data" destination = "/opt/prometheus/data" read_only = false } config { command = "/opt/prometheus/prometheus" args = [ "--config.file=/opt/prometheus/prometheus.yml", "--storage.tsdb.path=/opt/prometheus/data", "--web.console.libraries=/opt/prometheus/console_libraries", "--web.console.templates=/opt/prometheus/consoles", "--storage.tsdb.retention.time=15d", "--web.enable-lifecycle" ] } resources { cpu = 500 memory = 1024 } service { name = "prometheus" port = "http" tags = [ "prometheus", "monitoring", "metrics" ] check { type = "http" path = "/-/healthy" interval = "30s" timeout = "5s" } } } } # Node Exporter 服务组 group "node-exporter" { count = 1 network { port "metrics" { static = 9100 to = 9100 } } task "node-exporter" { driver = "exec" config { command = "/opt/node-exporter/node_exporter" args = [ "--path.procfs=/proc", "--path.rootfs=/", "--path.sysfs=/sys", "--collector.filesystem.mount-points-exclude=^/(sys|proc|dev|host|etc)($$|/)" ] } resources { cpu = 100 memory = 256 } service { name = "node-exporter" port = "metrics" tags = [ "node-exporter", "monitoring", "metrics" ] check { type = "http" path = "/metrics" interval = "30s" timeout = "5s" } } } } }