job "monitoring-stack" { datacenters = ["dc1"] type = "service" # Grafana 服务组 group "grafana" { count = 1 constraint { attribute = "${node.unique.name}" operator = "=" value = "influxdb" } volume "grafana-data" { type = "host" read_only = false source = "grafana-data" } network { port "http" { static = 3000 to = 3000 } } task "grafana" { driver = "exec" volume_mount { volume = "grafana-data" destination = "/opt/grafana/data" read_only = false } config { command = "/usr/sbin/grafana-server" args = [ "--config", "/etc/grafana/grafana.ini", "--homepath", "/usr/share/grafana", "cfg:default.paths.data=/opt/grafana/data", "cfg:default.paths.logs=/var/log/grafana", "cfg:default.paths.plugins=/var/lib/grafana/plugins", "cfg:default.paths.provisioning=/etc/grafana/provisioning" ] } resources { cpu = 300 memory = 512 } env { GF_SECURITY_ADMIN_PASSWORD = "admin123" GF_INSTALL_PLUGINS = "grafana-piechart-panel" GF_SERVER_DOMAIN = "grafana.tailnet-68f9.ts.net" GF_SERVER_ROOT_URL = "http://grafana.tailnet-68f9.ts.net:3000" } service { name = "grafana" port = "http" tags = [ "grafana", "monitoring", "dashboard" ] check { type = "http" path = "/api/health" interval = "30s" timeout = "5s" } } } } # Prometheus 服务组 group "prometheus" { count = 1 constraint { attribute = "${node.unique.name}" operator = "=" value = "influxdb" } volume "prometheus-data" { type = "host" read_only = false source = "prometheus-data" } network { port "http" { static = 9090 to = 9090 } } task "prometheus" { driver = "exec" volume_mount { volume = "prometheus-data" destination = "/opt/prometheus/data" read_only = false } config { command = "prometheus" args = [ "--config.file=/etc/prometheus/prometheus.yml", "--storage.tsdb.path=/opt/prometheus/data", "--web.console.libraries=/usr/share/prometheus/console_libraries", "--web.console.templates=/usr/share/prometheus/consoles", "--storage.tsdb.retention.time=15d", "--web.enable-lifecycle" ] } resources { cpu = 300 memory = 512 } service { name = "prometheus" port = "http" tags = [ "prometheus", "monitoring", "metrics" ] check { type = "http" path = "/-/healthy" interval = "30s" timeout = "5s" } } } } # Loki 服务组 group "loki" { count = 1 constraint { attribute = "${node.unique.name}" operator = "=" value = "influxdb" } volume "loki-data" { type = "host" read_only = false source = "loki-data" } network { port "http" { static = 3100 to = 3100 } } task "loki" { driver = "exec" volume_mount { volume = "loki-data" destination = "/opt/loki/data" read_only = false } template { data = <