job "monitoring-stack" { datacenters = ["dc1"] type = "service" # Grafana 服务组 group "grafana" { count = 1 volume "grafana-data" { type = "host" read_only = false source = "grafana-data" } network { port "http" { static = 3000 to = 3000 } } task "grafana" { driver = "exec" volume_mount { volume = "grafana-data" destination = "/opt/grafana/data" read_only = false } # 下载和安装 Grafana artifact { source = "https://dl.grafana.com/oss/release/grafana-10.2.0.linux-amd64.tar.gz" destination = "local/" mode = "any" } config { command = "/bin/bash" args = [ "-c", < /opt/grafana/conf/grafana.ini << 'INICONF' [server] http_port = 3000 domain = grafana.tailnet-68f9.ts.net root_url = http://grafana.tailnet-68f9.ts.net:3000 [database] type = sqlite3 path = /opt/grafana/data/grafana.db [security] admin_password = admin123 [users] allow_sign_up = false [log] mode = console level = info INICONF # 启动 Grafana exec /opt/grafana/bin/grafana-server --config /opt/grafana/conf/grafana.ini EOF ] } resources { cpu = 500 memory = 1024 } env { GF_SECURITY_ADMIN_PASSWORD = "admin123" GF_SERVER_DOMAIN = "grafana.tailnet-68f9.ts.net" GF_SERVER_ROOT_URL = "http://grafana.tailnet-68f9.ts.net:3000" } service { name = "grafana" port = "http" tags = [ "grafana", "monitoring", "dashboard" ] check { type = "http" path = "/api/health" interval = "30s" timeout = "5s" } } } } # Prometheus 服务组 group "prometheus" { count = 1 volume "prometheus-data" { type = "host" read_only = false source = "prometheus-data" } network { port "http" { static = 9090 to = 9090 } } task "prometheus" { driver = "exec" volume_mount { volume = "prometheus-data" destination = "/opt/prometheus/data" read_only = false } # 下载和安装 Prometheus artifact { source = "https://github.com/prometheus/prometheus/releases/download/v2.48.0/prometheus-2.48.0.linux-amd64.tar.gz" destination = "local/" mode = "any" } config { command = "/bin/bash" args = [ "-c", < /opt/prometheus/prometheus.yml << 'PROMCONF' global: scrape_interval: 15s evaluation_interval: 15s scrape_configs: - job_name: 'prometheus' static_configs: - targets: ['localhost:9090'] - job_name: 'node-exporter' static_configs: - targets: ['node-exporter.tailnet-68f9.ts.net:9100'] - job_name: 'consul' static_configs: - targets: - 'ch4.tailnet-68f9.ts.net:8500' - 'ash3c.tailnet-68f9.ts.net:8500' - 'warden.tailnet-68f9.ts.net:8500' - job_name: 'nomad' static_configs: - targets: - 'semaphore.tailnet-68f9.ts.net:4646' - 'ash1d.tailnet-68f9.ts.net:4646' - 'ash2e.tailnet-68f9.ts.net:4646' - 'ch2.tailnet-68f9.ts.net:4646' - 'ch3.tailnet-68f9.ts.net:4646' - 'onecloud1.tailnet-68f9.ts.net:4646' - 'de.tailnet-68f9.ts.net:4646' - job_name: 'vault' static_configs: - targets: - 'master.tailnet-68f9.ts.net:8200' - 'ash3c.tailnet-68f9.ts.net:8200' - 'hcp1.tailnet-68f9.ts.net:8200' - job_name: 'influxdb' static_configs: - targets: ['influxdb1.tailnet-68f9.ts.net:8086'] PROMCONF # 启动 Prometheus exec /opt/prometheus/prometheus --config.file=/opt/prometheus/prometheus.yml --storage.tsdb.path=/opt/prometheus/data --web.console.libraries=/opt/prometheus/console_libraries --web.console.templates=/opt/prometheus/consoles --storage.tsdb.retention.time=15d --web.enable-lifecycle EOF ] } resources { cpu = 500 memory = 1024 } service { name = "prometheus" port = "http" tags = [ "prometheus", "monitoring", "metrics" ] check { type = "http" path = "/-/healthy" interval = "30s" timeout = "5s" } } } } # Node Exporter 服务组 group "node-exporter" { count = 1 network { port "metrics" { static = 9100 to = 9100 } } task "node-exporter" { driver = "exec" # 下载和安装 Node Exporter artifact { source = "https://github.com/prometheus/node_exporter/releases/download/v1.7.0/node_exporter-1.7.0.linux-amd64.tar.gz" destination = "local/" mode = "any" } config { command = "/bin/bash" args = [ "-c", <