mgmt/monitoring-stack-simple.nomad

198 lines
3.9 KiB
HCL

job "monitoring-stack" {
datacenters = ["dc1"]
type = "service"
# Grafana 服务组
group "grafana" {
count = 1
constraint {
attribute = "${node.unique.name}"
operator = "="
value = "influxdb"
}
volume "grafana-data" {
type = "host"
read_only = false
source = "grafana-data"
}
network {
port "http" {
static = 3000
to = 3000
}
}
task "grafana" {
driver = "exec"
volume_mount {
volume = "grafana-data"
destination = "/var/lib/grafana"
read_only = false
}
config {
command = "/usr/sbin/grafana-server"
args = [
"--config", "/etc/grafana/grafana.ini",
"--homepath", "/usr/share/grafana",
"cfg:default.paths.data=/var/lib/grafana",
"cfg:default.paths.logs=/var/log/grafana",
"cfg:default.paths.plugins=/var/lib/grafana/plugins",
"cfg:default.paths.provisioning=/etc/grafana/provisioning"
]
}
resources {
cpu = 500
memory = 1024
}
env {
GF_SECURITY_ADMIN_PASSWORD = "admin123"
GF_SERVER_DOMAIN = "grafana.tailnet-68f9.ts.net"
GF_SERVER_ROOT_URL = "http://grafana.tailnet-68f9.ts.net:3000"
}
service {
name = "grafana"
port = "http"
tags = [
"grafana",
"monitoring",
"dashboard"
]
check {
type = "http"
path = "/api/health"
interval = "30s"
timeout = "5s"
}
}
}
}
# Prometheus 服务组
group "prometheus" {
count = 1
constraint {
attribute = "${node.unique.name}"
operator = "="
value = "influxdb"
}
volume "prometheus-data" {
type = "host"
read_only = false
source = "prometheus-data"
}
network {
port "http" {
static = 9090
to = 9090
}
}
task "prometheus" {
driver = "exec"
volume_mount {
volume = "prometheus-data"
destination = "/var/lib/prometheus"
read_only = false
}
config {
command = "/usr/bin/prometheus"
args = [
"--config.file=/etc/prometheus/prometheus.yml",
"--storage.tsdb.path=/var/lib/prometheus",
"--web.console.libraries=/etc/prometheus/console_libraries",
"--web.console.templates=/etc/prometheus/consoles",
"--storage.tsdb.retention.time=15d",
"--web.enable-lifecycle"
]
}
resources {
cpu = 500
memory = 1024
}
service {
name = "prometheus"
port = "http"
tags = [
"prometheus",
"monitoring",
"metrics"
]
check {
type = "http"
path = "/-/healthy"
interval = "30s"
timeout = "5s"
}
}
}
}
# Node Exporter 服务组
group "node-exporter" {
count = 1
network {
port "metrics" {
static = 9100
to = 9100
}
}
task "node-exporter" {
driver = "exec"
config {
command = "/usr/bin/prometheus-node-exporter"
args = [
"--path.procfs=/proc",
"--path.rootfs=/",
"--path.sysfs=/sys",
"--collector.filesystem.mount-points-exclude=^/(sys|proc|dev|host|etc)($$|/)"
]
}
resources {
cpu = 100
memory = 256
}
service {
name = "node-exporter"
port = "metrics"
tags = [
"node-exporter",
"monitoring",
"metrics"
]
check {
type = "http"
path = "/metrics"
interval = "30s"
timeout = "5s"
}
}
}
}
}