mgmt/monitoring-stack.nomad

187 lines
3.7 KiB
HCL

job "monitoring-stack" {
datacenters = ["dc1"]
type = "service"
# Grafana 服务组
group "grafana" {
count = 1
volume "grafana-data" {
type = "host"
read_only = false
source = "grafana-data"
}
network {
port "http" {
static = 3000
to = 3000
}
}
task "grafana" {
driver = "exec"
volume_mount {
volume = "grafana-data"
destination = "/opt/grafana/data"
read_only = false
}
config {
command = "/opt/grafana/bin/grafana-server"
args = [
"--config", "/opt/grafana/conf/grafana.ini",
"--homepath", "/opt/grafana",
"cfg:default.paths.data=/opt/grafana/data",
"cfg:default.paths.logs=/opt/grafana/logs",
"cfg:default.paths.plugins=/opt/grafana/plugins",
"cfg:default.paths.provisioning=/opt/grafana/conf/provisioning"
]
}
resources {
cpu = 500
memory = 1024
}
env {
GF_SECURITY_ADMIN_PASSWORD = "admin123"
GF_INSTALL_PLUGINS = "grafana-piechart-panel"
GF_SERVER_DOMAIN = "grafana.tailnet-68f9.ts.net"
GF_SERVER_ROOT_URL = "http://grafana.tailnet-68f9.ts.net:3000"
}
service {
name = "grafana"
port = "http"
tags = [
"grafana",
"monitoring",
"dashboard"
]
check {
type = "http"
path = "/api/health"
interval = "30s"
timeout = "5s"
}
}
}
}
# Prometheus 服务组
group "prometheus" {
count = 1
volume "prometheus-data" {
type = "host"
read_only = false
source = "prometheus-data"
}
network {
port "http" {
static = 9090
to = 9090
}
}
task "prometheus" {
driver = "exec"
volume_mount {
volume = "prometheus-data"
destination = "/opt/prometheus/data"
read_only = false
}
config {
command = "/opt/prometheus/prometheus"
args = [
"--config.file=/opt/prometheus/prometheus.yml",
"--storage.tsdb.path=/opt/prometheus/data",
"--web.console.libraries=/opt/prometheus/console_libraries",
"--web.console.templates=/opt/prometheus/consoles",
"--storage.tsdb.retention.time=15d",
"--web.enable-lifecycle"
]
}
resources {
cpu = 500
memory = 1024
}
service {
name = "prometheus"
port = "http"
tags = [
"prometheus",
"monitoring",
"metrics"
]
check {
type = "http"
path = "/-/healthy"
interval = "30s"
timeout = "5s"
}
}
}
}
# Node Exporter 服务组
group "node-exporter" {
count = 1
network {
port "metrics" {
static = 9100
to = 9100
}
}
task "node-exporter" {
driver = "exec"
config {
command = "/opt/node-exporter/node_exporter"
args = [
"--path.procfs=/proc",
"--path.rootfs=/",
"--path.sysfs=/sys",
"--collector.filesystem.mount-points-exclude=^/(sys|proc|dev|host|etc)($$|/)"
]
}
resources {
cpu = 100
memory = 256
}
service {
name = "node-exporter"
port = "metrics"
tags = [
"node-exporter",
"monitoring",
"metrics"
]
check {
type = "http"
path = "/metrics"
interval = "30s"
timeout = "5s"
}
}
}
}
}