mgmt/monitoring-stack-exec.nomad

292 lines
6.1 KiB
HCL

job "monitoring-stack" {
datacenters = ["dc1"]
type = "service"
# Grafana 服务组
group "grafana" {
count = 1
volume "grafana-data" {
type = "host"
read_only = false
source = "grafana-data"
}
network {
port "http" {
static = 3000
to = 3000
}
}
task "grafana" {
driver = "exec"
volume_mount {
volume = "grafana-data"
destination = "/opt/grafana/data"
read_only = false
}
# 下载和安装 Grafana
artifact {
source = "https://dl.grafana.com/oss/release/grafana-10.2.0.linux-amd64.tar.gz"
destination = "local/"
mode = "any"
}
config {
command = "/bin/bash"
args = [
"-c",
<<EOF
set -e
cd local
tar -xzf grafana-10.2.0.linux-amd64.tar.gz
mv grafana-10.2.0/* /opt/grafana/
mkdir -p /opt/grafana/data /opt/grafana/logs /opt/grafana/plugins
chmod +x /opt/grafana/bin/grafana-server
#
cat > /opt/grafana/conf/grafana.ini << 'INICONF'
[server]
http_port = 3000
domain = grafana.tailnet-68f9.ts.net
root_url = http://grafana.tailnet-68f9.ts.net:3000
[database]
type = sqlite3
path = /opt/grafana/data/grafana.db
[security]
admin_password = admin123
[users]
allow_sign_up = false
[log]
mode = console
level = info
INICONF
# Grafana
exec /opt/grafana/bin/grafana-server --config /opt/grafana/conf/grafana.ini
EOF
]
}
resources {
cpu = 500
memory = 1024
}
env {
GF_SECURITY_ADMIN_PASSWORD = "admin123"
GF_SERVER_DOMAIN = "grafana.tailnet-68f9.ts.net"
GF_SERVER_ROOT_URL = "http://grafana.tailnet-68f9.ts.net:3000"
}
service {
name = "grafana"
port = "http"
tags = [
"grafana",
"monitoring",
"dashboard"
]
check {
type = "http"
path = "/api/health"
interval = "30s"
timeout = "5s"
}
}
}
}
# Prometheus
group "prometheus" {
count = 1
volume "prometheus-data" {
type = "host"
read_only = false
source = "prometheus-data"
}
network {
port "http" {
static = 9090
to = 9090
}
}
task "prometheus" {
driver = "exec"
volume_mount {
volume = "prometheus-data"
destination = "/opt/prometheus/data"
read_only = false
}
# Prometheus
artifact {
source = "https://github.com/prometheus/prometheus/releases/download/v2.48.0/prometheus-2.48.0.linux-amd64.tar.gz"
destination = "local/"
mode = "any"
}
config {
command = "/bin/bash"
args = [
"-c",
<<EOF
set -e
cd local
tar -xzf prometheus-2.48.0.linux-amd64.tar.gz
mv prometheus-2.48.0/* /opt/prometheus/
chmod +x /opt/prometheus/prometheus
#
cat > /opt/prometheus/prometheus.yml << 'PROMCONF'
global:
scrape_interval: 15s
evaluation_interval: 15s
scrape_configs:
- job_name: 'prometheus'
static_configs:
- targets: ['localhost:9090']
- job_name: 'node-exporter'
static_configs:
- targets: ['node-exporter.tailnet-68f9.ts.net:9100']
- job_name: 'consul'
static_configs:
- targets:
- 'ch4.tailnet-68f9.ts.net:8500'
- 'ash3c.tailnet-68f9.ts.net:8500'
- 'warden.tailnet-68f9.ts.net:8500'
- job_name: 'nomad'
static_configs:
- targets:
- 'semaphore.tailnet-68f9.ts.net:4646'
- 'ash1d.tailnet-68f9.ts.net:4646'
- 'ash2e.tailnet-68f9.ts.net:4646'
- 'ch2.tailnet-68f9.ts.net:4646'
- 'ch3.tailnet-68f9.ts.net:4646'
- 'onecloud1.tailnet-68f9.ts.net:4646'
- 'de.tailnet-68f9.ts.net:4646'
- job_name: 'vault'
static_configs:
- targets:
- 'master.tailnet-68f9.ts.net:8200'
- 'ash3c.tailnet-68f9.ts.net:8200'
- 'hcp1.tailnet-68f9.ts.net:8200'
- job_name: 'influxdb'
static_configs:
- targets: ['influxdb1.tailnet-68f9.ts.net:8086']
PROMCONF
# Prometheus
exec /opt/prometheus/prometheus --config.file=/opt/prometheus/prometheus.yml --storage.tsdb.path=/opt/prometheus/data --web.console.libraries=/opt/prometheus/console_libraries --web.console.templates=/opt/prometheus/consoles --storage.tsdb.retention.time=15d --web.enable-lifecycle
EOF
]
}
resources {
cpu = 500
memory = 1024
}
service {
name = "prometheus"
port = "http"
tags = [
"prometheus",
"monitoring",
"metrics"
]
check {
type = "http"
path = "/-/healthy"
interval = "30s"
timeout = "5s"
}
}
}
}
# Node Exporter
group "node-exporter" {
count = 1
network {
port "metrics" {
static = 9100
to = 9100
}
}
task "node-exporter" {
driver = "exec"
# Node Exporter
artifact {
source = "https://github.com/prometheus/node_exporter/releases/download/v1.7.0/node_exporter-1.7.0.linux-amd64.tar.gz"
destination = "local/"
mode = "any"
}
config {
command = "/bin/bash"
args = [
"-c",
<<EOF
set -e
cd local
tar -xzf node_exporter-1.7.0.linux-amd64.tar.gz
mv node_exporter-1.7.0/node_exporter /opt/node-exporter/
chmod +x /opt/node-exporter/node_exporter
# Node Exporter
exec /opt/node-exporter/node_exporter --path.procfs=/proc --path.rootfs=/ --path.sysfs=/sys --collector.filesystem.mount-points-exclude=^/(sys|proc|dev|host|etc)($$|/)
EOF
]
}
resources {
cpu = 100
memory = 256
}
service {
name = "node-exporter"
port = "metrics"
tags = [
"node-exporter",
"monitoring",
"metrics"
]
check {
type = "http"
path = "/metrics"
interval = "30s"
timeout = "5s"
}
}
}
}
}