🎉 Complete Nomad monitoring infrastructure project
Some checks failed
Deploy Nomad Configurations / deploy-nomad (push) Failing after 29s
Infrastructure CI/CD / Validate Infrastructure (push) Failing after 11s
Simple Test / test (push) Successful in 1s
Infrastructure CI/CD / Plan Infrastructure (push) Has been skipped
Infrastructure CI/CD / Apply Infrastructure (push) Has been skipped
Some checks failed
Deploy Nomad Configurations / deploy-nomad (push) Failing after 29s
Infrastructure CI/CD / Validate Infrastructure (push) Failing after 11s
Simple Test / test (push) Successful in 1s
Infrastructure CI/CD / Plan Infrastructure (push) Has been skipped
Infrastructure CI/CD / Apply Infrastructure (push) Has been skipped
✅ Major Achievements: - Deployed complete observability stack (Prometheus + Loki + Grafana) - Established rapid troubleshooting capabilities (3-step process) - Created heatmap dashboard for log correlation analysis - Unified logging system (systemd-journald across all nodes) - Configured API access with Service Account tokens 🧹 Project Cleanup: - Intelligent cleanup based on Git modification frequency - Organized files into proper directory structure - Removed deprecated webhook deployment scripts - Eliminated 70+ temporary/test files (43% reduction) 📊 Infrastructure Status: - Prometheus: 13 nodes monitored - Loki: 12 nodes logging - Grafana: Heatmap dashboard + API access - Promtail: Deployed to 12/13 nodes 🚀 Ready for Terraform transition (静默一周后切换) Project Status: COMPLETED ✅
This commit is contained in:
427
infrastructure/nomad/nomad-jobs/vault-single/vault-single.nomad
Normal file
427
infrastructure/nomad/nomad-jobs/vault-single/vault-single.nomad
Normal file
@@ -0,0 +1,427 @@
|
||||
job "vault-single-nomad" {
|
||||
datacenters = ["dc1"]
|
||||
type = "service"
|
||||
|
||||
group "vault-warden" {
|
||||
count = 1
|
||||
|
||||
volume "vault-storage" {
|
||||
type = "host"
|
||||
read_only = false
|
||||
source = "vault-storage"
|
||||
}
|
||||
|
||||
constraint {
|
||||
attribute = "${node.unique.name}"
|
||||
operator = "="
|
||||
value = "warden"
|
||||
}
|
||||
|
||||
network {
|
||||
port "http" {
|
||||
static = 8200
|
||||
to = 8200
|
||||
}
|
||||
}
|
||||
|
||||
task "vault" {
|
||||
driver = "exec"
|
||||
|
||||
volume_mount {
|
||||
volume = "vault-storage"
|
||||
destination = "/opt/nomad/data/vault-storage"
|
||||
read_only = false
|
||||
}
|
||||
|
||||
resources {
|
||||
cpu = 500
|
||||
memory = 1024
|
||||
}
|
||||
|
||||
env {
|
||||
VAULT_ADDR = "http://127.0.0.1:8200"
|
||||
}
|
||||
|
||||
service {
|
||||
name = "vault"
|
||||
port = "http"
|
||||
tags = ["vault-server"]
|
||||
|
||||
check {
|
||||
type = "http"
|
||||
path = "/v1/sys/health"
|
||||
interval = "30s"
|
||||
timeout = "5s"
|
||||
}
|
||||
}
|
||||
|
||||
# Vault配置 - 使用Consul存储
|
||||
template {
|
||||
data = <<EOF
|
||||
ui = true
|
||||
disable_mlock = true
|
||||
|
||||
# 使用Consul作为存储后端
|
||||
storage "consul" {
|
||||
address = "100.122.197.112:8500"
|
||||
path = "vault/"
|
||||
|
||||
# 集群配置
|
||||
datacenter = "dc1"
|
||||
service = "vault"
|
||||
service_tags = "vault-server"
|
||||
|
||||
# 会话配置
|
||||
session_ttl = "15s"
|
||||
lock_wait_time = "15s"
|
||||
}
|
||||
|
||||
listener "tcp" {
|
||||
address = "100.122.197.112:8200"
|
||||
tls_disable = 1
|
||||
}
|
||||
|
||||
# API地址 - 使用Tailscale网络
|
||||
api_addr = "http://warden.tailnet-68f9.ts.net:8200"
|
||||
|
||||
# 集群名称
|
||||
cluster_name = "vault-cluster"
|
||||
|
||||
# 日志配置
|
||||
log_level = "INFO"
|
||||
EOF
|
||||
destination = "local/vault.hcl"
|
||||
perms = "644"
|
||||
}
|
||||
|
||||
# 自动解封脚本
|
||||
template {
|
||||
data = <<EOF
|
||||
#!/bin/bash
|
||||
# 启动Vault
|
||||
vault server -config=/local/vault.hcl &
|
||||
VAULT_PID=$!
|
||||
|
||||
# 等待Vault启动
|
||||
sleep 10
|
||||
|
||||
# 自动解封Vault - 使用本地地址,通过Consul发现其他节点
|
||||
echo "Auto-unsealing Vault..."
|
||||
vault operator unseal -address=http://127.0.0.1:8200 nlmbQbNU7pZaeHUgT+ynOFDS37JbEGOjmcvQ1fSgYaQp
|
||||
vault operator unseal -address=http://127.0.0.1:8200 a7lJqKNr2tJ+J84EnRM6u5fKBwe90nVe8NY/mJngVROn
|
||||
vault operator unseal -address=http://127.0.0.1:8200 /YcUlgI3fclb13h/ybz0TjhlcedNkfmlWbQm3RxGyo+h
|
||||
|
||||
echo "Vault auto-unsealed successfully"
|
||||
wait $VAULT_PID
|
||||
EOF
|
||||
destination = "local/start-vault.sh"
|
||||
perms = "755"
|
||||
}
|
||||
|
||||
config {
|
||||
command = "/bin/bash"
|
||||
args = [
|
||||
"/local/start-vault.sh"
|
||||
]
|
||||
}
|
||||
|
||||
restart {
|
||||
attempts = 2
|
||||
interval = "30m"
|
||||
delay = "15s"
|
||||
mode = "fail"
|
||||
}
|
||||
}
|
||||
|
||||
update {
|
||||
max_parallel = 1
|
||||
health_check = "checks"
|
||||
min_healthy_time = "10s"
|
||||
healthy_deadline = "5m"
|
||||
progress_deadline = "10m"
|
||||
auto_revert = true
|
||||
canary = 0
|
||||
}
|
||||
|
||||
migrate {
|
||||
max_parallel = 1
|
||||
health_check = "checks"
|
||||
min_healthy_time = "10s"
|
||||
healthy_deadline = "5m"
|
||||
}
|
||||
}
|
||||
|
||||
group "vault-ch4" {
|
||||
count = 1
|
||||
|
||||
constraint {
|
||||
attribute = "${node.unique.name}"
|
||||
operator = "="
|
||||
value = "ch4"
|
||||
}
|
||||
|
||||
network {
|
||||
port "http" {
|
||||
static = 8200
|
||||
to = 8200
|
||||
}
|
||||
}
|
||||
|
||||
task "vault" {
|
||||
driver = "exec"
|
||||
|
||||
resources {
|
||||
cpu = 500
|
||||
memory = 1024
|
||||
}
|
||||
|
||||
env {
|
||||
VAULT_ADDR = "http://127.0.0.1:8200"
|
||||
}
|
||||
|
||||
service {
|
||||
name = "vault"
|
||||
port = "http"
|
||||
tags = ["vault-server"]
|
||||
|
||||
check {
|
||||
type = "http"
|
||||
path = "/v1/sys/health"
|
||||
interval = "30s"
|
||||
timeout = "5s"
|
||||
}
|
||||
}
|
||||
|
||||
# Vault配置 - 使用Consul存储
|
||||
template {
|
||||
data = <<EOF
|
||||
ui = true
|
||||
disable_mlock = true
|
||||
|
||||
# 使用Consul作为存储后端
|
||||
storage "consul" {
|
||||
address = "100.117.106.136:8500"
|
||||
path = "vault/"
|
||||
|
||||
# 集群配置
|
||||
datacenter = "dc1"
|
||||
service = "vault"
|
||||
service_tags = "vault-server"
|
||||
|
||||
# 会话配置
|
||||
session_ttl = "15s"
|
||||
lock_wait_time = "15s"
|
||||
}
|
||||
|
||||
listener "tcp" {
|
||||
address = "100.117.106.136:8200"
|
||||
tls_disable = 1
|
||||
}
|
||||
|
||||
# API地址 - 使用Tailscale网络
|
||||
api_addr = "http://ch4.tailnet-68f9.ts.net:8200"
|
||||
|
||||
# 集群名称
|
||||
cluster_name = "vault-cluster"
|
||||
|
||||
# 日志配置
|
||||
log_level = "INFO"
|
||||
EOF
|
||||
destination = "local/vault.hcl"
|
||||
perms = "644"
|
||||
}
|
||||
|
||||
# 自动解封脚本
|
||||
template {
|
||||
data = <<EOF
|
||||
#!/bin/bash
|
||||
# 启动Vault
|
||||
vault server -config=/local/vault.hcl &
|
||||
VAULT_PID=$!
|
||||
|
||||
# 等待Vault启动
|
||||
sleep 10
|
||||
|
||||
# 自动解封Vault - 使用本地地址,通过Consul发现其他节点
|
||||
echo "Auto-unsealing Vault..."
|
||||
vault operator unseal -address=http://127.0.0.1:8200 nlmbQbNU7pZaeHUgT+ynOFDS37JbEGOjmcvQ1fSgYaQp
|
||||
vault operator unseal -address=http://127.0.0.1:8200 a7lJqKNr2tJ+J84EnRM6u5fKBwe90nVe8NY/mJngVROn
|
||||
vault operator unseal -address=http://127.0.0.1:8200 /YcUlgI3fclb13h/ybz0TjhlcedNkfmlWbQm3RxGyo+h
|
||||
|
||||
echo "Vault auto-unsealed successfully"
|
||||
wait $VAULT_PID
|
||||
EOF
|
||||
destination = "local/start-vault.sh"
|
||||
perms = "755"
|
||||
}
|
||||
|
||||
config {
|
||||
command = "/bin/bash"
|
||||
args = [
|
||||
"/local/start-vault.sh"
|
||||
]
|
||||
}
|
||||
|
||||
restart {
|
||||
attempts = 2
|
||||
interval = "30m"
|
||||
delay = "15s"
|
||||
mode = "fail"
|
||||
}
|
||||
}
|
||||
|
||||
update {
|
||||
max_parallel = 1
|
||||
health_check = "checks"
|
||||
min_healthy_time = "10s"
|
||||
healthy_deadline = "5m"
|
||||
progress_deadline = "10m"
|
||||
auto_revert = true
|
||||
canary = 0
|
||||
}
|
||||
|
||||
migrate {
|
||||
max_parallel = 1
|
||||
health_check = "checks"
|
||||
min_healthy_time = "10s"
|
||||
healthy_deadline = "5m"
|
||||
}
|
||||
}
|
||||
|
||||
group "vault-ash3c" {
|
||||
count = 1
|
||||
|
||||
constraint {
|
||||
attribute = "${node.unique.name}"
|
||||
operator = "="
|
||||
value = "ash3c"
|
||||
}
|
||||
|
||||
network {
|
||||
port "http" {
|
||||
static = 8200
|
||||
to = 8200
|
||||
}
|
||||
}
|
||||
|
||||
task "vault" {
|
||||
driver = "exec"
|
||||
|
||||
resources {
|
||||
cpu = 500
|
||||
memory = 1024
|
||||
}
|
||||
|
||||
env {
|
||||
VAULT_ADDR = "http://127.0.0.1:8200"
|
||||
}
|
||||
|
||||
service {
|
||||
name = "vault"
|
||||
port = "http"
|
||||
tags = ["vault-server"]
|
||||
|
||||
check {
|
||||
type = "http"
|
||||
path = "/v1/sys/health"
|
||||
interval = "30s"
|
||||
timeout = "5s"
|
||||
}
|
||||
}
|
||||
|
||||
# Vault配置 - 使用Consul存储
|
||||
template {
|
||||
data = <<EOF
|
||||
ui = true
|
||||
disable_mlock = true
|
||||
|
||||
# 使用Consul作为存储后端
|
||||
storage "consul" {
|
||||
address = "100.116.80.94:8500"
|
||||
path = "vault/"
|
||||
|
||||
# 集群配置
|
||||
datacenter = "dc1"
|
||||
service = "vault"
|
||||
service_tags = "vault-server"
|
||||
|
||||
# 会话配置
|
||||
session_ttl = "15s"
|
||||
lock_wait_time = "15s"
|
||||
}
|
||||
|
||||
listener "tcp" {
|
||||
address = "100.116.80.94:8200"
|
||||
tls_disable = 1
|
||||
}
|
||||
|
||||
# API地址 - 使用Tailscale网络
|
||||
api_addr = "http://ash3c.tailnet-68f9.ts.net:8200"
|
||||
|
||||
# 集群名称
|
||||
cluster_name = "vault-cluster"
|
||||
|
||||
# 日志配置
|
||||
log_level = "INFO"
|
||||
EOF
|
||||
destination = "local/vault.hcl"
|
||||
perms = "644"
|
||||
}
|
||||
|
||||
# 自动解封脚本
|
||||
template {
|
||||
data = <<EOF
|
||||
#!/bin/bash
|
||||
# 启动Vault
|
||||
vault server -config=/local/vault.hcl &
|
||||
VAULT_PID=$!
|
||||
|
||||
# 等待Vault启动
|
||||
sleep 10
|
||||
|
||||
# 自动解封Vault - 使用本地地址,通过Consul发现其他节点
|
||||
echo "Auto-unsealing Vault..."
|
||||
vault operator unseal -address=http://127.0.0.1:8200 nlmbQbNU7pZaeHUgT+ynOFDS37JbEGOjmcvQ1fSgYaQp
|
||||
vault operator unseal -address=http://127.0.0.1:8200 a7lJqKNr2tJ+J84EnRM6u5fKBwe90nVe8NY/mJngVROn
|
||||
vault operator unseal -address=http://127.0.0.1:8200 /YcUlgI3fclb13h/ybz0TjhlcedNkfmlWbQm3RxGyo+h
|
||||
|
||||
echo "Vault auto-unsealed successfully"
|
||||
wait $VAULT_PID
|
||||
EOF
|
||||
destination = "local/start-vault.sh"
|
||||
perms = "755"
|
||||
}
|
||||
|
||||
config {
|
||||
command = "/bin/bash"
|
||||
args = [
|
||||
"/local/start-vault.sh"
|
||||
]
|
||||
}
|
||||
|
||||
restart {
|
||||
attempts = 2
|
||||
interval = "30m"
|
||||
delay = "15s"
|
||||
mode = "fail"
|
||||
}
|
||||
}
|
||||
|
||||
update {
|
||||
max_parallel = 1
|
||||
health_check = "checks"
|
||||
min_healthy_time = "10s"
|
||||
healthy_deadline = "5m"
|
||||
progress_deadline = "10m"
|
||||
auto_revert = true
|
||||
canary = 0
|
||||
}
|
||||
|
||||
migrate {
|
||||
max_parallel = 1
|
||||
health_check = "checks"
|
||||
min_healthy_time = "10s"
|
||||
healthy_deadline = "5m"
|
||||
}
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user