47 lines
1.3 KiB
Bash
Executable File
47 lines
1.3 KiB
Bash
Executable File
#!/bin/bash
|
|
|
|
# Nomad 节点状态检查和修复脚本
|
|
# 用于实时监测和修复节点状态
|
|
|
|
NOMAD_ADDR="http://ch2.tailnet-68f9.ts.net:4646"
|
|
NODES=("ash2e" "ch4" "warden" "hcp1" "ash3c")
|
|
|
|
echo "🔍 检查 Nomad 节点状态..."
|
|
|
|
for node in "${NODES[@]}"; do
|
|
echo "📊 检查节点: $node"
|
|
|
|
# 检查节点状态
|
|
status=$(curl -s "$NOMAD_ADDR/v1/nodes" | jq -r ".[] | select(.Name == \"$node\") | .Status")
|
|
|
|
if [ "$status" = "down" ]; then
|
|
echo "❌ 节点 $node 状态: $status"
|
|
|
|
# 尝试重启节点上的服务
|
|
echo "🔄 尝试修复节点 $node..."
|
|
|
|
# 通过 SSH 重启 Nomad 服务
|
|
ssh "$node.tailnet-68f9.ts.net" "sudo systemctl restart nomad" 2>/dev/null
|
|
|
|
if [ $? -eq 0 ]; then
|
|
echo "✅ 节点 $node 服务重启成功"
|
|
else
|
|
echo "❌ 节点 $node 服务重启失败"
|
|
fi
|
|
|
|
# 等待服务启动
|
|
sleep 10
|
|
|
|
# 再次检查状态
|
|
new_status=$(curl -s "$NOMAD_ADDR/v1/nodes" | jq -r ".[] | select(.Name == \"$node\") | .Status")
|
|
echo "📊 节点 $node 新状态: $new_status"
|
|
|
|
else
|
|
echo "✅ 节点 $node 状态: $status"
|
|
fi
|
|
|
|
echo "---"
|
|
done
|
|
|
|
echo "🎯 检查完成!"
|