37 lines
1.1 KiB
Bash
Executable File
37 lines
1.1 KiB
Bash
Executable File
#!/bin/bash
|
|
|
|
echo "=== Nomad 集群状态检查 ==="
|
|
|
|
# 检查所有节点的服务状态
|
|
echo "1. 检查服务状态..."
|
|
ansible nomad_cluster -i /root/mgmt/configuration/inventories/production/nomad-cluster.ini -m shell -a "systemctl is-active nomad" 2>/dev/null
|
|
|
|
echo -e "\n2. 检查网络连通性..."
|
|
# 检查网络连通性
|
|
for ip in 100.116.158.95 100.117.106.136 100.116.80.94; do
|
|
echo "检查到 $ip 的连接..."
|
|
timeout 5 nc -zv $ip 4646 2>&1 | grep -E "(succeeded|open)"
|
|
timeout 5 nc -zv $ip 4647 2>&1 | grep -E "(succeeded|open)"
|
|
timeout 5 nc -zv $ip 4648 2>&1 | grep -E "(succeeded|open)"
|
|
done
|
|
|
|
echo -e "\n3. 检查 Nomad 集群成员..."
|
|
# 尝试查询集群成员
|
|
if nomad server members 2>/dev/null; then
|
|
echo "集群成员查询成功"
|
|
else
|
|
echo "无法查询集群成员 - 可能没有 leader"
|
|
fi
|
|
|
|
echo -e "\n4. 检查节点状态..."
|
|
if nomad node status 2>/dev/null; then
|
|
echo "节点状态查询成功"
|
|
else
|
|
echo "无法查询节点状态"
|
|
fi
|
|
|
|
echo -e "\n5. 检查最近的日志..."
|
|
echo "=== Semaphore 节点日志 ==="
|
|
journalctl -u nomad -n 5 --no-pager 2>/dev/null | tail -5
|
|
|
|
echo -e "\n=== 检查完成 ===" |