#!/bin/bash # Consul 集群同步诊断脚本 echo "=== Consul 集群同步诊断 ===" echo "时间: $(date)" echo "" CONSUL_NODES=( "master.tailnet-68f9.ts.net:8500" "warden.tailnet-68f9.ts.net:8500" "ash3c.tailnet-68f9.ts.net:8500" ) echo "1. 检查集群状态" echo "==================" for node in "${CONSUL_NODES[@]}"; do echo "节点: $node" echo " Leader: $(curl -s http://$node/v1/status/leader 2>/dev/null || echo 'ERROR')" echo " Peers: $(curl -s http://$node/v1/status/peers 2>/dev/null | jq length 2>/dev/null || echo 'ERROR')" echo "" done echo "2. 检查服务注册" echo "================" for node in "${CONSUL_NODES[@]}"; do echo "节点: $node" echo " Catalog 服务:" curl -s http://$node/v1/catalog/services 2>/dev/null | jq -r 'keys[]' 2>/dev/null | grep -E "(consul-lb|traefik)" | sed 's/^/ /' || echo " ERROR 或无服务" echo " Agent 服务:" curl -s http://$node/v1/agent/services 2>/dev/null | jq -r 'keys[]' 2>/dev/null | grep -E "traefik" | sed 's/^/ /' || echo " 无本地服务" echo "" done echo "3. 检查健康状态" echo "================" for node in "${CONSUL_NODES[@]}"; do echo "节点: $node" checks=$(curl -s http://$node/v1/agent/checks 2>/dev/null) if [ $? -eq 0 ]; then echo "$checks" | jq -r 'to_entries[] | select(.key | contains("traefik")) | " \(.key): \(.value.Status)"' 2>/dev/null || echo " 无 Traefik 健康检查" else echo " ERROR: 无法连接" fi echo "" done echo "4. 网络连通性测试" echo "==================" echo "测试从当前节点到 Traefik 的连接:" curl -s -w " HTTP %{http_code} - 响应时间: %{time_total}s\n" -o /dev/null http://100.97.62.111:80/ || echo " ERROR: 无法连接到 Traefik" curl -s -w " HTTP %{http_code} - 响应时间: %{time_total}s\n" -o /dev/null http://100.97.62.111:8080/api/overview || echo " ERROR: 无法连接到 Traefik Dashboard" echo "" echo "5. 建议操作" echo "===========" echo "如果发现问题:" echo " 1. 重新注册服务: ./scripts/register-traefik-to-all-consul.sh" echo " 2. 检查 Consul 日志: nomad alloc logs \$(nomad job allocs consul-cluster-nomad | grep warden | awk '{print \$1}') consul" echo " 3. 重启有问题的 Consul 节点" echo " 4. 检查网络连通性和防火墙设置"