mgmt/scripts/utilities/fix-ash3c-ip.sh

137 lines
3.2 KiB
Bash
Executable File

#!/bin/bash
# 🔧 ash3c IP 地址修复脚本
set -e
echo "🔧 ash3c IP 地址问题修复脚本"
echo ""
# 定义正确的 IP 地址
CORRECT_IP="100.116.80.94"
ASH3C_HOST="100.116.80.94"
echo "📡 检查 ash3c 节点的网络配置..."
# 检查 ash3c 的实际 IP 配置
echo "🔍 检查 ash3c 节点的 IP 地址绑定..."
ssh -p 22 -i ~/.ssh/id_ed25519 ben@${ASH3C_HOST} "echo '3131' | sudo -S ip addr show" | grep -E "inet.*100\." || echo "❌ 未找到 Tailscale IP"
echo ""
echo "🔍 检查 Tailscale 状态..."
ssh -p 22 -i ~/.ssh/id_ed25519 ben@${ASH3C_HOST} "echo '3131' | sudo -S tailscale status" || echo "❌ Tailscale 状态检查失败"
echo ""
echo "🔧 修复 ash3c 的 Nomad 配置..."
# 创建正确的配置文件
cat > /tmp/ash3c-nomad.hcl << EOF
# 🔧 ash3c 修复后的 Nomad 配置
datacenter = "dc1"
region = "global"
data_dir = "/opt/nomad/data"
# 强制使用正确的 Tailscale IP
bind_addr = "${CORRECT_IP}"
# 日志配置
log_level = "INFO"
log_file = "/var/log/nomad/nomad.log"
server {
enabled = true
bootstrap_expect = 3
encrypt = "NVOMDvXblgWfhtzFzOUIHnKEOrbXOkPrkIPbRGGf1YQ="
server_join {
retry_join = [
"100.116.158.95:4647",
"100.117.106.136:4647",
"100.116.80.94:4647"
]
retry_max = 10
retry_interval = "15s"
}
# 更宽松的心跳配置
heartbeat_grace = "30s"
min_heartbeat_ttl = "10s"
}
client {
enabled = true
network_interface = "tailscale0"
}
ui_config {
enabled = true
}
addresses {
http = "0.0.0.0"
rpc = "${CORRECT_IP}"
serf = "${CORRECT_IP}"
}
ports {
http = 4646
rpc = 4647
serf = 4648
}
plugin "docker" {
config {
allow_privileged = true
volumes {
enabled = true
}
}
}
EOF
echo "📤 上传修复后的配置到 ash3c..."
scp -P 22 -i ~/.ssh/id_ed25519 /tmp/ash3c-nomad.hcl ben@${ASH3C_HOST}:/tmp/
echo "🔧 在 ash3c 上应用修复..."
ssh -p 22 -i ~/.ssh/id_ed25519 ben@${ASH3C_HOST} << 'REMOTE_SCRIPT'
echo '3131' | sudo -S systemctl stop nomad || true
echo '3131' | sudo -S pkill -f nomad || true
sleep 5
# 备份旧配置
echo '3131' | sudo -S cp /etc/nomad.d/nomad.hcl /etc/nomad.d/nomad.hcl.backup.$(date +%Y%m%d_%H%M%S) || true
# 应用新配置
echo '3131' | sudo -S cp /tmp/ash3c-nomad.hcl /etc/nomad.d/nomad.hcl
echo '3131' | sudo -S chown nomad:nomad /etc/nomad.d/nomad.hcl
echo '3131' | sudo -S chmod 640 /etc/nomad.d/nomad.hcl
# 清理数据目录
echo '3131' | sudo -S rm -rf /opt/nomad/data/*
# 重启服务
echo '3131' | sudo -S systemctl daemon-reload
echo '3131' | sudo -S systemctl enable nomad
echo '3131' | sudo -S systemctl start nomad
echo "✅ ash3c 配置修复完成"
REMOTE_SCRIPT
echo ""
echo "⏰ 等待 ash3c 服务启动..."
sleep 15
echo ""
echo "🔍 检查 ash3c 服务状态..."
ssh -p 22 -i ~/.ssh/id_ed25519 ben@${ASH3C_HOST} "echo '3131' | sudo -S systemctl status nomad --no-pager" || echo "❌ 服务状态检查失败"
echo ""
echo "🧹 清理临时文件..."
rm -f /tmp/ash3c-nomad.hcl
echo ""
echo "✅ ash3c IP 修复完成!"
echo ""
echo "下一步:"
echo "1. 检查集群状态: nomad server members"
echo "2. 如果还有问题,运行核弹级重置: ./scripts/utilities/nuclear-reset.sh"