This commit is contained in:
2025-10-09 01:22:22 +00:00
parent 1c994f9f60
commit eab95c8c80
136 changed files with 11001 additions and 849 deletions

View File

@@ -0,0 +1,44 @@
#!/bin/bash
# NFS CSI Plugin 部署脚本
# 这个脚本会安装NFS CSI插件让您的NFS存储能在Nomad UI中显示
set -e
echo "🚀 开始部署NFS CSI Plugin..."
# 检查是否为root用户
if [ "$EUID" -ne 0 ]; then
echo "❌ 请以root用户运行此脚本"
exit 1
fi
# 1. 安装CSI插件
echo "📦 安装NFS CSI插件..."
ansible-playbook -i deployment/ansible/inventories/production/hosts \
deployment/ansible/playbooks/install/install-nfs-csi-plugin.yml
# 2. 等待Nomad服务重启
echo "⏳ 等待Nomad服务重启..."
sleep 30
# 3. 注册CSI Volume
echo "📝 注册CSI Volume..."
nomad volume register components/nomad/volumes/nfs-csi-volume.hcl
# 4. 验证CSI插件状态
echo "✅ 验证CSI插件状态..."
nomad plugin status
# 5. 显示CSI volumes
echo "📊 显示CSI volumes..."
nomad volume status
echo "🎉 NFS CSI Plugin部署完成"
echo "现在您可以在Nomad UI中看到CSI插件和volumes了"

View File

@@ -1,62 +0,0 @@
#!/bin/bash
# Consul 集群同步诊断脚本
echo "=== Consul 集群同步诊断 ==="
echo "时间: $(date)"
echo ""
CONSUL_NODES=(
"master.tailnet-68f9.ts.net:8500"
"warden.tailnet-68f9.ts.net:8500"
"ash3c.tailnet-68f9.ts.net:8500"
)
echo "1. 检查集群状态"
echo "=================="
for node in "${CONSUL_NODES[@]}"; do
echo "节点: $node"
echo " Leader: $(curl -s http://$node/v1/status/leader 2>/dev/null || echo 'ERROR')"
echo " Peers: $(curl -s http://$node/v1/status/peers 2>/dev/null | jq length 2>/dev/null || echo 'ERROR')"
echo ""
done
echo "2. 检查服务注册"
echo "================"
for node in "${CONSUL_NODES[@]}"; do
echo "节点: $node"
echo " Catalog 服务:"
curl -s http://$node/v1/catalog/services 2>/dev/null | jq -r 'keys[]' 2>/dev/null | grep -E "(consul-lb|traefik)" | sed 's/^/ /' || echo " ERROR 或无服务"
echo " Agent 服务:"
curl -s http://$node/v1/agent/services 2>/dev/null | jq -r 'keys[]' 2>/dev/null | grep -E "traefik" | sed 's/^/ /' || echo " 无本地服务"
echo ""
done
echo "3. 检查健康状态"
echo "================"
for node in "${CONSUL_NODES[@]}"; do
echo "节点: $node"
checks=$(curl -s http://$node/v1/agent/checks 2>/dev/null)
if [ $? -eq 0 ]; then
echo "$checks" | jq -r 'to_entries[] | select(.key | contains("traefik")) | " \(.key): \(.value.Status)"' 2>/dev/null || echo " 无 Traefik 健康检查"
else
echo " ERROR: 无法连接"
fi
echo ""
done
echo "4. 网络连通性测试"
echo "=================="
echo "测试从当前节点到 Traefik 的连接:"
curl -s -w " HTTP %{http_code} - 响应时间: %{time_total}s\n" -o /dev/null http://100.97.62.111:80/ || echo " ERROR: 无法连接到 Traefik"
curl -s -w " HTTP %{http_code} - 响应时间: %{time_total}s\n" -o /dev/null http://100.97.62.111:8080/api/overview || echo " ERROR: 无法连接到 Traefik Dashboard"
echo ""
echo "5. 建议操作"
echo "==========="
echo "如果发现问题:"
echo " 1. 重新注册服务: ./scripts/register-traefik-to-all-consul.sh"
echo " 2. 检查 Consul 日志: nomad alloc logs \$(nomad job allocs consul-cluster-nomad | grep warden | awk '{print \$1}') consul"
echo " 3. 重启有问题的 Consul 节点"
echo " 4. 检查网络连通性和防火墙设置"

View File

@@ -4,7 +4,7 @@
# 解决 Consul leader 轮换问题
CONSUL_NODES=(
"master.tailnet-68f9.ts.net:8500"
"ch4.tailnet-68f9.ts.net:8500"
"warden.tailnet-68f9.ts.net:8500"
"ash3c.tailnet-68f9.ts.net:8500"
)

View File

@@ -1,43 +0,0 @@
#!/bin/bash
# 测试 Consul APT 安装和配置
echo "🧪 测试 Consul APT 安装流程"
echo "================================"
# 测试目标节点
TEST_NODE="hcp1.tailnet-68f9.ts.net"
echo "1. 测试 HashiCorp 源配置..."
ssh $TEST_NODE "curl -s https://apt.releases.hashicorp.com/gpg | gpg --dearmor | sudo tee /usr/share/keyrings/hashicorp-archive-keyring.gpg > /dev/null"
echo "2. 添加 APT 源..."
ssh $TEST_NODE "echo 'deb [trusted=yes signed-by=/usr/share/keyrings/hashicorp-archive-keyring.gpg] https://apt.releases.hashicorp.com $(lsb_release -cs) main' | sudo tee /etc/apt/sources.list.d/hashicorp.list"
echo "3. 更新包列表..."
ssh $TEST_NODE "apt update"
echo "4. 检查可用的 Consul 版本..."
ssh $TEST_NODE "apt-cache policy consul"
echo "5. 测试安装 Consul..."
ssh $TEST_NODE "apt install -y consul=1.21.5-*"
if [ $? -eq 0 ]; then
echo "✅ Consul 安装成功"
echo "6. 验证安装..."
ssh $TEST_NODE "consul version"
ssh $TEST_NODE "which consul"
echo "7. 检查服务状态..."
ssh $TEST_NODE "systemctl status consul --no-pager"
else
echo "❌ Consul 安装失败"
exit 1
fi
echo ""
echo "🎉 测试完成!"
echo "现在可以运行完整的 Ansible playbook"