mgmt/scripts/utilities/cleanup-retired-nodes.sh

69 lines
1.8 KiB
Bash

#!/bin/bash
# 清理退役节点脚本
# 创建日期: 2025-09-27
# 执行日期: 2025-10-27 (一个月后)
set -e
NOMAD_ADDR=${NOMAD_ADDR:-"http://100.116.158.95:4646"}
echo "=== 清理退役节点脚本 ==="
echo "执行时间: $(date)"
echo "Nomad 地址: $NOMAD_ADDR"
echo ""
# 退役节点列表
RETIRED_NODES=(
"583f1b77:semaphore:已转为纯server"
"06bb8a3a:hcs:华为云节点退役"
)
echo "准备清理以下退役节点:"
for node_info in "${RETIRED_NODES[@]}"; do
IFS=':' read -r node_id node_name reason <<< "$node_info"
echo " - $node_name ($node_id): $reason"
done
echo ""
read -p "确认要清理这些节点吗? (y/N): " confirm
if [[ $confirm != [yY] ]]; then
echo "操作已取消"
exit 0
fi
echo "开始清理退役节点..."
for node_info in "${RETIRED_NODES[@]}"; do
IFS=':' read -r node_id node_name reason <<< "$node_info"
echo "处理节点: $node_name ($node_id)"
# 检查节点状态
if nomad node status "$node_id" >/dev/null 2>&1; then
echo " - 节点存在,开始清理..."
# 确保节点已 drain
echo " - 确保节点已 drain..."
nomad node drain -enable -yes "$node_id" || true
# 禁用调度
echo " - 禁用调度资格..."
nomad node eligibility -disable "$node_id" || true
# 等待一段时间确保所有任务已迁移
echo " - 等待任务迁移完成..."
sleep 10
echo " - 节点 $node_name 已成功清理"
else
echo " - 节点不存在或已被清理"
fi
echo ""
done
echo "=== 清理完成 ==="
echo "请手动验证集群状态:"
echo " nomad node status"
echo " nomad server members"
echo ""
echo "如需彻底删除节点记录,请联系管理员"