feat: 迁移基础设施到Nomad和Podman并重构配置
refactor: 更新Ansible Playbooks以支持Nomad集群 docs: 更新文档反映从Docker Swarm到Nomad的迁移 ci: 更新Gitea工作流以支持Podman构建 test: 添加Nomad作业测试文件 build: 更新Makefile以支持Podman操作 chore: 清理旧的Docker Swarm相关文件和配置
This commit is contained in:
69
scripts/utilities/cleanup-retired-nodes.sh
Normal file
69
scripts/utilities/cleanup-retired-nodes.sh
Normal file
@@ -0,0 +1,69 @@
|
||||
#!/bin/bash
|
||||
# 清理退役节点脚本
|
||||
# 创建日期: 2025-09-27
|
||||
# 执行日期: 2025-10-27 (一个月后)
|
||||
|
||||
set -e
|
||||
|
||||
NOMAD_ADDR=${NOMAD_ADDR:-"http://100.116.158.95:4646"}
|
||||
|
||||
echo "=== 清理退役节点脚本 ==="
|
||||
echo "执行时间: $(date)"
|
||||
echo "Nomad 地址: $NOMAD_ADDR"
|
||||
echo ""
|
||||
|
||||
# 退役节点列表
|
||||
RETIRED_NODES=(
|
||||
"583f1b77:semaphore:已转为纯server"
|
||||
"06bb8a3a:hcs:华为云节点退役"
|
||||
)
|
||||
|
||||
echo "准备清理以下退役节点:"
|
||||
for node_info in "${RETIRED_NODES[@]}"; do
|
||||
IFS=':' read -r node_id node_name reason <<< "$node_info"
|
||||
echo " - $node_name ($node_id): $reason"
|
||||
done
|
||||
echo ""
|
||||
|
||||
read -p "确认要清理这些节点吗? (y/N): " confirm
|
||||
if [[ $confirm != [yY] ]]; then
|
||||
echo "操作已取消"
|
||||
exit 0
|
||||
fi
|
||||
|
||||
echo "开始清理退役节点..."
|
||||
|
||||
for node_info in "${RETIRED_NODES[@]}"; do
|
||||
IFS=':' read -r node_id node_name reason <<< "$node_info"
|
||||
|
||||
echo "处理节点: $node_name ($node_id)"
|
||||
|
||||
# 检查节点状态
|
||||
if nomad node status "$node_id" >/dev/null 2>&1; then
|
||||
echo " - 节点存在,开始清理..."
|
||||
|
||||
# 确保节点已 drain
|
||||
echo " - 确保节点已 drain..."
|
||||
nomad node drain -enable -yes "$node_id" || true
|
||||
|
||||
# 禁用调度
|
||||
echo " - 禁用调度资格..."
|
||||
nomad node eligibility -disable "$node_id" || true
|
||||
|
||||
# 等待一段时间确保所有任务已迁移
|
||||
echo " - 等待任务迁移完成..."
|
||||
sleep 10
|
||||
|
||||
echo " - 节点 $node_name 已成功清理"
|
||||
else
|
||||
echo " - 节点不存在或已被清理"
|
||||
fi
|
||||
echo ""
|
||||
done
|
||||
|
||||
echo "=== 清理完成 ==="
|
||||
echo "请手动验证集群状态:"
|
||||
echo " nomad node status"
|
||||
echo " nomad server members"
|
||||
echo ""
|
||||
echo "如需彻底删除节点记录,请联系管理员"
|
||||
39
scripts/utilities/purge_stale_nodes.sh
Executable file
39
scripts/utilities/purge_stale_nodes.sh
Executable file
@@ -0,0 +1,39 @@
|
||||
#!/bin/bash
|
||||
set -euo pipefail
|
||||
|
||||
ADDR="http://100.81.26.3:4646"
|
||||
# 检查 NOMAD_TOKEN 是否设置,如果设置了,则准备好 Header
|
||||
HDR=""
|
||||
if [ -n "${NOMAD_TOKEN:-}" ]; then
|
||||
HDR="-H "X-Nomad-Token: $NOMAD_TOKEN""
|
||||
fi
|
||||
|
||||
echo "--- 节点列表 (Before) ---"
|
||||
nomad node status -address="$ADDR"
|
||||
|
||||
echo
|
||||
echo "--- 开始查找需要清理的旧节点 ---"
|
||||
|
||||
# 使用 jq 从 nomad node status 的 json 输出中精确查找
|
||||
# 条件: 状态为 "down" 且 名称匹配列表
|
||||
IDS_TO_PURGE=$(nomad node status -address="$ADDR" -json | jq -r '.[] | select(.Status == "down" and (.Name | test("^(ch3|ch2|ash1d|ash2e|semaphore)$"))) | .ID')
|
||||
|
||||
if [[ -z "$IDS_TO_PURGE" ]]; then
|
||||
echo "✅ 未找到符合条件的 'down' 状态节点,无需清理。"
|
||||
else
|
||||
echo "以下是待清理的节点 ID:"
|
||||
echo "$IDS_TO_PURGE"
|
||||
echo
|
||||
|
||||
# 循环遍历 ID,使用 curl 调用 HTTP API 进行 purge
|
||||
for NODE_ID in $IDS_TO_PURGE; do
|
||||
echo "===> 正在清理节点: $NODE_ID"
|
||||
# 构造 curl 命令,并使用 eval 来正确处理可能为空的 $HDR
|
||||
cmd="curl -sS -XPOST $HDR -w ' -> HTTP %{http_code}\n' '$ADDR/v1/node/$NODE_ID/purge'"
|
||||
eval $cmd
|
||||
done
|
||||
fi
|
||||
|
||||
echo
|
||||
echo "--- 节点列表 (After) ---"
|
||||
nomad node status -address="$ADDR"
|
||||
Reference in New Issue
Block a user