feat: 重构项目目录结构并添加多个功能

- 新增脚本和配置文件用于管理Nomad节点和NFS存储
- 添加多个Ansible playbook用于配置和调试Nomad集群
- 新增Nomad job文件用于测试Podman和NFS功能
- 重构playbooks目录结构,按功能分类
- 更新Nomad客户端和服务端配置模板
- 添加SSH密钥分发和配置脚本
- 新增多个调试和修复问题的playbook
This commit is contained in:
2025-09-27 13:05:30 +00:00
parent a06e5e1a00
commit 44b098bd20
98 changed files with 1141 additions and 2 deletions

69
scripts/deploy-nfs-for-nomad.sh Executable file
View File

@@ -0,0 +1,69 @@
#!/bin/bash
# Nomad集群NFS配置部署脚本
# 根据容器类型和地理位置进行分情况处理
set -e
echo "🚀 开始部署Nomad集群NFS配置..."
# 颜色定义
RED='\033[0;31m'
GREEN='\033[0;32m'
YELLOW='\033[1;33m'
NC='\033[0m' # No Color
# 函数:打印带颜色的消息
log_info() { echo -e "${GREEN}[INFO]${NC} $1"; }
log_warn() { echo -e "${YELLOW}[WARN]${NC} $1"; }
log_error() { echo -e "${RED}[ERROR]${NC} $1"; }
# 检查当前目录
if [ ! -f "configuration/inventories/production/inventory.ini" ]; then
log_error "请在mgmt项目根目录运行此脚本"
exit 1
fi
# 1. 为所有节点配置NFS挂载
log_info "步骤1: 为所有节点配置NFS挂载 (根据容器类型和地理位置)"
ansible-playbook -i configuration/inventories/production/inventory.ini \
playbooks/setup-nfs-by-container-type.yml
# 2. 为Nomad客户端配置NFS卷支持
log_info "步骤2: 配置Nomad客户端支持NFS卷"
ansible-playbook -i configuration/inventories/production/nomad-cluster.ini \
playbooks/setup-nomad-nfs-client.yml
# 3. 验证NFS挂载状态
log_info "步骤3: 验证所有节点的NFS挂载状态"
ansible all -i configuration/inventories/production/inventory.ini \
-m shell -a "df -h /mnt/fnsync 2>/dev/null || echo 'NFS未挂载'" \
--limit '!snail'
# 4. 验证Nomad客户端配置
log_info "步骤4: 验证Nomad客户端配置"
ansible nomad_clients -i configuration/inventories/production/nomad-cluster.ini \
-m shell -a "nomad node status -self 2>/dev/null || echo 'Nomad未运行'"
# 5. 部署示例NFS任务可选
read -p "是否部署示例NFS任务(y/n): " deploy_example
if [ "$deploy_example" = "y" ] || [ "$deploy_example" = "Y" ]; then
log_info "部署示例NFS任务..."
nomad run jobs/nomad-nfs-multi-type.nomad
echo "等待任务启动..."
sleep 10
nomad job status nfs-multi-type-example
fi
log_info "✅ NFS配置部署完成!"
echo ""
echo "📋 使用说明:"
echo "1. NFS挂载点: /mnt/fnsync"
echo "2. 本地LXC容器: 直接使用挂载目录"
echo "3. 海外PVE容器: 使用优化参数挂载"
echo "4. Nomad作业: 使用host volume 'nfs-shared'"
echo ""
echo "🔧 手动验证命令:"
echo " - 检查NFS挂载: df -h /mnt/fnsync"
echo " - 检查Nomad状态: nomad node status"
echo " - 运行NFS任务: nomad run jobs/nomad-nfs-multi-type.nomad"

View File

@@ -0,0 +1,29 @@
#!/bin/bash
# 分发SSH公钥到所有Nomad节点
echo "分发SSH公钥到Nomad节点..."
# 节点列表
NODES=(
"100.81.26.3" # ash1d.global
"100.103.147.94" # ash2e.global
"100.90.159.68" # ch2.global
"100.86.141.112" # ch3.global
"100.117.106.136" # master
"100.116.80.94" # ash3c
)
PUB_KEY=$(cat /home/ben/.ssh/id_ed25519.pub)
for NODE in "${NODES[@]}"; do
echo "正在配置节点: $NODE"
# 尝试使用现有密钥连接并添加新密钥
ssh-keyscan -H $NODE >> ~/.ssh/known_hosts 2>/dev/null
# 使用现有认证方式添加密钥
ssh root@$NODE "echo '$PUB_KEY' >> /root/.ssh/authorized_keys" 2>/dev/null && \
echo "$NODE 配置成功" || echo "$NODE 配置失败"
done
echo "密钥分发完成"

View File

@@ -0,0 +1,22 @@
---
- name: 设置Nomad节点SSH密钥认证
hosts: nomad_nodes
become: yes
vars:
ssh_public_key: "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIIIddJVPEvFRtzhWwYjr21lKTar+d7R5Kn/6bhd2s231 ben@ch2"
tasks:
- name: 确保.ssh目录存在
file:
path: /root/.ssh
state: directory
mode: '0700'
- name: 添加SSH公钥到authorized_keys
authorized_key:
user: root
state: present
key: "{{ ssh_public_key }}"
- name: 测试SSH连接
ping:

86
scripts/verify-nfs-config.sh Executable file
View File

@@ -0,0 +1,86 @@
#!/bin/bash
# NFS配置验证脚本
set -e
echo "🔍 验证NFS配置状态..."
# 颜色定义
GREEN='\033[0;32m'
YELLOW='\033[1;33m'
RED='\033[0;31m'
NC='\033[0m'
log_info() { echo -e "${GREEN}[INFO]${NC} $1"; }
log_warn() { echo -e "${YELLOW}[WARN]${NC} $1"; }
log_error() { echo -e "${RED}[ERROR]${NC} $1"; }
# 1. 检查本地NFS挂载
log_info "1. 检查本地NFS挂载状态"
if df -h | grep -q "/mnt/fnsync"; then
log_info "✅ 本地NFS挂载正常"
df -h | grep "/mnt/fnsync"
else
log_error "❌ 本地NFS未挂载"
fi
# 2. 检查配置文件存在
log_info "2. 检查配置文件"
config_files=(
"playbooks/setup-nfs-by-container-type.yml"
"playbooks/setup-nomad-nfs-client.yml"
"jobs/nomad-nfs-multi-type.nomad"
"scripts/deploy-nfs-for-nomad.sh"
"docs/nomad-nfs-setup.md"
)
for file in "${config_files[@]}"; do
if [ -f "$file" ]; then
log_info "$file 存在"
else
log_error "$file 不存在"
fi
done
# 3. 检查Ansible inventory
log_info "3. 检查Ansible配置"
if [ -f "configuration/inventories/production/inventory.ini" ]; then
log_info "✅ inventory.ini 存在"
echo "节点分类:"
grep -E "\[.*\]" configuration/inventories/production/inventory.ini | head -10
else
log_error "❌ inventory.ini 不存在"
fi
# 4. 检查Nomad服务状态
log_info "4. 检查Nomad服务"
if command -v nomad &> /dev/null; then
if nomad node status &> /dev/null; then
log_info "✅ Nomad服务运行正常"
nomad node status -self | grep -E "(Name|Status|Datacenter)"
else
log_warn "⚠️ Nomad服务未运行或无法连接"
fi
else
log_warn "⚠️ Nomad命令未安装"
fi
# 5. 检查NFS服务器连通性
log_info "5. 检查NFS服务器连通性"
if ping -c 1 -W 3 snail &> /dev/null; then
log_info "✅ NFS服务器 snail 可达"
if command -v showmount &> /dev/null; then
showmount -e snail 2>/dev/null || log_warn "⚠️ 无法获取NFS导出列表"
fi
else
log_error "❌ NFS服务器 snail 不可达"
fi
echo ""
echo "📊 验证完成!"
echo ""
echo "🚀 下一步操作:"
echo "1. 运行部署脚本: ./scripts/deploy-nfs-for-nomad.sh"
echo "2. 查看详细文档: cat docs/nomad-nfs-setup.md"
echo "3. 测试NFS功能: nomad run jobs/nomad-nfs-multi-type.nomad"