#!/bin/bash # Nomad 节点用户数据脚本 # 用于自动配置 Nomad 节点,支持服务器和客户端模式 set -e # 日志函数 log() { echo "$(date '+%Y-%m-%d %H:%M:%S') - $1" } log "开始 Nomad 节点配置..." # 更新系统 log "更新系统包..." apt-get update apt-get upgrade -y # 安装必要工具 log "安装必要工具..." apt-get install -y curl unzip wget gnupg software-properties-common # 安装 Podman (作为容器运行时) log "安装 Podman..." . /etc/os-release echo "deb https://download.opensuse.org/repositories/devel:/kubic:/libcontainers:/stable/xUbuntu_${VERSION_ID}/ /" | tee /etc/apt/sources.list.d/devel:kubic:libcontainers:stable.list curl -L "https://download.opensuse.org/repositories/devel:/kubic:/libcontainers:/stable/xUbuntu_${VERSION_ID}/Release.key" | apt-key add - apt-get update apt-get install -y podman # 配置 Podman log "配置 Podman..." mkdir -p /etc/containers echo -e "[registries.search]\nregistries = ['docker.io']" > /etc/containers/registries.conf # 下载并安装 Nomad log "安装 Nomad..." NOMAD_VERSION=${nomad_version} NOMAD_ZIP="nomad_${NOMAD_VERSION}_linux_amd64.zip" NOMAD_URL="https://releases.hashicorp.com/nomad/${NOMAD_VERSION}/${NOMAD_ZIP}" NOMAD_SHA256_URL="https://releases.hashicorp.com/nomad/${NOMAD_VERSION}/nomad_${NOMAD_VERSION}_SHA256SUMS" cd /tmp wget -q ${NOMAD_URL} wget -q ${NOMAD_SHA256_URL} sha256sum -c nomad_${NOMAD_VERSION}_SHA256SUMS --ignore-missing unzip -o ${NOMAD_ZIP} -d /usr/local/bin/ chmod +x /usr/local/bin/nomad # 创建 Nomad 用户和目录 log "创建 Nomad 用户和目录..." useradd --system --home /etc/nomad.d --shell /bin/false nomad mkdir -p /opt/nomad/data mkdir -p /etc/nomad.d mkdir -p /var/log/nomad chown -R nomad:nomad /opt/nomad /etc/nomad.d /var/log/nomad # 获取本机 IP 地址 if [ "${bind_addr}" = "auto" ]; then # 尝试多种方法获取 IP BIND_ADDR=$(curl -s http://169.254.169.254/latest/meta-data/local-ipv4 2>/dev/null || \ curl -s http://metadata.google.internal/computeMetadata/v1/instance/network-interfaces/0/ip -H "Metadata-Flavor: Google" 2>/dev/null || \ ip route get 8.8.8.8 | awk '{print $7; exit}' || \ hostname -I | awk '{print $1}') else BIND_ADDR="${bind_addr}" fi log "检测到 IP 地址: $BIND_ADDR" # 创建 Nomad 配置文件 log "创建 Nomad 配置文件..." cat > /etc/nomad.d/nomad.hcl << EOF # Nomad 配置文件 datacenter = "${datacenter}" data_dir = "/opt/nomad/data" log_level = "INFO" # 客户端配置 client { enabled = true servers = ["${nomad_servers}"] options { "driver.raw_exec.enable" = "1" "driver.podman.enabled" = "1" } } # 服务器配置 server { enabled = ${server_enabled} bootstrap_expect = ${bootstrap_expect} } # Consul 集成 consul { address = "127.0.0.1:8500" token = "${consul_token}" } # 加密设置 encrypt = "${nomad_encrypt_key}" # 网络配置 network { mode = "bridge" } # UI 配置 ui { enabled = true } # 插件目录 plugin_dir = "/opt/nomad/plugins" EOF # 创建 systemd 服务文件 log "创建 systemd 服务文件..." cat > /etc/systemd/system/nomad.service << EOF [Unit] Description=Nomad Documentation=https://www.nomadproject.io/ Wants=network-online.target After=network-online.target [Service] ExecReload=/bin/kill -HUP \$MAINPID ExecStart=/usr/local/bin/nomad agent -config /etc/nomad.d KillMode=process KillSignal=SIGINT LimitNOFILE=65536 LimitNPROC=infinity Restart=on-failure RestartSec=2 StartLimitBurst=3 StartLimitInterval=10 TasksMax=infinity [Install] WantedBy=multi-user.target EOF # 启动 Nomad 服务 log "启动 Nomad 服务..." systemctl daemon-reload systemctl enable nomad systemctl start nomad # 等待服务启动 log "等待 Nomad 服务启动..." sleep 10 # 验证 Nomad 状态 if systemctl is-active --quiet nomad; then log "Nomad 服务启动成功" else log "Nomad 服务启动失败" journalctl -u nomad --no-pager exit 1 fi # 创建 Nomad 客户端状态检查脚本 log "创建状态检查脚本..." cat > /usr/local/bin/check-nomad.sh << 'EOF' #!/bin/bash # Nomad 状态检查脚本 set -e # 检查 Nomad 服务状态 if systemctl is-active --quiet nomad; then echo "Nomad 服务运行正常" else echo "Nomad 服务未运行" exit 1 fi # 检查 Nomad 节点状态 NODE_STATUS=$(nomad node status -self -json | jq -r '.Status') if [ "$NODE_STATUS" = "ready" ]; then echo "Nomad 节点状态: $NODE_STATUS" else echo "Nomad 节点状态异常: $NODE_STATUS" exit 1 fi # 检查 Nomad 集群成员 SERVER_MEMBERS=$(nomad server members 2>/dev/null | grep -c "alive" || echo "0") if [ "$SERVER_MEMBERS" -gt 0 ]; then echo "Nomad 集群服务器成员: $SERVER_MEMBERS" else echo "未找到 Nomad 集群服务器成员" exit 1 fi echo "Nomad 状态检查完成" EOF chmod +x /usr/local/bin/check-nomad.sh # 设置防火墙规则 log "设置防火墙规则..." if command -v ufw >/dev/null 2>&1; then ufw allow 4646/tcp # Nomad HTTP ufw allow 4647/tcp # Nomad RPC ufw allow 4648/tcp # Nomad Serf ufw --force enable elif command -v firewall-cmd >/dev/null 2>&1; then firewall-cmd --permanent --add-port=4646/tcp firewall-cmd --permanent --add-port=4647/tcp firewall-cmd --permanent --add-port=4648/tcp firewall-cmd --reload fi # 创建简单的 Nomad 任务示例 log "创建示例任务..." mkdir -p /opt/nomad/examples cat > /opt/nomad/examples/redis.nomad << 'EOF' job "redis" { datacenters = ["dc1", "dc2", "dc3"] type = "service" priority = 50 update { stagger = "10s" max_parallel = 1 } group "redis" { count = 1 restart { attempts = 3 delay = "30s" interval = "5m" mode = "fail" } task "redis" { driver = "podman" config { image = "redis:alpine" ports = ["redis"] } resources { cpu = 200 # MHz memory = 128 # MB network { mbits = 10 port "redis" { static = 6379 } } } service { name = "redis" port = "redis" check { type = "tcp" interval = "10s" timeout = "2s" } } } } } EOF log "Nomad 节点配置完成" log "Nomad UI 可通过 http://$(curl -s http://169.254.169.254/latest/meta-data/public-ipv4):4646 访问"