mgmt/configuration/playbooks/add-warden-to-nomad-cluster...

202 lines
5.2 KiB
YAML

---
- name: Add Warden Server as Nomad Client to Cluster
hosts: warden
become: yes
gather_facts: yes
vars:
nomad_plugin_dir: "/opt/nomad/plugins"
nomad_datacenter: "dc1"
nomad_region: "global"
nomad_servers:
- "100.117.106.136:4647"
- "100.116.80.94:4647"
- "100.97.62.111:4647"
- "100.116.112.45:4647"
- "100.84.197.26:4647"
tasks:
- name: 显示当前处理的节点
debug:
msg: "🔧 将 warden 服务器添加为 Nomad 客户端: {{ inventory_hostname }}"
- name: 检查 Nomad 是否已安装
shell: which nomad || echo "not_found"
register: nomad_check
changed_when: false
- name: 下载并安装 Nomad
block:
- name: 下载 Nomad 1.10.5
get_url:
url: "https://releases.hashicorp.com/nomad/1.10.5/nomad_1.10.5_linux_amd64.zip"
dest: "/tmp/nomad.zip"
mode: '0644'
- name: 解压并安装 Nomad
unarchive:
src: "/tmp/nomad.zip"
dest: "/usr/local/bin/"
remote_src: yes
owner: root
group: root
mode: '0755'
- name: 清理临时文件
file:
path: "/tmp/nomad.zip"
state: absent
when: nomad_check.stdout == "not_found"
- name: 验证 Nomad 安装
shell: nomad version
register: nomad_version_output
- name: 创建 Nomad 配置目录
file:
path: /etc/nomad.d
state: directory
owner: root
group: root
mode: '0755'
- name: 创建 Nomad 数据目录
file:
path: /opt/nomad/data
state: directory
owner: nomad
group: nomad
mode: '0755'
ignore_errors: yes
- name: 创建 Nomad 插件目录
file:
path: "{{ nomad_plugin_dir }}"
state: directory
owner: nomad
group: nomad
mode: '0755'
ignore_errors: yes
- name: 获取服务器 IP 地址
shell: |
ip route get 1.1.1.1 | grep -oP 'src \K\S+'
register: server_ip_result
changed_when: false
- name: 设置服务器 IP 变量
set_fact:
server_ip: "{{ server_ip_result.stdout }}"
- name: 停止 Nomad 服务(如果正在运行)
systemd:
name: nomad
state: stopped
ignore_errors: yes
- name: 创建 Nomad 客户端配置文件
copy:
content: |
# Nomad Client Configuration for warden
datacenter = "{{ nomad_datacenter }}"
data_dir = "/opt/nomad/data"
log_level = "INFO"
bind_addr = "{{ server_ip }}"
server {
enabled = false
}
client {
enabled = true
servers = [
{% for server in nomad_servers %}"{{ server }}"{% if not loop.last %}, {% endif %}{% endfor %}
]
}
plugin_dir = "{{ nomad_plugin_dir }}"
plugin "podman" {
config {
socket_path = "unix:///run/podman/podman.sock"
volumes {
enabled = true
}
}
}
consul {
address = "127.0.0.1:8500"
}
dest: /etc/nomad.d/nomad.hcl
owner: root
group: root
mode: '0644'
- name: 验证 Nomad 配置
shell: nomad config validate /etc/nomad.d/nomad.hcl
register: nomad_validate
failed_when: nomad_validate.rc != 0
- name: 创建 Nomad systemd 服务文件
copy:
content: |
[Unit]
Description=Nomad
Documentation=https://www.nomadproject.io/docs/
Wants=network-online.target
After=network-online.target
[Service]
Type=notify
User=root
Group=root
ExecStart=/usr/local/bin/nomad agent -config=/etc/nomad.d
ExecReload=/bin/kill -HUP $MAINPID
KillMode=process
KillSignal=SIGINT
TimeoutStopSec=5
LimitNOFILE=65536
LimitNPROC=32768
Restart=on-failure
RestartSec=2
[Install]
WantedBy=multi-user.target
dest: /etc/systemd/system/nomad.service
mode: '0644'
- name: 重新加载 systemd 配置
systemd:
daemon_reload: yes
- name: 启动并启用 Nomad 服务
systemd:
name: nomad
state: started
enabled: yes
- name: 等待 Nomad 服务启动
wait_for:
port: 4646
host: "{{ server_ip }}"
delay: 5
timeout: 60
- name: 检查 Nomad 客户端状态
shell: nomad node status -self
register: nomad_node_status
retries: 5
delay: 5
until: nomad_node_status.rc == 0
ignore_errors: yes
- name: 显示 Nomad 客户端配置结果
debug:
msg: |
✅ warden 服务器已成功配置为 Nomad 客户端
📦 Nomad 版本: {{ nomad_version_output.stdout.split('\n')[0] }}
🌐 服务器 IP: {{ server_ip }}
🏗️ 数据中心: {{ nomad_datacenter }}
📊 客户端状态: {{ 'SUCCESS' if nomad_node_status.rc == 0 else 'PENDING' }}
🚀 warden 现在是 Nomad 集群的一部分