mgmt/deployment/ansible/playbooks/configure/configure-nomad-tailscale.yml

226 lines
6.1 KiB
YAML
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

---
- name: 配置 Nomad 集群使用 Tailscale 网络通讯
hosts: nomad_cluster
become: yes
gather_facts: no
vars:
nomad_config_dir: "/etc/nomad.d"
nomad_config_file: "{{ nomad_config_dir }}/nomad.hcl"
tasks:
- name: 获取当前节点的 Tailscale IP
shell: tailscale ip | head -1
register: current_tailscale_ip
changed_when: false
ignore_errors: yes
- name: 计算用于 Nomad 的地址(优先 Tailscale回退到 inventory 或 ansible_host
set_fact:
node_addr: "{{ (current_tailscale_ip.stdout | default('')) is match('^100\\.') | ternary((current_tailscale_ip.stdout | trim), (hostvars[inventory_hostname].tailscale_ip | default(ansible_host))) }}"
- name: 确保 Nomad 配置目录存在
file:
path: "{{ nomad_config_dir }}"
state: directory
owner: root
group: root
mode: '0755'
- name: 生成 Nomad 服务器配置(使用 Tailscale
copy:
dest: "{{ nomad_config_file }}"
owner: root
group: root
mode: '0644'
content: |
datacenter = "{{ nomad_datacenter | default('dc1') }}"
data_dir = "/opt/nomad/data"
log_level = "INFO"
bind_addr = "{{ node_addr }}"
addresses {
http = "{{ node_addr }}"
rpc = "{{ node_addr }}"
serf = "{{ node_addr }}"
}
ports {
http = 4646
rpc = 4647
serf = 4648
}
server {
enabled = true
bootstrap_expect = {{ nomad_bootstrap_expect | default(4) }}
retry_join = [
"100.116.158.95", # semaphore
"100.103.147.94", # ash2e
"100.81.26.3", # ash1d
"100.90.159.68" # ch2
]
encrypt = "{{ nomad_encrypt_key }}"
}
client {
enabled = false
}
plugin "podman" {
config {
socket_path = "unix:///run/podman/podman.sock"
volumes {
enabled = true
}
}
}
consul {
address = "{{ node_addr }}:8500"
}
when: nomad_role == "server"
notify: restart nomad
- name: 生成 Nomad 客户端配置(使用 Tailscale
copy:
dest: "{{ nomad_config_file }}"
owner: root
group: root
mode: '0644'
content: |
datacenter = "{{ nomad_datacenter | default('dc1') }}"
data_dir = "/opt/nomad/data"
log_level = "INFO"
bind_addr = "{{ node_addr }}"
addresses {
http = "{{ node_addr }}"
rpc = "{{ node_addr }}"
serf = "{{ node_addr }}"
}
ports {
http = 4646
rpc = 4647
serf = 4648
}
server {
enabled = false
}
client {
enabled = true
network_interface = "tailscale0"
cpu_total_compute = 0
servers = [
"100.116.158.95:4647", # semaphore
"100.103.147.94:4647", # ash2e
"100.81.26.3:4647", # ash1d
"100.90.159.68:4647" # ch2
]
}
plugin "podman" {
config {
socket_path = "unix:///run/podman/podman.sock"
volumes {
enabled = true
}
}
}
consul {
address = "{{ node_addr }}:8500"
}
when: nomad_role == "client"
notify: restart nomad
- name: 检查 Nomad 二进制文件位置
shell: which nomad || find /usr -name nomad 2>/dev/null | head -1
register: nomad_binary_path
failed_when: nomad_binary_path.stdout == ""
- name: 创建/更新 Nomad systemd 服务文件
copy:
dest: "/etc/systemd/system/nomad.service"
owner: root
group: root
mode: '0644'
content: |
[Unit]
Description=Nomad
Documentation=https://www.nomadproject.io/
Requires=network-online.target
After=network-online.target
[Service]
Type=notify
User=root
Group=root
ExecStart={{ nomad_binary_path.stdout }} agent -config=/etc/nomad.d/nomad.hcl
ExecReload=/bin/kill -HUP $MAINPID
KillMode=process
Restart=on-failure
LimitNOFILE=65536
[Install]
WantedBy=multi-user.target
notify: restart nomad
- name: 确保 Nomad 数据目录存在
file:
path: "/opt/nomad/data"
state: directory
owner: root
group: root
mode: '0755'
- name: 重新加载 systemd daemon
systemd:
daemon_reload: yes
- name: 启用并启动 Nomad 服务
systemd:
name: nomad
enabled: yes
state: started
- name: 等待 Nomad 服务启动
wait_for:
port: 4646
host: "{{ node_addr }}"
delay: 5
timeout: 30
ignore_errors: yes
- name: 检查 Nomad 服务状态
shell: systemctl status nomad --no-pager -l
register: nomad_status
ignore_errors: yes
- name: 显示配置结果
debug:
msg: |
✅ 节点 {{ inventory_hostname }} 配置完成
🌐 使用地址: {{ node_addr }}
🎯 角色: {{ nomad_role }}
🔧 Nomad 二进制: {{ nomad_binary_path.stdout }}
📊 服务状态: {{ 'active' if nomad_status.rc == 0 else 'failed' }}
{% if nomad_status.rc != 0 %}
❌ 错误信息:
{{ nomad_status.stdout }}
{{ nomad_status.stderr }}
{% endif %}
handlers:
- name: restart nomad
systemd:
name: nomad
state: restarted
daemon_reload: yes