mgmt/configuration/playbooks/fix-nomad-systemd.yml

88 lines
2.4 KiB
YAML

---
- name: Fix Nomad systemd service binary path
hosts: nomad_cluster
become: yes
tasks:
- name: Check Nomad binary location
shell: which nomad
register: nomad_binary_path
- name: Display binary path
debug:
msg: "Nomad binary 位于: {{ nomad_binary_path.stdout }}"
- name: Stop Nomad service
systemd:
name: nomad
state: stopped
ignore_errors: yes
- name: Update Nomad systemd service with correct binary path
copy:
content: |
[Unit]
Description=Nomad
Documentation=https://www.nomadproject.io/
Requires=network-online.target
After=network-online.target
ConditionFileNotEmpty=/etc/nomad.d/nomad.hcl
[Service]
Type=notify
User=nomad
Group=nomad
ExecStart={{ nomad_binary_path.stdout }} agent -config=/etc/nomad.d/nomad.hcl
ExecReload=/bin/kill -HUP $MAINPID
KillMode=process
Restart=on-failure
LimitNOFILE=65536
[Install]
WantedBy=multi-user.target
dest: /etc/systemd/system/nomad.service
mode: '0644'
notify: reload systemd
- name: Reload systemd and start Nomad servers first
systemd:
name: nomad
state: started
enabled: yes
daemon_reload: yes
when: inventory_hostname in groups['nomad_servers']
- name: Wait for servers to be ready
pause:
seconds: 15
when: inventory_hostname in groups['nomad_servers']
- name: Start Nomad clients
systemd:
name: nomad
state: started
enabled: yes
daemon_reload: yes
when: inventory_hostname in groups['nomad_clients']
- name: Wait for clients to connect
pause:
seconds: 10
when: inventory_hostname in groups['nomad_clients']
- name: Check final service status
shell: systemctl status nomad --no-pager -l
register: service_status
ignore_errors: yes
- name: Display service status
debug:
msg: |
✅ 节点 {{ inventory_hostname }} 服务状态:
📊 状态: {{ 'SUCCESS' if service_status.rc == 0 else 'FAILED' }}
💾 二进制路径: {{ nomad_binary_path.stdout }}
handlers:
- name: reload systemd
systemd:
daemon_reload: yes