189 lines
4.7 KiB
YAML
189 lines
4.7 KiB
YAML
---
|
|
- name: Complete Nomad Cluster Fix with Ansible
|
|
hosts: nomad_cluster
|
|
become: yes
|
|
gather_facts: yes
|
|
vars:
|
|
nomad_encrypt_key: "NVOMDvXblgWfhtzFzOUIHnKEOrbXOkPrkIPbRGGf1YQ="
|
|
tailscale_ips:
|
|
semaphore: "100.116.158.95"
|
|
master: "100.117.106.136"
|
|
ash3c: "100.116.80.94"
|
|
|
|
tasks:
|
|
- name: Stop nomad service completely
|
|
systemd:
|
|
name: nomad
|
|
state: stopped
|
|
enabled: yes
|
|
ignore_errors: yes
|
|
|
|
- name: Kill any remaining nomad processes
|
|
shell: pkill -f nomad || true
|
|
ignore_errors: yes
|
|
|
|
- name: Reset systemd failure state
|
|
shell: systemctl reset-failed nomad
|
|
ignore_errors: yes
|
|
|
|
- name: Create nomad user if not exists
|
|
user:
|
|
name: nomad
|
|
system: yes
|
|
shell: /bin/false
|
|
home: /opt/nomad
|
|
create_home: no
|
|
|
|
- name: Create all required directories with correct permissions
|
|
file:
|
|
path: "{{ item }}"
|
|
state: directory
|
|
owner: nomad
|
|
group: nomad
|
|
mode: '0755'
|
|
loop:
|
|
- /opt/nomad
|
|
- /opt/nomad/data
|
|
- /opt/nomad/alloc_mounts
|
|
- /var/log/nomad
|
|
- /etc/nomad.d
|
|
|
|
- name: Completely clean nomad data directory
|
|
shell: rm -rf /opt/nomad/data/* /opt/nomad/data/.*
|
|
ignore_errors: yes
|
|
|
|
- name: Create correct nomad configuration
|
|
copy:
|
|
content: |
|
|
datacenter = "dc1"
|
|
region = "global"
|
|
data_dir = "/opt/nomad/data"
|
|
|
|
bind_addr = "{{ tailscale_ips[inventory_hostname] }}"
|
|
|
|
server {
|
|
enabled = true
|
|
bootstrap_expect = 3
|
|
encrypt = "{{ nomad_encrypt_key }}"
|
|
|
|
server_join {
|
|
retry_join = [
|
|
"{{ tailscale_ips.semaphore }}:4647",
|
|
"{{ tailscale_ips.master }}:4647",
|
|
"{{ tailscale_ips.ash3c }}:4647"
|
|
]
|
|
retry_interval = "15s"
|
|
retry_max = 3
|
|
}
|
|
}
|
|
|
|
client {
|
|
enabled = true
|
|
alloc_dir = "/opt/nomad/alloc_mounts"
|
|
}
|
|
|
|
ui {
|
|
enabled = true
|
|
}
|
|
|
|
addresses {
|
|
http = "0.0.0.0"
|
|
rpc = "{{ tailscale_ips[inventory_hostname] }}"
|
|
serf = "{{ tailscale_ips[inventory_hostname] }}"
|
|
}
|
|
|
|
ports {
|
|
http = 4646
|
|
rpc = 4647
|
|
serf = 4648
|
|
}
|
|
|
|
plugin "docker" {
|
|
config {
|
|
allow_privileged = true
|
|
volumes {
|
|
enabled = true
|
|
}
|
|
}
|
|
}
|
|
|
|
log_level = "INFO"
|
|
log_file = "/var/log/nomad/nomad.log"
|
|
log_rotate_duration = "24h"
|
|
log_rotate_max_files = 5
|
|
dest: /etc/nomad.d/nomad.hcl
|
|
owner: nomad
|
|
group: nomad
|
|
mode: '0640'
|
|
|
|
- name: Set correct ownership for all nomad files
|
|
file:
|
|
path: "{{ item }}"
|
|
owner: nomad
|
|
group: nomad
|
|
recurse: yes
|
|
loop:
|
|
- /opt/nomad
|
|
- /var/log/nomad
|
|
- /etc/nomad.d
|
|
|
|
- name: Validate nomad configuration
|
|
shell: nomad config validate /etc/nomad.d/nomad.hcl
|
|
register: config_validation
|
|
ignore_errors: yes
|
|
|
|
- name: Show config validation result
|
|
debug:
|
|
var: config_validation
|
|
|
|
- name: Start nomad service on first node (semaphore)
|
|
systemd:
|
|
name: nomad
|
|
state: started
|
|
daemon_reload: yes
|
|
when: inventory_hostname == 'semaphore'
|
|
|
|
- name: Wait for first node to start
|
|
pause:
|
|
seconds: 30
|
|
when: inventory_hostname == 'semaphore'
|
|
|
|
- name: Start nomad service on remaining nodes
|
|
systemd:
|
|
name: nomad
|
|
state: started
|
|
daemon_reload: yes
|
|
when: inventory_hostname != 'semaphore'
|
|
|
|
- name: Wait for all services to start
|
|
pause:
|
|
seconds: 20
|
|
|
|
- name: Check nomad service status
|
|
shell: systemctl status nomad --no-pager -l
|
|
register: service_status
|
|
ignore_errors: yes
|
|
|
|
- name: Show service status
|
|
debug:
|
|
var: service_status.stdout_lines
|
|
|
|
- name: Check nomad logs for errors
|
|
shell: journalctl -u nomad -n 10 --no-pager
|
|
register: nomad_logs
|
|
ignore_errors: yes
|
|
|
|
- name: Show recent nomad logs
|
|
debug:
|
|
var: nomad_logs.stdout_lines
|
|
|
|
- name: Test nomad connectivity
|
|
shell: nomad server members
|
|
register: nomad_members
|
|
ignore_errors: yes
|
|
when: inventory_hostname == 'semaphore'
|
|
|
|
- name: Show cluster members
|
|
debug:
|
|
var: nomad_members.stdout_lines
|
|
when: inventory_hostname == 'semaphore' |