--- - name: Complete Nomad Cluster Fix with Ansible hosts: nomad_cluster become: yes gather_facts: yes vars: nomad_encrypt_key: "NVOMDvXblgWfhtzFzOUIHnKEOrbXOkPrkIPbRGGf1YQ=" tailscale_ips: semaphore: "100.116.158.95" master: "100.117.106.136" ash3c: "100.116.80.94" tasks: - name: Stop nomad service completely systemd: name: nomad state: stopped enabled: yes ignore_errors: yes - name: Kill any remaining nomad processes shell: pkill -f nomad || true ignore_errors: yes - name: Reset systemd failure state shell: systemctl reset-failed nomad ignore_errors: yes - name: Create nomad user if not exists user: name: nomad system: yes shell: /bin/false home: /opt/nomad create_home: no - name: Create all required directories with correct permissions file: path: "{{ item }}" state: directory owner: nomad group: nomad mode: '0755' loop: - /opt/nomad - /opt/nomad/data - /opt/nomad/alloc_mounts - /var/log/nomad - /etc/nomad.d - name: Completely clean nomad data directory shell: rm -rf /opt/nomad/data/* /opt/nomad/data/.* ignore_errors: yes - name: Create correct nomad configuration copy: content: | datacenter = "dc1" region = "global" data_dir = "/opt/nomad/data" bind_addr = "{{ tailscale_ips[inventory_hostname] }}" server { enabled = true bootstrap_expect = 3 encrypt = "{{ nomad_encrypt_key }}" server_join { retry_join = [ "{{ tailscale_ips.semaphore }}:4647", "{{ tailscale_ips.master }}:4647", "{{ tailscale_ips.ash3c }}:4647" ] retry_interval = "15s" retry_max = 3 } } client { enabled = true alloc_dir = "/opt/nomad/alloc_mounts" } ui { enabled = true } addresses { http = "0.0.0.0" rpc = "{{ tailscale_ips[inventory_hostname] }}" serf = "{{ tailscale_ips[inventory_hostname] }}" } ports { http = 4646 rpc = 4647 serf = 4648 } plugin "docker" { config { allow_privileged = true volumes { enabled = true } } } log_level = "INFO" log_file = "/var/log/nomad/nomad.log" log_rotate_duration = "24h" log_rotate_max_files = 5 dest: /etc/nomad.d/nomad.hcl owner: nomad group: nomad mode: '0640' - name: Set correct ownership for all nomad files file: path: "{{ item }}" owner: nomad group: nomad recurse: yes loop: - /opt/nomad - /var/log/nomad - /etc/nomad.d - name: Validate nomad configuration shell: nomad config validate /etc/nomad.d/nomad.hcl register: config_validation ignore_errors: yes - name: Show config validation result debug: var: config_validation - name: Start nomad service on first node (semaphore) systemd: name: nomad state: started daemon_reload: yes when: inventory_hostname == 'semaphore' - name: Wait for first node to start pause: seconds: 30 when: inventory_hostname == 'semaphore' - name: Start nomad service on remaining nodes systemd: name: nomad state: started daemon_reload: yes when: inventory_hostname != 'semaphore' - name: Wait for all services to start pause: seconds: 20 - name: Check nomad service status shell: systemctl status nomad --no-pager -l register: service_status ignore_errors: yes - name: Show service status debug: var: service_status.stdout_lines - name: Check nomad logs for errors shell: journalctl -u nomad -n 10 --no-pager register: nomad_logs ignore_errors: yes - name: Show recent nomad logs debug: var: nomad_logs.stdout_lines - name: Test nomad connectivity shell: nomad server members register: nomad_members ignore_errors: yes when: inventory_hostname == 'semaphore' - name: Show cluster members debug: var: nomad_members.stdout_lines when: inventory_hostname == 'semaphore'