--- - name: Complete Nomad Cluster Reset and Rebuild hosts: nomad_cluster become: yes serial: 1 # 一次处理一个节点 vars: nomad_encrypt_key: "NVOMDvXblgWfhtzFzOUIHnKEOrbXOkPrkIPbRGGf1YQ=" tailscale_ips: semaphore: "100.116.158.95" master: "100.117.106.136" ash3c: "100.116.80.94" tasks: - name: Stop nomad service completely systemd: name: nomad state: stopped ignore_errors: yes - name: Kill any remaining nomad processes shell: pkill -f nomad || true ignore_errors: yes - name: Remove all nomad data and state shell: | rm -rf /opt/nomad/data/* rm -rf /opt/nomad/data/.* rm -rf /var/log/nomad/* ignore_errors: yes - name: Create fresh nomad configuration with correct Tailscale IPs copy: content: | datacenter = "dc1" region = "global" data_dir = "/opt/nomad/data" # 使用 Tailscale IP 地址 bind_addr = "{{ tailscale_ips[inventory_hostname] }}" server { enabled = true bootstrap_expect = 3 encrypt = "{{ nomad_encrypt_key }}" server_join { retry_join = [ "{{ tailscale_ips.semaphore }}", "{{ tailscale_ips.master }}", "{{ tailscale_ips.ash3c }}" ] } } client { enabled = true network_interface = "tailscale0" } ui_config { enabled = true } addresses { http = "0.0.0.0" rpc = "{{ tailscale_ips[inventory_hostname] }}" serf = "{{ tailscale_ips[inventory_hostname] }}" } ports { http = 4646 rpc = 4647 serf = 4648 } plugin "docker" { config { allow_privileged = true volumes { enabled = true } } } log_level = "INFO" log_file = "/var/log/nomad/nomad.log" dest: /etc/nomad.d/nomad.hcl owner: nomad group: nomad mode: '0640' - name: Ensure log directory exists file: path: /var/log/nomad state: directory owner: nomad group: nomad mode: '0755' - name: Start nomad service systemd: name: nomad state: started enabled: yes - name: Wait for nomad to start wait_for: port: 4646 host: "{{ tailscale_ips[inventory_hostname] }}" delay: 5 timeout: 30 - name: Check nomad service status shell: systemctl status nomad --no-pager -l register: nomad_status ignore_errors: yes - name: Display nomad status debug: var: nomad_status.stdout_lines - name: Wait for cluster to form hosts: localhost gather_facts: no tasks: - name: Wait for cluster formation pause: seconds: 30 prompt: "等待集群形成..." - name: Verify cluster status hosts: semaphore become: yes tasks: - name: Check cluster members shell: nomad server members register: cluster_members ignore_errors: yes - name: Display cluster members debug: var: cluster_members.stdout_lines - name: Check node status shell: nomad node status register: node_status ignore_errors: yes - name: Display node status debug: var: node_status.stdout_lines