--- - name: 修复 Nomad 节点配置 hosts: nomad_cluster become: yes vars: nomad_datacenter: "dc1" consul_servers: - "ash3c.tailnet-68f9.ts.net:8500" - "ch4.tailnet-68f9.ts.net:8500" - "warden.tailnet-68f9.ts.net:8500" tasks: - name: 检查节点当前状态 debug: msg: "正在修复节点: {{ inventory_hostname }}" - name: 停止 Nomad 服务 systemd: name: nomad state: stopped ignore_errors: yes - name: 备份现有配置 copy: src: /etc/nomad.d/nomad.hcl dest: /etc/nomad.d/nomad.hcl.backup.{{ ansible_date_time.epoch }} remote_src: yes ignore_errors: yes - name: 创建 Nomad 配置目录 file: path: /etc/nomad.d state: directory mode: '0755' - name: 生成 Nomad 客户端配置 template: src: nomad-client.hcl.j2 dest: /etc/nomad.d/nomad.hcl mode: '0644' notify: restart nomad - name: 启动 Nomad 服务 systemd: name: nomad state: started enabled: yes - name: 等待 Nomad 服务启动 wait_for: port: 4646 host: "{{ inventory_hostname }}.tailnet-68f9.ts.net" delay: 5 timeout: 30 ignore_errors: yes - name: 验证 Nomad 节点状态 uri: url: "http://{{ inventory_hostname }}.tailnet-68f9.ts.net:4646/v1/agent/self" method: GET register: nomad_status ignore_errors: yes - name: 显示修复结果 debug: msg: - "节点 {{ inventory_hostname }} 修复完成" - "Nomad 状态: {{ 'OK' if nomad_status.status == 200 else 'ERROR' }}" handlers: - name: restart nomad systemd: name: nomad state: restarted