mgmt/ansible/fix-ashburn-servers.yml

80 lines
2.1 KiB
YAML
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

---
# 修复美国 Ashburn 服务器节点的安全配置
- name: 修复 Ashburn 服务器节点不安全配置
hosts: ash1d,ash2e
become: yes
serial: 1 # 一个一个来,确保安全
tasks:
- name: 显示当前处理的服务器节点
debug:
msg: "⚠️ 正在处理关键服务器节点: {{ inventory_hostname }}"
- name: 检查集群状态 - 确保有足够的服务器在线
uri:
url: "http://semaphore.tailnet-68f9.ts.net:4646/v1/status/leader"
method: GET
register: leader_check
delegate_to: localhost
- name: 确认集群有 leader
fail:
msg: "集群没有 leader停止操作"
when: leader_check.status != 200
- name: 备份当前配置
copy:
src: /etc/nomad.d/nomad.hcl
dest: /etc/nomad.d/nomad.hcl.backup.{{ ansible_date_time.epoch }}
backup: yes
- name: 创建安全的服务器配置
template:
src: ../nomad-configs-tofu/server-template-secure.hcl
dest: /etc/nomad.d/nomad.hcl
backup: yes
notify: restart nomad
- name: 验证配置文件语法
command: nomad config validate /etc/nomad.d/nomad.hcl
register: config_validation
- name: 显示验证结果
debug:
msg: "{{ inventory_hostname }} 配置验证: {{ config_validation.stdout }}"
- name: 重启 Nomad 服务
systemd:
name: nomad
state: restarted
daemon_reload: yes
- name: 等待服务启动
wait_for:
port: 4646
host: "{{ inventory_hostname }}.tailnet-68f9.ts.net"
delay: 10
timeout: 60
delegate_to: localhost
handlers:
- name: restart nomad
systemd:
name: nomad
state: restarted
daemon_reload: yes
post_tasks:
- name: 等待节点重新加入集群
pause:
seconds: 20
- name: 验证服务器重新加入集群
uri:
url: "http://semaphore.tailnet-68f9.ts.net:4646/v1/status/peers"
method: GET
register: cluster_peers
delegate_to: localhost
- name: 显示集群状态
debug:
msg: "集群 peers: {{ cluster_peers.json }}"