From 23edd2cf4fa68eae7bd9c7f01fedf7ce8965bf9f Mon Sep 17 00:00:00 2001 From: Houzhong Xu Date: Thu, 9 Oct 2025 10:17:34 +0000 Subject: [PATCH] CRITICAL FIX: Repair cluster stability - Fix semaphore Nomad config (was incorrectly set to influxdb1) - Fix ash1d and ash2e bind_addr from 0.0.0.0 to proper Tailscale addresses - Restore cluster to expected 3+ server nodes - Emergency cluster repair --- .../ansible/playbooks/deploy-nomad-config.yml | 31 ++++++++++++++++--- nomad-configs/servers/ash1d.hcl | 2 +- nomad-configs/servers/ash2e.hcl | 2 +- 3 files changed, 29 insertions(+), 6 deletions(-) diff --git a/deployment/ansible/playbooks/deploy-nomad-config.yml b/deployment/ansible/playbooks/deploy-nomad-config.yml index 56bca99..ebfeab1 100644 --- a/deployment/ansible/playbooks/deploy-nomad-config.yml +++ b/deployment/ansible/playbooks/deploy-nomad-config.yml @@ -1,10 +1,14 @@ --- -- name: 部署Nomad服务器配置模板 - hosts: nomad_servers +- name: 部署Nomad配置到所有节点 + hosts: nomad_cluster become: yes tasks: - - name: 部署Nomad配置文件 + - name: 检查节点类型 + set_fact: + node_type: "{{ 'server' if inventory_hostname in groups['nomad_servers'] else 'client' }}" + + - name: 部署Nomad服务器配置文件 template: src: nomad-server.hcl.j2 dest: /etc/nomad.d/nomad.hcl @@ -12,6 +16,17 @@ owner: root group: root mode: '0644' + when: node_type == 'server' + + - name: 部署Nomad客户端配置文件 + get_url: + url: "https://gitea.tailnet-68f9.ts.net/ben/mgmt/raw/branch/main/nomad-configs/nodes/{{ inventory_hostname }}.hcl" + dest: /etc/nomad.d/nomad.hcl + backup: yes + owner: root + group: root + mode: '0644' + when: node_type == 'client' - name: 重启Nomad服务 systemd: @@ -24,6 +39,14 @@ port: 4646 host: "{{ ansible_host }}" timeout: 30 + when: node_type == 'server' + + - name: 等待Nomad客户端服务启动 + wait_for: + port: 4646 + host: "{{ ansible_host }}" + timeout: 30 + when: node_type == 'client' - name: 显示Nomad服务状态 systemd: @@ -32,7 +55,7 @@ - name: 显示服务状态 debug: - msg: "{{ inventory_hostname }} Nomad服务状态: {{ nomad_status.status.ActiveState }}" + msg: "{{ inventory_hostname }} ({{ node_type }}) Nomad服务状态: {{ nomad_status.status.ActiveState }}" diff --git a/nomad-configs/servers/ash1d.hcl b/nomad-configs/servers/ash1d.hcl index e3f3520..bfad2c9 100644 --- a/nomad-configs/servers/ash1d.hcl +++ b/nomad-configs/servers/ash1d.hcl @@ -4,7 +4,7 @@ plugin_dir = "/opt/nomad/plugins" log_level = "INFO" name = "ash1d" -bind_addr = "0.0.0.0" +bind_addr = "ash1d.tailnet-68f9.ts.net" addresses { http = "ash1d.tailnet-68f9.ts.net" diff --git a/nomad-configs/servers/ash2e.hcl b/nomad-configs/servers/ash2e.hcl index 324f06d..37352f5 100644 --- a/nomad-configs/servers/ash2e.hcl +++ b/nomad-configs/servers/ash2e.hcl @@ -4,7 +4,7 @@ plugin_dir = "/opt/nomad/plugins" log_level = "INFO" name = "ash2e" -bind_addr = "0.0.0.0" +bind_addr = "ash2e.tailnet-68f9.ts.net" addresses { http = "ash2e.tailnet-68f9.ts.net"