feat(配置): 新增多个Ansible playbook用于Nomad集群管理
添加用于调试、配置和维护Nomad集群的playbook 包括节点连通性检查、配置读取、Tailscale IP获取等功能 修改现有playbook以支持更广泛的节点部署
This commit is contained in:
12
configuration/inventories/production/nomad-cluster.ini
Normal file
12
configuration/inventories/production/nomad-cluster.ini
Normal file
@@ -0,0 +1,12 @@
|
||||
[consul_servers:children]
|
||||
nomad_servers
|
||||
|
||||
[consul_servers:vars]
|
||||
consul_cert_dir=/etc/consul.d/certs
|
||||
consul_ca_src=security/certificates/ca.pem
|
||||
consul_cert_src=security/certificates/consul-server.pem
|
||||
consul_key_src=security/certificates/consul-server-key.pem
|
||||
|
||||
[nomad_cluster:children]
|
||||
nomad_servers
|
||||
nomad_clients
|
||||
14
configuration/playbooks/check-security-logs.yml
Normal file
14
configuration/playbooks/check-security-logs.yml
Normal file
@@ -0,0 +1,14 @@
|
||||
---
|
||||
- name: Check for AppArmor or SELinux denials
|
||||
hosts: germany
|
||||
become: yes
|
||||
tasks:
|
||||
- name: Search journalctl for AppArmor/SELinux messages
|
||||
shell: 'journalctl -k | grep -i -e apparmor -e selinux -e "avc: denied"'
|
||||
register: security_logs
|
||||
changed_when: false
|
||||
failed_when: false
|
||||
|
||||
- name: Display security logs
|
||||
debug:
|
||||
var: security_logs.stdout_lines
|
||||
@@ -116,6 +116,7 @@
|
||||
client {
|
||||
enabled = true
|
||||
network_interface = "tailscale0"
|
||||
cpu_total_compute = 0
|
||||
|
||||
servers = [
|
||||
"100.116.158.95:4647", # semaphore
|
||||
@@ -162,7 +163,7 @@
|
||||
Type=notify
|
||||
User=root
|
||||
Group=root
|
||||
ExecStart=/snap/bin/nomad agent -config=/etc/nomad.d/nomad.hcl
|
||||
ExecStart={{ nomad_binary_path.stdout }} agent -config=/etc/nomad.d/nomad.hcl
|
||||
ExecReload=/bin/kill -HUP $MAINPID
|
||||
KillMode=process
|
||||
Restart=on-failure
|
||||
|
||||
33
configuration/playbooks/debug-cgroup-permissions.yml
Normal file
33
configuration/playbooks/debug-cgroup-permissions.yml
Normal file
@@ -0,0 +1,33 @@
|
||||
---
|
||||
- name: Debug cgroup permissions
|
||||
hosts: germany
|
||||
become: yes
|
||||
tasks:
|
||||
- name: Check permissions of /sys/fs/cgroup/cpuset/
|
||||
stat:
|
||||
path: /sys/fs/cgroup/cpuset/
|
||||
register: cpuset_dir
|
||||
|
||||
- name: Display cpuset dir stats
|
||||
debug:
|
||||
var: cpuset_dir.stat
|
||||
|
||||
- name: Check for nomad subdir in cpuset
|
||||
stat:
|
||||
path: /sys/fs/cgroup/cpuset/nomad
|
||||
register: nomad_cpuset_dir
|
||||
ignore_errors: true
|
||||
|
||||
- name: Display nomad cpuset dir stats
|
||||
debug:
|
||||
var: nomad_cpuset_dir.stat
|
||||
when: nomad_cpuset_dir.stat.exists is defined and nomad_cpuset_dir.stat.exists
|
||||
|
||||
- name: List contents of /sys/fs/cgroup/cpuset/
|
||||
command: ls -la /sys/fs/cgroup/cpuset/
|
||||
register: ls_cpuset
|
||||
changed_when: false
|
||||
|
||||
- name: Display contents of /sys/fs/cgroup/cpuset/
|
||||
debug:
|
||||
var: ls_cpuset.stdout_lines
|
||||
14
configuration/playbooks/debug-nomad-cgroup.yml
Normal file
14
configuration/playbooks/debug-nomad-cgroup.yml
Normal file
@@ -0,0 +1,14 @@
|
||||
---
|
||||
- name: Debug Nomad cgroup subdirectory
|
||||
hosts: germany
|
||||
become: yes
|
||||
tasks:
|
||||
- name: List contents of /sys/fs/cgroup/cpuset/nomad/
|
||||
command: ls -la /sys/fs/cgroup/cpuset/nomad/
|
||||
register: ls_nomad_cpuset
|
||||
changed_when: false
|
||||
failed_when: false
|
||||
|
||||
- name: Display contents of /sys/fs/cgroup/cpuset/nomad/
|
||||
debug:
|
||||
var: ls_nomad_cpuset.stdout_lines
|
||||
30
configuration/playbooks/debug-nomad-nodes.yml
Normal file
30
configuration/playbooks/debug-nomad-nodes.yml
Normal file
@@ -0,0 +1,30 @@
|
||||
---
|
||||
- name: Gather Nomad debug information from multiple nodes
|
||||
hosts: all
|
||||
become: yes
|
||||
tasks:
|
||||
- name: Get Nomad service status
|
||||
shell: systemctl status nomad --no-pager -l
|
||||
register: nomad_status
|
||||
changed_when: false
|
||||
failed_when: false
|
||||
|
||||
- name: Get last 50 lines of Nomad journal logs
|
||||
shell: journalctl -u nomad -n 50 --no-pager
|
||||
register: nomad_journal
|
||||
changed_when: false
|
||||
failed_when: false
|
||||
|
||||
- name: Display Nomad Status
|
||||
debug:
|
||||
msg: |
|
||||
--- Nomad Status for {{ inventory_hostname }} ---
|
||||
{{ nomad_status.stdout }}
|
||||
{{ nomad_status.stderr }}
|
||||
|
||||
- name: Display Nomad Journal
|
||||
debug:
|
||||
msg: |
|
||||
--- Nomad Journal for {{ inventory_hostname }} ---
|
||||
{{ nomad_journal.stdout }}
|
||||
{{ nomad_journal.stderr }}
|
||||
14
configuration/playbooks/find-nomad-service.yml
Normal file
14
configuration/playbooks/find-nomad-service.yml
Normal file
@@ -0,0 +1,14 @@
|
||||
---
|
||||
- name: Find Nomad service
|
||||
hosts: germany
|
||||
become: yes
|
||||
tasks:
|
||||
- name: List systemd services and filter for nomad
|
||||
shell: systemctl list-unit-files --type=service | grep -i nomad
|
||||
register: nomad_services
|
||||
changed_when: false
|
||||
failed_when: false
|
||||
|
||||
- name: Display found services
|
||||
debug:
|
||||
var: nomad_services.stdout_lines
|
||||
19
configuration/playbooks/fix-cgroup-permissions.yml
Normal file
19
configuration/playbooks/fix-cgroup-permissions.yml
Normal file
@@ -0,0 +1,19 @@
|
||||
---
|
||||
- name: Fix cgroup permissions for Nomad
|
||||
hosts: germany
|
||||
become: yes
|
||||
tasks:
|
||||
- name: Recursively change ownership of nomad cgroup directory
|
||||
file:
|
||||
path: /sys/fs/cgroup/cpuset/nomad
|
||||
state: directory
|
||||
owner: root
|
||||
group: root
|
||||
recurse: yes
|
||||
|
||||
- name: Change ownership of the parent cpuset directory
|
||||
file:
|
||||
path: /sys/fs/cgroup/cpuset/
|
||||
state: directory
|
||||
owner: root
|
||||
group: root
|
||||
45
configuration/playbooks/fix-nomad-server-config.yml
Normal file
45
configuration/playbooks/fix-nomad-server-config.yml
Normal file
@@ -0,0 +1,45 @@
|
||||
---
|
||||
- name: Fix Nomad server configuration
|
||||
hosts: localhost
|
||||
gather_facts: no
|
||||
become: yes
|
||||
tasks:
|
||||
- name: Create corrected nomad.hcl
|
||||
copy:
|
||||
dest: /etc/nomad.d/nomad.hcl
|
||||
content: |
|
||||
datacenter = "dc1"
|
||||
data_dir = "/opt/nomad/data"
|
||||
log_level = "INFO"
|
||||
|
||||
bind_addr = "100.116.158.95"
|
||||
|
||||
server {
|
||||
enabled = true
|
||||
bootstrap_expect = 5
|
||||
encrypt = "NVOMDvXblgWfhtzFzOUIHnKEOrbXOkPrkIPbRGGf1YQ="
|
||||
retry_join = [
|
||||
"100.116.158.95", # semaphore
|
||||
"100.81.26.3", # ash1d
|
||||
"100.103.147.94", # ash2e
|
||||
"100.90.159.68", # ch2
|
||||
"100.86.141.112" # ch3
|
||||
]
|
||||
}
|
||||
|
||||
client {
|
||||
enabled = false
|
||||
}
|
||||
|
||||
plugin "podman" {
|
||||
config {
|
||||
socket_path = "unix:///run/podman/podman.sock"
|
||||
volumes {
|
||||
enabled = true
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
consul {
|
||||
address = "100.116.158.95:8500"
|
||||
}
|
||||
12
configuration/playbooks/get-tailscale-ips.yml
Normal file
12
configuration/playbooks/get-tailscale-ips.yml
Normal file
@@ -0,0 +1,12 @@
|
||||
---
|
||||
- name: Get Tailscale IP for specified nodes
|
||||
hosts: all
|
||||
gather_facts: no
|
||||
tasks:
|
||||
- name: Get tailscale IP
|
||||
shell: "tailscale ip -4"
|
||||
register: tailscale_ip
|
||||
|
||||
- name: Display Tailscale IP
|
||||
debug:
|
||||
msg: "Node {{ inventory_hostname }} has IP: {{ tailscale_ip.stdout }}"
|
||||
@@ -1,10 +1,8 @@
|
||||
---
|
||||
- name: Install Nomad by direct download from HashiCorp
|
||||
hosts: hcs
|
||||
hosts: all
|
||||
become: yes
|
||||
vars:
|
||||
nomad_version: "1.10.5"
|
||||
nomad_url: "https://releases.hashicorp.com/nomad/{{ nomad_version }}/nomad_{{ nomad_version }}_linux_amd64.zip"
|
||||
nomad_user: "nomad"
|
||||
nomad_group: "nomad"
|
||||
nomad_home: "/opt/nomad"
|
||||
|
||||
@@ -1,17 +1,22 @@
|
||||
- name: Manually run Nomad agent to capture output
|
||||
---
|
||||
- name: Manually run Nomad agent for debugging
|
||||
hosts: germany
|
||||
gather_facts: false
|
||||
become: yes
|
||||
tasks:
|
||||
- name: Run nomad agent directly
|
||||
command: /snap/bin/nomad agent -config=/etc/nomad.d/nomad.hcl
|
||||
register: nomad_agent_output
|
||||
ignore_errors: true
|
||||
- name: Find Nomad binary path
|
||||
shell: which nomad || find /usr -name nomad 2>/dev/null | head -1
|
||||
register: nomad_binary_path
|
||||
failed_when: nomad_binary_path.stdout == ""
|
||||
|
||||
- name: Display agent output
|
||||
- name: Run nomad agent directly
|
||||
command: "{{ nomad_binary_path.stdout }} agent -config=/etc/nomad.d/nomad.hcl"
|
||||
register: nomad_run
|
||||
failed_when: false
|
||||
|
||||
- name: Display Nomad output
|
||||
debug:
|
||||
msg: |
|
||||
--- Nomad Agent STDOUT ---
|
||||
{{ nomad_agent_output.stdout }}
|
||||
|
||||
--- Nomad Agent STDERR ---
|
||||
{{ nomad_agent_output.stderr }}
|
||||
var: nomad_run.stdout
|
||||
|
||||
- name: Display Nomad error output
|
||||
debug:
|
||||
var: nomad_run.stderr
|
||||
7
configuration/playbooks/ping-nodes.yml
Normal file
7
configuration/playbooks/ping-nodes.yml
Normal file
@@ -0,0 +1,7 @@
|
||||
---
|
||||
- name: Ping nodes to check connectivity
|
||||
hosts: all
|
||||
gather_facts: no
|
||||
tasks:
|
||||
- name: Ping the host
|
||||
ping:
|
||||
13
configuration/playbooks/read-nomad-config.yml
Normal file
13
configuration/playbooks/read-nomad-config.yml
Normal file
@@ -0,0 +1,13 @@
|
||||
---
|
||||
- name: Read Nomad config file
|
||||
hosts: localhost
|
||||
gather_facts: no
|
||||
tasks:
|
||||
- name: Read nomad.hcl
|
||||
slurp:
|
||||
src: /etc/nomad.d/nomad.hcl
|
||||
register: nomad_config
|
||||
|
||||
- name: Display Nomad config
|
||||
debug:
|
||||
msg: "{{ nomad_config['content'] | b64decode }}"
|
||||
37
configuration/playbooks/update-nomad-config.yml
Normal file
37
configuration/playbooks/update-nomad-config.yml
Normal file
@@ -0,0 +1,37 @@
|
||||
---
|
||||
- name: Update Nomad config to run as a client
|
||||
hosts: localhost
|
||||
gather_facts: no
|
||||
become: yes
|
||||
tasks:
|
||||
- name: Create new nomad.hcl
|
||||
copy:
|
||||
dest: /etc/nomad.d/nomad.hcl
|
||||
content: |
|
||||
datacenter = "dc1"
|
||||
data_dir = "/opt/nomad/data"
|
||||
log_level = "INFO"
|
||||
|
||||
bind_addr = "100.116.158.95"
|
||||
|
||||
server {
|
||||
enabled = false
|
||||
}
|
||||
|
||||
client {
|
||||
enabled = true
|
||||
servers = ["100.81.26.3:4647", "100.103.147.94:4647", "100.90.159.68:4647"]
|
||||
}
|
||||
|
||||
plugin "podman" {
|
||||
config {
|
||||
socket_path = "unix:///run/podman/podman.sock"
|
||||
volumes {
|
||||
enabled = true
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
consul {
|
||||
address = "100.116.158.95:8500"
|
||||
}
|
||||
Reference in New Issue
Block a user