feat: 重构项目目录结构并添加多个功能
- 新增脚本和配置文件用于管理Nomad节点和NFS存储 - 添加多个Ansible playbook用于配置和调试Nomad集群 - 新增Nomad job文件用于测试Podman和NFS功能 - 重构playbooks目录结构,按功能分类 - 更新Nomad客户端和服务端配置模板 - 添加SSH密钥分发和配置脚本 - 新增多个调试和修复问题的playbook
This commit is contained in:
16
configuration/playbooks/fix/fix-apt-errors.yml
Normal file
16
configuration/playbooks/fix/fix-apt-errors.yml
Normal file
@@ -0,0 +1,16 @@
|
||||
---
|
||||
- name: Debug apt repository issues
|
||||
hosts: beijing:children
|
||||
become: yes
|
||||
ignore_unreachable: yes
|
||||
tasks:
|
||||
- name: Run apt-get update to capture error
|
||||
ansible.builtin.shell: apt-get update
|
||||
register: apt_update_result
|
||||
failed_when: false
|
||||
changed_when: false
|
||||
|
||||
- name: Display apt-get update stderr
|
||||
ansible.builtin.debug:
|
||||
var: apt_update_result.stderr
|
||||
verbosity: 2
|
||||
126
configuration/playbooks/fix/fix-duplicate-podman-config.yml
Normal file
126
configuration/playbooks/fix/fix-duplicate-podman-config.yml
Normal file
@@ -0,0 +1,126 @@
|
||||
---
|
||||
- name: Fix duplicate Podman configuration in Nomad
|
||||
hosts: nomad_cluster
|
||||
become: yes
|
||||
tasks:
|
||||
- name: Stop Nomad service
|
||||
systemd:
|
||||
name: nomad
|
||||
state: stopped
|
||||
|
||||
- name: Backup current configuration
|
||||
copy:
|
||||
src: /etc/nomad.d/nomad.hcl
|
||||
dest: /etc/nomad.d/nomad.hcl.backup-duplicate-fix
|
||||
remote_src: yes
|
||||
|
||||
- name: Read current configuration
|
||||
slurp:
|
||||
src: /etc/nomad.d/nomad.hcl
|
||||
register: current_config
|
||||
|
||||
- name: Create clean configuration for clients
|
||||
copy:
|
||||
content: |
|
||||
datacenter = "{{ nomad_datacenter }}"
|
||||
region = "{{ nomad_region }}"
|
||||
data_dir = "/opt/nomad/data"
|
||||
bind_addr = "{{ tailscale_ip }}"
|
||||
|
||||
server {
|
||||
enabled = false
|
||||
}
|
||||
|
||||
client {
|
||||
enabled = true
|
||||
servers = ["100.116.158.95:4647", "100.117.106.136:4647", "100.86.141.112:4647", "100.81.26.3:4647", "100.103.147.94:4647"]
|
||||
}
|
||||
|
||||
ui {
|
||||
enabled = true
|
||||
}
|
||||
|
||||
addresses {
|
||||
http = "0.0.0.0"
|
||||
rpc = "{{ tailscale_ip }}"
|
||||
serf = "{{ tailscale_ip }}"
|
||||
}
|
||||
|
||||
ports {
|
||||
http = 4646
|
||||
rpc = 4647
|
||||
serf = 4648
|
||||
}
|
||||
|
||||
plugin "podman" {
|
||||
config {
|
||||
socket_path = "unix:///run/podman/podman.sock"
|
||||
volumes {
|
||||
enabled = true
|
||||
}
|
||||
recover_stopped = true
|
||||
}
|
||||
}
|
||||
|
||||
consul {
|
||||
auto_advertise = false
|
||||
server_auto_join = false
|
||||
client_auto_join = false
|
||||
}
|
||||
|
||||
log_level = "INFO"
|
||||
enable_syslog = true
|
||||
dest: /etc/nomad.d/nomad.hcl
|
||||
owner: nomad
|
||||
group: nomad
|
||||
mode: '0640'
|
||||
when: nomad_role == "client"
|
||||
|
||||
- name: Ensure Podman is installed
|
||||
package:
|
||||
name: podman
|
||||
state: present
|
||||
|
||||
- name: Enable and start Podman socket
|
||||
systemd:
|
||||
name: podman.socket
|
||||
enabled: yes
|
||||
state: started
|
||||
|
||||
- name: Set proper permissions on Podman socket
|
||||
file:
|
||||
path: /run/podman/podman.sock
|
||||
mode: '0666'
|
||||
ignore_errors: yes
|
||||
|
||||
- name: Validate Nomad configuration
|
||||
shell: /usr/local/bin/nomad config validate /etc/nomad.d/nomad.hcl || /usr/bin/nomad config validate /etc/nomad.d/nomad.hcl
|
||||
register: config_validation
|
||||
failed_when: config_validation.rc != 0
|
||||
|
||||
- name: Start Nomad service
|
||||
systemd:
|
||||
name: nomad
|
||||
state: started
|
||||
enabled: yes
|
||||
|
||||
- name: Wait for Nomad to be ready
|
||||
wait_for:
|
||||
port: 4646
|
||||
host: localhost
|
||||
delay: 10
|
||||
timeout: 60
|
||||
|
||||
- name: Wait for drivers to load
|
||||
pause:
|
||||
seconds: 20
|
||||
|
||||
- name: Check driver status
|
||||
shell: |
|
||||
/usr/local/bin/nomad node status -self | grep -A 10 "Driver Status" || /usr/bin/nomad node status -self | grep -A 10 "Driver Status"
|
||||
register: driver_status
|
||||
failed_when: false
|
||||
|
||||
- name: Display driver status
|
||||
debug:
|
||||
var: driver_status.stdout_lines
|
||||
27
configuration/playbooks/fix/fix-hashicorp-apt-source.yml
Normal file
27
configuration/playbooks/fix/fix-hashicorp-apt-source.yml
Normal file
@@ -0,0 +1,27 @@
|
||||
---
|
||||
- name: 直接复制正确的 HashiCorp APT 源配置
|
||||
hosts: nomad_cluster
|
||||
become: yes
|
||||
|
||||
tasks:
|
||||
- name: 创建正确的 HashiCorp APT 源配置
|
||||
copy:
|
||||
content: "deb [trusted=yes] http://apt.releases.hashicorp.com {{ ansible_distribution_release }} main\n"
|
||||
dest: "/etc/apt/sources.list.d/hashicorp.list"
|
||||
owner: root
|
||||
group: root
|
||||
mode: '0644'
|
||||
|
||||
- name: 更新 APT 缓存
|
||||
apt:
|
||||
update_cache: yes
|
||||
ignore_errors: yes
|
||||
|
||||
- name: 验证配置
|
||||
command: cat /etc/apt/sources.list.d/hashicorp.list
|
||||
register: config_check
|
||||
changed_when: false
|
||||
|
||||
- name: 显示配置内容
|
||||
debug:
|
||||
msg: "HashiCorp APT 源配置: {{ config_check.stdout }}"
|
||||
83
configuration/playbooks/fix/fix-hcp-podman.yml
Normal file
83
configuration/playbooks/fix/fix-hcp-podman.yml
Normal file
@@ -0,0 +1,83 @@
|
||||
---
|
||||
- name: Fix HCP1 and HCP2 Podman Configuration
|
||||
hosts: hcp1,hcp2
|
||||
become: yes
|
||||
tasks:
|
||||
- name: Stop Nomad service
|
||||
systemd:
|
||||
name: nomad
|
||||
state: stopped
|
||||
|
||||
- name: Ensure nomad user exists
|
||||
user:
|
||||
name: nomad
|
||||
system: yes
|
||||
shell: /bin/false
|
||||
home: /home/nomad
|
||||
create_home: yes
|
||||
|
||||
- name: Ensure Podman socket is running
|
||||
systemd:
|
||||
name: podman.socket
|
||||
state: started
|
||||
enabled: yes
|
||||
|
||||
- name: Set proper permissions on Podman socket
|
||||
file:
|
||||
path: /run/podman/podman.sock
|
||||
mode: '0666'
|
||||
ignore_errors: yes
|
||||
|
||||
- name: Create nomad data directory
|
||||
file:
|
||||
path: /opt/nomad/data
|
||||
state: directory
|
||||
owner: nomad
|
||||
group: nomad
|
||||
mode: '0755'
|
||||
|
||||
- name: Create nomad log directory
|
||||
file:
|
||||
path: /var/log/nomad
|
||||
state: directory
|
||||
owner: nomad
|
||||
group: nomad
|
||||
mode: '0755'
|
||||
|
||||
- name: Test Podman access for nomad user
|
||||
shell: sudo -u nomad podman version
|
||||
register: podman_test
|
||||
failed_when: false
|
||||
|
||||
- name: Display Podman test result
|
||||
debug:
|
||||
var: podman_test.stdout_lines
|
||||
|
||||
- name: Validate Nomad configuration
|
||||
shell: /usr/local/bin/nomad config validate /etc/nomad.d/nomad.hcl
|
||||
register: config_validation
|
||||
failed_when: false
|
||||
|
||||
- name: Display configuration validation
|
||||
debug:
|
||||
var: config_validation
|
||||
|
||||
- name: Start Nomad service
|
||||
systemd:
|
||||
name: nomad
|
||||
state: started
|
||||
enabled: yes
|
||||
|
||||
- name: Wait for Nomad to be ready
|
||||
wait_for:
|
||||
port: 4646
|
||||
timeout: 60
|
||||
|
||||
- name: Check Nomad node status
|
||||
shell: /usr/local/bin/nomad node status -self
|
||||
register: node_status
|
||||
failed_when: false
|
||||
|
||||
- name: Display node status
|
||||
debug:
|
||||
var: node_status.stdout_lines
|
||||
56
configuration/playbooks/fix/fix-hcs-dpkg-issue.yml
Normal file
56
configuration/playbooks/fix/fix-hcs-dpkg-issue.yml
Normal file
@@ -0,0 +1,56 @@
|
||||
---
|
||||
- name: Fix dpkg and initramfs issues on hcs
|
||||
hosts: hcs
|
||||
become: yes
|
||||
tasks:
|
||||
- name: Check current dpkg status
|
||||
shell: dpkg --audit
|
||||
register: dpkg_status
|
||||
ignore_errors: yes
|
||||
|
||||
- name: Display dpkg status
|
||||
debug:
|
||||
var: dpkg_status.stdout_lines
|
||||
|
||||
- name: Fix broken btrfs hook
|
||||
shell: |
|
||||
# Remove problematic btrfs hook temporarily
|
||||
mv /usr/share/initramfs-tools/hooks/btrfs /usr/share/initramfs-tools/hooks/btrfs.bak || true
|
||||
|
||||
# Try to reconfigure the failed package
|
||||
dpkg --configure -a
|
||||
|
||||
# If that works, restore the hook
|
||||
if [ $? -eq 0 ]; then
|
||||
mv /usr/share/initramfs-tools/hooks/btrfs.bak /usr/share/initramfs-tools/hooks/btrfs || true
|
||||
fi
|
||||
register: fix_result
|
||||
ignore_errors: yes
|
||||
|
||||
- name: Display fix result
|
||||
debug:
|
||||
var: fix_result
|
||||
|
||||
- name: Alternative fix - reinstall initramfs-tools
|
||||
apt:
|
||||
name: initramfs-tools
|
||||
state: latest
|
||||
force: yes
|
||||
when: fix_result.rc != 0
|
||||
ignore_errors: yes
|
||||
|
||||
- name: Clean up and update
|
||||
shell: |
|
||||
apt autoremove -y
|
||||
apt update
|
||||
apt upgrade -y
|
||||
ignore_errors: yes
|
||||
|
||||
- name: Check final dpkg status
|
||||
shell: dpkg --audit
|
||||
register: final_status
|
||||
ignore_errors: yes
|
||||
|
||||
- name: Display final status
|
||||
debug:
|
||||
var: final_status.stdout_lines
|
||||
98
configuration/playbooks/fix/fix-nomad-cluster.yml
Normal file
98
configuration/playbooks/fix/fix-nomad-cluster.yml
Normal file
@@ -0,0 +1,98 @@
|
||||
---
|
||||
- name: Fix Nomad Cluster Configuration
|
||||
hosts: nomad_servers
|
||||
become: yes
|
||||
vars:
|
||||
nomad_servers_list:
|
||||
- "100.116.158.95" # semaphore
|
||||
- "100.103.147.94" # ash2e
|
||||
- "100.81.26.3" # ash1d
|
||||
- "100.90.159.68" # ch2
|
||||
- "{{ ansible_default_ipv4.address }}" # ch3 (will be determined dynamically)
|
||||
|
||||
tasks:
|
||||
- name: Stop Nomad service
|
||||
systemd:
|
||||
name: nomad
|
||||
state: stopped
|
||||
ignore_errors: yes
|
||||
|
||||
- name: Create nomad user
|
||||
user:
|
||||
name: nomad
|
||||
system: yes
|
||||
shell: /bin/false
|
||||
home: /opt/nomad
|
||||
create_home: no
|
||||
|
||||
- name: Create Nomad configuration directory
|
||||
file:
|
||||
path: /etc/nomad.d
|
||||
state: directory
|
||||
mode: '0755'
|
||||
|
||||
- name: Create Nomad data directory
|
||||
file:
|
||||
path: /opt/nomad/data
|
||||
state: directory
|
||||
mode: '0755'
|
||||
owner: nomad
|
||||
group: nomad
|
||||
ignore_errors: yes
|
||||
|
||||
- name: Create Nomad log directory
|
||||
file:
|
||||
path: /var/log/nomad
|
||||
state: directory
|
||||
mode: '0755'
|
||||
owner: nomad
|
||||
group: nomad
|
||||
ignore_errors: yes
|
||||
|
||||
- name: Generate Nomad server configuration
|
||||
template:
|
||||
src: nomad-server.hcl.j2
|
||||
dest: /etc/nomad.d/nomad.hcl
|
||||
mode: '0644'
|
||||
notify: restart nomad
|
||||
|
||||
- name: Create Nomad systemd service file
|
||||
copy:
|
||||
content: |
|
||||
[Unit]
|
||||
Description=Nomad
|
||||
Documentation=https://www.nomadproject.io/
|
||||
Requires=network-online.target
|
||||
After=network-online.target
|
||||
ConditionFileNotEmpty=/etc/nomad.d/nomad.hcl
|
||||
|
||||
[Service]
|
||||
Type=notify
|
||||
User=nomad
|
||||
Group=nomad
|
||||
ExecStart=/usr/bin/nomad agent -config=/etc/nomad.d/nomad.hcl
|
||||
ExecReload=/bin/kill -HUP $MAINPID
|
||||
KillMode=process
|
||||
Restart=on-failure
|
||||
LimitNOFILE=65536
|
||||
|
||||
[Install]
|
||||
WantedBy=multi-user.target
|
||||
dest: /etc/systemd/system/nomad.service
|
||||
mode: '0644'
|
||||
|
||||
- name: Reload systemd daemon
|
||||
systemd:
|
||||
daemon_reload: yes
|
||||
|
||||
- name: Enable and start Nomad service
|
||||
systemd:
|
||||
name: nomad
|
||||
enabled: yes
|
||||
state: started
|
||||
|
||||
handlers:
|
||||
- name: restart nomad
|
||||
systemd:
|
||||
name: nomad
|
||||
state: restarted
|
||||
99
configuration/playbooks/fix/fix-nomad-local.yml
Normal file
99
configuration/playbooks/fix/fix-nomad-local.yml
Normal file
@@ -0,0 +1,99 @@
|
||||
---
|
||||
- name: Update Nomad configuration for Podman and fix issues
|
||||
hosts: localhost
|
||||
become: yes
|
||||
connection: local
|
||||
|
||||
tasks:
|
||||
- name: Stop Nomad service
|
||||
systemd:
|
||||
name: nomad
|
||||
state: stopped
|
||||
|
||||
- name: Update Nomad configuration to use Podman and disable Consul
|
||||
copy:
|
||||
content: |
|
||||
datacenter = "dc1"
|
||||
region = "global"
|
||||
data_dir = "/opt/nomad/data"
|
||||
|
||||
bind_addr = "100.116.158.95"
|
||||
|
||||
server {
|
||||
enabled = true
|
||||
bootstrap_expect = 1
|
||||
encrypt = "NVOMDvXblgWfhtzFzOUIHnKEOrbXOkPrkIPbRGGf1YQ="
|
||||
}
|
||||
|
||||
client {
|
||||
enabled = true
|
||||
}
|
||||
|
||||
ui {
|
||||
enabled = true
|
||||
}
|
||||
|
||||
addresses {
|
||||
http = "0.0.0.0"
|
||||
rpc = "100.116.158.95"
|
||||
serf = "100.116.158.95"
|
||||
}
|
||||
|
||||
ports {
|
||||
http = 4646
|
||||
rpc = 4647
|
||||
serf = 4648
|
||||
}
|
||||
|
||||
plugin "podman" {
|
||||
config {
|
||||
socket_path = "unix:///run/podman/podman.sock"
|
||||
volumes {
|
||||
enabled = true
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
# Disable Consul integration for now
|
||||
consul {
|
||||
address = ""
|
||||
}
|
||||
|
||||
log_level = "INFO"
|
||||
log_file = "/var/log/nomad/nomad.log"
|
||||
dest: /etc/nomad.d/nomad.hcl
|
||||
owner: nomad
|
||||
group: nomad
|
||||
mode: '0640'
|
||||
backup: yes
|
||||
|
||||
- name: Enable Podman socket for systemd
|
||||
systemd:
|
||||
name: podman.socket
|
||||
enabled: yes
|
||||
state: started
|
||||
ignore_errors: yes
|
||||
|
||||
- name: Start Nomad service
|
||||
systemd:
|
||||
name: nomad
|
||||
state: started
|
||||
|
||||
- name: Wait for Nomad to be ready
|
||||
wait_for:
|
||||
port: 4646
|
||||
host: localhost
|
||||
delay: 5
|
||||
timeout: 30
|
||||
|
||||
- name: Check Nomad status
|
||||
uri:
|
||||
url: http://localhost:4646/v1/status/leader
|
||||
method: GET
|
||||
register: nomad_status
|
||||
retries: 3
|
||||
delay: 5
|
||||
|
||||
- name: Display Nomad status
|
||||
debug:
|
||||
msg: "Nomad leader: {{ nomad_status.json if nomad_status.json is defined else 'No leader elected' }}"
|
||||
72
configuration/playbooks/fix/fix-nomad-podman-config.yml
Normal file
72
configuration/playbooks/fix/fix-nomad-podman-config.yml
Normal file
@@ -0,0 +1,72 @@
|
||||
---
|
||||
- name: Fix Nomad Podman Driver Configuration
|
||||
hosts: all
|
||||
become: yes
|
||||
vars:
|
||||
nomad_user: nomad
|
||||
|
||||
tasks:
|
||||
- name: Stop Nomad service
|
||||
systemd:
|
||||
name: nomad
|
||||
state: stopped
|
||||
|
||||
- name: Update Nomad configuration to properly reference Podman plugin
|
||||
replace:
|
||||
path: /etc/nomad.d/nomad.hcl
|
||||
regexp: 'plugin "podman" \{\n config \{\n socket_path = "unix:///run/user/1001/podman/podman.sock"\n volumes \{\n enabled = true\n \}\n \}\n\}'
|
||||
replace: |
|
||||
plugin "nomad-driver-podman" {
|
||||
config {
|
||||
socket_path = "unix:///run/user/1001/podman/podman.sock"
|
||||
volumes {
|
||||
enabled = true
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
- name: Start Nomad service
|
||||
systemd:
|
||||
name: nomad
|
||||
state: started
|
||||
|
||||
- name: Wait for Nomad to be ready
|
||||
wait_for:
|
||||
port: 4646
|
||||
host: localhost
|
||||
delay: 10
|
||||
timeout: 60
|
||||
|
||||
- name: Wait for plugins to load
|
||||
pause:
|
||||
seconds: 15
|
||||
|
||||
- name: Check if Podman driver is now loaded
|
||||
shell: |
|
||||
sudo -u {{ nomad_user }} /usr/local/bin/nomad node status -self | grep -A 20 "Driver Status"
|
||||
register: driver_status
|
||||
|
||||
- name: Display driver status
|
||||
debug:
|
||||
var: driver_status.stdout_lines
|
||||
|
||||
- name: Check Nomad logs for successful plugin loading
|
||||
shell: journalctl -u nomad -n 20 --no-pager | grep -E "(podman|plugin)"
|
||||
register: recent_logs
|
||||
failed_when: false
|
||||
|
||||
- name: Display recent plugin logs
|
||||
debug:
|
||||
var: recent_logs.stdout_lines
|
||||
|
||||
- name: Final verification - Test Podman functionality
|
||||
shell: |
|
||||
sudo -u {{ nomad_user }} /usr/local/bin/nomad node status -json | jq -r '.Drivers | keys[]' | grep -i podman
|
||||
register: podman_driver_check
|
||||
failed_when: false
|
||||
|
||||
- name: Display final result
|
||||
debug:
|
||||
msg: |
|
||||
Podman driver status: {{ 'SUCCESS - Driver loaded!' if 'podman' in (podman_driver_check.stdout | default('')) else 'Still checking...' }}
|
||||
Available drivers: {{ podman_driver_check.stdout_lines | default(['none']) | join(', ') }}
|
||||
45
configuration/playbooks/fix/fix-nomad-server-config.yml
Normal file
45
configuration/playbooks/fix/fix-nomad-server-config.yml
Normal file
@@ -0,0 +1,45 @@
|
||||
---
|
||||
- name: Fix Nomad server configuration
|
||||
hosts: localhost
|
||||
gather_facts: no
|
||||
become: yes
|
||||
tasks:
|
||||
- name: Create corrected nomad.hcl
|
||||
copy:
|
||||
dest: /etc/nomad.d/nomad.hcl
|
||||
content: |
|
||||
datacenter = "dc1"
|
||||
data_dir = "/opt/nomad/data"
|
||||
log_level = "INFO"
|
||||
|
||||
bind_addr = "100.116.158.95"
|
||||
|
||||
server {
|
||||
enabled = true
|
||||
bootstrap_expect = 5
|
||||
encrypt = "NVOMDvXblgWfhtzFzOUIHnKEOrbXOkPrkIPbRGGf1YQ="
|
||||
retry_join = [
|
||||
"100.116.158.95", # semaphore
|
||||
"100.81.26.3", # ash1d
|
||||
"100.103.147.94", # ash2e
|
||||
"100.90.159.68", # ch2
|
||||
"100.86.141.112" # ch3
|
||||
]
|
||||
}
|
||||
|
||||
client {
|
||||
enabled = false
|
||||
}
|
||||
|
||||
plugin "podman" {
|
||||
config {
|
||||
socket_path = "unix:///run/podman/podman.sock"
|
||||
volumes {
|
||||
enabled = true
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
consul {
|
||||
address = "100.116.158.95:8500"
|
||||
}
|
||||
88
configuration/playbooks/fix/fix-nomad-systemd.yml
Normal file
88
configuration/playbooks/fix/fix-nomad-systemd.yml
Normal file
@@ -0,0 +1,88 @@
|
||||
---
|
||||
- name: Fix Nomad systemd service binary path
|
||||
hosts: nomad_cluster
|
||||
become: yes
|
||||
|
||||
tasks:
|
||||
- name: Check Nomad binary location
|
||||
shell: which nomad
|
||||
register: nomad_binary_path
|
||||
|
||||
- name: Display binary path
|
||||
debug:
|
||||
msg: "Nomad binary 位于: {{ nomad_binary_path.stdout }}"
|
||||
|
||||
- name: Stop Nomad service
|
||||
systemd:
|
||||
name: nomad
|
||||
state: stopped
|
||||
ignore_errors: yes
|
||||
|
||||
- name: Update Nomad systemd service with correct binary path
|
||||
copy:
|
||||
content: |
|
||||
[Unit]
|
||||
Description=Nomad
|
||||
Documentation=https://www.nomadproject.io/
|
||||
Requires=network-online.target
|
||||
After=network-online.target
|
||||
ConditionFileNotEmpty=/etc/nomad.d/nomad.hcl
|
||||
|
||||
[Service]
|
||||
Type=notify
|
||||
User=nomad
|
||||
Group=nomad
|
||||
ExecStart={{ nomad_binary_path.stdout }} agent -config=/etc/nomad.d/nomad.hcl
|
||||
ExecReload=/bin/kill -HUP $MAINPID
|
||||
KillMode=process
|
||||
Restart=on-failure
|
||||
LimitNOFILE=65536
|
||||
|
||||
[Install]
|
||||
WantedBy=multi-user.target
|
||||
dest: /etc/systemd/system/nomad.service
|
||||
mode: '0644'
|
||||
notify: reload systemd
|
||||
|
||||
- name: Reload systemd and start Nomad servers first
|
||||
systemd:
|
||||
name: nomad
|
||||
state: started
|
||||
enabled: yes
|
||||
daemon_reload: yes
|
||||
when: inventory_hostname in groups['nomad_servers']
|
||||
|
||||
- name: Wait for servers to be ready
|
||||
pause:
|
||||
seconds: 15
|
||||
when: inventory_hostname in groups['nomad_servers']
|
||||
|
||||
- name: Start Nomad clients
|
||||
systemd:
|
||||
name: nomad
|
||||
state: started
|
||||
enabled: yes
|
||||
daemon_reload: yes
|
||||
when: inventory_hostname in groups['nomad_clients']
|
||||
|
||||
- name: Wait for clients to connect
|
||||
pause:
|
||||
seconds: 10
|
||||
when: inventory_hostname in groups['nomad_clients']
|
||||
|
||||
- name: Check final service status
|
||||
shell: systemctl status nomad --no-pager -l
|
||||
register: service_status
|
||||
ignore_errors: yes
|
||||
|
||||
- name: Display service status
|
||||
debug:
|
||||
msg: |
|
||||
✅ 节点 {{ inventory_hostname }} 服务状态:
|
||||
📊 状态: {{ 'SUCCESS' if service_status.rc == 0 else 'FAILED' }}
|
||||
💾 二进制路径: {{ nomad_binary_path.stdout }}
|
||||
|
||||
handlers:
|
||||
- name: reload systemd
|
||||
systemd:
|
||||
daemon_reload: yes
|
||||
79
configuration/playbooks/fix/fix-podman-installation.yml
Normal file
79
configuration/playbooks/fix/fix-podman-installation.yml
Normal file
@@ -0,0 +1,79 @@
|
||||
---
|
||||
- name: Fix Podman installation on remaining nodes
|
||||
hosts: semaphore,master,ash3c,hcs
|
||||
become: yes
|
||||
serial: 1 # 逐个处理,避免同时影响多个节点
|
||||
|
||||
tasks:
|
||||
- name: Current node status
|
||||
debug:
|
||||
msg: "🔧 修复节点: {{ inventory_hostname }}"
|
||||
|
||||
- name: Check if Podman is already installed
|
||||
shell: podman --version 2>/dev/null || echo "NOT_INSTALLED"
|
||||
register: podman_check
|
||||
|
||||
- name: Install Podman if not present (semaphore special handling)
|
||||
apt:
|
||||
name:
|
||||
- podman
|
||||
- buildah
|
||||
- skopeo
|
||||
state: present
|
||||
update_cache: yes
|
||||
force_apt_get: yes
|
||||
when: inventory_hostname == 'semaphore' and 'NOT_INSTALLED' in podman_check.stdout
|
||||
ignore_errors: yes
|
||||
|
||||
- name: Install Podman on other nodes
|
||||
apt:
|
||||
name:
|
||||
- podman
|
||||
- buildah
|
||||
- skopeo
|
||||
state: present
|
||||
when: inventory_hostname != 'semaphore'
|
||||
ignore_errors: yes
|
||||
|
||||
- name: Install Python dependencies for podman-compose
|
||||
apt:
|
||||
name:
|
||||
- python3-pip
|
||||
- python3-setuptools
|
||||
- python3-yaml
|
||||
- python3-dotenv
|
||||
state: present
|
||||
ignore_errors: yes
|
||||
|
||||
- name: Install podman-compose via pip
|
||||
pip:
|
||||
name:
|
||||
- podman-compose
|
||||
state: present
|
||||
executable: pip3
|
||||
ignore_errors: yes
|
||||
|
||||
- name: Alternative podman-compose installation via apt
|
||||
apt:
|
||||
name: podman-compose
|
||||
state: present
|
||||
ignore_errors: yes
|
||||
|
||||
- name: Verify installations
|
||||
shell: |
|
||||
echo "Podman: $(podman --version 2>/dev/null || echo 'FAILED')"
|
||||
echo "Podman Compose: $(podman-compose --version 2>/dev/null || echo 'FAILED')"
|
||||
register: verify_result
|
||||
|
||||
- name: Display verification results
|
||||
debug:
|
||||
msg: |
|
||||
✅ 节点 {{ inventory_hostname }} 验证结果:
|
||||
{{ verify_result.stdout }}
|
||||
|
||||
- name: Enable Podman socket
|
||||
systemd:
|
||||
name: podman.socket
|
||||
enabled: yes
|
||||
state: started
|
||||
ignore_errors: yes
|
||||
109
configuration/playbooks/fix/fix-server-config.yml
Normal file
109
configuration/playbooks/fix/fix-server-config.yml
Normal file
@@ -0,0 +1,109 @@
|
||||
---
|
||||
- name: Fix Nomad server configuration
|
||||
hosts: nomad_servers
|
||||
become: yes
|
||||
tasks:
|
||||
- name: Stop Nomad service
|
||||
systemd:
|
||||
name: nomad
|
||||
state: stopped
|
||||
|
||||
- name: Backup current configuration
|
||||
copy:
|
||||
src: /etc/nomad.d/nomad.hcl
|
||||
dest: /etc/nomad.d/nomad.hcl.backup-server-fix
|
||||
remote_src: yes
|
||||
|
||||
- name: Create clean server configuration
|
||||
copy:
|
||||
content: |
|
||||
datacenter = "{{ nomad_datacenter }}"
|
||||
region = "{{ nomad_region }}"
|
||||
data_dir = "/opt/nomad/data"
|
||||
bind_addr = "{{ ansible_default_ipv4.address }}"
|
||||
|
||||
server {
|
||||
enabled = true
|
||||
bootstrap_expect = {{ nomad_bootstrap_expect }}
|
||||
encrypt = "{{ nomad_encrypt_key }}"
|
||||
|
||||
retry_join = [
|
||||
"100.116.158.95",
|
||||
"100.103.147.94",
|
||||
"100.81.26.3",
|
||||
"100.90.159.68",
|
||||
"100.86.141.112"
|
||||
]
|
||||
}
|
||||
|
||||
client {
|
||||
enabled = true
|
||||
}
|
||||
|
||||
ui {
|
||||
enabled = true
|
||||
}
|
||||
|
||||
addresses {
|
||||
http = "0.0.0.0"
|
||||
rpc = "{{ ansible_default_ipv4.address }}"
|
||||
serf = "{{ ansible_default_ipv4.address }}"
|
||||
}
|
||||
|
||||
ports {
|
||||
http = 4646
|
||||
rpc = 4647
|
||||
serf = 4648
|
||||
}
|
||||
|
||||
plugin "podman" {
|
||||
config {
|
||||
socket_path = "unix:///run/podman/podman.sock"
|
||||
volumes {
|
||||
enabled = true
|
||||
}
|
||||
recover_stopped = true
|
||||
}
|
||||
}
|
||||
|
||||
consul {
|
||||
auto_advertise = false
|
||||
server_auto_join = false
|
||||
client_auto_join = false
|
||||
}
|
||||
|
||||
log_level = "INFO"
|
||||
log_file = "/var/log/nomad/nomad.log"
|
||||
dest: /etc/nomad.d/nomad.hcl
|
||||
owner: nomad
|
||||
group: nomad
|
||||
mode: '0640'
|
||||
|
||||
- name: Ensure Podman is installed
|
||||
package:
|
||||
name: podman
|
||||
state: present
|
||||
|
||||
- name: Enable and start Podman socket
|
||||
systemd:
|
||||
name: podman.socket
|
||||
enabled: yes
|
||||
state: started
|
||||
|
||||
- name: Validate Nomad configuration
|
||||
shell: /usr/local/bin/nomad config validate /etc/nomad.d/nomad.hcl || /usr/bin/nomad config validate /etc/nomad.d/nomad.hcl
|
||||
register: config_validation
|
||||
failed_when: config_validation.rc != 0
|
||||
|
||||
- name: Start Nomad service
|
||||
systemd:
|
||||
name: nomad
|
||||
state: started
|
||||
enabled: yes
|
||||
|
||||
- name: Wait for Nomad to be ready
|
||||
wait_for:
|
||||
port: 4646
|
||||
host: localhost
|
||||
delay: 10
|
||||
timeout: 60
|
||||
103
configuration/playbooks/fix/fix-server-network-config.yml
Normal file
103
configuration/playbooks/fix/fix-server-network-config.yml
Normal file
@@ -0,0 +1,103 @@
|
||||
---
|
||||
- name: Fix Nomad server network configuration
|
||||
hosts: nomad_servers
|
||||
become: yes
|
||||
vars:
|
||||
server_ips:
|
||||
semaphore: "100.116.158.95"
|
||||
ash2e: "100.103.147.94"
|
||||
ash1d: "100.81.26.3"
|
||||
ch2: "100.90.159.68"
|
||||
ch3: "100.86.141.112"
|
||||
tasks:
|
||||
- name: Stop Nomad service
|
||||
systemd:
|
||||
name: nomad
|
||||
state: stopped
|
||||
|
||||
- name: Get server IP for this host
|
||||
set_fact:
|
||||
server_ip: "{{ server_ips[inventory_hostname] }}"
|
||||
|
||||
- name: Create corrected server configuration
|
||||
copy:
|
||||
content: |
|
||||
datacenter = "{{ nomad_datacenter }}"
|
||||
region = "{{ nomad_region }}"
|
||||
data_dir = "/opt/nomad/data"
|
||||
bind_addr = "{{ server_ip }}"
|
||||
|
||||
server {
|
||||
enabled = true
|
||||
bootstrap_expect = {{ nomad_bootstrap_expect }}
|
||||
encrypt = "{{ nomad_encrypt_key }}"
|
||||
|
||||
retry_join = [
|
||||
"100.116.158.95",
|
||||
"100.103.147.94",
|
||||
"100.81.26.3",
|
||||
"100.90.159.68",
|
||||
"100.86.141.112"
|
||||
]
|
||||
}
|
||||
|
||||
client {
|
||||
enabled = true
|
||||
}
|
||||
|
||||
ui {
|
||||
enabled = true
|
||||
}
|
||||
|
||||
addresses {
|
||||
http = "0.0.0.0"
|
||||
rpc = "{{ server_ip }}"
|
||||
serf = "{{ server_ip }}"
|
||||
}
|
||||
|
||||
ports {
|
||||
http = 4646
|
||||
rpc = 4647
|
||||
serf = 4648
|
||||
}
|
||||
|
||||
plugin "podman" {
|
||||
config {
|
||||
socket_path = "unix:///run/podman/podman.sock"
|
||||
volumes {
|
||||
enabled = true
|
||||
}
|
||||
recover_stopped = true
|
||||
}
|
||||
}
|
||||
|
||||
consul {
|
||||
auto_advertise = false
|
||||
server_auto_join = false
|
||||
client_auto_join = false
|
||||
}
|
||||
|
||||
log_level = "INFO"
|
||||
log_file = "/var/log/nomad/nomad.log"
|
||||
dest: /etc/nomad.d/nomad.hcl
|
||||
owner: nomad
|
||||
group: nomad
|
||||
mode: '0640'
|
||||
|
||||
- name: Validate Nomad configuration
|
||||
shell: /usr/local/bin/nomad config validate /etc/nomad.d/nomad.hcl || /usr/bin/nomad config validate /etc/nomad.d/nomad.hcl
|
||||
register: config_validation
|
||||
failed_when: config_validation.rc != 0
|
||||
|
||||
- name: Start Nomad service
|
||||
systemd:
|
||||
name: nomad
|
||||
state: started
|
||||
enabled: yes
|
||||
|
||||
- name: Wait for Nomad to be ready
|
||||
wait_for:
|
||||
port: 4646
|
||||
host: localhost
|
||||
delay: 10
|
||||
timeout: 60
|
||||
39
configuration/playbooks/fix/fix-warden-compose.yml
Normal file
39
configuration/playbooks/fix/fix-warden-compose.yml
Normal file
@@ -0,0 +1,39 @@
|
||||
---
|
||||
- name: Fix Warden docker-compose.yml
|
||||
hosts: warden
|
||||
become: yes
|
||||
gather_facts: no
|
||||
|
||||
tasks:
|
||||
- name: Ensure /opt/warden directory exists
|
||||
file:
|
||||
path: /opt/warden
|
||||
state: directory
|
||||
owner: root
|
||||
group: root
|
||||
mode: '0755'
|
||||
|
||||
- name: Create or update docker-compose.yml with correct indentation
|
||||
copy:
|
||||
dest: /opt/warden/docker-compose.yml
|
||||
content: |
|
||||
services:
|
||||
vaultwarden:
|
||||
image: hub.git4ta.fun/vaultwarden/server:latest
|
||||
security_opt:
|
||||
- "seccomp=unconfined"
|
||||
env_file:
|
||||
- .env
|
||||
volumes:
|
||||
- ./data:/data
|
||||
ports:
|
||||
- "980:80"
|
||||
restart: always
|
||||
networks:
|
||||
- vaultwarden_network
|
||||
|
||||
networks:
|
||||
vaultwarden_network:
|
||||
owner: root
|
||||
group: root
|
||||
mode: '0644'
|
||||
Reference in New Issue
Block a user