feat: 重构项目目录结构并添加多个功能

- 新增脚本和配置文件用于管理Nomad节点和NFS存储
- 添加多个Ansible playbook用于配置和调试Nomad集群
- 新增Nomad job文件用于测试Podman和NFS功能
- 重构playbooks目录结构,按功能分类
- 更新Nomad客户端和服务端配置模板
- 添加SSH密钥分发和配置脚本
- 新增多个调试和修复问题的playbook
This commit is contained in:
2025-09-27 13:05:30 +00:00
parent a06e5e1a00
commit 44b098bd20
98 changed files with 1141 additions and 2 deletions

View File

@@ -0,0 +1,69 @@
---
- name: Add Beijing prefix to LXC node names in Nomad configuration
hosts: beijing
become: yes
vars:
node_prefixes:
influxdb: "bj-influxdb"
warden: "bj-warden"
hcp1: "bj-hcp1"
hcp2: "bj-hcp2"
tailscale_ips:
influxdb: "100.100.7.4"
warden: "100.122.197.112"
hcp1: "100.97.62.111"
hcp2: "100.116.112.45"
tasks:
- name: Stop Nomad service
systemd:
name: nomad
state: stopped
- name: Get current node name from inventory
set_fact:
current_node_name: "{{ inventory_hostname }}"
new_node_name: "{{ node_prefixes[inventory_hostname] }}"
tailscale_ip: "{{ tailscale_ips[inventory_hostname] }}"
- name: Display node name change
debug:
msg: "Changing node name from {{ current_node_name }} to {{ new_node_name }}, using Tailscale IP {{ tailscale_ip }}"
- name: Update node name in Nomad configuration
lineinfile:
path: /etc/nomad.d/nomad.hcl
regexp: '^name\s*='
line: 'name = "{{ new_node_name }}"'
insertafter: 'datacenter = "dc1"'
state: present
- name: Validate Nomad configuration
shell: nomad config validate /etc/nomad.d/nomad.hcl
register: config_validation
failed_when: config_validation.rc != 0
- name: Start Nomad service
systemd:
name: nomad
state: started
- name: Wait for Nomad to be ready on Tailscale IP
wait_for:
port: 4646
host: "{{ tailscale_ip }}"
delay: 10
timeout: 60
- name: Wait for node registration
pause:
seconds: 15
- name: Display new configuration
shell: cat /etc/nomad.d/nomad.hcl | grep -E "^(datacenter|name|bind_addr)\s*="
register: nomad_config_check
- name: Show updated configuration
debug:
var: nomad_config_check.stdout_lines

View File

@@ -0,0 +1,56 @@
---
- name: Fix duplicate plugin_dir configuration
hosts: nomadlxc,hcp
become: yes
tasks:
- name: Stop Nomad service
systemd:
name: nomad
state: stopped
- name: Remove duplicate plugin_dir lines
lineinfile:
path: /etc/nomad.d/nomad.hcl
regexp: '^plugin_dir = "/opt/nomad/plugins"'
state: absent
- name: Ensure only one plugin_dir configuration exists
lineinfile:
path: /etc/nomad.d/nomad.hcl
regexp: '^plugin_dir = "/opt/nomad/data/plugins"'
line: 'plugin_dir = "/opt/nomad/data/plugins"'
insertafter: 'data_dir = "/opt/nomad/data"'
state: present
- name: Validate Nomad configuration
shell: nomad config validate /etc/nomad.d/nomad.hcl
register: config_validation
failed_when: config_validation.rc != 0
- name: Start Nomad service
systemd:
name: nomad
state: started
- name: Wait for Nomad to be ready
wait_for:
port: 4646
host: localhost
delay: 10
timeout: 60
- name: Wait for plugins to load
pause:
seconds: 15
- name: Check driver status
shell: |
export NOMAD_ADDR=http://localhost:4646
nomad node status -self | grep -A 10 "Driver Status"
register: driver_status
failed_when: false
- name: Display driver status
debug:
var: driver_status.stdout_lines

View File

@@ -0,0 +1,112 @@
---
- name: Fix Nomad Podman Driver Configuration
hosts: nomadlxc,hcp
become: yes
vars:
nomad_user: nomad
tasks:
- name: Stop Nomad service
systemd:
name: nomad
state: stopped
- name: Install Podman driver plugin if missing
block:
- name: Check if plugin exists
stat:
path: /opt/nomad/data/plugins/nomad-driver-podman
register: plugin_exists
- name: Download and install Podman driver plugin
block:
- name: Download Nomad Podman driver
get_url:
url: "https://releases.hashicorp.com/nomad-driver-podman/0.6.1/nomad-driver-podman_0.6.1_linux_amd64.zip"
dest: "/tmp/nomad-driver-podman.zip"
mode: '0644'
- name: Extract Podman driver
unarchive:
src: "/tmp/nomad-driver-podman.zip"
dest: "/tmp"
remote_src: yes
- name: Install Podman driver
copy:
src: "/tmp/nomad-driver-podman"
dest: "/opt/nomad/data/plugins/nomad-driver-podman"
owner: "{{ nomad_user }}"
group: "{{ nomad_user }}"
mode: '0755'
remote_src: yes
- name: Clean up temporary files
file:
path: "{{ item }}"
state: absent
loop:
- "/tmp/nomad-driver-podman.zip"
- "/tmp/nomad-driver-podman"
when: not plugin_exists.stat.exists
- name: Update Nomad configuration with correct plugin name and socket path
replace:
path: /etc/nomad.d/nomad.hcl
regexp: 'plugin "podman" \{'
replace: 'plugin "nomad-driver-podman" {'
- name: Update socket path to system socket
replace:
path: /etc/nomad.d/nomad.hcl
regexp: 'socket_path = "unix:///run/user/1001/podman/podman.sock"'
replace: 'socket_path = "unix:///run/podman/podman.sock"'
- name: Add plugin_dir configuration if missing
lineinfile:
path: /etc/nomad.d/nomad.hcl
line: 'plugin_dir = "/opt/nomad/data/plugins"'
insertafter: 'data_dir = "/opt/nomad/data"'
state: present
- name: Ensure Podman socket is enabled and running
systemd:
name: podman.socket
enabled: yes
state: started
- name: Start Nomad service
systemd:
name: nomad
state: started
- name: Wait for Nomad to be ready
wait_for:
port: 4646
host: localhost
delay: 10
timeout: 60
- name: Wait for plugins to load
pause:
seconds: 20
- name: Check driver status
shell: |
export NOMAD_ADDR=http://localhost:4646
nomad node status -self | grep -A 10 "Driver Status"
register: driver_status
failed_when: false
- name: Display driver status
debug:
var: driver_status.stdout_lines
- name: Check for Podman driver in logs
shell: journalctl -u nomad -n 30 --no-pager | grep -E "(podman|plugin)" | tail -10
register: plugin_logs
failed_when: false
- name: Display plugin logs
debug:
var: plugin_logs.stdout_lines

View File

@@ -0,0 +1,46 @@
---
- name: Fix NFS mounting on warden node
hosts: warden
become: yes
tasks:
- name: Ensure rpcbind is running
systemd:
name: rpcbind
state: started
enabled: yes
- name: Ensure nfs-client.target is active
systemd:
name: nfs-client.target
state: started
enabled: yes
- name: Create consul-shared directory
file:
path: /opt/consul-shared
state: directory
mode: '0755'
- name: Mount NFS share
mount:
path: /opt/consul-shared
src: snail:/fs/1000/nfs
fstype: nfs
opts: rw,sync,vers=3
state: mounted
- name: Add to fstab for persistence
mount:
path: /opt/consul-shared
src: snail:/fs/1000/nfs
fstype: nfs
opts: rw,sync,vers=3
state: present
- name: Verify mount
command: df -h /opt/consul-shared
register: mount_result
- name: Display mount result
debug:
var: mount_result.stdout

View File

@@ -0,0 +1,82 @@
---
- name: Setup NFS for different container types
hosts: all
become: yes
vars:
nfs_server: snail
nfs_export_path: /fs/1000/nfs/Fnsync
nfs_mount_path: /mnt/fnsync
nfs_options_local: "rw,sync,vers=4.2"
nfs_options_overseas: "rw,sync,vers=3,timeo=600,retrans=2"
tasks:
- name: Detect container type and location
set_fact:
container_type: "{{ 'lxc' if inventory_hostname in groups['lxc'] else 'pve' }}"
is_overseas: "{{ inventory_hostname in ['ash1d', 'ash2e', 'ash3c', 'ch2', 'ch3'] }}"
- name: Install NFS client for all nodes
package:
name: nfs-common
state: present
- name: Create mount directory for all nodes
file:
path: "{{ nfs_mount_path }}"
state: directory
owner: root
group: root
mode: '0755'
- name: Mount NFS for local LXC containers (direct mount)
mount:
path: "{{ nfs_mount_path }}"
src: "{{ nfs_server }}:{{ nfs_export_path }}"
fstype: nfs
opts: "{{ nfs_options_local }}"
state: mounted
when: container_type == 'lxc' and not is_overseas
- name: Mount NFS for overseas PVE containers (with retry options)
mount:
path: "{{ nfs_mount_path }}"
src: "{{ nfs_server }}:{{ nfs_export_path }}"
fstype: nfs
opts: "{{ nfs_options_overseas }}"
state: mounted
when: container_type == 'pve' and is_overseas
- name: Ensure NFS mount persists after reboot
mount:
path: "{{ nfs_mount_path }}"
src: "{{ nfs_server }}:{{ nfs_export_path }}"
fstype: nfs
opts: "{{ nfs_options_local if container_type == 'lxc' and not is_overseas else nfs_options_overseas }}"
state: present
- name: Verify NFS mount
command: df -h "{{ nfs_mount_path }}"
register: mount_result
ignore_errors: yes
- name: Display mount status
debug:
msg: "{{ inventory_hostname }} - {{ container_type }} - {{ '海外' if is_overseas else '本地' }} - Mount: {{ '成功' if mount_result.rc == 0 else '失败' }}"
- name: Create Nomad directories for LXC containers
file:
path: "{{ nfs_mount_path }}/nomad/{{ inventory_hostname }}"
state: directory
owner: nomad
group: nomad
mode: '0755'
when: container_type == 'lxc'
- name: Create shared volumes directory for PVE containers
file:
path: "{{ nfs_mount_path }}/nomad/volumes/{{ inventory_hostname }}"
state: directory
owner: nomad
group: nomad
mode: '0755'
when: container_type == 'pve'

View File

@@ -0,0 +1,75 @@
---
- name: Setup NFS Storage for Consul Cluster
hosts: localhost
gather_facts: false
vars:
nfs_server: snail
nfs_export_path: /fs/1000/nfs/Fnsync
nfs_mount_path: /mnt/fnsync
tasks:
- name: Install NFS client and mount on master
ansible.builtin.shell: |
ssh -o StrictHostKeyChecking=no -p 60022 ben@master '
echo "3131" | sudo -S apt update &&
echo "3131" | sudo -S apt install -y nfs-common &&
echo "3131" | sudo -S mkdir -p {{ nfs_mount_path }} &&
echo "3131" | sudo -S mount -t nfs {{ nfs_server }}:{{ nfs_export_path }} {{ nfs_mount_path }} &&
echo "{{ nfs_server }}:{{ nfs_export_path }} {{ nfs_mount_path }} nfs defaults 0 0" | echo "3131" | sudo -S tee -a /etc/fstab
'
delegate_to: localhost
register: master_result
- name: Install NFS client and mount on ash3c
ansible.builtin.shell: |
ssh -o StrictHostKeyChecking=no ben@ash3c '
echo "3131" | sudo -S apt update &&
echo "3131" | sudo -S apt install -y nfs-common &&
echo "3131" | sudo -S mkdir -p {{ nfs_mount_path }} &&
echo "3131" | sudo -S mount -t nfs {{ nfs_server }}:{{ nfs_export_path }} {{ nfs_mount_path }} &&
echo "{{ nfs_server }}:{{ nfs_export_path }} {{ nfs_mount_path }} nfs defaults 0 0" | echo "3131" | sudo -S tee -a /etc/fstab
'
delegate_to: localhost
register: ash3c_result
- name: Install NFS client and mount on warden
ansible.builtin.shell: |
ssh -o StrictHostKeyChecking=no ben@warden '
echo "3131" | sudo -S apt update &&
echo "3131" | sudo -S apt install -y nfs-common &&
echo "3131" | sudo -S mkdir -p {{ nfs_mount_path }} &&
echo "3131" | sudo -S mount -t nfs {{ nfs_server }}:{{ nfs_export_path }} {{ nfs_mount_path }} &&
echo "{{ nfs_server }}:{{ nfs_export_path }} {{ nfs_mount_path }} nfs defaults 0 0" | echo "3131" | sudo -S tee -a /etc/fstab
'
delegate_to: localhost
register: warden_result
- name: Test NFS connectivity on all nodes
ansible.builtin.shell: |
ssh -o StrictHostKeyChecking=no -p 60022 ben@master 'echo "3131" | sudo -S touch {{ nfs_mount_path }}/test-master-$(date +%s) && ls -la {{ nfs_mount_path }}/'
ssh -o StrictHostKeyChecking=no ben@ash3c 'echo "3131" | sudo -S touch {{ nfs_mount_path }}/test-ash3c-$(date +%s) && ls -la {{ nfs_mount_path }}/'
ssh -o StrictHostKeyChecking=no ben@warden 'echo "3131" | sudo -S touch {{ nfs_mount_path }}/test-warden-$(date +%s) && ls -la {{ nfs_mount_path }}/'
delegate_to: localhost
register: nfs_test_result
- name: Display NFS test results
ansible.builtin.debug:
var: nfs_test_result.stdout_lines
- name: Create Consul data directories on NFS
ansible.builtin.shell: |
ssh -o StrictHostKeyChecking=no -p 60022 ben@master 'echo "3131" | sudo -S mkdir -p {{ nfs_mount_path }}/consul-master'
ssh -o StrictHostKeyChecking=no ben@ash3c 'echo "3131" | sudo -S mkdir -p {{ nfs_mount_path }}/consul-ash3c'
ssh -o StrictHostKeyChecking=no ben@warden 'echo "3131" | sudo -S mkdir -p {{ nfs_mount_path }}/consul-warden'
delegate_to: localhost
register: consul_dirs_result
- name: Display setup completion
ansible.builtin.debug:
msg:
- "NFS setup completed successfully!"
- "NFS mount point: {{ nfs_mount_path }}"
- "Consul data directories created:"
- " - {{ nfs_mount_path }}/consul-master"
- " - {{ nfs_mount_path }}/consul-ash3c"
- " - {{ nfs_mount_path }}/consul-warden"

View File

@@ -0,0 +1,50 @@
---
- name: Configure Nomad client for NFS volumes
hosts: nomad_clients
become: yes
vars:
nfs_mount_path: /mnt/fnsync
tasks:
- name: Create Nomad plugin directory for NFS
file:
path: /opt/nomad/plugins
state: directory
owner: nomad
group: nomad
mode: '0755'
- name: Configure Nomad client to use NFS volumes
blockinfile:
path: /etc/nomad.d/nomad.hcl
marker: "# {mark} NFS VOLUME CONFIGURATION"
block: |
plugin "nomad-driver-podman" {
config {
volumes {
enabled = true
}
}
}
client {
host_volume "nfs-shared" {
path = "{{ nfs_mount_path }}/nomad/volumes"
read_only = false
}
}
insertafter: 'data_dir = "/opt/nomad/data"'
- name: Restart Nomad service to apply changes
systemd:
name: nomad
state: restarted
- name: Verify Nomad client configuration
command: nomad node status -self
register: nomad_status
ignore_errors: yes
- name: Display Nomad status
debug:
msg: "{{ inventory_hostname }} - Nomad status: {{ '运行中' if nomad_status.rc == 0 else '异常' }}"

View File

@@ -0,0 +1,63 @@
---
- name: Setup NFS Storage for Nomad Cluster
hosts: nomad_cluster
become: yes
vars:
nfs_server: snail
nfs_export_path: /fs/1000/nfs/Fnsync
nfs_mount_path: /mnt/fnsync
nfs_options: "rw,sync,vers=4.2"
tasks:
- name: Install NFS client packages
package:
name: nfs-common
state: present
- name: Create NFS mount directory
file:
path: "{{ nfs_mount_path }}"
state: directory
owner: root
group: root
mode: '0755'
- name: Mount NFS share
mount:
path: "{{ nfs_mount_path }}"
src: "{{ nfs_server }}:{{ nfs_export_path }}"
fstype: nfs
opts: "{{ nfs_options }}"
state: mounted
- name: Ensure NFS mount persists after reboot
mount:
path: "{{ nfs_mount_path }}"
src: "{{ nfs_server }}:{{ nfs_export_path }}"
fstype: nfs
opts: "{{ nfs_options }}"
state: present
- name: Verify NFS mount
command: df -h "{{ nfs_mount_path }}"
register: mount_result
- name: Display mount result
debug:
var: mount_result.stdout
- name: Create Nomad data directories on NFS
file:
path: "{{ nfs_mount_path }}/nomad/{{ inventory_hostname }}"
state: directory
owner: nomad
group: nomad
mode: '0755'
- name: Create shared volumes directory
file:
path: "{{ nfs_mount_path }}/nomad/volumes"
state: directory
owner: nomad
group: nomad
mode: '0755'