feat: 重构项目目录结构并添加多个功能

- 新增脚本和配置文件用于管理Nomad节点和NFS存储
- 添加多个Ansible playbook用于配置和调试Nomad集群
- 新增Nomad job文件用于测试Podman和NFS功能
- 重构playbooks目录结构,按功能分类
- 更新Nomad客户端和服务端配置模板
- 添加SSH密钥分发和配置脚本
- 新增多个调试和修复问题的playbook
This commit is contained in:
2025-09-27 13:05:30 +00:00
parent a06e5e1a00
commit 44b098bd20
98 changed files with 1141 additions and 2 deletions

View File

@@ -1,69 +0,0 @@
---
- name: Add Beijing prefix to LXC node names in Nomad configuration
hosts: beijing
become: yes
vars:
node_prefixes:
influxdb: "bj-influxdb"
warden: "bj-warden"
hcp1: "bj-hcp1"
hcp2: "bj-hcp2"
tailscale_ips:
influxdb: "100.100.7.4"
warden: "100.122.197.112"
hcp1: "100.97.62.111"
hcp2: "100.116.112.45"
tasks:
- name: Stop Nomad service
systemd:
name: nomad
state: stopped
- name: Get current node name from inventory
set_fact:
current_node_name: "{{ inventory_hostname }}"
new_node_name: "{{ node_prefixes[inventory_hostname] }}"
tailscale_ip: "{{ tailscale_ips[inventory_hostname] }}"
- name: Display node name change
debug:
msg: "Changing node name from {{ current_node_name }} to {{ new_node_name }}, using Tailscale IP {{ tailscale_ip }}"
- name: Update node name in Nomad configuration
lineinfile:
path: /etc/nomad.d/nomad.hcl
regexp: '^name\s*='
line: 'name = "{{ new_node_name }}"'
insertafter: 'datacenter = "dc1"'
state: present
- name: Validate Nomad configuration
shell: nomad config validate /etc/nomad.d/nomad.hcl
register: config_validation
failed_when: config_validation.rc != 0
- name: Start Nomad service
systemd:
name: nomad
state: started
- name: Wait for Nomad to be ready on Tailscale IP
wait_for:
port: 4646
host: "{{ tailscale_ip }}"
delay: 10
timeout: 60
- name: Wait for node registration
pause:
seconds: 15
- name: Display new configuration
shell: cat /etc/nomad.d/nomad.hcl | grep -E "^(datacenter|name|bind_addr)\s*="
register: nomad_config_check
- name: Show updated configuration
debug:
var: nomad_config_check.stdout_lines

View File

@@ -0,0 +1,72 @@
---
- name: 配置Nomad客户端节点
hosts: nomad_nodes:!semaphore
become: yes
vars:
nomad_config_dir: /etc/nomad.d
tasks:
- name: 创建Nomad配置目录
file:
path: "{{ nomad_config_dir }}"
state: directory
owner: root
group: root
mode: '0755'
- name: 复制Nomad客户端配置
copy:
content: |
datacenter = "dc1"
data_dir = "/opt/nomad/data"
log_level = "INFO"
bind_addr = "0.0.0.0"
server {
enabled = false
}
client {
enabled = true
servers = ["100.116.158.95:4647"]
host_volume "fnsync" {
path = "/mnt/fnsync"
read_only = false
}
}
addresses {
http = "{{ ansible_host }}"
rpc = "{{ ansible_host }}"
serf = "{{ ansible_host }}"
}
advertise {
http = "{{ ansible_host }}:4646"
rpc = "{{ ansible_host }}:4647"
serf = "{{ ansible_host }}:4648"
}
consul {
address = "100.116.158.95:8500"
}
dest: "{{ nomad_config_dir }}/nomad.hcl"
owner: root
group: root
mode: '0644'
- name: 启动Nomad服务
systemd:
name: nomad
state: restarted
enabled: yes
daemon_reload: yes
- name: 检查Nomad服务状态
command: systemctl status nomad
register: nomad_status
changed_when: false
- name: 显示Nomad服务状态
debug:
var: nomad_status.stdout_lines

View File

@@ -1,56 +0,0 @@
---
- name: Fix duplicate plugin_dir configuration
hosts: nomadlxc,hcp
become: yes
tasks:
- name: Stop Nomad service
systemd:
name: nomad
state: stopped
- name: Remove duplicate plugin_dir lines
lineinfile:
path: /etc/nomad.d/nomad.hcl
regexp: '^plugin_dir = "/opt/nomad/plugins"'
state: absent
- name: Ensure only one plugin_dir configuration exists
lineinfile:
path: /etc/nomad.d/nomad.hcl
regexp: '^plugin_dir = "/opt/nomad/data/plugins"'
line: 'plugin_dir = "/opt/nomad/data/plugins"'
insertafter: 'data_dir = "/opt/nomad/data"'
state: present
- name: Validate Nomad configuration
shell: nomad config validate /etc/nomad.d/nomad.hcl
register: config_validation
failed_when: config_validation.rc != 0
- name: Start Nomad service
systemd:
name: nomad
state: started
- name: Wait for Nomad to be ready
wait_for:
port: 4646
host: localhost
delay: 10
timeout: 60
- name: Wait for plugins to load
pause:
seconds: 15
- name: Check driver status
shell: |
export NOMAD_ADDR=http://localhost:4646
nomad node status -self | grep -A 10 "Driver Status"
register: driver_status
failed_when: false
- name: Display driver status
debug:
var: driver_status.stdout_lines

View File

@@ -1,112 +0,0 @@
---
- name: Fix Nomad Podman Driver Configuration
hosts: nomadlxc,hcp
become: yes
vars:
nomad_user: nomad
tasks:
- name: Stop Nomad service
systemd:
name: nomad
state: stopped
- name: Install Podman driver plugin if missing
block:
- name: Check if plugin exists
stat:
path: /opt/nomad/data/plugins/nomad-driver-podman
register: plugin_exists
- name: Download and install Podman driver plugin
block:
- name: Download Nomad Podman driver
get_url:
url: "https://releases.hashicorp.com/nomad-driver-podman/0.6.1/nomad-driver-podman_0.6.1_linux_amd64.zip"
dest: "/tmp/nomad-driver-podman.zip"
mode: '0644'
- name: Extract Podman driver
unarchive:
src: "/tmp/nomad-driver-podman.zip"
dest: "/tmp"
remote_src: yes
- name: Install Podman driver
copy:
src: "/tmp/nomad-driver-podman"
dest: "/opt/nomad/data/plugins/nomad-driver-podman"
owner: "{{ nomad_user }}"
group: "{{ nomad_user }}"
mode: '0755'
remote_src: yes
- name: Clean up temporary files
file:
path: "{{ item }}"
state: absent
loop:
- "/tmp/nomad-driver-podman.zip"
- "/tmp/nomad-driver-podman"
when: not plugin_exists.stat.exists
- name: Update Nomad configuration with correct plugin name and socket path
replace:
path: /etc/nomad.d/nomad.hcl
regexp: 'plugin "podman" \{'
replace: 'plugin "nomad-driver-podman" {'
- name: Update socket path to system socket
replace:
path: /etc/nomad.d/nomad.hcl
regexp: 'socket_path = "unix:///run/user/1001/podman/podman.sock"'
replace: 'socket_path = "unix:///run/podman/podman.sock"'
- name: Add plugin_dir configuration if missing
lineinfile:
path: /etc/nomad.d/nomad.hcl
line: 'plugin_dir = "/opt/nomad/data/plugins"'
insertafter: 'data_dir = "/opt/nomad/data"'
state: present
- name: Ensure Podman socket is enabled and running
systemd:
name: podman.socket
enabled: yes
state: started
- name: Start Nomad service
systemd:
name: nomad
state: started
- name: Wait for Nomad to be ready
wait_for:
port: 4646
host: localhost
delay: 10
timeout: 60
- name: Wait for plugins to load
pause:
seconds: 20
- name: Check driver status
shell: |
export NOMAD_ADDR=http://localhost:4646
nomad node status -self | grep -A 10 "Driver Status"
register: driver_status
failed_when: false
- name: Display driver status
debug:
var: driver_status.stdout_lines
- name: Check for Podman driver in logs
shell: journalctl -u nomad -n 30 --no-pager | grep -E "(podman|plugin)" | tail -10
register: plugin_logs
failed_when: false
- name: Display plugin logs
debug:
var: plugin_logs.stdout_lines

View File

@@ -1,46 +0,0 @@
---
- name: Fix NFS mounting on warden node
hosts: warden
become: yes
tasks:
- name: Ensure rpcbind is running
systemd:
name: rpcbind
state: started
enabled: yes
- name: Ensure nfs-client.target is active
systemd:
name: nfs-client.target
state: started
enabled: yes
- name: Create consul-shared directory
file:
path: /opt/consul-shared
state: directory
mode: '0755'
- name: Mount NFS share
mount:
path: /opt/consul-shared
src: snail:/fs/1000/nfs
fstype: nfs
opts: rw,sync,vers=3
state: mounted
- name: Add to fstab for persistence
mount:
path: /opt/consul-shared
src: snail:/fs/1000/nfs
fstype: nfs
opts: rw,sync,vers=3
state: present
- name: Verify mount
command: df -h /opt/consul-shared
register: mount_result
- name: Display mount result
debug:
var: mount_result.stdout

42
playbooks/nfs-mount.yml Normal file
View File

@@ -0,0 +1,42 @@
---
- name: 配置Nomad节点NFS挂载
hosts: nomad_nodes
become: yes
vars:
nfs_server: "snail"
nfs_share: "/fs/1000/nfs/Fnsync"
mount_point: "/mnt/fnsync"
tasks:
- name: 安装NFS客户端
package:
name: nfs-common
state: present
- name: 创建挂载目录
file:
path: "{{ mount_point }}"
state: directory
mode: '0755'
- name: 临时挂载NFS共享
mount:
path: "{{ mount_point }}"
src: "{{ nfs_server }}:{{ nfs_share }}"
fstype: nfs4
opts: "rw,relatime,vers=4.2"
state: mounted
- name: 配置开机自动挂载
lineinfile:
path: /etc/fstab
line: "{{ nfs_server }}:{{ nfs_share }} {{ mount_point }} nfs4 rw,relatime,vers=4.2 0 0"
state: present
- name: 验证挂载
command: df -h {{ mount_point }}
register: mount_check
- name: 显示挂载信息
debug:
var: mount_check.stdout_lines

View File

@@ -0,0 +1,43 @@
---
- name: 设置Nomad节点NFS挂载
hosts: nomad_nodes
become: yes
vars:
nfs_server: "snail"
nfs_share: "/fs/1000/nfs/Fnsync"
mount_point: "/mnt/fnsync"
tasks:
- name: 安装NFS客户端
package:
name: nfs-common
state: present
- name: 创建挂载目录
file:
path: "{{ mount_point }}"
state: directory
mode: '0755'
- name: 临时挂载NFS共享
mount:
path: "{{ mount_point }}"
src: "{{ nfs_server }}:{{ nfs_share }}"
fstype: nfs4
opts: "rw,relatime,vers=4.2"
state: mounted
- name: 配置开机自动挂载
lineinfile:
path: /etc/fstab
line: "{{ nfs_server }}:{{ nfs_share }} {{ mount_point }} nfs4 rw,relatime,vers=4.2 0 0"
state: present
- name: 验证挂载
command: df -h {{ mount_point }}
register: mount_check
- name: 显示挂载信息
debug:
var: mount_check.stdout_lines

View File

@@ -1,75 +0,0 @@
---
- name: Setup NFS Storage for Consul Cluster
hosts: localhost
gather_facts: false
vars:
nfs_server: snail
nfs_export_path: /fs/1000/nfs
nfs_mount_path: /opt/consul-shared
tasks:
- name: Install NFS client and mount on master
ansible.builtin.shell: |
ssh -o StrictHostKeyChecking=no -p 60022 ben@master '
echo "3131" | sudo -S apt update &&
echo "3131" | sudo -S apt install -y nfs-common &&
echo "3131" | sudo -S mkdir -p {{ nfs_mount_path }} &&
echo "3131" | sudo -S mount -t nfs {{ nfs_server }}:{{ nfs_export_path }} {{ nfs_mount_path }} &&
echo "{{ nfs_server }}:{{ nfs_export_path }} {{ nfs_mount_path }} nfs defaults 0 0" | echo "3131" | sudo -S tee -a /etc/fstab
'
delegate_to: localhost
register: master_result
- name: Install NFS client and mount on ash3c
ansible.builtin.shell: |
ssh -o StrictHostKeyChecking=no ben@ash3c '
echo "3131" | sudo -S apt update &&
echo "3131" | sudo -S apt install -y nfs-common &&
echo "3131" | sudo -S mkdir -p {{ nfs_mount_path }} &&
echo "3131" | sudo -S mount -t nfs {{ nfs_server }}:{{ nfs_export_path }} {{ nfs_mount_path }} &&
echo "{{ nfs_server }}:{{ nfs_export_path }} {{ nfs_mount_path }} nfs defaults 0 0" | echo "3131" | sudo -S tee -a /etc/fstab
'
delegate_to: localhost
register: ash3c_result
- name: Install NFS client and mount on warden
ansible.builtin.shell: |
ssh -o StrictHostKeyChecking=no ben@warden '
echo "3131" | sudo -S apt update &&
echo "3131" | sudo -S apt install -y nfs-common &&
echo "3131" | sudo -S mkdir -p {{ nfs_mount_path }} &&
echo "3131" | sudo -S mount -t nfs {{ nfs_server }}:{{ nfs_export_path }} {{ nfs_mount_path }} &&
echo "{{ nfs_server }}:{{ nfs_export_path }} {{ nfs_mount_path }} nfs defaults 0 0" | echo "3131" | sudo -S tee -a /etc/fstab
'
delegate_to: localhost
register: warden_result
- name: Test NFS connectivity on all nodes
ansible.builtin.shell: |
ssh -o StrictHostKeyChecking=no -p 60022 ben@master 'echo "3131" | sudo -S touch {{ nfs_mount_path }}/test-master-$(date +%s) && ls -la {{ nfs_mount_path }}/'
ssh -o StrictHostKeyChecking=no ben@ash3c 'echo "3131" | sudo -S touch {{ nfs_mount_path }}/test-ash3c-$(date +%s) && ls -la {{ nfs_mount_path }}/'
ssh -o StrictHostKeyChecking=no ben@warden 'echo "3131" | sudo -S touch {{ nfs_mount_path }}/test-warden-$(date +%s) && ls -la {{ nfs_mount_path }}/'
delegate_to: localhost
register: nfs_test_result
- name: Display NFS test results
ansible.builtin.debug:
var: nfs_test_result.stdout_lines
- name: Create Consul data directories on NFS
ansible.builtin.shell: |
ssh -o StrictHostKeyChecking=no -p 60022 ben@master 'echo "3131" | sudo -S mkdir -p {{ nfs_mount_path }}/consul-master'
ssh -o StrictHostKeyChecking=no ben@ash3c 'echo "3131" | sudo -S mkdir -p {{ nfs_mount_path }}/consul-ash3c'
ssh -o StrictHostKeyChecking=no ben@warden 'echo "3131" | sudo -S mkdir -p {{ nfs_mount_path }}/consul-warden'
delegate_to: localhost
register: consul_dirs_result
- name: Display setup completion
ansible.builtin.debug:
msg:
- "NFS setup completed successfully!"
- "NFS mount point: {{ nfs_mount_path }}"
- "Consul data directories created:"
- " - {{ nfs_mount_path }}/consul-master"
- " - {{ nfs_mount_path }}/consul-ash3c"
- " - {{ nfs_mount_path }}/consul-warden"