feat: 迁移基础设施到Nomad和Podman并重构配置

refactor: 更新Ansible Playbooks以支持Nomad集群
docs: 更新文档反映从Docker Swarm到Nomad的迁移
ci: 更新Gitea工作流以支持Podman构建
test: 添加Nomad作业测试文件
build: 更新Makefile以支持Podman操作
chore: 清理旧的Docker Swarm相关文件和配置
This commit is contained in:
2025-09-27 08:04:23 +00:00
parent c0d4cf54dc
commit a06e5e1a00
54 changed files with 2010 additions and 329 deletions

View File

@@ -0,0 +1,69 @@
---
- name: Add Beijing prefix to LXC node names in Nomad configuration
hosts: beijing
become: yes
vars:
node_prefixes:
influxdb: "bj-influxdb"
warden: "bj-warden"
hcp1: "bj-hcp1"
hcp2: "bj-hcp2"
tailscale_ips:
influxdb: "100.100.7.4"
warden: "100.122.197.112"
hcp1: "100.97.62.111"
hcp2: "100.116.112.45"
tasks:
- name: Stop Nomad service
systemd:
name: nomad
state: stopped
- name: Get current node name from inventory
set_fact:
current_node_name: "{{ inventory_hostname }}"
new_node_name: "{{ node_prefixes[inventory_hostname] }}"
tailscale_ip: "{{ tailscale_ips[inventory_hostname] }}"
- name: Display node name change
debug:
msg: "Changing node name from {{ current_node_name }} to {{ new_node_name }}, using Tailscale IP {{ tailscale_ip }}"
- name: Update node name in Nomad configuration
lineinfile:
path: /etc/nomad.d/nomad.hcl
regexp: '^name\s*='
line: 'name = "{{ new_node_name }}"'
insertafter: 'datacenter = "dc1"'
state: present
- name: Validate Nomad configuration
shell: nomad config validate /etc/nomad.d/nomad.hcl
register: config_validation
failed_when: config_validation.rc != 0
- name: Start Nomad service
systemd:
name: nomad
state: started
- name: Wait for Nomad to be ready on Tailscale IP
wait_for:
port: 4646
host: "{{ tailscale_ip }}"
delay: 10
timeout: 60
- name: Wait for node registration
pause:
seconds: 15
- name: Display new configuration
shell: cat /etc/nomad.d/nomad.hcl | grep -E "^(datacenter|name|bind_addr)\s*="
register: nomad_config_check
- name: Show updated configuration
debug:
var: nomad_config_check.stdout_lines

View File

@@ -0,0 +1,56 @@
---
- name: Fix duplicate plugin_dir configuration
hosts: nomadlxc,hcp
become: yes
tasks:
- name: Stop Nomad service
systemd:
name: nomad
state: stopped
- name: Remove duplicate plugin_dir lines
lineinfile:
path: /etc/nomad.d/nomad.hcl
regexp: '^plugin_dir = "/opt/nomad/plugins"'
state: absent
- name: Ensure only one plugin_dir configuration exists
lineinfile:
path: /etc/nomad.d/nomad.hcl
regexp: '^plugin_dir = "/opt/nomad/data/plugins"'
line: 'plugin_dir = "/opt/nomad/data/plugins"'
insertafter: 'data_dir = "/opt/nomad/data"'
state: present
- name: Validate Nomad configuration
shell: nomad config validate /etc/nomad.d/nomad.hcl
register: config_validation
failed_when: config_validation.rc != 0
- name: Start Nomad service
systemd:
name: nomad
state: started
- name: Wait for Nomad to be ready
wait_for:
port: 4646
host: localhost
delay: 10
timeout: 60
- name: Wait for plugins to load
pause:
seconds: 15
- name: Check driver status
shell: |
export NOMAD_ADDR=http://localhost:4646
nomad node status -self | grep -A 10 "Driver Status"
register: driver_status
failed_when: false
- name: Display driver status
debug:
var: driver_status.stdout_lines

View File

@@ -0,0 +1,112 @@
---
- name: Fix Nomad Podman Driver Configuration
hosts: nomadlxc,hcp
become: yes
vars:
nomad_user: nomad
tasks:
- name: Stop Nomad service
systemd:
name: nomad
state: stopped
- name: Install Podman driver plugin if missing
block:
- name: Check if plugin exists
stat:
path: /opt/nomad/data/plugins/nomad-driver-podman
register: plugin_exists
- name: Download and install Podman driver plugin
block:
- name: Download Nomad Podman driver
get_url:
url: "https://releases.hashicorp.com/nomad-driver-podman/0.6.1/nomad-driver-podman_0.6.1_linux_amd64.zip"
dest: "/tmp/nomad-driver-podman.zip"
mode: '0644'
- name: Extract Podman driver
unarchive:
src: "/tmp/nomad-driver-podman.zip"
dest: "/tmp"
remote_src: yes
- name: Install Podman driver
copy:
src: "/tmp/nomad-driver-podman"
dest: "/opt/nomad/data/plugins/nomad-driver-podman"
owner: "{{ nomad_user }}"
group: "{{ nomad_user }}"
mode: '0755'
remote_src: yes
- name: Clean up temporary files
file:
path: "{{ item }}"
state: absent
loop:
- "/tmp/nomad-driver-podman.zip"
- "/tmp/nomad-driver-podman"
when: not plugin_exists.stat.exists
- name: Update Nomad configuration with correct plugin name and socket path
replace:
path: /etc/nomad.d/nomad.hcl
regexp: 'plugin "podman" \{'
replace: 'plugin "nomad-driver-podman" {'
- name: Update socket path to system socket
replace:
path: /etc/nomad.d/nomad.hcl
regexp: 'socket_path = "unix:///run/user/1001/podman/podman.sock"'
replace: 'socket_path = "unix:///run/podman/podman.sock"'
- name: Add plugin_dir configuration if missing
lineinfile:
path: /etc/nomad.d/nomad.hcl
line: 'plugin_dir = "/opt/nomad/data/plugins"'
insertafter: 'data_dir = "/opt/nomad/data"'
state: present
- name: Ensure Podman socket is enabled and running
systemd:
name: podman.socket
enabled: yes
state: started
- name: Start Nomad service
systemd:
name: nomad
state: started
- name: Wait for Nomad to be ready
wait_for:
port: 4646
host: localhost
delay: 10
timeout: 60
- name: Wait for plugins to load
pause:
seconds: 20
- name: Check driver status
shell: |
export NOMAD_ADDR=http://localhost:4646
nomad node status -self | grep -A 10 "Driver Status"
register: driver_status
failed_when: false
- name: Display driver status
debug:
var: driver_status.stdout_lines
- name: Check for Podman driver in logs
shell: journalctl -u nomad -n 30 --no-pager | grep -E "(podman|plugin)" | tail -10
register: plugin_logs
failed_when: false
- name: Display plugin logs
debug:
var: plugin_logs.stdout_lines

View File

@@ -0,0 +1,46 @@
---
- name: Fix NFS mounting on warden node
hosts: warden
become: yes
tasks:
- name: Ensure rpcbind is running
systemd:
name: rpcbind
state: started
enabled: yes
- name: Ensure nfs-client.target is active
systemd:
name: nfs-client.target
state: started
enabled: yes
- name: Create consul-shared directory
file:
path: /opt/consul-shared
state: directory
mode: '0755'
- name: Mount NFS share
mount:
path: /opt/consul-shared
src: snail:/fs/1000/nfs
fstype: nfs
opts: rw,sync,vers=3
state: mounted
- name: Add to fstab for persistence
mount:
path: /opt/consul-shared
src: snail:/fs/1000/nfs
fstype: nfs
opts: rw,sync,vers=3
state: present
- name: Verify mount
command: df -h /opt/consul-shared
register: mount_result
- name: Display mount result
debug:
var: mount_result.stdout

View File

@@ -0,0 +1,75 @@
---
- name: Setup NFS Storage for Consul Cluster
hosts: localhost
gather_facts: false
vars:
nfs_server: snail
nfs_export_path: /fs/1000/nfs
nfs_mount_path: /opt/consul-shared
tasks:
- name: Install NFS client and mount on master
ansible.builtin.shell: |
ssh -o StrictHostKeyChecking=no -p 60022 ben@master '
echo "3131" | sudo -S apt update &&
echo "3131" | sudo -S apt install -y nfs-common &&
echo "3131" | sudo -S mkdir -p {{ nfs_mount_path }} &&
echo "3131" | sudo -S mount -t nfs {{ nfs_server }}:{{ nfs_export_path }} {{ nfs_mount_path }} &&
echo "{{ nfs_server }}:{{ nfs_export_path }} {{ nfs_mount_path }} nfs defaults 0 0" | echo "3131" | sudo -S tee -a /etc/fstab
'
delegate_to: localhost
register: master_result
- name: Install NFS client and mount on ash3c
ansible.builtin.shell: |
ssh -o StrictHostKeyChecking=no ben@ash3c '
echo "3131" | sudo -S apt update &&
echo "3131" | sudo -S apt install -y nfs-common &&
echo "3131" | sudo -S mkdir -p {{ nfs_mount_path }} &&
echo "3131" | sudo -S mount -t nfs {{ nfs_server }}:{{ nfs_export_path }} {{ nfs_mount_path }} &&
echo "{{ nfs_server }}:{{ nfs_export_path }} {{ nfs_mount_path }} nfs defaults 0 0" | echo "3131" | sudo -S tee -a /etc/fstab
'
delegate_to: localhost
register: ash3c_result
- name: Install NFS client and mount on warden
ansible.builtin.shell: |
ssh -o StrictHostKeyChecking=no ben@warden '
echo "3131" | sudo -S apt update &&
echo "3131" | sudo -S apt install -y nfs-common &&
echo "3131" | sudo -S mkdir -p {{ nfs_mount_path }} &&
echo "3131" | sudo -S mount -t nfs {{ nfs_server }}:{{ nfs_export_path }} {{ nfs_mount_path }} &&
echo "{{ nfs_server }}:{{ nfs_export_path }} {{ nfs_mount_path }} nfs defaults 0 0" | echo "3131" | sudo -S tee -a /etc/fstab
'
delegate_to: localhost
register: warden_result
- name: Test NFS connectivity on all nodes
ansible.builtin.shell: |
ssh -o StrictHostKeyChecking=no -p 60022 ben@master 'echo "3131" | sudo -S touch {{ nfs_mount_path }}/test-master-$(date +%s) && ls -la {{ nfs_mount_path }}/'
ssh -o StrictHostKeyChecking=no ben@ash3c 'echo "3131" | sudo -S touch {{ nfs_mount_path }}/test-ash3c-$(date +%s) && ls -la {{ nfs_mount_path }}/'
ssh -o StrictHostKeyChecking=no ben@warden 'echo "3131" | sudo -S touch {{ nfs_mount_path }}/test-warden-$(date +%s) && ls -la {{ nfs_mount_path }}/'
delegate_to: localhost
register: nfs_test_result
- name: Display NFS test results
ansible.builtin.debug:
var: nfs_test_result.stdout_lines
- name: Create Consul data directories on NFS
ansible.builtin.shell: |
ssh -o StrictHostKeyChecking=no -p 60022 ben@master 'echo "3131" | sudo -S mkdir -p {{ nfs_mount_path }}/consul-master'
ssh -o StrictHostKeyChecking=no ben@ash3c 'echo "3131" | sudo -S mkdir -p {{ nfs_mount_path }}/consul-ash3c'
ssh -o StrictHostKeyChecking=no ben@warden 'echo "3131" | sudo -S mkdir -p {{ nfs_mount_path }}/consul-warden'
delegate_to: localhost
register: consul_dirs_result
- name: Display setup completion
ansible.builtin.debug:
msg:
- "NFS setup completed successfully!"
- "NFS mount point: {{ nfs_mount_path }}"
- "Consul data directories created:"
- " - {{ nfs_mount_path }}/consul-master"
- " - {{ nfs_mount_path }}/consul-ash3c"
- " - {{ nfs_mount_path }}/consul-warden"