feat: 迁移基础设施到Nomad和Podman并重构配置

refactor: 更新Ansible Playbooks以支持Nomad集群
docs: 更新文档反映从Docker Swarm到Nomad的迁移
ci: 更新Gitea工作流以支持Podman构建
test: 添加Nomad作业测试文件
build: 更新Makefile以支持Podman操作
chore: 清理旧的Docker Swarm相关文件和配置
This commit is contained in:
2025-09-27 08:04:23 +00:00
parent c0d4cf54dc
commit a06e5e1a00
54 changed files with 2010 additions and 329 deletions

View File

@@ -1,14 +0,0 @@
---
- name: Check for AppArmor or SELinux denials
hosts: germany
become: yes
tasks:
- name: Search journalctl for AppArmor/SELinux messages
shell: 'journalctl -k | grep -i -e apparmor -e selinux -e "avc: denied"'
register: security_logs
changed_when: false
failed_when: false
- name: Display security logs
debug:
var: security_logs.stdout_lines

View File

@@ -0,0 +1,22 @@
---
- name: 清理 HashiCorp APT 源备份文件
hosts: nomad_cluster
become: yes
tasks:
- name: 查找所有 HashiCorp 备份文件
find:
paths: "/etc/apt/sources.list.d/"
patterns: "hashicorp.list.backup-*"
register: backup_files
- name: 删除所有备份文件
file:
path: "{{ item.path }}"
state: absent
loop: "{{ backup_files.files }}"
when: backup_files.files | length > 0
- name: 显示清理结果
debug:
msg: "已删除 {{ backup_files.files | length }} 个备份文件"

View File

@@ -1,6 +1,6 @@
---
- name: Configure Podman driver for all Nomad client nodes
hosts: nomad_clients,nomad_servers
hosts: target_nodes
become: yes
tasks:

View File

@@ -1,33 +0,0 @@
---
- name: Debug cgroup permissions
hosts: germany
become: yes
tasks:
- name: Check permissions of /sys/fs/cgroup/cpuset/
stat:
path: /sys/fs/cgroup/cpuset/
register: cpuset_dir
- name: Display cpuset dir stats
debug:
var: cpuset_dir.stat
- name: Check for nomad subdir in cpuset
stat:
path: /sys/fs/cgroup/cpuset/nomad
register: nomad_cpuset_dir
ignore_errors: true
- name: Display nomad cpuset dir stats
debug:
var: nomad_cpuset_dir.stat
when: nomad_cpuset_dir.stat.exists is defined and nomad_cpuset_dir.stat.exists
- name: List contents of /sys/fs/cgroup/cpuset/
command: ls -la /sys/fs/cgroup/cpuset/
register: ls_cpuset
changed_when: false
- name: Display contents of /sys/fs/cgroup/cpuset/
debug:
var: ls_cpuset.stdout_lines

View File

@@ -1,14 +0,0 @@
---
- name: Debug Nomad cgroup subdirectory
hosts: germany
become: yes
tasks:
- name: List contents of /sys/fs/cgroup/cpuset/nomad/
command: ls -la /sys/fs/cgroup/cpuset/nomad/
register: ls_nomad_cpuset
changed_when: false
failed_when: false
- name: Display contents of /sys/fs/cgroup/cpuset/nomad/
debug:
var: ls_nomad_cpuset.stdout_lines

View File

@@ -1,24 +0,0 @@
- name: Debug Nomad service on germany
hosts: germany
gather_facts: false
tasks:
- name: Get Nomad service status
command: systemctl status nomad.service --no-pager -l
register: nomad_status
ignore_errors: true
- name: Get Nomad service journal
command: journalctl -xeu nomad.service --no-pager -n 100
register: nomad_journal
ignore_errors: true
- name: Display debug information
debug:
msg: |
--- Nomad Service Status ---
{{ nomad_status.stdout }}
{{ nomad_status.stderr }}
--- Nomad Service Journal ---
{{ nomad_journal.stdout }}
{{ nomad_journal.stderr }}

View File

@@ -1,12 +0,0 @@
- name: Distribute new podman binary to germany
hosts: germany
gather_facts: false
tasks:
- name: Copy new podman binary to /usr/local/bin
copy:
src: /root/mgmt/configuration/podman-remote-static-linux_amd64
dest: /usr/local/bin/podman
owner: root
group: root
mode: '0755'
become: yes

View File

@@ -1,14 +0,0 @@
---
- name: Find Nomad service
hosts: germany
become: yes
tasks:
- name: List systemd services and filter for nomad
shell: systemctl list-unit-files --type=service | grep -i nomad
register: nomad_services
changed_when: false
failed_when: false
- name: Display found services
debug:
var: nomad_services.stdout_lines

View File

@@ -1,19 +0,0 @@
---
- name: Fix cgroup permissions for Nomad
hosts: germany
become: yes
tasks:
- name: Recursively change ownership of nomad cgroup directory
file:
path: /sys/fs/cgroup/cpuset/nomad
state: directory
owner: root
group: root
recurse: yes
- name: Change ownership of the parent cpuset directory
file:
path: /sys/fs/cgroup/cpuset/
state: directory
owner: root
group: root

View File

@@ -4,16 +4,9 @@
become: yes
tasks:
- name: 备份现有的 HashiCorp APT 源配置(如果存在)
copy:
src: "/etc/apt/sources.list.d/hashicorp.list"
dest: "/etc/apt/sources.list.d/hashicorp.list.backup-{{ ansible_date_time.epoch }}"
remote_src: yes
ignore_errors: yes
- name: 创建正确的 HashiCorp APT 源配置
copy:
content: "deb [trusted=yes] http://apt.releases.hashicorp.com bookworm main\n"
content: "deb [trusted=yes] http://apt.releases.hashicorp.com {{ ansible_distribution_release }} main\n"
dest: "/etc/apt/sources.list.d/hashicorp.list"
owner: root
group: root

View File

@@ -0,0 +1,68 @@
---
- name: 在 master 和 ash3c 节点安装 Consul
hosts: master,ash3c
become: yes
vars:
consul_version: "1.21.5"
consul_arch: "arm64" # 因为这两个节点都是 aarch64
tasks:
- name: 检查节点架构
command: uname -m
register: node_arch
changed_when: false
- name: 显示节点架构
debug:
msg: "节点 {{ inventory_hostname }} 架构: {{ node_arch.stdout }}"
- name: 检查是否已安装 consul
command: which consul
register: consul_check
failed_when: false
changed_when: false
- name: 显示当前 consul 状态
debug:
msg: "Consul 状态: {{ 'already installed' if consul_check.rc == 0 else 'not installed' }}"
- name: 删除错误的 consul 二进制文件(如果存在)
file:
path: /usr/local/bin/consul
state: absent
when: consul_check.rc == 0
- name: 更新 APT 缓存
apt:
update_cache: yes
ignore_errors: yes
- name: 安装 consul 通过 APT
apt:
name: consul={{ consul_version }}-1
state: present
- name: 验证 consul 安装
command: consul version
register: consul_version_check
changed_when: false
- name: 显示安装的 consul 版本
debug:
msg: "安装的 Consul 版本: {{ consul_version_check.stdout_lines[0] }}"
- name: 确保 consul 用户存在
user:
name: consul
system: yes
shell: /bin/false
home: /opt/consul
create_home: no
- name: 创建 consul 数据目录
file:
path: /opt/consul
state: directory
owner: consul
group: consul
mode: '0755'

View File

@@ -1,6 +1,6 @@
---
- name: Install Nomad Podman Driver Plugin
hosts: all
hosts: target_nodes
become: yes
vars:
nomad_user: nomad

View File

@@ -1,22 +0,0 @@
---
- name: Manually run Nomad agent for debugging
hosts: germany
become: yes
tasks:
- name: Find Nomad binary path
shell: which nomad || find /usr -name nomad 2>/dev/null | head -1
register: nomad_binary_path
failed_when: nomad_binary_path.stdout == ""
- name: Run nomad agent directly
command: "{{ nomad_binary_path.stdout }} agent -config=/etc/nomad.d/nomad.hcl"
register: nomad_run
failed_when: false
- name: Display Nomad output
debug:
var: nomad_run.stdout
- name: Display Nomad error output
debug:
var: nomad_run.stderr

View File

@@ -1,12 +0,0 @@
- name: Read Nomad config on germany
hosts: germany
gather_facts: false
tasks:
- name: Read nomad.hcl
command: cat /etc/nomad.d/nomad.hcl
register: nomad_config
ignore_errors: true
- name: Display config
debug:
msg: "{{ nomad_config.stdout }}"