feat: 重构项目目录结构并添加多个功能

- 新增脚本和配置文件用于管理Nomad节点和NFS存储
- 添加多个Ansible playbook用于配置和调试Nomad集群
- 新增Nomad job文件用于测试Podman和NFS功能
- 重构playbooks目录结构,按功能分类
- 更新Nomad客户端和服务端配置模板
- 添加SSH密钥分发和配置脚本
- 新增多个调试和修复问题的playbook
This commit is contained in:
2025-09-27 13:05:30 +00:00
parent a06e5e1a00
commit 44b098bd20
98 changed files with 1141 additions and 2 deletions

View File

@@ -0,0 +1,57 @@
---
- name: Configure Podman driver for all Nomad client nodes
hosts: target_nodes
become: yes
tasks:
- name: Stop Nomad service
systemd:
name: nomad
state: stopped
- name: Install Podman if not present
package:
name: podman
state: present
ignore_errors: yes
- name: Enable Podman socket
systemd:
name: podman.socket
enabled: yes
state: started
ignore_errors: yes
- name: Update Nomad configuration to use Podman
lineinfile:
path: /etc/nomad.d/nomad.hcl
regexp: '^plugin "docker"'
line: 'plugin "podman" {'
state: present
- name: Add Podman plugin configuration
blockinfile:
path: /etc/nomad.d/nomad.hcl
marker: "# {mark} PODMAN PLUGIN CONFIG"
block: |
plugin "podman" {
config {
socket_path = "unix:///run/podman/podman.sock"
volumes {
enabled = true
}
}
}
insertafter: 'client {'
- name: Start Nomad service
systemd:
name: nomad
state: started
- name: Wait for Nomad to be ready
wait_for:
port: 4646
host: localhost
delay: 5
timeout: 30

View File

@@ -0,0 +1,22 @@
---
- name: Configure NOPASSWD sudo for nomad user
hosts: nomad_clients
become: yes
tasks:
- name: Ensure sudoers.d directory exists
file:
path: /etc/sudoers.d
state: directory
owner: root
group: root
mode: '0750'
- name: Allow nomad user passwordless sudo for required commands
copy:
dest: /etc/sudoers.d/nomad
content: |
nomad ALL=(ALL) NOPASSWD: /usr/bin/apt, /usr/bin/systemctl, /bin/mkdir, /bin/chown, /bin/chmod, /bin/mv, /bin/sed, /usr/bin/tee, /usr/sbin/usermod, /usr/bin/unzip, /usr/bin/wget
owner: root
group: root
mode: '0440'
validate: 'visudo -cf %s'

View File

@@ -0,0 +1,226 @@
---
- name: 配置 Nomad 集群使用 Tailscale 网络通讯
hosts: nomad_cluster
become: yes
gather_facts: no
vars:
nomad_config_dir: "/etc/nomad.d"
nomad_config_file: "{{ nomad_config_dir }}/nomad.hcl"
tasks:
- name: 获取当前节点的 Tailscale IP
shell: tailscale ip | head -1
register: current_tailscale_ip
changed_when: false
ignore_errors: yes
- name: 计算用于 Nomad 的地址(优先 Tailscale回退到 inventory 或 ansible_host
set_fact:
node_addr: "{{ (current_tailscale_ip.stdout | default('')) is match('^100\\.') | ternary((current_tailscale_ip.stdout | trim), (hostvars[inventory_hostname].tailscale_ip | default(ansible_host))) }}"
- name: 确保 Nomad 配置目录存在
file:
path: "{{ nomad_config_dir }}"
state: directory
owner: root
group: root
mode: '0755'
- name: 生成 Nomad 服务器配置(使用 Tailscale
copy:
dest: "{{ nomad_config_file }}"
owner: root
group: root
mode: '0644'
content: |
datacenter = "{{ nomad_datacenter | default('dc1') }}"
data_dir = "/opt/nomad/data"
log_level = "INFO"
bind_addr = "{{ node_addr }}"
addresses {
http = "{{ node_addr }}"
rpc = "{{ node_addr }}"
serf = "{{ node_addr }}"
}
ports {
http = 4646
rpc = 4647
serf = 4648
}
server {
enabled = true
bootstrap_expect = {{ nomad_bootstrap_expect | default(4) }}
retry_join = [
"100.116.158.95", # semaphore
"100.103.147.94", # ash2e
"100.81.26.3", # ash1d
"100.90.159.68" # ch2
]
encrypt = "{{ nomad_encrypt_key }}"
}
client {
enabled = false
}
plugin "podman" {
config {
socket_path = "unix:///run/podman/podman.sock"
volumes {
enabled = true
}
}
}
consul {
address = "{{ node_addr }}:8500"
}
when: nomad_role == "server"
notify: restart nomad
- name: 生成 Nomad 客户端配置(使用 Tailscale
copy:
dest: "{{ nomad_config_file }}"
owner: root
group: root
mode: '0644'
content: |
datacenter = "{{ nomad_datacenter | default('dc1') }}"
data_dir = "/opt/nomad/data"
log_level = "INFO"
bind_addr = "{{ node_addr }}"
addresses {
http = "{{ node_addr }}"
rpc = "{{ node_addr }}"
serf = "{{ node_addr }}"
}
ports {
http = 4646
rpc = 4647
serf = 4648
}
server {
enabled = false
}
client {
enabled = true
network_interface = "tailscale0"
cpu_total_compute = 0
servers = [
"100.116.158.95:4647", # semaphore
"100.103.147.94:4647", # ash2e
"100.81.26.3:4647", # ash1d
"100.90.159.68:4647" # ch2
]
}
plugin "podman" {
config {
socket_path = "unix:///run/podman/podman.sock"
volumes {
enabled = true
}
}
}
consul {
address = "{{ node_addr }}:8500"
}
when: nomad_role == "client"
notify: restart nomad
- name: 检查 Nomad 二进制文件位置
shell: which nomad || find /usr -name nomad 2>/dev/null | head -1
register: nomad_binary_path
failed_when: nomad_binary_path.stdout == ""
- name: 创建/更新 Nomad systemd 服务文件
copy:
dest: "/etc/systemd/system/nomad.service"
owner: root
group: root
mode: '0644'
content: |
[Unit]
Description=Nomad
Documentation=https://www.nomadproject.io/
Requires=network-online.target
After=network-online.target
[Service]
Type=notify
User=root
Group=root
ExecStart={{ nomad_binary_path.stdout }} agent -config=/etc/nomad.d/nomad.hcl
ExecReload=/bin/kill -HUP $MAINPID
KillMode=process
Restart=on-failure
LimitNOFILE=65536
[Install]
WantedBy=multi-user.target
notify: restart nomad
- name: 确保 Nomad 数据目录存在
file:
path: "/opt/nomad/data"
state: directory
owner: root
group: root
mode: '0755'
- name: 重新加载 systemd daemon
systemd:
daemon_reload: yes
- name: 启用并启动 Nomad 服务
systemd:
name: nomad
enabled: yes
state: started
- name: 等待 Nomad 服务启动
wait_for:
port: 4646
host: "{{ node_addr }}"
delay: 5
timeout: 30
ignore_errors: yes
- name: 检查 Nomad 服务状态
shell: systemctl status nomad --no-pager -l
register: nomad_status
ignore_errors: yes
- name: 显示配置结果
debug:
msg: |
✅ 节点 {{ inventory_hostname }} 配置完成
🌐 使用地址: {{ node_addr }}
🎯 角色: {{ nomad_role }}
🔧 Nomad 二进制: {{ nomad_binary_path.stdout }}
📊 服务状态: {{ 'active' if nomad_status.rc == 0 else 'failed' }}
{% if nomad_status.rc != 0 %}
❌ 错误信息:
{{ nomad_status.stdout }}
{{ nomad_status.stderr }}
{% endif %}
handlers:
- name: restart nomad
systemd:
name: nomad
state: restarted
daemon_reload: yes

View File

@@ -0,0 +1,115 @@
---
- name: Configure Podman for Nomad Integration
hosts: all
become: yes
gather_facts: yes
tasks:
- name: 显示当前处理的节点
debug:
msg: "🔧 正在为 Nomad 配置 Podman: {{ inventory_hostname }}"
- name: 确保 Podman 已安装
package:
name: podman
state: present
- name: 启用并启动 Podman socket 服务
systemd:
name: podman.socket
enabled: yes
state: started
- name: 创建 Podman 系统配置目录
file:
path: /etc/containers
state: directory
mode: '0755'
- name: 配置 Podman 使用系统 socket
copy:
content: |
[engine]
# 使用系统级 socket 而不是用户级 socket
active_service = "system"
[engine.service_destinations]
[engine.service_destinations.system]
uri = "unix:///run/podman/podman.sock"
dest: /etc/containers/containers.conf
mode: '0644'
- name: 检查是否存在 nomad 用户
getent:
database: passwd
key: nomad
register: nomad_user_check
ignore_errors: yes
- name: 为 nomad 用户创建配置目录
file:
path: "/home/nomad/.config/containers"
state: directory
owner: nomad
group: nomad
mode: '0755'
when: nomad_user_check is succeeded
- name: 为 nomad 用户配置 Podman
copy:
content: |
[engine]
active_service = "system"
[engine.service_destinations]
[engine.service_destinations.system]
uri = "unix:///run/podman/podman.sock"
dest: /home/nomad/.config/containers/containers.conf
owner: nomad
group: nomad
mode: '0644'
when: nomad_user_check is succeeded
- name: 将 nomad 用户添加到 podman 组
user:
name: nomad
groups: podman
append: yes
when: nomad_user_check is succeeded
ignore_errors: yes
- name: 创建 podman 组(如果不存在)
group:
name: podman
state: present
ignore_errors: yes
- name: 设置 podman socket 目录权限
file:
path: /run/podman
state: directory
mode: '0755'
group: podman
ignore_errors: yes
- name: 验证 Podman socket 权限
file:
path: /run/podman/podman.sock
mode: '066'
when: nomad_user_check is succeeded
ignore_errors: yes
- name: 验证 Podman 安装
shell: podman --version
register: podman_version
- name: 测试 Podman 功能
shell: podman info
register: podman_info
ignore_errors: yes
- name: 显示配置结果
debug:
msg: |
✅ 节点 {{ inventory_hostname }} Podman 配置完成
📦 Podman 版本: {{ podman_version.stdout }}
🐳 Podman 状态: {{ 'SUCCESS' if podman_info.rc == 0 else 'WARNING' }}
👤 Nomad 用户: {{ 'FOUND' if nomad_user_check is succeeded else 'NOT FOUND' }}