feat: 重构项目目录结构并添加多个功能
- 新增脚本和配置文件用于管理Nomad节点和NFS存储 - 添加多个Ansible playbook用于配置和调试Nomad集群 - 新增Nomad job文件用于测试Podman和NFS功能 - 重构playbooks目录结构,按功能分类 - 更新Nomad客户端和服务端配置模板 - 添加SSH密钥分发和配置脚本 - 新增多个调试和修复问题的playbook
This commit is contained in:
@@ -0,0 +1,57 @@
|
||||
---
|
||||
- name: Configure Podman driver for all Nomad client nodes
|
||||
hosts: target_nodes
|
||||
become: yes
|
||||
|
||||
tasks:
|
||||
- name: Stop Nomad service
|
||||
systemd:
|
||||
name: nomad
|
||||
state: stopped
|
||||
|
||||
- name: Install Podman if not present
|
||||
package:
|
||||
name: podman
|
||||
state: present
|
||||
ignore_errors: yes
|
||||
|
||||
- name: Enable Podman socket
|
||||
systemd:
|
||||
name: podman.socket
|
||||
enabled: yes
|
||||
state: started
|
||||
ignore_errors: yes
|
||||
|
||||
- name: Update Nomad configuration to use Podman
|
||||
lineinfile:
|
||||
path: /etc/nomad.d/nomad.hcl
|
||||
regexp: '^plugin "docker"'
|
||||
line: 'plugin "podman" {'
|
||||
state: present
|
||||
|
||||
- name: Add Podman plugin configuration
|
||||
blockinfile:
|
||||
path: /etc/nomad.d/nomad.hcl
|
||||
marker: "# {mark} PODMAN PLUGIN CONFIG"
|
||||
block: |
|
||||
plugin "podman" {
|
||||
config {
|
||||
socket_path = "unix:///run/podman/podman.sock"
|
||||
volumes {
|
||||
enabled = true
|
||||
}
|
||||
}
|
||||
}
|
||||
insertafter: 'client {'
|
||||
|
||||
- name: Start Nomad service
|
||||
systemd:
|
||||
name: nomad
|
||||
state: started
|
||||
|
||||
- name: Wait for Nomad to be ready
|
||||
wait_for:
|
||||
port: 4646
|
||||
host: localhost
|
||||
delay: 5
|
||||
timeout: 30
|
||||
22
configuration/playbooks/configure/configure-nomad-sudo.yml
Normal file
22
configuration/playbooks/configure/configure-nomad-sudo.yml
Normal file
@@ -0,0 +1,22 @@
|
||||
---
|
||||
- name: Configure NOPASSWD sudo for nomad user
|
||||
hosts: nomad_clients
|
||||
become: yes
|
||||
tasks:
|
||||
- name: Ensure sudoers.d directory exists
|
||||
file:
|
||||
path: /etc/sudoers.d
|
||||
state: directory
|
||||
owner: root
|
||||
group: root
|
||||
mode: '0750'
|
||||
|
||||
- name: Allow nomad user passwordless sudo for required commands
|
||||
copy:
|
||||
dest: /etc/sudoers.d/nomad
|
||||
content: |
|
||||
nomad ALL=(ALL) NOPASSWD: /usr/bin/apt, /usr/bin/systemctl, /bin/mkdir, /bin/chown, /bin/chmod, /bin/mv, /bin/sed, /usr/bin/tee, /usr/sbin/usermod, /usr/bin/unzip, /usr/bin/wget
|
||||
owner: root
|
||||
group: root
|
||||
mode: '0440'
|
||||
validate: 'visudo -cf %s'
|
||||
226
configuration/playbooks/configure/configure-nomad-tailscale.yml
Normal file
226
configuration/playbooks/configure/configure-nomad-tailscale.yml
Normal file
@@ -0,0 +1,226 @@
|
||||
---
|
||||
- name: 配置 Nomad 集群使用 Tailscale 网络通讯
|
||||
hosts: nomad_cluster
|
||||
become: yes
|
||||
gather_facts: no
|
||||
vars:
|
||||
nomad_config_dir: "/etc/nomad.d"
|
||||
nomad_config_file: "{{ nomad_config_dir }}/nomad.hcl"
|
||||
|
||||
tasks:
|
||||
- name: 获取当前节点的 Tailscale IP
|
||||
shell: tailscale ip | head -1
|
||||
register: current_tailscale_ip
|
||||
changed_when: false
|
||||
ignore_errors: yes
|
||||
|
||||
- name: 计算用于 Nomad 的地址(优先 Tailscale,回退到 inventory 或 ansible_host)
|
||||
set_fact:
|
||||
node_addr: "{{ (current_tailscale_ip.stdout | default('')) is match('^100\\.') | ternary((current_tailscale_ip.stdout | trim), (hostvars[inventory_hostname].tailscale_ip | default(ansible_host))) }}"
|
||||
|
||||
- name: 确保 Nomad 配置目录存在
|
||||
file:
|
||||
path: "{{ nomad_config_dir }}"
|
||||
state: directory
|
||||
owner: root
|
||||
group: root
|
||||
mode: '0755'
|
||||
|
||||
- name: 生成 Nomad 服务器配置(使用 Tailscale)
|
||||
copy:
|
||||
dest: "{{ nomad_config_file }}"
|
||||
owner: root
|
||||
group: root
|
||||
mode: '0644'
|
||||
content: |
|
||||
datacenter = "{{ nomad_datacenter | default('dc1') }}"
|
||||
data_dir = "/opt/nomad/data"
|
||||
log_level = "INFO"
|
||||
|
||||
bind_addr = "{{ node_addr }}"
|
||||
|
||||
addresses {
|
||||
http = "{{ node_addr }}"
|
||||
rpc = "{{ node_addr }}"
|
||||
serf = "{{ node_addr }}"
|
||||
}
|
||||
|
||||
ports {
|
||||
http = 4646
|
||||
rpc = 4647
|
||||
serf = 4648
|
||||
}
|
||||
|
||||
server {
|
||||
enabled = true
|
||||
bootstrap_expect = {{ nomad_bootstrap_expect | default(4) }}
|
||||
|
||||
retry_join = [
|
||||
"100.116.158.95", # semaphore
|
||||
"100.103.147.94", # ash2e
|
||||
"100.81.26.3", # ash1d
|
||||
"100.90.159.68" # ch2
|
||||
]
|
||||
|
||||
encrypt = "{{ nomad_encrypt_key }}"
|
||||
}
|
||||
|
||||
client {
|
||||
enabled = false
|
||||
}
|
||||
|
||||
plugin "podman" {
|
||||
config {
|
||||
socket_path = "unix:///run/podman/podman.sock"
|
||||
volumes {
|
||||
enabled = true
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
consul {
|
||||
address = "{{ node_addr }}:8500"
|
||||
}
|
||||
when: nomad_role == "server"
|
||||
notify: restart nomad
|
||||
|
||||
- name: 生成 Nomad 客户端配置(使用 Tailscale)
|
||||
copy:
|
||||
dest: "{{ nomad_config_file }}"
|
||||
owner: root
|
||||
group: root
|
||||
mode: '0644'
|
||||
content: |
|
||||
datacenter = "{{ nomad_datacenter | default('dc1') }}"
|
||||
data_dir = "/opt/nomad/data"
|
||||
log_level = "INFO"
|
||||
|
||||
bind_addr = "{{ node_addr }}"
|
||||
|
||||
addresses {
|
||||
http = "{{ node_addr }}"
|
||||
rpc = "{{ node_addr }}"
|
||||
serf = "{{ node_addr }}"
|
||||
}
|
||||
|
||||
ports {
|
||||
http = 4646
|
||||
rpc = 4647
|
||||
serf = 4648
|
||||
}
|
||||
|
||||
server {
|
||||
enabled = false
|
||||
}
|
||||
|
||||
client {
|
||||
enabled = true
|
||||
network_interface = "tailscale0"
|
||||
cpu_total_compute = 0
|
||||
|
||||
servers = [
|
||||
"100.116.158.95:4647", # semaphore
|
||||
"100.103.147.94:4647", # ash2e
|
||||
"100.81.26.3:4647", # ash1d
|
||||
"100.90.159.68:4647" # ch2
|
||||
]
|
||||
}
|
||||
|
||||
plugin "podman" {
|
||||
config {
|
||||
socket_path = "unix:///run/podman/podman.sock"
|
||||
volumes {
|
||||
enabled = true
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
consul {
|
||||
address = "{{ node_addr }}:8500"
|
||||
}
|
||||
when: nomad_role == "client"
|
||||
notify: restart nomad
|
||||
|
||||
- name: 检查 Nomad 二进制文件位置
|
||||
shell: which nomad || find /usr -name nomad 2>/dev/null | head -1
|
||||
register: nomad_binary_path
|
||||
failed_when: nomad_binary_path.stdout == ""
|
||||
|
||||
- name: 创建/更新 Nomad systemd 服务文件
|
||||
copy:
|
||||
dest: "/etc/systemd/system/nomad.service"
|
||||
owner: root
|
||||
group: root
|
||||
mode: '0644'
|
||||
content: |
|
||||
[Unit]
|
||||
Description=Nomad
|
||||
Documentation=https://www.nomadproject.io/
|
||||
Requires=network-online.target
|
||||
After=network-online.target
|
||||
|
||||
[Service]
|
||||
Type=notify
|
||||
User=root
|
||||
Group=root
|
||||
ExecStart={{ nomad_binary_path.stdout }} agent -config=/etc/nomad.d/nomad.hcl
|
||||
ExecReload=/bin/kill -HUP $MAINPID
|
||||
KillMode=process
|
||||
Restart=on-failure
|
||||
LimitNOFILE=65536
|
||||
|
||||
[Install]
|
||||
WantedBy=multi-user.target
|
||||
notify: restart nomad
|
||||
|
||||
- name: 确保 Nomad 数据目录存在
|
||||
file:
|
||||
path: "/opt/nomad/data"
|
||||
state: directory
|
||||
owner: root
|
||||
group: root
|
||||
mode: '0755'
|
||||
|
||||
- name: 重新加载 systemd daemon
|
||||
systemd:
|
||||
daemon_reload: yes
|
||||
|
||||
- name: 启用并启动 Nomad 服务
|
||||
systemd:
|
||||
name: nomad
|
||||
enabled: yes
|
||||
state: started
|
||||
|
||||
- name: 等待 Nomad 服务启动
|
||||
wait_for:
|
||||
port: 4646
|
||||
host: "{{ node_addr }}"
|
||||
delay: 5
|
||||
timeout: 30
|
||||
ignore_errors: yes
|
||||
|
||||
- name: 检查 Nomad 服务状态
|
||||
shell: systemctl status nomad --no-pager -l
|
||||
register: nomad_status
|
||||
ignore_errors: yes
|
||||
|
||||
- name: 显示配置结果
|
||||
debug:
|
||||
msg: |
|
||||
✅ 节点 {{ inventory_hostname }} 配置完成
|
||||
🌐 使用地址: {{ node_addr }}
|
||||
🎯 角色: {{ nomad_role }}
|
||||
🔧 Nomad 二进制: {{ nomad_binary_path.stdout }}
|
||||
📊 服务状态: {{ 'active' if nomad_status.rc == 0 else 'failed' }}
|
||||
{% if nomad_status.rc != 0 %}
|
||||
❌ 错误信息:
|
||||
{{ nomad_status.stdout }}
|
||||
{{ nomad_status.stderr }}
|
||||
{% endif %}
|
||||
|
||||
handlers:
|
||||
- name: restart nomad
|
||||
systemd:
|
||||
name: nomad
|
||||
state: restarted
|
||||
daemon_reload: yes
|
||||
115
configuration/playbooks/configure/configure-podman-for-nomad.yml
Normal file
115
configuration/playbooks/configure/configure-podman-for-nomad.yml
Normal file
@@ -0,0 +1,115 @@
|
||||
---
|
||||
- name: Configure Podman for Nomad Integration
|
||||
hosts: all
|
||||
become: yes
|
||||
gather_facts: yes
|
||||
|
||||
tasks:
|
||||
- name: 显示当前处理的节点
|
||||
debug:
|
||||
msg: "🔧 正在为 Nomad 配置 Podman: {{ inventory_hostname }}"
|
||||
|
||||
- name: 确保 Podman 已安装
|
||||
package:
|
||||
name: podman
|
||||
state: present
|
||||
|
||||
- name: 启用并启动 Podman socket 服务
|
||||
systemd:
|
||||
name: podman.socket
|
||||
enabled: yes
|
||||
state: started
|
||||
|
||||
- name: 创建 Podman 系统配置目录
|
||||
file:
|
||||
path: /etc/containers
|
||||
state: directory
|
||||
mode: '0755'
|
||||
|
||||
- name: 配置 Podman 使用系统 socket
|
||||
copy:
|
||||
content: |
|
||||
[engine]
|
||||
# 使用系统级 socket 而不是用户级 socket
|
||||
active_service = "system"
|
||||
[engine.service_destinations]
|
||||
[engine.service_destinations.system]
|
||||
uri = "unix:///run/podman/podman.sock"
|
||||
dest: /etc/containers/containers.conf
|
||||
mode: '0644'
|
||||
|
||||
- name: 检查是否存在 nomad 用户
|
||||
getent:
|
||||
database: passwd
|
||||
key: nomad
|
||||
register: nomad_user_check
|
||||
ignore_errors: yes
|
||||
|
||||
- name: 为 nomad 用户创建配置目录
|
||||
file:
|
||||
path: "/home/nomad/.config/containers"
|
||||
state: directory
|
||||
owner: nomad
|
||||
group: nomad
|
||||
mode: '0755'
|
||||
when: nomad_user_check is succeeded
|
||||
|
||||
- name: 为 nomad 用户配置 Podman
|
||||
copy:
|
||||
content: |
|
||||
[engine]
|
||||
active_service = "system"
|
||||
[engine.service_destinations]
|
||||
[engine.service_destinations.system]
|
||||
uri = "unix:///run/podman/podman.sock"
|
||||
dest: /home/nomad/.config/containers/containers.conf
|
||||
owner: nomad
|
||||
group: nomad
|
||||
mode: '0644'
|
||||
when: nomad_user_check is succeeded
|
||||
|
||||
- name: 将 nomad 用户添加到 podman 组
|
||||
user:
|
||||
name: nomad
|
||||
groups: podman
|
||||
append: yes
|
||||
when: nomad_user_check is succeeded
|
||||
ignore_errors: yes
|
||||
|
||||
- name: 创建 podman 组(如果不存在)
|
||||
group:
|
||||
name: podman
|
||||
state: present
|
||||
ignore_errors: yes
|
||||
|
||||
- name: 设置 podman socket 目录权限
|
||||
file:
|
||||
path: /run/podman
|
||||
state: directory
|
||||
mode: '0755'
|
||||
group: podman
|
||||
ignore_errors: yes
|
||||
|
||||
- name: 验证 Podman socket 权限
|
||||
file:
|
||||
path: /run/podman/podman.sock
|
||||
mode: '066'
|
||||
when: nomad_user_check is succeeded
|
||||
ignore_errors: yes
|
||||
|
||||
- name: 验证 Podman 安装
|
||||
shell: podman --version
|
||||
register: podman_version
|
||||
|
||||
- name: 测试 Podman 功能
|
||||
shell: podman info
|
||||
register: podman_info
|
||||
ignore_errors: yes
|
||||
|
||||
- name: 显示配置结果
|
||||
debug:
|
||||
msg: |
|
||||
✅ 节点 {{ inventory_hostname }} Podman 配置完成
|
||||
📦 Podman 版本: {{ podman_version.stdout }}
|
||||
🐳 Podman 状态: {{ 'SUCCESS' if podman_info.rc == 0 else 'WARNING' }}
|
||||
👤 Nomad 用户: {{ 'FOUND' if nomad_user_check is succeeded else 'NOT FOUND' }}
|
||||
Reference in New Issue
Block a user