feat: 重构项目目录结构并添加多个功能
- 新增脚本和配置文件用于管理Nomad节点和NFS存储 - 添加多个Ansible playbook用于配置和调试Nomad集群 - 新增Nomad job文件用于测试Podman和NFS功能 - 重构playbooks目录结构,按功能分类 - 更新Nomad客户端和服务端配置模板 - 添加SSH密钥分发和配置脚本 - 新增多个调试和修复问题的playbook
This commit is contained in:
parent
a06e5e1a00
commit
44b098bd20
|
|
@ -0,0 +1,20 @@
|
|||
[nomad_nodes]
|
||||
# 本机节点 (已通过PVE挂载NFS)
|
||||
semaphore ansible_host=100.116.158.95 ansible_user=root
|
||||
|
||||
# 云服务器节点 (需要配置NFS挂载)
|
||||
ash1d.global ansible_host=100.81.26.3 ansible_user=ben ansible_password=3131 ansible_become_password=3131
|
||||
ash2e.global ansible_host=100.103.147.94 ansible_user=ben ansible_password=3131 ansible_become_password=3131
|
||||
ch2.global ansible_host=100.90.159.68 ansible_user=ben ansible_password=3131 ansible_become_password=3131
|
||||
ch3.global ansible_host=100.86.141.112 ansible_user=ben ansible_password=3131 ansible_become_password=3131
|
||||
master ansible_host=100.117.106.136 ansible_user=ben ansible_password=3131 ansible_become_password=3131 ansible_port=60022
|
||||
ash3c ansible_host=100.116.80.94 ansible_user=ben ansible_password=3131 ansible_become_password=3131
|
||||
|
||||
[nomad_nodes:vars]
|
||||
# NFS配置
|
||||
nfs_server=snail
|
||||
nfs_share=/fs/1000/nfs/Fnsync
|
||||
mount_point=/mnt/fnsync
|
||||
|
||||
# Ansible配置
|
||||
ansible_ssh_common_args='-o StrictHostKeyChecking=no'
|
||||
|
|
@ -0,0 +1,45 @@
|
|||
# NFS CSI Volume 配置
|
||||
type = "csi"
|
||||
id = "nfs-fnsync"
|
||||
name = "nfs-fnsync"
|
||||
external_id = "nfs-fnsync"
|
||||
|
||||
# 插件配置
|
||||
plugin_id = "nfs"
|
||||
capacity_min = "1GiB"
|
||||
capacity_max = "100GiB"
|
||||
|
||||
# 挂载选项
|
||||
mount_options {
|
||||
fs_type = "nfs4"
|
||||
mount_flags = ["rw", "relatime", "vers=4.2"]
|
||||
}
|
||||
|
||||
# 访问模式
|
||||
access_mode = "single-node-writer"
|
||||
attachment_mode = "file-system"
|
||||
|
||||
# 拓扑约束
|
||||
topology_request {
|
||||
preferred {
|
||||
topology {
|
||||
segments = {
|
||||
"rack" = "rack-1"
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
required {
|
||||
topology {
|
||||
segments = {
|
||||
"datacenter" = "dc1"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
# 参数配置
|
||||
parameters {
|
||||
server = "snail"
|
||||
share = "/fs/1000/nfs/Fnsync"
|
||||
}
|
||||
|
|
@ -0,0 +1,82 @@
|
|||
---
|
||||
- name: Setup NFS for different container types
|
||||
hosts: all
|
||||
become: yes
|
||||
vars:
|
||||
nfs_server: snail
|
||||
nfs_export_path: /fs/1000/nfs/Fnsync
|
||||
nfs_mount_path: /mnt/fnsync
|
||||
nfs_options_local: "rw,sync,vers=4.2"
|
||||
nfs_options_overseas: "rw,sync,vers=3,timeo=600,retrans=2"
|
||||
|
||||
tasks:
|
||||
- name: Detect container type and location
|
||||
set_fact:
|
||||
container_type: "{{ 'lxc' if inventory_hostname in groups['lxc'] else 'pve' }}"
|
||||
is_overseas: "{{ inventory_hostname in ['ash1d', 'ash2e', 'ash3c', 'ch2', 'ch3'] }}"
|
||||
|
||||
- name: Install NFS client for all nodes
|
||||
package:
|
||||
name: nfs-common
|
||||
state: present
|
||||
|
||||
- name: Create mount directory for all nodes
|
||||
file:
|
||||
path: "{{ nfs_mount_path }}"
|
||||
state: directory
|
||||
owner: root
|
||||
group: root
|
||||
mode: '0755'
|
||||
|
||||
- name: Mount NFS for local LXC containers (direct mount)
|
||||
mount:
|
||||
path: "{{ nfs_mount_path }}"
|
||||
src: "{{ nfs_server }}:{{ nfs_export_path }}"
|
||||
fstype: nfs
|
||||
opts: "{{ nfs_options_local }}"
|
||||
state: mounted
|
||||
when: container_type == 'lxc' and not is_overseas
|
||||
|
||||
- name: Mount NFS for overseas PVE containers (with retry options)
|
||||
mount:
|
||||
path: "{{ nfs_mount_path }}"
|
||||
src: "{{ nfs_server }}:{{ nfs_export_path }}"
|
||||
fstype: nfs
|
||||
opts: "{{ nfs_options_overseas }}"
|
||||
state: mounted
|
||||
when: container_type == 'pve' and is_overseas
|
||||
|
||||
- name: Ensure NFS mount persists after reboot
|
||||
mount:
|
||||
path: "{{ nfs_mount_path }}"
|
||||
src: "{{ nfs_server }}:{{ nfs_export_path }}"
|
||||
fstype: nfs
|
||||
opts: "{{ nfs_options_local if container_type == 'lxc' and not is_overseas else nfs_options_overseas }}"
|
||||
state: present
|
||||
|
||||
- name: Verify NFS mount
|
||||
command: df -h "{{ nfs_mount_path }}"
|
||||
register: mount_result
|
||||
ignore_errors: yes
|
||||
|
||||
- name: Display mount status
|
||||
debug:
|
||||
msg: "{{ inventory_hostname }} - {{ container_type }} - {{ '海外' if is_overseas else '本地' }} - Mount: {{ '成功' if mount_result.rc == 0 else '失败' }}"
|
||||
|
||||
- name: Create Nomad directories for LXC containers
|
||||
file:
|
||||
path: "{{ nfs_mount_path }}/nomad/{{ inventory_hostname }}"
|
||||
state: directory
|
||||
owner: nomad
|
||||
group: nomad
|
||||
mode: '0755'
|
||||
when: container_type == 'lxc'
|
||||
|
||||
- name: Create shared volumes directory for PVE containers
|
||||
file:
|
||||
path: "{{ nfs_mount_path }}/nomad/volumes/{{ inventory_hostname }}"
|
||||
state: directory
|
||||
owner: nomad
|
||||
group: nomad
|
||||
mode: '0755'
|
||||
when: container_type == 'pve'
|
||||
|
|
@ -4,8 +4,8 @@
|
|||
gather_facts: false
|
||||
vars:
|
||||
nfs_server: snail
|
||||
nfs_export_path: /fs/1000/nfs
|
||||
nfs_mount_path: /opt/consul-shared
|
||||
nfs_export_path: /fs/1000/nfs/Fnsync
|
||||
nfs_mount_path: /mnt/fnsync
|
||||
|
||||
tasks:
|
||||
- name: Install NFS client and mount on master
|
||||
|
|
@ -0,0 +1,50 @@
|
|||
---
|
||||
- name: Configure Nomad client for NFS volumes
|
||||
hosts: nomad_clients
|
||||
become: yes
|
||||
vars:
|
||||
nfs_mount_path: /mnt/fnsync
|
||||
|
||||
tasks:
|
||||
- name: Create Nomad plugin directory for NFS
|
||||
file:
|
||||
path: /opt/nomad/plugins
|
||||
state: directory
|
||||
owner: nomad
|
||||
group: nomad
|
||||
mode: '0755'
|
||||
|
||||
- name: Configure Nomad client to use NFS volumes
|
||||
blockinfile:
|
||||
path: /etc/nomad.d/nomad.hcl
|
||||
marker: "# {mark} NFS VOLUME CONFIGURATION"
|
||||
block: |
|
||||
plugin "nomad-driver-podman" {
|
||||
config {
|
||||
volumes {
|
||||
enabled = true
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
client {
|
||||
host_volume "nfs-shared" {
|
||||
path = "{{ nfs_mount_path }}/nomad/volumes"
|
||||
read_only = false
|
||||
}
|
||||
}
|
||||
insertafter: 'data_dir = "/opt/nomad/data"'
|
||||
|
||||
- name: Restart Nomad service to apply changes
|
||||
systemd:
|
||||
name: nomad
|
||||
state: restarted
|
||||
|
||||
- name: Verify Nomad client configuration
|
||||
command: nomad node status -self
|
||||
register: nomad_status
|
||||
ignore_errors: yes
|
||||
|
||||
- name: Display Nomad status
|
||||
debug:
|
||||
msg: "{{ inventory_hostname }} - Nomad status: {{ '运行中' if nomad_status.rc == 0 else '异常' }}"
|
||||
|
|
@ -0,0 +1,63 @@
|
|||
---
|
||||
- name: Setup NFS Storage for Nomad Cluster
|
||||
hosts: nomad_cluster
|
||||
become: yes
|
||||
vars:
|
||||
nfs_server: snail
|
||||
nfs_export_path: /fs/1000/nfs/Fnsync
|
||||
nfs_mount_path: /mnt/fnsync
|
||||
nfs_options: "rw,sync,vers=4.2"
|
||||
|
||||
tasks:
|
||||
- name: Install NFS client packages
|
||||
package:
|
||||
name: nfs-common
|
||||
state: present
|
||||
|
||||
- name: Create NFS mount directory
|
||||
file:
|
||||
path: "{{ nfs_mount_path }}"
|
||||
state: directory
|
||||
owner: root
|
||||
group: root
|
||||
mode: '0755'
|
||||
|
||||
- name: Mount NFS share
|
||||
mount:
|
||||
path: "{{ nfs_mount_path }}"
|
||||
src: "{{ nfs_server }}:{{ nfs_export_path }}"
|
||||
fstype: nfs
|
||||
opts: "{{ nfs_options }}"
|
||||
state: mounted
|
||||
|
||||
- name: Ensure NFS mount persists after reboot
|
||||
mount:
|
||||
path: "{{ nfs_mount_path }}"
|
||||
src: "{{ nfs_server }}:{{ nfs_export_path }}"
|
||||
fstype: nfs
|
||||
opts: "{{ nfs_options }}"
|
||||
state: present
|
||||
|
||||
- name: Verify NFS mount
|
||||
command: df -h "{{ nfs_mount_path }}"
|
||||
register: mount_result
|
||||
|
||||
- name: Display mount result
|
||||
debug:
|
||||
var: mount_result.stdout
|
||||
|
||||
- name: Create Nomad data directories on NFS
|
||||
file:
|
||||
path: "{{ nfs_mount_path }}/nomad/{{ inventory_hostname }}"
|
||||
state: directory
|
||||
owner: nomad
|
||||
group: nomad
|
||||
mode: '0755'
|
||||
|
||||
- name: Create shared volumes directory
|
||||
file:
|
||||
path: "{{ nfs_mount_path }}/nomad/volumes"
|
||||
state: directory
|
||||
owner: nomad
|
||||
group: nomad
|
||||
mode: '0755'
|
||||
|
|
@ -0,0 +1,39 @@
|
|||
datacenter = "dc1"
|
||||
data_dir = "/opt/nomad/data"
|
||||
log_level = "INFO"
|
||||
|
||||
# 使用Tailscale网络,但绑定到本地接口
|
||||
bind_addr = "0.0.0.0"
|
||||
|
||||
server {
|
||||
enabled = false
|
||||
}
|
||||
|
||||
# 启用客户端模式,支持混合存储
|
||||
client {
|
||||
enabled = true
|
||||
servers = ["100.116.158.95:4647"]
|
||||
|
||||
# 配置host volumes
|
||||
host_volume "fnsync" {
|
||||
path = "/mnt/fnsync"
|
||||
read_only = false
|
||||
}
|
||||
}
|
||||
|
||||
# 指定Tailscale地址用于通信
|
||||
addresses {
|
||||
http = "{{ ansible_host }}"
|
||||
rpc = "{{ ansible_host }}"
|
||||
serf = "{{ ansible_host }}"
|
||||
}
|
||||
|
||||
advertise {
|
||||
http = "{{ ansible_host }}:4646"
|
||||
rpc = "{{ ansible_host }}:4647"
|
||||
serf = "{{ ansible_host }}:4648"
|
||||
}
|
||||
|
||||
consul {
|
||||
address = "100.116.158.95:8500"
|
||||
}
|
||||
|
|
@ -0,0 +1,146 @@
|
|||
# Nomad集群NFS配置指南
|
||||
|
||||
## 概述
|
||||
|
||||
本文档介绍如何为Nomad集群配置NFS存储,支持不同类型的容器和地理位置。
|
||||
|
||||
## 容器类型分类
|
||||
|
||||
### 1. 本地LXC容器
|
||||
- **位置**: 本地网络环境
|
||||
- **节点示例**: influxdb, warden, hcp1, hcp2
|
||||
- **特点**: 直接使用已映射的NFS目录
|
||||
- **NFS参数**: `rw,sync,vers=4.2`
|
||||
|
||||
### 2. 海外PVE容器
|
||||
- **位置**: 海外云服务器
|
||||
- **节点示例**: ash1d, ash2e, ash3c, ch2, ch3
|
||||
- **特点**: 需要网络优化参数
|
||||
- **NFS参数**: `rw,sync,vers=3,timeo=600,retrans=2`
|
||||
|
||||
## NFS配置详情
|
||||
|
||||
### NFS服务器信息
|
||||
- **服务器**: snail
|
||||
- **导出路径**: `/fs/1000/nfs/Fnsync`
|
||||
- **挂载点**: `/mnt/fnsync`
|
||||
|
||||
### 当前挂载状态
|
||||
```bash
|
||||
# 检查当前挂载
|
||||
df -h | grep fnsync
|
||||
# 输出: snail:/fs/1000/nfs/Fnsync 8.2T 2.2T 6.0T 27% /mnt/fnsync
|
||||
```
|
||||
|
||||
## 部署步骤
|
||||
|
||||
### 1. 自动部署
|
||||
```bash
|
||||
chmod +x scripts/deploy-nfs-for-nomad.sh
|
||||
./scripts/deploy-nfs-for-nomad.sh
|
||||
```
|
||||
|
||||
### 2. 手动分步部署
|
||||
```bash
|
||||
# 步骤1: 配置NFS挂载
|
||||
ansible-playbook -i configuration/inventories/production/inventory.ini \
|
||||
playbooks/setup-nfs-by-container-type.yml
|
||||
|
||||
# 步骤2: 配置Nomad客户端
|
||||
ansible-playbook -i configuration/inventories/production/nomad-cluster.ini \
|
||||
playbooks/setup-nomad-nfs-client.yml
|
||||
```
|
||||
|
||||
## Nomad作业配置
|
||||
|
||||
### 使用NFS卷的Nomad作业示例
|
||||
|
||||
```hcl
|
||||
job "nfs-example" {
|
||||
volume "nfs-shared" {
|
||||
type = "host"
|
||||
source = "nfs-shared"
|
||||
read_only = false
|
||||
}
|
||||
|
||||
task "app" {
|
||||
volume_mount {
|
||||
volume = "nfs-shared"
|
||||
destination = "/shared"
|
||||
read_only = false
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
### 针对不同容器类型的约束
|
||||
|
||||
```hcl
|
||||
# 本地LXC容器约束
|
||||
constraint {
|
||||
attribute = "${attr.unique.hostname}"
|
||||
operator = "regexp"
|
||||
value = "(influxdb|warden|hcp1|hcp2)"
|
||||
}
|
||||
|
||||
# 海外PVE容器约束
|
||||
constraint {
|
||||
attribute = "${attr.unique.hostname}"
|
||||
operator = "regexp"
|
||||
value = "(ash1d|ash2e|ash3c|ch2|ch3)"
|
||||
}
|
||||
```
|
||||
|
||||
## 验证和监控
|
||||
|
||||
### 验证命令
|
||||
```bash
|
||||
# 检查NFS挂载
|
||||
ansible all -i configuration/inventories/production/inventory.ini \
|
||||
-m shell -a "df -h /mnt/fnsync"
|
||||
|
||||
# 检查Nomad状态
|
||||
nomad node status
|
||||
|
||||
# 检查NFS任务状态
|
||||
nomad job status nfs-multi-type-example
|
||||
```
|
||||
|
||||
### 监控指标
|
||||
- NFS挂载状态
|
||||
- 网络延迟(海外节点)
|
||||
- 存储使用情况
|
||||
- Nomad任务运行状态
|
||||
|
||||
## 故障排除
|
||||
|
||||
### 常见问题
|
||||
|
||||
1. **NFS挂载失败**
|
||||
- 检查网络连通性: `ping snail`
|
||||
- 验证NFS服务: `showmount -e snail`
|
||||
- 检查防火墙设置
|
||||
|
||||
2. **海外节点连接慢**
|
||||
- 使用NFSv3协议
|
||||
- 增加超时参数
|
||||
- 考虑使用缓存方案
|
||||
|
||||
3. **Nomad卷无法挂载**
|
||||
- 检查Nomad客户端配置
|
||||
- 验证目录权限
|
||||
- 检查Nomad服务状态
|
||||
|
||||
## 最佳实践
|
||||
|
||||
1. **数据备份**: 定期备份NFS上的重要数据
|
||||
2. **监控告警**: 设置NFS挂载状态监控
|
||||
3. **容量规划**: 监控存储使用情况
|
||||
4. **网络优化**: 为海外节点配置合适的网络参数
|
||||
|
||||
## 相关文件
|
||||
|
||||
- `playbooks/setup-nfs-by-container-type.yml` - NFS挂载配置
|
||||
- `playbooks/setup-nomad-nfs-client.yml` - Nomad客户端配置
|
||||
- `jobs/nomad-nfs-multi-type.nomad` - 示例Nomad作业
|
||||
- `scripts/deploy-nfs-for-nomad.sh` - 部署脚本
|
||||
|
|
@ -0,0 +1,59 @@
|
|||
#!/bin/bash
|
||||
|
||||
# 脚本用于检查和修复远程节点上的apt临时文件权限问题
|
||||
|
||||
# 定义需要检查的节点列表
|
||||
NODES=('ash2e' 'ash1d' 'ch2')
|
||||
|
||||
# 循环检查每个节点
|
||||
for NODE in "${NODES[@]}"; do
|
||||
echo "\n===== 检查节点: $NODE ====="
|
||||
|
||||
# 检查SSH连接是否可用
|
||||
if ! ssh -q -o BatchMode=yes -o ConnectTimeout=5 root@$NODE "echo Connected > /dev/null"; then
|
||||
echo "错误: 无法连接到节点 $NODE"
|
||||
continue
|
||||
fi
|
||||
|
||||
echo "成功连接到节点 $NODE"
|
||||
|
||||
# 1. 检查/tmp目录权限
|
||||
echo -n "检查/tmp目录权限: "
|
||||
ssh root@$NODE "ls -ld /tmp"
|
||||
|
||||
# 2. 检查磁盘空间
|
||||
echo "检查磁盘空间:"
|
||||
ssh root@$NODE "df -h"
|
||||
|
||||
# 3. 测试创建临时文件
|
||||
echo -n "测试创建临时文件: "
|
||||
if ssh root@$NODE "touch /tmp/test-apt-temp-$$ 2>/dev/null && echo 成功 && rm -f /tmp/test-apt-temp-$$"; then
|
||||
echo "临时文件创建成功"
|
||||
else
|
||||
echo "错误: 无法创建临时文件"
|
||||
# 尝试修复/tmp目录权限
|
||||
echo "尝试修复/tmp目录权限..."
|
||||
ssh root@$NODE "chmod 1777 /tmp"
|
||||
fi
|
||||
|
||||
# 4. 清理apt缓存
|
||||
echo "清理apt缓存..."
|
||||
ssh root@$NODE "apt clean"
|
||||
|
||||
# 5. 尝试运行apt update测试
|
||||
echo "测试apt update (仅输出前10行)..."
|
||||
ssh root@$NODE "apt update 2>&1 | head -n 10"
|
||||
|
||||
echo "\n节点 $NODE 检查完成\n"
|
||||
done
|
||||
|
||||
# 提供额外的修复建议
|
||||
echo "\n===== 修复建议 ====="
|
||||
echo "1. 如果问题仍然存在,请检查以下内容:"
|
||||
echo " - /etc/apt/apt.conf文件中的配置"
|
||||
echo " - apt-key命令的完整性 (dpkg -l apt)
|
||||
echo " - 系统的临时文件清理服务状态"
|
||||
echo "2. 可以尝试的命令:"
|
||||
echo " - dpkg --configure -a"
|
||||
echo " - apt-get install --reinstall apt apt-utils"
|
||||
echo "3. 对于特定节点的持久问题,建议登录到该节点进行更详细的故障排除。"
|
||||
|
|
@ -0,0 +1,65 @@
|
|||
job "hybrid-nfs-app" {
|
||||
datacenters = ["dc1"]
|
||||
type = "service"
|
||||
|
||||
# 使用约束条件区分存储类型
|
||||
constraint {
|
||||
attribute = "${attr.unique.hostname}"
|
||||
operator = "regexp"
|
||||
value = "semaphore"
|
||||
}
|
||||
|
||||
group "app" {
|
||||
count = 1
|
||||
|
||||
network {
|
||||
port "http" {
|
||||
static = 8080
|
||||
}
|
||||
}
|
||||
|
||||
# 对于本机(semaphore)使用host volume
|
||||
volume "local-storage" {
|
||||
type = "host"
|
||||
read_only = false
|
||||
source = "local-fnsync"
|
||||
}
|
||||
|
||||
task "web-app" {
|
||||
driver = "exec"
|
||||
|
||||
config {
|
||||
command = "python3"
|
||||
args = ["-m", "http.server", "8080", "--directory", "local/fnsync"]
|
||||
}
|
||||
|
||||
template {
|
||||
data = <<EOH
|
||||
<h1>Hybrid NFS App - Running on {{ env "attr.unique.hostname" }}</h1>
|
||||
<p>Storage Type: {{ with eq (env "attr.unique.hostname") "semaphore" }}PVE Mount{{ else }}NFS{{ end }}</p>
|
||||
<p>Timestamp: {{ now | date "2006-01-02 15:04:05" }}</p>
|
||||
EOH
|
||||
destination = "local/fnsync/index.html"
|
||||
}
|
||||
|
||||
resources {
|
||||
cpu = 100
|
||||
memory = 128
|
||||
}
|
||||
|
||||
service {
|
||||
name = "hybrid-nfs-app"
|
||||
port = "http"
|
||||
|
||||
tags = ["hybrid", "nfs", "web"]
|
||||
|
||||
check {
|
||||
type = "http"
|
||||
path = "/"
|
||||
interval = "10s"
|
||||
timeout = "2s"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,51 @@
|
|||
job "nfs-app-example" {
|
||||
datacenters = ["dc1"]
|
||||
type = "service"
|
||||
|
||||
group "app" {
|
||||
count = 1
|
||||
|
||||
# 使用NFS存储卷
|
||||
volume "nfs-storage" {
|
||||
type = "host"
|
||||
read_only = false
|
||||
source = "nfs-fnsync"
|
||||
}
|
||||
|
||||
task "web-app" {
|
||||
driver = "docker"
|
||||
|
||||
config {
|
||||
image = "nginx:alpine"
|
||||
ports = ["http"]
|
||||
|
||||
# 挂载NFS卷到容器
|
||||
mount {
|
||||
type = "volume"
|
||||
target = "/usr/share/nginx/html"
|
||||
source = "nfs-storage"
|
||||
readonly = false
|
||||
}
|
||||
}
|
||||
|
||||
resources {
|
||||
cpu = 100
|
||||
memory = 128
|
||||
}
|
||||
|
||||
service {
|
||||
name = "nfs-web-app"
|
||||
port = "http"
|
||||
|
||||
tags = ["nfs", "web"]
|
||||
|
||||
check {
|
||||
type = "http"
|
||||
path = "/"
|
||||
interval = "10s"
|
||||
timeout = "2s"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,34 @@
|
|||
job "nfs-storage-test" {
|
||||
datacenters = ["dc1"]
|
||||
type = "batch"
|
||||
|
||||
group "test" {
|
||||
count = 1
|
||||
|
||||
volume "nfs-storage" {
|
||||
type = "csi"
|
||||
read_only = false
|
||||
source = "nfs-fnsync"
|
||||
}
|
||||
|
||||
task "storage-test" {
|
||||
driver = "exec"
|
||||
|
||||
volume_mount {
|
||||
volume = "nfs-storage"
|
||||
destination = "/mnt/nfs"
|
||||
read_only = false
|
||||
}
|
||||
|
||||
config {
|
||||
command = "/bin/sh"
|
||||
args = ["-c", "echo 'NFS Storage Test - $(hostname) - $(date)' > /mnt/nfs/test-$(hostname).txt && ls -la /mnt/nfs/"]
|
||||
}
|
||||
|
||||
resources {
|
||||
cpu = 50
|
||||
memory = 64
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,84 @@
|
|||
job "nfs-multi-type-example" {
|
||||
datacenters = ["dc1"]
|
||||
type = "service"
|
||||
|
||||
# 为本地LXC容器配置的任务组
|
||||
group "lxc-apps" {
|
||||
count = 2
|
||||
|
||||
constraint {
|
||||
attribute = "${attr.unique.hostname}"
|
||||
operator = "regexp"
|
||||
value = "(influxdb|hcp)"
|
||||
}
|
||||
|
||||
volume "lxc-nfs" {
|
||||
type = "host"
|
||||
source = "nfs-shared"
|
||||
read_only = false
|
||||
}
|
||||
|
||||
task "lxc-app" {
|
||||
driver = "podman"
|
||||
|
||||
config {
|
||||
image = "alpine:latest"
|
||||
args = ["tail", "-f", "/dev/null"]
|
||||
}
|
||||
|
||||
volume_mount {
|
||||
volume = "lxc-nfs"
|
||||
destination = "/shared/lxc"
|
||||
read_only = false
|
||||
}
|
||||
|
||||
resources {
|
||||
cpu = 100
|
||||
memory = 64
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
# 为海外PVE容器配置的任务组
|
||||
group "pve-apps" {
|
||||
count = 3
|
||||
|
||||
constraint {
|
||||
attribute = "${attr.unique.hostname}"
|
||||
operator = "regexp"
|
||||
value = "(ash1d|ash2e|ash3c|ch2|ch3)"
|
||||
}
|
||||
|
||||
volume "pve-nfs" {
|
||||
type = "host"
|
||||
source = "nfs-shared"
|
||||
read_only = false
|
||||
}
|
||||
|
||||
task "pve-app" {
|
||||
driver = "podman"
|
||||
|
||||
config {
|
||||
image = "alpine:latest"
|
||||
args = ["tail", "-f", "/dev/null"]
|
||||
|
||||
# 为海外节点添加网络优化参数
|
||||
network_mode = "host"
|
||||
}
|
||||
|
||||
volume_mount {
|
||||
volume = "pve-nfs"
|
||||
destination = "/shared/pve"
|
||||
read_only = false
|
||||
}
|
||||
|
||||
resources {
|
||||
cpu = 100
|
||||
memory = 64
|
||||
network {
|
||||
mbits = 5
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,34 @@
|
|||
job "nfs-volume-example" {
|
||||
datacenters = ["dc1"]
|
||||
type = "service"
|
||||
|
||||
group "nfs-app" {
|
||||
count = 1
|
||||
|
||||
volume "nfs-shared" {
|
||||
type = "host"
|
||||
source = "nfs-shared"
|
||||
read_only = false
|
||||
}
|
||||
|
||||
task "app" {
|
||||
driver = "podman"
|
||||
|
||||
config {
|
||||
image = "alpine:latest"
|
||||
args = ["tail", "-f", "/dev/null"]
|
||||
}
|
||||
|
||||
volume_mount {
|
||||
volume = "nfs-shared"
|
||||
destination = "/shared"
|
||||
read_only = false
|
||||
}
|
||||
|
||||
resources {
|
||||
cpu = 100
|
||||
memory = 64
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,4 @@
|
|||
#!/bin/bash
|
||||
|
||||
# 列出所有playbooks文件
|
||||
ls -1 /root/mgmt/configuration/playbooks/*.yml
|
||||
|
|
@ -0,0 +1,72 @@
|
|||
---
|
||||
- name: 配置Nomad客户端节点
|
||||
hosts: nomad_nodes:!semaphore
|
||||
become: yes
|
||||
vars:
|
||||
nomad_config_dir: /etc/nomad.d
|
||||
|
||||
tasks:
|
||||
- name: 创建Nomad配置目录
|
||||
file:
|
||||
path: "{{ nomad_config_dir }}"
|
||||
state: directory
|
||||
owner: root
|
||||
group: root
|
||||
mode: '0755'
|
||||
|
||||
- name: 复制Nomad客户端配置
|
||||
copy:
|
||||
content: |
|
||||
datacenter = "dc1"
|
||||
data_dir = "/opt/nomad/data"
|
||||
log_level = "INFO"
|
||||
bind_addr = "0.0.0.0"
|
||||
|
||||
server {
|
||||
enabled = false
|
||||
}
|
||||
|
||||
client {
|
||||
enabled = true
|
||||
servers = ["100.116.158.95:4647"]
|
||||
host_volume "fnsync" {
|
||||
path = "/mnt/fnsync"
|
||||
read_only = false
|
||||
}
|
||||
}
|
||||
|
||||
addresses {
|
||||
http = "{{ ansible_host }}"
|
||||
rpc = "{{ ansible_host }}"
|
||||
serf = "{{ ansible_host }}"
|
||||
}
|
||||
|
||||
advertise {
|
||||
http = "{{ ansible_host }}:4646"
|
||||
rpc = "{{ ansible_host }}:4647"
|
||||
serf = "{{ ansible_host }}:4648"
|
||||
}
|
||||
|
||||
consul {
|
||||
address = "100.116.158.95:8500"
|
||||
}
|
||||
dest: "{{ nomad_config_dir }}/nomad.hcl"
|
||||
owner: root
|
||||
group: root
|
||||
mode: '0644'
|
||||
|
||||
- name: 启动Nomad服务
|
||||
systemd:
|
||||
name: nomad
|
||||
state: restarted
|
||||
enabled: yes
|
||||
daemon_reload: yes
|
||||
|
||||
- name: 检查Nomad服务状态
|
||||
command: systemctl status nomad
|
||||
register: nomad_status
|
||||
changed_when: false
|
||||
|
||||
- name: 显示Nomad服务状态
|
||||
debug:
|
||||
var: nomad_status.stdout_lines
|
||||
|
|
@ -0,0 +1,42 @@
|
|||
---
|
||||
- name: 配置Nomad节点NFS挂载
|
||||
hosts: nomad_nodes
|
||||
become: yes
|
||||
vars:
|
||||
nfs_server: "snail"
|
||||
nfs_share: "/fs/1000/nfs/Fnsync"
|
||||
mount_point: "/mnt/fnsync"
|
||||
|
||||
tasks:
|
||||
- name: 安装NFS客户端
|
||||
package:
|
||||
name: nfs-common
|
||||
state: present
|
||||
|
||||
- name: 创建挂载目录
|
||||
file:
|
||||
path: "{{ mount_point }}"
|
||||
state: directory
|
||||
mode: '0755'
|
||||
|
||||
- name: 临时挂载NFS共享
|
||||
mount:
|
||||
path: "{{ mount_point }}"
|
||||
src: "{{ nfs_server }}:{{ nfs_share }}"
|
||||
fstype: nfs4
|
||||
opts: "rw,relatime,vers=4.2"
|
||||
state: mounted
|
||||
|
||||
- name: 配置开机自动挂载
|
||||
lineinfile:
|
||||
path: /etc/fstab
|
||||
line: "{{ nfs_server }}:{{ nfs_share }} {{ mount_point }} nfs4 rw,relatime,vers=4.2 0 0"
|
||||
state: present
|
||||
|
||||
- name: 验证挂载
|
||||
command: df -h {{ mount_point }}
|
||||
register: mount_check
|
||||
|
||||
- name: 显示挂载信息
|
||||
debug:
|
||||
var: mount_check.stdout_lines
|
||||
|
|
@ -0,0 +1,43 @@
|
|||
---
|
||||
- name: 设置Nomad节点NFS挂载
|
||||
hosts: nomad_nodes
|
||||
become: yes
|
||||
vars:
|
||||
nfs_server: "snail"
|
||||
nfs_share: "/fs/1000/nfs/Fnsync"
|
||||
mount_point: "/mnt/fnsync"
|
||||
|
||||
tasks:
|
||||
|
||||
- name: 安装NFS客户端
|
||||
package:
|
||||
name: nfs-common
|
||||
state: present
|
||||
|
||||
- name: 创建挂载目录
|
||||
file:
|
||||
path: "{{ mount_point }}"
|
||||
state: directory
|
||||
mode: '0755'
|
||||
|
||||
- name: 临时挂载NFS共享
|
||||
mount:
|
||||
path: "{{ mount_point }}"
|
||||
src: "{{ nfs_server }}:{{ nfs_share }}"
|
||||
fstype: nfs4
|
||||
opts: "rw,relatime,vers=4.2"
|
||||
state: mounted
|
||||
|
||||
- name: 配置开机自动挂载
|
||||
lineinfile:
|
||||
path: /etc/fstab
|
||||
line: "{{ nfs_server }}:{{ nfs_share }} {{ mount_point }} nfs4 rw,relatime,vers=4.2 0 0"
|
||||
state: present
|
||||
|
||||
- name: 验证挂载
|
||||
command: df -h {{ mount_point }}
|
||||
register: mount_check
|
||||
|
||||
- name: 显示挂载信息
|
||||
debug:
|
||||
var: mount_check.stdout_lines
|
||||
|
|
@ -0,0 +1,69 @@
|
|||
#!/bin/bash
|
||||
|
||||
# Nomad集群NFS配置部署脚本
|
||||
# 根据容器类型和地理位置进行分情况处理
|
||||
|
||||
set -e
|
||||
|
||||
echo "🚀 开始部署Nomad集群NFS配置..."
|
||||
|
||||
# 颜色定义
|
||||
RED='\033[0;31m'
|
||||
GREEN='\033[0;32m'
|
||||
YELLOW='\033[1;33m'
|
||||
NC='\033[0m' # No Color
|
||||
|
||||
# 函数:打印带颜色的消息
|
||||
log_info() { echo -e "${GREEN}[INFO]${NC} $1"; }
|
||||
log_warn() { echo -e "${YELLOW}[WARN]${NC} $1"; }
|
||||
log_error() { echo -e "${RED}[ERROR]${NC} $1"; }
|
||||
|
||||
# 检查当前目录
|
||||
if [ ! -f "configuration/inventories/production/inventory.ini" ]; then
|
||||
log_error "请在mgmt项目根目录运行此脚本"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# 1. 为所有节点配置NFS挂载
|
||||
log_info "步骤1: 为所有节点配置NFS挂载 (根据容器类型和地理位置)"
|
||||
ansible-playbook -i configuration/inventories/production/inventory.ini \
|
||||
playbooks/setup-nfs-by-container-type.yml
|
||||
|
||||
# 2. 为Nomad客户端配置NFS卷支持
|
||||
log_info "步骤2: 配置Nomad客户端支持NFS卷"
|
||||
ansible-playbook -i configuration/inventories/production/nomad-cluster.ini \
|
||||
playbooks/setup-nomad-nfs-client.yml
|
||||
|
||||
# 3. 验证NFS挂载状态
|
||||
log_info "步骤3: 验证所有节点的NFS挂载状态"
|
||||
ansible all -i configuration/inventories/production/inventory.ini \
|
||||
-m shell -a "df -h /mnt/fnsync 2>/dev/null || echo 'NFS未挂载'" \
|
||||
--limit '!snail'
|
||||
|
||||
# 4. 验证Nomad客户端配置
|
||||
log_info "步骤4: 验证Nomad客户端配置"
|
||||
ansible nomad_clients -i configuration/inventories/production/nomad-cluster.ini \
|
||||
-m shell -a "nomad node status -self 2>/dev/null || echo 'Nomad未运行'"
|
||||
|
||||
# 5. 部署示例NFS任务(可选)
|
||||
read -p "是否部署示例NFS任务?(y/n): " deploy_example
|
||||
if [ "$deploy_example" = "y" ] || [ "$deploy_example" = "Y" ]; then
|
||||
log_info "部署示例NFS任务..."
|
||||
nomad run jobs/nomad-nfs-multi-type.nomad
|
||||
echo "等待任务启动..."
|
||||
sleep 10
|
||||
nomad job status nfs-multi-type-example
|
||||
fi
|
||||
|
||||
log_info "✅ NFS配置部署完成!"
|
||||
echo ""
|
||||
echo "📋 使用说明:"
|
||||
echo "1. NFS挂载点: /mnt/fnsync"
|
||||
echo "2. 本地LXC容器: 直接使用挂载目录"
|
||||
echo "3. 海外PVE容器: 使用优化参数挂载"
|
||||
echo "4. Nomad作业: 使用host volume 'nfs-shared'"
|
||||
echo ""
|
||||
echo "🔧 手动验证命令:"
|
||||
echo " - 检查NFS挂载: df -h /mnt/fnsync"
|
||||
echo " - 检查Nomad状态: nomad node status"
|
||||
echo " - 运行NFS任务: nomad run jobs/nomad-nfs-multi-type.nomad"
|
||||
|
|
@ -0,0 +1,29 @@
|
|||
#!/bin/bash
|
||||
|
||||
# 分发SSH公钥到所有Nomad节点
|
||||
echo "分发SSH公钥到Nomad节点..."
|
||||
|
||||
# 节点列表
|
||||
NODES=(
|
||||
"100.81.26.3" # ash1d.global
|
||||
"100.103.147.94" # ash2e.global
|
||||
"100.90.159.68" # ch2.global
|
||||
"100.86.141.112" # ch3.global
|
||||
"100.117.106.136" # master
|
||||
"100.116.80.94" # ash3c
|
||||
)
|
||||
|
||||
PUB_KEY=$(cat /home/ben/.ssh/id_ed25519.pub)
|
||||
|
||||
for NODE in "${NODES[@]}"; do
|
||||
echo "正在配置节点: $NODE"
|
||||
|
||||
# 尝试使用现有密钥连接并添加新密钥
|
||||
ssh-keyscan -H $NODE >> ~/.ssh/known_hosts 2>/dev/null
|
||||
|
||||
# 使用现有认证方式添加密钥
|
||||
ssh root@$NODE "echo '$PUB_KEY' >> /root/.ssh/authorized_keys" 2>/dev/null && \
|
||||
echo "✓ $NODE 配置成功" || echo "✗ $NODE 配置失败"
|
||||
done
|
||||
|
||||
echo "密钥分发完成"
|
||||
|
|
@ -0,0 +1,22 @@
|
|||
---
|
||||
- name: 设置Nomad节点SSH密钥认证
|
||||
hosts: nomad_nodes
|
||||
become: yes
|
||||
vars:
|
||||
ssh_public_key: "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIIIddJVPEvFRtzhWwYjr21lKTar+d7R5Kn/6bhd2s231 ben@ch2"
|
||||
|
||||
tasks:
|
||||
- name: 确保.ssh目录存在
|
||||
file:
|
||||
path: /root/.ssh
|
||||
state: directory
|
||||
mode: '0700'
|
||||
|
||||
- name: 添加SSH公钥到authorized_keys
|
||||
authorized_key:
|
||||
user: root
|
||||
state: present
|
||||
key: "{{ ssh_public_key }}"
|
||||
|
||||
- name: 测试SSH连接
|
||||
ping:
|
||||
|
|
@ -0,0 +1,86 @@
|
|||
#!/bin/bash
|
||||
|
||||
# NFS配置验证脚本
|
||||
|
||||
set -e
|
||||
|
||||
echo "🔍 验证NFS配置状态..."
|
||||
|
||||
# 颜色定义
|
||||
GREEN='\033[0;32m'
|
||||
YELLOW='\033[1;33m'
|
||||
RED='\033[0;31m'
|
||||
NC='\033[0m'
|
||||
|
||||
log_info() { echo -e "${GREEN}[INFO]${NC} $1"; }
|
||||
log_warn() { echo -e "${YELLOW}[WARN]${NC} $1"; }
|
||||
log_error() { echo -e "${RED}[ERROR]${NC} $1"; }
|
||||
|
||||
# 1. 检查本地NFS挂载
|
||||
log_info "1. 检查本地NFS挂载状态"
|
||||
if df -h | grep -q "/mnt/fnsync"; then
|
||||
log_info "✅ 本地NFS挂载正常"
|
||||
df -h | grep "/mnt/fnsync"
|
||||
else
|
||||
log_error "❌ 本地NFS未挂载"
|
||||
fi
|
||||
|
||||
# 2. 检查配置文件存在
|
||||
log_info "2. 检查配置文件"
|
||||
config_files=(
|
||||
"playbooks/setup-nfs-by-container-type.yml"
|
||||
"playbooks/setup-nomad-nfs-client.yml"
|
||||
"jobs/nomad-nfs-multi-type.nomad"
|
||||
"scripts/deploy-nfs-for-nomad.sh"
|
||||
"docs/nomad-nfs-setup.md"
|
||||
)
|
||||
|
||||
for file in "${config_files[@]}"; do
|
||||
if [ -f "$file" ]; then
|
||||
log_info "✅ $file 存在"
|
||||
else
|
||||
log_error "❌ $file 不存在"
|
||||
fi
|
||||
done
|
||||
|
||||
# 3. 检查Ansible inventory
|
||||
log_info "3. 检查Ansible配置"
|
||||
if [ -f "configuration/inventories/production/inventory.ini" ]; then
|
||||
log_info "✅ inventory.ini 存在"
|
||||
echo "节点分类:"
|
||||
grep -E "\[.*\]" configuration/inventories/production/inventory.ini | head -10
|
||||
else
|
||||
log_error "❌ inventory.ini 不存在"
|
||||
fi
|
||||
|
||||
# 4. 检查Nomad服务状态
|
||||
log_info "4. 检查Nomad服务"
|
||||
if command -v nomad &> /dev/null; then
|
||||
if nomad node status &> /dev/null; then
|
||||
log_info "✅ Nomad服务运行正常"
|
||||
nomad node status -self | grep -E "(Name|Status|Datacenter)"
|
||||
else
|
||||
log_warn "⚠️ Nomad服务未运行或无法连接"
|
||||
fi
|
||||
else
|
||||
log_warn "⚠️ Nomad命令未安装"
|
||||
fi
|
||||
|
||||
# 5. 检查NFS服务器连通性
|
||||
log_info "5. 检查NFS服务器连通性"
|
||||
if ping -c 1 -W 3 snail &> /dev/null; then
|
||||
log_info "✅ NFS服务器 snail 可达"
|
||||
if command -v showmount &> /dev/null; then
|
||||
showmount -e snail 2>/dev/null || log_warn "⚠️ 无法获取NFS导出列表"
|
||||
fi
|
||||
else
|
||||
log_error "❌ NFS服务器 snail 不可达"
|
||||
fi
|
||||
|
||||
echo ""
|
||||
echo "📊 验证完成!"
|
||||
echo ""
|
||||
echo "🚀 下一步操作:"
|
||||
echo "1. 运行部署脚本: ./scripts/deploy-nfs-for-nomad.sh"
|
||||
echo "2. 查看详细文档: cat docs/nomad-nfs-setup.md"
|
||||
echo "3. 测试NFS功能: nomad run jobs/nomad-nfs-multi-type.nomad"
|
||||
Loading…
Reference in New Issue