feat: 迁移基础设施到Nomad和Podman并重构配置

refactor: 更新Ansible Playbooks以支持Nomad集群
docs: 更新文档反映从Docker Swarm到Nomad的迁移
ci: 更新Gitea工作流以支持Podman构建
test: 添加Nomad作业测试文件
build: 更新Makefile以支持Podman操作
chore: 清理旧的Docker Swarm相关文件和配置
This commit is contained in:
2025-09-27 08:04:23 +00:00
parent c0d4cf54dc
commit a06e5e1a00
54 changed files with 2010 additions and 329 deletions

View File

@@ -0,0 +1,87 @@
job "consul-cluster-arm64" {
datacenters = ["dc1"]
type = "service"
# 只在 ARM64 节点上运行master 和 ash3c
constraint {
attribute = "${attr.unique.hostname}"
operator = "regexp"
value = "(master|ash3c)"
}
group "consul" {
count = 2
# 确保每个节点只运行一个实例
constraint {
operator = "distinct_hosts"
value = "true"
}
network {
port "http" {
static = 8500
}
port "rpc" {
static = 8400
}
port "serf_lan" {
static = 8301
}
port "serf_wan" {
static = 8302
}
port "server" {
static = 8300
}
port "dns" {
static = 8600
}
}
task "consul" {
driver = "exec"
config {
command = "consul"
args = [
"agent",
"-server",
"-bootstrap-expect=2",
"-data-dir=/tmp/consul-cluster-data",
"-bind=${NOMAD_IP_serf_lan}",
"-client=0.0.0.0",
"-retry-join=100.117.106.136", # master Tailscale IP
"-retry-join=100.116.80.94", # ash3c Tailscale IP
"-ui-config-enabled=true",
"-log-level=INFO",
"-node=${node.unique.name}-consul",
"-datacenter=dc1"
]
}
artifact {
source = "https://releases.hashicorp.com/consul/1.17.0/consul_1.17.0_linux_arm64.zip"
destination = "local/"
}
resources {
cpu = 200
memory = 256
}
service {
name = "consul-cluster-arm64"
port = "http"
check {
type = "http"
path = "/v1/status/leader"
port = "http"
interval = "10s"
timeout = "3s"
}
}
}
}
}

View File

@@ -0,0 +1,88 @@
job "consul-cluster" {
datacenters = ["dc1"]
type = "service"
# 在三个节点上运行bj-warden, master, ash3c
constraint {
attribute = "${node.unique.name}"
operator = "regexp"
value = "(bj-warden|master|ash3c)"
}
group "consul" {
count = 3
# 确保每个节点只运行一个实例
constraint {
operator = "distinct_hosts"
value = "true"
}
network {
port "http" {
static = 8500
}
port "rpc" {
static = 8400
}
port "serf_lan" {
static = 8301
}
port "serf_wan" {
static = 8302
}
port "server" {
static = 8300
}
port "dns" {
static = 8600
}
}
task "consul" {
driver = "exec"
config {
command = "consul"
args = [
"agent",
"-server",
"-bootstrap-expect=3",
"-data-dir=/tmp/consul-cluster-data",
"-bind=${NOMAD_IP_serf_lan}",
"-client=0.0.0.0",
"-retry-join=100.122.197.112", # bj-warden Tailscale IP
"-retry-join=100.117.106.136", # master Tailscale IP
"-retry-join=100.116.80.94", # ash3c Tailscale IP
"-ui-config-enabled=true",
"-log-level=INFO",
"-node=${node.unique.name}-consul",
"-datacenter=dc1"
]
}
artifact {
source = "https://releases.hashicorp.com/consul/1.17.0/consul_1.17.0_linux_arm64.zip"
destination = "local/"
}
resources {
cpu = 200
memory = 256
}
service {
name = "consul-cluster"
port = "http"
check {
type = "http"
path = "/v1/status/leader"
port = "http"
interval = "10s"
timeout = "3s"
}
}
}
}
}

View File

@@ -0,0 +1,81 @@
job "consul-cluster" {
datacenters = ["dc1"]
type = "service"
constraint {
attribute = "${node.unique.name}"
operator = "regexp"
value = "^(master|ash3c|semaphore)$"
}
group "consul" {
count = 3
network {
port "http" {
static = 8500
}
port "serf_lan" {
static = 8301
}
port "serf_wan" {
static = 8302
}
port "server" {
static = 8300
}
port "dns" {
static = 8600
}
}
service {
name = "consul"
port = "http"
check {
type = "http"
path = "/v1/status/leader"
interval = "10s"
timeout = "2s"
}
}
task "consul" {
driver = "podman"
config {
image = "consul:1.15.4"
network_mode = "host"
args = [
"agent",
"-server",
"-bootstrap-expect=3",
"-ui",
"-data-dir=/consul/data",
"-config-dir=/consul/config",
"-bind={{ env \"attr.unique.network.ip-address\" }}",
"-client=0.0.0.0",
"-retry-join=100.117.106.136",
"-retry-join=100.116.80.94",
"-retry-join=100.116.158.95"
]
volumes = [
"consul-data:/consul/data",
"consul-config:/consul/config"
]
}
resources {
cpu = 500
memory = 512
}
env {
CONSUL_BIND_INTERFACE = "tailscale0"
}
}
}
}

View File

@@ -0,0 +1,157 @@
job "consul-cluster-simple" {
datacenters = ["dc1"]
type = "service"
group "consul-master" {
count = 1
constraint {
attribute = "${node.unique.name}"
value = "master"
}
network {
port "http" {
static = 8500
}
port "rpc" {
static = 8300
}
port "serf_lan" {
static = 8301
}
port "serf_wan" {
static = 8302
}
}
task "consul" {
driver = "exec"
config {
command = "consul"
args = [
"agent",
"-server",
"-bootstrap-expect=3",
"-data-dir=/opt/nomad/data/consul",
"-client=100.64.0.0/10",
"-bind=100.117.106.136",
"-advertise=100.117.106.136",
"-retry-join=100.116.80.94",
"-retry-join=100.122.197.112",
"-ui"
]
}
resources {
cpu = 300
memory = 512
}
}
}
group "consul-ash3c" {
count = 1
constraint {
attribute = "${node.unique.name}"
value = "ash3c"
}
network {
port "http" {
static = 8500
}
port "rpc" {
static = 8300
}
port "serf_lan" {
static = 8301
}
port "serf_wan" {
static = 8302
}
}
task "consul" {
driver = "exec"
config {
command = "consul"
args = [
"agent",
"-server",
"-bootstrap-expect=3",
"-data-dir=/opt/nomad/data/consul",
"-client=100.64.0.0/10",
"-bind=100.116.80.94",
"-advertise=100.116.80.94",
"-retry-join=100.117.106.136",
"-retry-join=100.122.197.112",
"-ui"
]
}
resources {
cpu = 300
memory = 512
}
}
}
group "consul-warden" {
count = 1
constraint {
attribute = "${node.unique.name}"
value = "bj-warden"
}
network {
port "http" {
static = 8500
}
port "rpc" {
static = 8300
}
port "serf_lan" {
static = 8301
}
port "serf_wan" {
static = 8302
}
}
task "consul" {
driver = "exec"
config {
command = "consul"
args = [
"agent",
"-server",
"-bootstrap-expect=3",
"-data-dir=/opt/nomad/data/consul",
"-client=100.64.0.0/10",
"-bind=100.122.197.112",
"-advertise=100.122.197.112",
"-retry-join=100.117.106.136",
"-retry-join=100.116.80.94",
"-ui"
]
}
resources {
cpu = 300
memory = 512
}
}
}
}

View File

@@ -0,0 +1,190 @@
job "consul-cluster-three-nodes" {
datacenters = ["dc1"]
type = "service"
group "consul-master" {
count = 1
constraint {
attribute = "${node.unique.name}"
value = "master"
}
network {
port "http" {
static = 8500
}
port "rpc" {
static = 8300
}
port "serf_lan" {
static = 8301
}
port "serf_wan" {
static = 8302
}
}
task "consul" {
driver = "exec"
config {
command = "consul"
args = [
"agent",
"-server",
"-bootstrap-expect=3",
"-data-dir=/opt/nomad/data/consul",
"-client=0.0.0.0",
"-bind=100.117.106.136",
"-advertise=100.117.106.136",
"-retry-join=100.116.80.94",
"-retry-join=100.122.197.112",
"-ui-config-enabled=true"
]
}
resources {
cpu = 300
memory = 512
}
service {
name = "consul-master"
port = "http"
check {
type = "http"
path = "/v1/status/leader"
port = "http"
interval = "10s"
timeout = "3s"
}
}
}
}
group "consul-ash3c" {
count = 1
constraint {
attribute = "${node.unique.name}"
value = "ash3c"
}
network {
port "http" {
static = 8500
}
port "rpc" {
static = 8300
}
port "serf_lan" {
static = 8301
}
port "serf_wan" {
static = 8302
}
}
task "consul" {
driver = "exec"
config {
command = "consul"
args = [
"agent",
"-server",
"-bootstrap-expect=3",
"-data-dir=/opt/nomad/data/consul",
"-client=0.0.0.0",
"-bind=100.116.80.94",
"-advertise=100.116.80.94",
"-retry-join=100.117.106.136",
"-retry-join=100.122.197.112",
"-ui-config-enabled=true"
]
}
resources {
cpu = 300
memory = 512
}
service {
name = "consul-ash3c"
port = "http"
check {
type = "http"
path = "/v1/status/leader"
port = "http"
interval = "10s"
timeout = "3s"
}
}
}
}
group "consul-warden" {
count = 1
constraint {
attribute = "${node.unique.name}"
value = "bj-warden"
}
network {
port "http" {
static = 8500
}
port "rpc" {
static = 8300
}
port "serf_lan" {
static = 8301
}
port "serf_wan" {
static = 8302
}
}
task "consul" {
driver = "exec"
config {
command = "consul"
args = [
"agent",
"-server",
"-bootstrap-expect=3",
"-data-dir=/opt/nomad/data/consul",
"-client=0.0.0.0",
"-bind=100.122.197.112",
"-advertise=100.122.197.112",
"-retry-join=100.117.106.136",
"-retry-join=100.116.80.94",
"-ui-config-enabled=true"
]
}
resources {
cpu = 300
memory = 512
}
service {
name = "consul-warden"
port = "http"
check {
type = "http"
path = "/v1/status/leader"
port = "http"
interval = "10s"
timeout = "3s"
}
}
}
}
}

57
jobs/consul-cluster.nomad Normal file
View File

@@ -0,0 +1,57 @@
job "consul-cluster" {
datacenters = ["dc1"]
type = "service"
group "consul-servers" {
count = 3
constraint {
attribute = "${node.unique.name}"
operator = "regexp"
value = "(master|ash3c|hcp)"
}
task "consul" {
driver = "podman"
config {
image = "hashicorp/consul:latest"
ports = ["server", "serf_lan", "serf_wan", "ui"]
args = [
"agent",
"-server",
"-bootstrap-expect=3",
"-data-dir=/consul/data",
"-ui",
"-client=0.0.0.0",
"-bind={{ env `NOMAD_IP_server` }}",
"-retry-join=100.117.106.136",
"-retry-join=100.116.80.94",
"-retry-join=100.76.13.187"
]
}
volume_mount {
volume = "consul-data"
destination = "/consul/data"
read_only = false
}
resources {
network {
mbits = 10
port "server" { static = 8300 }
port "serf_lan" { static = 8301 }
port "serf_wan" { static = 8302 }
port "ui" { static = 8500 }
}
}
}
volume "consul-data" {
type = "host"
read_only = false
source = "consul-data"
}
}
}

View File

@@ -0,0 +1,47 @@
job "consul-single-member" {
datacenters = ["dc1"]
type = "service"
priority = 50
constraint {
attribute = "${node.unique.name}"
value = "warden"
}
group "consul" {
count = 1
task "consul" {
driver = "exec"
config {
command = "consul"
args = ["agent", "-dev", "-client=0.0.0.0", "-data-dir=/tmp/consul-data"]
}
resources {
cpu = 200
memory = 256
network {
mbits = 10
port "http" {
static = 8500
}
}
}
service {
name = "consul"
port = "http"
check {
type = "http"
path = "/v1/status/leader"
port = "http"
interval = "10s"
timeout = "2s"
}
}
}
}
}

View File

@@ -0,0 +1,47 @@
job "consul-single-member" {
datacenters = ["dc1"]
type = "service"
priority = 50
constraint {
attribute = "${node.unique.name}"
value = "warden"
}
group "consul" {
count = 1
task "consul" {
driver = "exec"
config {
command = "consul"
args = ["agent", "-dev", "-client=0.0.0.0", "-data-dir=/tmp/consul-data"]
}
resources {
cpu = 200
memory = 256
network {
mbits = 10
port "http" {
static = 8500
}
}
}
service {
name = "consul"
port = "http"
check {
type = "http"
path = "/v1/status/leader"
port = "http"
interval = "10s"
timeout = "2s"
}
}
}
}
}

View File

@@ -0,0 +1,46 @@
job "consul-test-warden" {
datacenters = ["dc1"]
type = "service"
constraint {
attribute = "${node.unique.name}"
value = "bj-warden"
}
group "consul" {
count = 1
network {
port "http" {
static = 8500
}
}
task "consul" {
driver = "exec"
config {
command = "consul"
args = ["agent", "-dev", "-client=0.0.0.0", "-data-dir=/tmp/consul-test"]
}
resources {
cpu = 200
memory = 256
}
service {
name = "consul-test"
port = "http"
check {
type = "http"
path = "/v1/status/leader"
port = "http"
interval = "10s"
timeout = "2s"
}
}
}
}
}

View File

@@ -0,0 +1,46 @@
job "consul-warden" {
datacenters = ["dc1"]
type = "service"
priority = 50
constraint {
attribute = "${node.unique.name}"
value = "warden"
}
group "consul" {
count = 1
task "consul" {
driver = "exec"
config {
command = "consul"
args = ["agent", "-dev", "-client=0.0.0.0", "-data-dir=/tmp/consul-data"]
}
resources {
cpu = 200
memory = 256
network {
port "http" {
static = 8500
}
}
}
service {
name = "consul"
port = "http"
check {
type = "http"
path = "/v1/status/leader"
port = "http"
interval = "10s"
timeout = "2s"
}
}
}
}
}

View File

@@ -0,0 +1,110 @@
job "install-podman-driver" {
datacenters = ["dc1"]
type = "system" # 在所有节点上运行
group "install" {
task "install-podman" {
driver = "exec"
config {
command = "bash"
args = [
"-c",
<<-EOF
set -euo pipefail
export PATH="/usr/local/bin:/usr/bin:/bin:/usr/sbin:/sbin"
# 依赖工具
if ! command -v jq >/dev/null 2>&1 || ! command -v unzip >/dev/null 2>&1 || ! command -v wget >/dev/null 2>&1; then
echo "Installing dependencies (jq unzip wget)..."
sudo -n apt update -y || true
sudo -n apt install -y jq unzip wget || true
fi
# 安装 Podman若未安装
if ! command -v podman >/dev/null 2>&1; then
echo "Installing Podman..."
sudo -n apt update -y || true
sudo -n apt install -y podman || true
sudo -n systemctl enable podman || true
else
echo "Podman already installed"
fi
# 启用并启动 podman.socket确保 Nomad 可访问
sudo -n systemctl enable --now podman.socket || true
if getent group podman >/dev/null 2>&1; then
sudo -n usermod -aG podman nomad || true
fi
# 安装 Nomad Podman 驱动插件(始终确保存在)
PODMAN_DRIVER_VERSION="0.6.1"
PLUGIN_DIR="/opt/nomad/data/plugins"
sudo -n mkdir -p "${PLUGIN_DIR}" || true
cd /tmp
if [ ! -x "${PLUGIN_DIR}/nomad-driver-podman" ]; then
echo "Installing nomad-driver-podman ${PODMAN_DRIVER_VERSION}..."
wget -q "https://releases.hashicorp.com/nomad-driver-podman/${PODMAN_DRIVER_VERSION}/nomad-driver-podman_${PODMAN_DRIVER_VERSION}_linux_amd64.zip"
unzip -o "nomad-driver-podman_${PODMAN_DRIVER_VERSION}_linux_amd64.zip"
sudo -n mv -f nomad-driver-podman "${PLUGIN_DIR}/"
sudo -n chmod +x "${PLUGIN_DIR}/nomad-driver-podman"
sudo -n chown -R nomad:nomad "${PLUGIN_DIR}"
rm -f "nomad-driver-podman_${PODMAN_DRIVER_VERSION}_linux_amd64.zip"
else
echo "nomad-driver-podman already present in ${PLUGIN_DIR}"
fi
# 更新 /etc/nomad.d/nomad.hcl 的 plugin_dir 设置
if [ -f /etc/nomad.d/nomad.hcl ]; then
if grep -q "^plugin_dir\s*=\s*\"" /etc/nomad.d/nomad.hcl; then
sudo -n sed -i 's#^plugin_dir\s*=\s*\".*\"#plugin_dir = "/opt/nomad/data/plugins"#' /etc/nomad.d/nomad.hcl || true
else
echo 'plugin_dir = "/opt/nomad/data/plugins"' | sudo -n tee -a /etc/nomad.d/nomad.hcl >/dev/null || true
fi
fi
# 重启 Nomad 服务以加载插件
sudo -n systemctl restart nomad || true
echo "Waiting for Nomad to restart..."
sleep 15
# 检查 Podman 驱动是否被 Nomad 检测到
if /usr/local/bin/nomad node status -self -json 2>/dev/null | jq -r '.Drivers.podman.Detected' | grep -q "true"; then
echo "Podman driver successfully loaded"
exit 0
fi
echo "Podman driver not detected yet, retrying once after socket restart..."
sudo -n systemctl restart podman.socket || true
sleep 5
if /usr/local/bin/nomad node status -self -json 2>/dev/null | jq -r '.Drivers.podman.Detected' | grep -q "true"; then
echo "Podman driver successfully loaded after socket restart"
exit 0
else
echo "Podman driver still not detected; manual investigation may be required"
exit 1
fi
EOF
]
}
resources {
cpu = 200
memory = 256
}
// 以root权限运行
// user = "root"
# 使用 nomad 用户运行任务,避免客户端策略禁止 root
user = "nomad"
# 确保任务成功完成
restart {
attempts = 1
interval = "24h"
delay = "60s"
mode = "fail"
}
}
}
}

View File

@@ -0,0 +1,46 @@
job "service-discovery-warden" {
datacenters = ["dc1"]
type = "service"
constraint {
attribute = "${node.unique.name}"
value = "warden"
}
group "discovery" {
count = 1
network {
port "http" {
static = 8500
}
}
task "discovery" {
driver = "exec"
config {
command = "consul"
args = ["agent", "-dev", "-client=0.0.0.0", "-data-dir=/tmp/discovery-data"]
}
resources {
cpu = 200
memory = 256
}
service {
name = "discovery-service"
port = "http"
check {
type = "http"
path = "/v1/status/leader"
port = "http"
interval = "10s"
timeout = "2s"
}
}
}
}
}

View File

@@ -0,0 +1,52 @@
job "simple-consul-test" {
datacenters = ["dc1"]
type = "service"
constraint {
attribute = "${node.unique.name}"
value = "warden"
}
group "consul" {
count = 1
network {
port "http" {
static = 8500
}
}
task "consul" {
driver = "exec"
config {
command = "consul"
args = [
"agent",
"-dev",
"-client=0.0.0.0",
"-bind=100.122.197.112",
"-data-dir=/tmp/consul-test-data"
]
}
resources {
cpu = 200
memory = 256
}
service {
name = "consul-test"
port = "http"
check {
type = "http"
path = "/v1/status/leader"
port = "http"
interval = "10s"
timeout = "2s"
}
}
}
}
}

40
jobs/test-job.nomad Normal file
View File

@@ -0,0 +1,40 @@
job "test-nginx" {
datacenters = ["dc1"]
type = "service"
group "web" {
count = 1
network {
port "http" {
static = 8080
}
}
task "nginx" {
driver = "podman"
config {
image = "nginx:alpine"
ports = ["http"]
}
resources {
cpu = 100
memory = 128
}
service {
name = "nginx-test"
port = "http"
check {
type = "http"
path = "/"
interval = "10s"
timeout = "3s"
}
}
}
}
}

View File

@@ -0,0 +1,24 @@
job "test-podman" {
datacenters = ["dc1"]
type = "batch"
group "test" {
count = 1
task "hello" {
driver = "podman"
config {
image = "docker.io/library/hello-world:latest"
logging = {
driver = "journald"
}
}
resources {
cpu = 100
memory = 128
}
}
}
}

View File

@@ -0,0 +1,23 @@
job "test-podman-simple" {
datacenters = ["dc1"]
type = "batch"
group "test" {
count = 1
task "hello" {
driver = "podman"
config {
image = "alpine:latest"
command = "echo"
args = ["Hello from Podman!"]
}
resources {
cpu = 100
memory = 64
}
}
}
}

View File

@@ -0,0 +1,31 @@
job "test-private-registry" {
datacenters = ["dc1"]
type = "batch"
group "test" {
count = 1
# 指定运行在北京节点上
constraint {
attribute = "${node.unique.name}"
operator = "regexp"
value = "bj-.*"
}
task "hello" {
driver = "podman"
config {
image = "hello-world:latest"
logging = {
driver = "journald"
}
}
resources {
cpu = 100
memory = 64
}
}
}
}

27
jobs/test-simple.nomad Normal file
View File

@@ -0,0 +1,27 @@
job "test-simple" {
datacenters = ["dc1"]
type = "service"
constraint {
attribute = "${node.unique.name}"
value = "warden"
}
group "test" {
count = 1
task "hello" {
driver = "exec"
config {
command = "echo"
args = ["Hello from warden node!"]
}
resources {
cpu = 100
memory = 64
}
}
}
}