Remove backup directory and improve gitignore

- Delete backups/ directory (use git for version control)
- Add backup file patterns to .gitignore
- Git is the best backup strategy
This commit is contained in:
Houzhong Xu 2025-10-09 06:19:17 +00:00
parent f8532b8306
commit cef3ab7534
No known key found for this signature in database
GPG Key ID: B44BEB1438F1B46F
33 changed files with 3 additions and 3267 deletions

3
.gitignore vendored
View File

@ -100,3 +100,6 @@ scripts/deploy-*-webhook.sh
.kilocode/ .kilocode/
# Symbolic links # Symbolic links
mcp_shared_config.json mcp_shared_config.json
# Backup files (use git for version control)
backup-*/
*.backup

View File

@ -1,99 +0,0 @@
# Nomad Jobs 备份
**备份时间**: 2025-10-04 07:44:11
**备份原因**: 所有服务正常运行SSL证书已配置完成
## 当前运行状态
### ✅ 已部署并正常工作的服务
1. **Traefik** (`traefik-cloudflare-v1`)
- 文件: `components/traefik/jobs/traefik-cloudflare.nomad`
- 状态: 运行中SSL证书正常
- 域名: `*.git4ta.me`
- 证书: Let's Encrypt (Cloudflare DNS Challenge)
2. **Vault** (`vault-cluster`)
- 文件: `nomad-jobs/vault-cluster.nomad`
- 状态: 三节点集群运行中
- 节点: ch4, ash3c, warden
- 配置: 存储在 Consul KV `vault/config`
3. **Waypoint** (`waypoint-server`)
- 文件: `waypoint-server.nomad`
- 状态: 运行中
- 节点: hcp1
- Web UI: `https://waypoint.git4ta.me/auth/token`
### 🔧 关键配置
#### Traefik 配置要点
- 使用 Cloudflare DNS Challenge 获取 SSL 证书
- 证书存储: `/local/acme.json` (本地存储)
- 域名: `git4ta.me`
- 服务路由: consul, nomad, vault, waypoint
#### Vault 配置要点
- 三节点高可用集群
- 配置统一存储在 Consul KV
- 使用 `exec` driver
- 服务注册到 Consul
#### Waypoint 配置要点
- 使用 `raw_exec` driver
- HTTPS API: 9701, gRPC: 9702
- 已引导并获取认证 token
### 📋 服务端点
- `https://consul.git4ta.me` → Consul UI
- `https://traefik.git4ta.me` → Traefik Dashboard
- `https://nomad.git4ta.me` → Nomad UI
- `https://vault.git4ta.me` → Vault UI
- `https://waypoint.git4ta.me/auth/token` → Waypoint UI
### 🔑 重要凭据
#### Vault
- Unseal Keys: 存储在 Consul KV `vault/unseal-keys`
- Root Token: 存储在 Consul KV `vault/root-token`
- 详细文档: `/root/mgmt/README-Vault.md`
#### Waypoint
- Auth Token: 存储在 Consul KV `waypoint/auth-token`
- 详细文档: `/root/mgmt/README-Waypoint.md`
### 🚀 部署命令
```bash
# 部署 Traefik
nomad job run components/traefik/jobs/traefik-cloudflare.nomad
# 部署 Vault
nomad job run nomad-jobs/vault-cluster.nomad
# 部署 Waypoint
nomad job run waypoint-server.nomad
```
### 📝 注意事项
1. **证书管理**: 证书存储在 Traefik 容器的 `/local/acme.json`,容器重启会丢失
2. **Vault 配置**: 所有配置通过 Consul KV 动态加载,修改后需要重启 job
3. **网络配置**: 所有服务使用 Tailscale 网络地址
4. **备份策略**: 建议定期备份 Consul KV 中的配置和凭据
### 🔄 恢复步骤
如需恢复到此状态:
1. 恢复 Consul KV 配置
2. 按顺序部署: Traefik → Vault → Waypoint
3. 验证所有服务端点可访问
4. 检查 SSL 证书状态
---
**备份完成时间**: 2025-10-04 07:44:11
**备份者**: AI Assistant
**状态**: 所有服务正常运行 ✅

View File

@ -1,19 +0,0 @@
# Consul 配置
## 部署
```bash
nomad job run components/consul/jobs/consul-cluster.nomad
```
## Job 信息
- **Job 名称**: `consul-cluster-nomad`
- **类型**: service
- **节点**: master, ash3c, warden
## 访问方式
- Master: `http://master.tailnet-68f9.ts.net:8500`
- Ash3c: `http://ash3c.tailnet-68f9.ts.net:8500`
- Warden: `http://warden.tailnet-68f9.ts.net:8500`

View File

@ -1,88 +0,0 @@
# Consul配置文件
# 此文件包含Consul的完整配置包括变量和存储相关设置
# 基础配置
data_dir = "/opt/consul/data"
raft_dir = "/opt/consul/raft"
# 启用UI
ui_config {
enabled = true
}
# 数据中心配置
datacenter = "dc1"
# 服务器配置
server = true
bootstrap_expect = 3
# 网络配置
client_addr = "0.0.0.0"
bind_addr = "{{ GetInterfaceIP `eth0` }}"
advertise_addr = "{{ GetInterfaceIP `eth0` }}"
# 端口配置
ports {
dns = 8600
http = 8500
https = -1
grpc = 8502
grpc_tls = 8503
serf_lan = 8301
serf_wan = 8302
server = 8300
}
# 集群连接
retry_join = ["100.117.106.136", "100.116.80.94", "100.122.197.112"]
# 服务发现
enable_service_script = true
enable_script_checks = true
enable_local_script_checks = true
# 性能调优
performance {
raft_multiplier = 1
}
# 日志配置
log_level = "INFO"
enable_syslog = false
log_file = "/var/log/consul/consul.log"
# 安全配置
encrypt = "YourEncryptionKeyHere"
# 连接配置
reconnect_timeout = "30s"
reconnect_timeout_wan = "30s"
session_ttl_min = "10s"
# Autopilot配置
autopilot {
cleanup_dead_servers = true
last_contact_threshold = "200ms"
max_trailing_logs = 250
server_stabilization_time = "10s"
redundancy_zone_tag = ""
disable_upgrade_migration = false
upgrade_version_tag = ""
}
# 快照配置
snapshot {
enabled = true
interval = "24h"
retain = 30
name = "consul-snapshot-{{.Timestamp}}"
}
# 备份配置
backup {
enabled = true
interval = "6h"
retain = 7
name = "consul-backup-{{.Timestamp}}"
}

View File

@ -1,93 +0,0 @@
# Consul配置模板文件
# 此文件使用Consul模板语法从KV存储中动态获取配置
# 遵循 config/{environment}/{provider}/{region_or_service}/{key} 格式
# 基础配置
data_dir = "{{ keyOrDefault `config/` + env "ENVIRONMENT" + `/consul/cluster/data_dir` `/opt/consul/data` }}"
raft_dir = "{{ keyOrDefault `config/` + env "ENVIRONMENT" + `/consul/cluster/raft_dir` `/opt/consul/raft` }}"
# 启用UI
ui_config {
enabled = {{ keyOrDefault `config/` + env "ENVIRONMENT" + `/consul/ui/enabled` `true` }}
}
# 数据中心配置
datacenter = "{{ keyOrDefault `config/` + env "ENVIRONMENT" + `/consul/cluster/datacenter` `dc1` }}"
# 服务器配置
server = true
bootstrap_expect = {{ keyOrDefault `config/` + env "ENVIRONMENT" + `/consul/cluster/bootstrap_expect` `3` }}
# 网络配置
client_addr = "{{ keyOrDefault `config/` + env "ENVIRONMENT" + `/consul/network/client_addr` `0.0.0.0` }}"
bind_addr = "{{ GetInterfaceIP (keyOrDefault `config/` + env "ENVIRONMENT" + `/consul/network/bind_interface` `ens160`) }}"
advertise_addr = "{{ GetInterfaceIP (keyOrDefault `config/` + env "ENVIRONMENT" + `/consul/network/advertise_interface` `ens160`) }}"
# 端口配置
ports {
dns = {{ keyOrDefault `config/` + env "ENVIRONMENT" + `/consul/ports/dns` `8600` }}
http = {{ keyOrDefault `config/` + env "ENVIRONMENT" + `/consul/ports/http` `8500` }}
https = {{ keyOrDefault `config/` + env "ENVIRONMENT" + `/consul/ports/https` `-1` }}
grpc = {{ keyOrDefault `config/` + env "ENVIRONMENT" + `/consul/ports/grpc` `8502` }}
grpc_tls = {{ keyOrDefault `config/` + env "ENVIRONMENT" + `/consul/ports/grpc_tls` `8503` }}
serf_lan = {{ keyOrDefault `config/` + env "ENVIRONMENT" + `/consul/ports/serf_lan` `8301` }}
serf_wan = {{ keyOrDefault `config/` + env "ENVIRONMENT" + `/consul/ports/serf_wan` `8302` }}
server = {{ keyOrDefault `config/` + env "ENVIRONMENT" + `/consul/ports/server` `8300` }}
}
# 集群连接 - 动态获取节点IP
retry_join = [
"{{ keyOrDefault `config/` + env "ENVIRONMENT" + `/consul/nodes/master/ip` `100.117.106.136` }}",
"{{ keyOrDefault `config/` + env "ENVIRONMENT" + `/consul/nodes/ash3c/ip` `100.116.80.94` }}",
"{{ keyOrDefault `config/` + env "ENVIRONMENT" + `/consul/nodes/warden/ip` `100.122.197.112` }}"
]
# 服务发现
enable_service_script = {{ keyOrDefault `config/` + env "ENVIRONMENT" + `/consul/service/enable_service_script` `true` }}
enable_script_checks = {{ keyOrDefault `config/` + env "ENVIRONMENT" + `/consul/service/enable_script_checks` `true` }}
enable_local_script_checks = {{ keyOrDefault `config/` + env "ENVIRONMENT" + `/consul/service/enable_local_script_checks` `true` }}
# 性能调优
performance {
raft_multiplier = {{ keyOrDefault `config/` + env "ENVIRONMENT" + `/consul/performance/raft_multiplier` `1` }}
}
# 日志配置
log_level = "{{ keyOrDefault `config/` + env "ENVIRONMENT" + `/consul/cluster/log_level` `INFO` }}"
enable_syslog = {{ keyOrDefault `config/` + env "ENVIRONMENT" + `/consul/log/enable_syslog` `false` }}
log_file = "{{ keyOrDefault `config/` + env "ENVIRONMENT" + `/consul/log/log_file` `/var/log/consul/consul.log` }}"
# 安全配置
encrypt = "{{ keyOrDefault `config/` + env "ENVIRONMENT" + `/consul/cluster/encrypt_key` `YourEncryptionKeyHere` }}"
# 连接配置
reconnect_timeout = "{{ keyOrDefault `config/` + env "ENVIRONMENT" + `/consul/connection/reconnect_timeout` `30s` }}"
reconnect_timeout_wan = "{{ keyOrDefault `config/` + env "ENVIRONMENT" + `/consul/connection/reconnect_timeout_wan` `30s` }}"
session_ttl_min = "{{ keyOrDefault `config/` + env "ENVIRONMENT" + `/consul/connection/session_ttl_min` `10s` }}"
# Autopilot配置
autopilot {
cleanup_dead_servers = {{ keyOrDefault `config/` + env "ENVIRONMENT" + `/consul/autopilot/cleanup_dead_servers` `true` }}
last_contact_threshold = "{{ keyOrDefault `config/` + env "ENVIRONMENT" + `/consul/autopilot/last_contact_threshold` `200ms` }}"
max_trailing_logs = {{ keyOrDefault `config/` + env "ENVIRONMENT" + `/consul/autopilot/max_trailing_logs` `250` }}
server_stabilization_time = "{{ keyOrDefault `config/` + env "ENVIRONMENT" + `/consul/autopilot/server_stabilization_time` `10s` }}"
redundancy_zone_tag = ""
disable_upgrade_migration = {{ keyOrDefault `config/` + env "ENVIRONMENT" + `/consul/autopilot/disable_upgrade_migration` `false` }}
upgrade_version_tag = ""
}
# 快照配置
snapshot {
enabled = {{ keyOrDefault `config/` + env "ENVIRONMENT" + `/consul/snapshot/enabled` `true` }}
interval = "{{ keyOrDefault `config/` + env "ENVIRONMENT" + `/consul/snapshot/interval` `24h` }}"
retain = {{ keyOrDefault `config/` + env "ENVIRONMENT" + `/consul/snapshot/retain` `30` }}
name = "{{ keyOrDefault `config/` + env "ENVIRONMENT" + `/consul/snapshot/name` `consul-snapshot-{{.Timestamp}}` }}"
}
# 备份配置
backup {
enabled = {{ keyOrDefault `config/` + env "ENVIRONMENT" + `/consul/backup/enabled` `true` }}
interval = "{{ keyOrDefault `config/` + env "ENVIRONMENT" + `/consul/backup/interval` `6h` }}"
retain = {{ keyOrDefault `config/` + env "ENVIRONMENT" + `/consul/backup/retain` `7` }}
name = "{{ keyOrDefault `config/` + env "ENVIRONMENT" + `/consul/backup/name` `consul-backup-{{.Timestamp}}` }}"
}

View File

@ -1,50 +0,0 @@
job "consul-clients-additional" {
datacenters = ["dc1"]
type = "service"
constraint {
attribute = "${node.unique.name}"
operator = "regexp"
value = "ch2|ch3|de"
}
group "consul-client" {
count = 3
task "consul-client" {
driver = "exec"
config {
command = "/usr/bin/consul"
args = [
"agent",
"-config-dir=/etc/consul.d",
"-data-dir=/opt/consul",
"-node=${node.unique.name}",
"-bind=${attr.unique.network.ip-address}",
"-retry-join=warden.tailnet-68f9.ts.net:8301",
"-retry-join=ch4.tailnet-68f9.ts.net:8301",
"-retry-join=ash3c.tailnet-68f9.ts.net:8301",
"-client=0.0.0.0"
]
}
resources {
cpu = 100
memory = 128
}
service {
name = "consul-client"
port = "http"
check {
type = "http"
path = "/v1/status/leader"
interval = "30s"
timeout = "5s"
}
}
}
}
}

View File

@ -1,154 +0,0 @@
job "consul-clients-dedicated" {
datacenters = ["dc1"]
type = "service"
group "consul-client-hcp1" {
constraint {
attribute = "${node.unique.name}"
value = "hcp1"
}
network {
port "http" {
static = 8500
}
}
task "consul-client" {
driver = "exec"
config {
command = "/usr/bin/consul"
args = [
"agent",
"-data-dir=/opt/consul",
"-node=hcp1",
"-bind=100.97.62.111",
"-advertise=100.97.62.111",
"-retry-join=hcp1.tailnet-68f9.ts.net:80",
"-client=0.0.0.0",
"-http-port=8500",
"-datacenter=dc1"
]
}
resources {
cpu = 100
memory = 128
}
service {
name = "consul-client"
port = "http"
check {
type = "script"
command = "consul"
args = ["members"]
interval = "10s"
timeout = "3s"
}
}
}
}
group "consul-client-influxdb1" {
constraint {
attribute = "${node.unique.name}"
value = "influxdb1"
}
network {
port "http" {
static = 8500
}
}
task "consul-client" {
driver = "exec"
config {
command = "/usr/bin/consul"
args = [
"agent",
"-data-dir=/opt/consul",
"-node=influxdb1",
"-bind=100.100.7.4",
"-advertise=100.100.7.4",
"-retry-join=hcp1.tailnet-68f9.ts.net:80",
"-client=0.0.0.0",
"-http-port=8500",
"-datacenter=dc1"
]
}
resources {
cpu = 100
memory = 128
}
service {
name = "consul-client"
port = "http"
check {
type = "script"
command = "consul"
args = ["members"]
interval = "10s"
timeout = "3s"
}
}
}
}
group "consul-client-browser" {
constraint {
attribute = "${node.unique.name}"
value = "browser"
}
network {
port "http" {
static = 8500
}
}
task "consul-client" {
driver = "exec"
config {
command = "/usr/bin/consul"
args = [
"agent",
"-data-dir=/opt/consul",
"-node=browser",
"-bind=100.116.112.45",
"-advertise=100.116.112.45",
"-retry-join=hcp1.tailnet-68f9.ts.net:80",
"-client=0.0.0.0",
"-http-port=8500",
"-datacenter=dc1"
]
}
resources {
cpu = 100
memory = 128
}
service {
name = "consul-client"
port = "http"
check {
type = "script"
command = "consul"
args = ["members"]
interval = "10s"
timeout = "3s"
}
}
}
}
}

View File

@ -1,66 +0,0 @@
job "consul-clients-dedicated" {
datacenters = ["dc1"]
type = "service"
constraint {
attribute = "${node.unique.name}"
operator = "regexp"
value = "hcp1|influxdb1|browser"
}
group "consul-client" {
count = 3
update {
max_parallel = 3
min_healthy_time = "5s"
healthy_deadline = "2m"
progress_deadline = "5m"
auto_revert = false
}
network {
port "http" {
static = 8500
}
}
task "consul-client" {
driver = "exec"
config {
command = "/usr/bin/consul"
args = [
"agent",
"-data-dir=/opt/consul",
"-node=${node.unique.name}",
"-bind=${attr.unique.network.ip-address}",
"-advertise=${attr.unique.network.ip-address}",
"-retry-join=warden.tailnet-68f9.ts.net:8301",
"-retry-join=ch4.tailnet-68f9.ts.net:8301",
"-retry-join=ash3c.tailnet-68f9.ts.net:8301",
"-client=0.0.0.0",
"-http-port=${NOMAD_PORT_http}",
"-datacenter=dc1"
]
}
resources {
cpu = 100
memory = 128
}
service {
name = "consul-client"
port = "http"
check {
type = "http"
path = "/v1/status/leader"
interval = "10s"
timeout = "3s"
}
}
}
}
}

View File

@ -1,43 +0,0 @@
job "consul-clients" {
datacenters = ["dc1"]
type = "system"
group "consul-client" {
count = 0 # system job, runs on all nodes
task "consul-client" {
driver = "exec"
config {
command = "/usr/bin/consul"
args = [
"agent",
"-config-dir=/etc/consul.d",
"-data-dir=/opt/consul",
"-node=${node.unique.name}",
"-bind=${attr.unique.network.ip-address}",
"-retry-join=warden.tailnet-68f9.ts.net:8301",
"-retry-join=ch4.tailnet-68f9.ts.net:8301",
"-retry-join=ash3c.tailnet-68f9.ts.net:8301"
]
}
resources {
cpu = 100
memory = 128
}
service {
name = "consul-client"
port = "http"
check {
type = "http"
path = "/v1/status/leader"
interval = "30s"
timeout = "5s"
}
}
}
}
}

View File

@ -1,115 +0,0 @@
job "consul-cluster-nomad" {
datacenters = ["dc1"]
type = "service"
group "consul-ch4" {
constraint {
attribute = "${node.unique.name}"
value = "ch4"
}
task "consul" {
driver = "exec"
config {
command = "consul"
args = [
"agent",
"-server",
"-bootstrap-expect=3",
"-data-dir=/opt/nomad/data/consul",
"-client=0.0.0.0",
"-bind=100.117.106.136",
"-advertise=100.117.106.136",
"-retry-join=100.116.80.94",
"-retry-join=100.122.197.112",
"-ui",
"-http-port=8500",
"-server-port=8300",
"-serf-lan-port=8301",
"-serf-wan-port=8302"
]
}
resources {
cpu = 300
memory = 512
}
}
}
group "consul-ash3c" {
constraint {
attribute = "${node.unique.name}"
value = "ash3c"
}
task "consul" {
driver = "exec"
config {
command = "consul"
args = [
"agent",
"-server",
"-bootstrap-expect=3",
"-data-dir=/opt/nomad/data/consul",
"-client=0.0.0.0",
"-bind=100.116.80.94",
"-advertise=100.116.80.94",
"-retry-join=100.117.106.136",
"-retry-join=100.122.197.112",
"-ui",
"-http-port=8500",
"-server-port=8300",
"-serf-lan-port=8301",
"-serf-wan-port=8302"
]
}
resources {
cpu = 300
memory = 512
}
}
}
group "consul-warden" {
constraint {
attribute = "${node.unique.name}"
value = "warden"
}
task "consul" {
driver = "exec"
config {
command = "consul"
args = [
"agent",
"-server",
"-bootstrap-expect=3",
"-data-dir=/opt/nomad/data/consul",
"-client=0.0.0.0",
"-bind=100.122.197.112",
"-advertise=100.122.197.112",
"-retry-join=100.117.106.136",
"-retry-join=100.116.80.94",
"-ui",
"-http-port=8500",
"-server-port=8300",
"-serf-lan-port=8301",
"-serf-wan-port=8302"
]
}
resources {
cpu = 300
memory = 512
}
}
}
}

View File

@ -1,66 +0,0 @@
job "consul-ui-service" {
datacenters = ["dc1"]
type = "service"
group "consul-ui" {
count = 1
constraint {
attribute = "${node.unique.name}"
value = "warden"
}
network {
mode = "host"
port "http" {
static = 8500
host_network = "tailscale0"
}
}
service {
name = "consul-ui"
port = "http"
tags = [
"traefik.enable=true",
"traefik.http.routers.consul-ui.rule=PathPrefix(`/consul`)",
"traefik.http.routers.consul-ui.priority=100"
]
check {
type = "http"
path = "/v1/status/leader"
interval = "10s"
timeout = "2s"
}
}
task "consul-ui" {
driver = "exec"
config {
command = "/usr/bin/consul"
args = [
"agent",
"-server",
"-bootstrap-expect=3",
"-data-dir=/opt/nomad/data/consul",
"-client=0.0.0.0",
"-bind=100.122.197.112",
"-advertise=100.122.197.112",
"-retry-join=100.117.106.136",
"-retry-join=100.116.80.94",
"-ui",
"-http-port=8500"
]
}
resources {
cpu = 300
memory = 512
}
}
}
}

View File

@ -1,8 +0,0 @@
# Nomad 配置
## Jobs
- `install-podman-driver.nomad` - 安装 Podman 驱动
- `nomad-consul-config.nomad` - Nomad-Consul 配置
- `nomad-consul-setup.nomad` - Nomad-Consul 设置
- `nomad-nfs-volume.nomad` - NFS 卷配置

View File

@ -1,110 +0,0 @@
job "install-podman-driver" {
datacenters = ["dc1"]
type = "system" # 在所有节点上运行
group "install" {
task "install-podman" {
driver = "exec"
config {
command = "bash"
args = [
"-c",
<<-EOF
set -euo pipefail
export PATH="/usr/local/bin:/usr/bin:/bin:/usr/sbin:/sbin"
# 依赖工具
if ! command -v jq >/dev/null 2>&1 || ! command -v unzip >/dev/null 2>&1 || ! command -v wget >/dev/null 2>&1; then
echo "Installing dependencies (jq unzip wget)..."
sudo -n apt update -y || true
sudo -n apt install -y jq unzip wget || true
fi
# 安装 Podman若未安装
if ! command -v podman >/dev/null 2>&1; then
echo "Installing Podman..."
sudo -n apt update -y || true
sudo -n apt install -y podman || true
sudo -n systemctl enable podman || true
else
echo "Podman already installed"
fi
# 启用并启动 podman.socket确保 Nomad 可访问
sudo -n systemctl enable --now podman.socket || true
if getent group podman >/dev/null 2>&1; then
sudo -n usermod -aG podman nomad || true
fi
# 安装 Nomad Podman 驱动插件(始终确保存在)
PODMAN_DRIVER_VERSION="0.6.1"
PLUGIN_DIR="/opt/nomad/data/plugins"
sudo -n mkdir -p "${PLUGIN_DIR}" || true
cd /tmp
if [ ! -x "${PLUGIN_DIR}/nomad-driver-podman" ]; then
echo "Installing nomad-driver-podman ${PODMAN_DRIVER_VERSION}..."
wget -q "https://releases.hashicorp.com/nomad-driver-podman/${PODMAN_DRIVER_VERSION}/nomad-driver-podman_${PODMAN_DRIVER_VERSION}_linux_amd64.zip"
unzip -o "nomad-driver-podman_${PODMAN_DRIVER_VERSION}_linux_amd64.zip"
sudo -n mv -f nomad-driver-podman "${PLUGIN_DIR}/"
sudo -n chmod +x "${PLUGIN_DIR}/nomad-driver-podman"
sudo -n chown -R nomad:nomad "${PLUGIN_DIR}"
rm -f "nomad-driver-podman_${PODMAN_DRIVER_VERSION}_linux_amd64.zip"
else
echo "nomad-driver-podman already present in ${PLUGIN_DIR}"
fi
# 更新 /etc/nomad.d/nomad.hcl 的 plugin_dir 设置
if [ -f /etc/nomad.d/nomad.hcl ]; then
if grep -q "^plugin_dir\s*=\s*\"" /etc/nomad.d/nomad.hcl; then
sudo -n sed -i 's#^plugin_dir\s*=\s*\".*\"#plugin_dir = "/opt/nomad/data/plugins"#' /etc/nomad.d/nomad.hcl || true
else
echo 'plugin_dir = "/opt/nomad/data/plugins"' | sudo -n tee -a /etc/nomad.d/nomad.hcl >/dev/null || true
fi
fi
# 重启 Nomad 服务以加载插件
sudo -n systemctl restart nomad || true
echo "Waiting for Nomad to restart..."
sleep 15
# 检查 Podman 驱动是否被 Nomad 检测到
if /usr/local/bin/nomad node status -self -json 2>/dev/null | jq -r '.Drivers.podman.Detected' | grep -q "true"; then
echo "Podman driver successfully loaded"
exit 0
fi
echo "Podman driver not detected yet, retrying once after socket restart..."
sudo -n systemctl restart podman.socket || true
sleep 5
if /usr/local/bin/nomad node status -self -json 2>/dev/null | jq -r '.Drivers.podman.Detected' | grep -q "true"; then
echo "Podman driver successfully loaded after socket restart"
exit 0
else
echo "Podman driver still not detected; manual investigation may be required"
exit 1
fi
EOF
]
}
resources {
cpu = 200
memory = 256
}
// 以root权限运行
// user = "root"
# 使用 nomad 用户运行任务,避免客户端策略禁止 root
user = "nomad"
# 确保任务成功完成
restart {
attempts = 1
interval = "24h"
delay = "60s"
mode = "fail"
}
}
}
}

View File

@ -1,55 +0,0 @@
job "nomad-consul-config" {
datacenters = ["dc1"]
type = "system"
group "nomad-server-config" {
constraint {
attribute = "${node.unique.name}"
operator = "regexp"
value = "semaphore|ash1d|ash2e|ch2|ch3|onecloud1|de"
}
task "update-nomad-config" {
driver = "exec"
config {
command = "sh"
args = [
"-c",
"sed -i '/^consul {/,/^}/c\\consul {\\n address = \"ch4.tailnet-68f9.ts.net:8500,ash3c.tailnet-68f9.ts.net:8500,warden.tailnet-68f9.ts.net:8500\"\\n server_service_name = \"nomad\"\\n client_service_name = \"nomad-client\"\\n auto_advertise = true\\n server_auto_join = true\\n client_auto_join = false\\n}' /etc/nomad.d/nomad.hcl && systemctl restart nomad"
]
}
resources {
cpu = 100
memory = 128
}
}
}
group "nomad-client-config" {
constraint {
attribute = "${node.unique.name}"
operator = "regexp"
value = "ch4|ash3c|browser|influxdb1|hcp1|warden"
}
task "update-nomad-config" {
driver = "exec"
config {
command = "sh"
args = [
"-c",
"sed -i '/^consul {/,/^}/c\\consul {\\n address = \"ch4.tailnet-68f9.ts.net:8500,ash3c.tailnet-68f9.ts.net:8500,warden.tailnet-68f9.ts.net:8500\"\\n server_service_name = \"nomad\"\\n client_service_name = \"nomad-client\"\\n auto_advertise = true\\n server_auto_join = false\\n client_auto_join = true\\n}' /etc/nomad.d/nomad.hcl && systemctl restart nomad"
]
}
resources {
cpu = 100
memory = 128
}
}
}
}

View File

@ -1,23 +0,0 @@
job "nomad-consul-setup" {
datacenters = ["dc1"]
type = "system"
group "nomad-config" {
task "setup-consul" {
driver = "exec"
config {
command = "sh"
args = [
"-c",
"if grep -q 'server.*enabled.*true' /etc/nomad.d/nomad.hcl; then sed -i '/^consul {/,/^}/c\\consul {\\n address = \"ch4.tailnet-68f9.ts.net:8500,ash3c.tailnet-68f9.ts.net:8500,warden.tailnet-68f9.ts.net:8500\"\\n server_service_name = \"nomad\"\\n client_service_name = \"nomad-client\"\\n auto_advertise = true\\n server_auto_join = true\\n client_auto_join = false\\n}' /etc/nomad.d/nomad.hcl; else sed -i '/^consul {/,/^}/c\\consul {\\n address = \"ch4.tailnet-68f9.ts.net:8500,ash3c.tailnet-68f9.ts.net:8500,warden.tailnet-68f9.ts.net:8500\"\\n server_service_name = \"nomad\"\\n client_service_name = \"nomad-client\"\\n auto_advertise = true\\n server_auto_join = false\\n client_auto_join = true\\n}' /etc/nomad.d/nomad.hcl; fi && systemctl restart nomad"
]
}
resources {
cpu = 100
memory = 128
}
}
}
}

View File

@ -1,34 +0,0 @@
job "nfs-volume-example" {
datacenters = ["dc1"]
type = "service"
group "nfs-app" {
count = 1
volume "nfs-shared" {
type = "host"
source = "nfs-shared"
read_only = false
}
task "app" {
driver = "podman"
config {
image = "alpine:latest"
args = ["tail", "-f", "/dev/null"]
}
volume_mount {
volume = "nfs-shared"
destination = "/shared"
read_only = false
}
resources {
cpu = 100
memory = 64
}
}
}
}

View File

@ -1,28 +0,0 @@
# Traefik 配置
## 部署
```bash
nomad job run components/traefik/jobs/traefik.nomad
```
## 配置特点
- 明确绑定 Tailscale IP (100.97.62.111)
- 地理位置优化的 Consul 集群顺序(北京 → 韩国 → 美国)
- 适合跨太平洋网络的宽松健康检查
- 无服务健康检查,避免 flapping
## 访问方式
- Dashboard: `http://hcp1.tailnet-68f9.ts.net:8080/dashboard/`
- 直接 IP: `http://100.97.62.111:8080/dashboard/`
- Consul LB: `http://hcp1.tailnet-68f9.ts.net:80`
## 故障排除
如果遇到服务 flapping 问题:
1. 检查是否使用了 RFC1918 私有地址
2. 确认 Tailscale 网络连通性
3. 调整健康检查间隔时间
4. 考虑地理位置对网络延迟的影响

View File

@ -1,28 +0,0 @@
job "test-simple" {
datacenters = ["dc1"]
type = "service"
group "test" {
count = 1
constraint {
attribute = "${node.unique.name}"
value = "warden"
}
task "test" {
driver = "exec"
config {
command = "sleep"
args = ["3600"]
}
resources {
cpu = 100
memory = 64
}
}
}
}

View File

@ -1,213 +0,0 @@
job "traefik-cloudflare-v1" {
datacenters = ["dc1"]
type = "service"
group "traefik" {
count = 1
constraint {
attribute = "${node.unique.name}"
value = "hcp1"
}
network {
mode = "host"
port "http" {
static = 80
host_network = "tailscale0"
}
port "https" {
static = 443
host_network = "tailscale0"
}
port "traefik" {
static = 8080
host_network = "tailscale0"
}
}
task "traefik" {
driver = "exec"
config {
command = "/usr/local/bin/traefik"
args = [
"--configfile=/local/traefik.yml"
]
}
template {
data = <<EOF
api:
dashboard: true
insecure: true
entryPoints:
web:
address: "0.0.0.0:80"
http:
redirections:
entrypoint:
to: websecure
scheme: https
permanent: true
websecure:
address: "0.0.0.0:443"
traefik:
address: "0.0.0.0:8080"
providers:
consulCatalog:
endpoint:
address: "warden.tailnet-68f9.ts.net:8500"
scheme: "http"
watch: true
exposedByDefault: false
prefix: "traefik"
defaultRule: "Host(`{{ .Name }}.git4ta.me`)"
file:
filename: /local/dynamic.yml
watch: true
certificatesResolvers:
cloudflare:
acme:
email: houzhongxu.houzhongxu@gmail.com
storage: /local/acme.json
dnsChallenge:
provider: cloudflare
delayBeforeCheck: 30s
resolvers:
- "1.1.1.1:53"
- "1.0.0.1:53"
log:
level: DEBUG
EOF
destination = "local/traefik.yml"
}
template {
data = <<EOF
http:
serversTransports:
waypoint-insecure:
insecureSkipVerify: true
middlewares:
consul-stripprefix:
stripPrefix:
prefixes:
- "/consul"
waypoint-auth:
replacePathRegex:
regex: "^/auth/token(.*)$"
replacement: "/auth/token$1"
services:
consul-cluster:
loadBalancer:
servers:
- url: "http://warden.tailnet-68f9.ts.net:8500" # 北京,优先
- url: "http://ch4.tailnet-68f9.ts.net:8500" # 韩国,备用
- url: "http://ash3c.tailnet-68f9.ts.net:8500" # 美国,备用
healthCheck:
path: "/v1/status/leader"
interval: "30s"
timeout: "15s"
nomad-cluster:
loadBalancer:
servers:
- url: "http://warden.tailnet-68f9.ts.net:4646" # 北京,优先
- url: "http://ch4.tailnet-68f9.ts.net:4646" # 韩国,备用
- url: "http://ash3c.tailnet-68f9.ts.net:4646" # 美国,备用
healthCheck:
path: "/v1/status/leader"
interval: "30s"
timeout: "15s"
waypoint-cluster:
loadBalancer:
servers:
- url: "https://hcp1.tailnet-68f9.ts.net:9701" # hcp1 节点 HTTPS API
serversTransport: waypoint-insecure
vault-cluster:
loadBalancer:
servers:
- url: "http://ch4.tailnet-68f9.ts.net:8200" # 韩国,活跃节点
- url: "http://ash3c.tailnet-68f9.ts.net:8200" # 美国,备用节点
- url: "http://warden.tailnet-68f9.ts.net:8200" # 北京,备用节点
healthCheck:
path: "/v1/sys/health"
interval: "30s"
timeout: "15s"
routers:
consul-api:
rule: "Host(`consul.git4ta.me`)"
service: consul-cluster
middlewares:
- consul-stripprefix
entryPoints:
- websecure
tls:
certResolver: cloudflare
traefik-dashboard:
rule: "Host(`traefik.git4ta.me`)"
service: dashboard@internal
middlewares:
- dashboard_redirect@internal
- dashboard_stripprefix@internal
entryPoints:
- websecure
tls:
certResolver: cloudflare
nomad-ui:
rule: "Host(`nomad.git4ta.me`)"
service: nomad-cluster
entryPoints:
- websecure
tls:
certResolver: cloudflare
waypoint-ui:
rule: "Host(`waypoint.git4ta.me`)"
service: waypoint-cluster
entryPoints:
- websecure
tls:
certResolver: cloudflare
vault-ui:
rule: "Host(`vault.git4ta.me`)"
service: vault-cluster
entryPoints:
- websecure
tls:
certResolver: cloudflare
EOF
destination = "local/dynamic.yml"
}
template {
data = <<EOF
CLOUDFLARE_EMAIL=houzhongxu.houzhongxu@gmail.com
CLOUDFLARE_DNS_API_TOKEN=HYT-cfZTP_jq6Xd9g3tpFMwxopOyIrf8LZpmGAI3
CLOUDFLARE_ZONE_API_TOKEN=HYT-cfZTP_jq6Xd9g3tpFMwxopOyIrf8LZpmGAI3
EOF
destination = "local/cloudflare.env"
env = true
}
resources {
cpu = 500
memory = 512
}
}
}
}

View File

@ -1,217 +0,0 @@
job "traefik-consul-kv" {
datacenters = ["dc1"]
type = "service"
group "traefik" {
count = 1
constraint {
attribute = "${node.unique.name}"
value = "hcp1"
}
network {
mode = "host"
port "http" {
static = 80
host_network = "tailscale0"
}
port "traefik" {
static = 8080
host_network = "tailscale0"
}
}
task "traefik" {
driver = "exec"
config {
command = "/usr/local/bin/traefik"
args = [
"--configfile=/local/traefik.yml"
]
}
template {
data = <<EOF
api:
dashboard: true
insecure: true
entryPoints:
web:
address: "0.0.0.0:80"
traefik:
address: "0.0.0.0:8080"
providers:
consulCatalog:
endpoint:
address: "warden.tailnet-68f9.ts.net:8500"
scheme: "http"
watch: true
file:
filename: /local/dynamic.yml
watch: true
metrics:
prometheus:
addEntryPointsLabels: true
addServicesLabels: true
addRoutersLabels: true
log:
level: INFO
EOF
destination = "local/traefik.yml"
}
template {
data = <<EOF
http:
middlewares:
consul-stripprefix:
stripPrefix:
prefixes:
- "/consul"
traefik-stripprefix:
stripPrefix:
prefixes:
- "/traefik"
nomad-stripprefix:
stripPrefix:
prefixes:
- "/nomad"
consul-redirect:
redirectRegex:
regex: "^/consul/?$"
replacement: "/consul/ui/"
permanent: false
nomad-redirect:
redirectRegex:
regex: "^/nomad/?$"
replacement: "/nomad/ui/"
permanent: false
traefik-redirect:
redirectRegex:
regex: "^/traefik/?$"
replacement: "/traefik/dashboard/"
permanent: false
services:
consul-cluster:
loadBalancer:
servers:
- url: "http://warden.tailnet-68f9.ts.net:8500" # 北京,优先
- url: "http://ch4.tailnet-68f9.ts.net:8500" # 韩国,备用
- url: "http://ash3c.tailnet-68f9.ts.net:8500" # 美国,备用
healthCheck:
path: "/v1/status/leader"
interval: "30s"
timeout: "15s"
nomad-cluster:
loadBalancer:
servers:
- url: "http://ch2.tailnet-68f9.ts.net:4646" # Nomad server leader
healthCheck:
path: "/v1/status/leader"
interval: "30s"
timeout: "15s"
routers:
consul-redirect:
rule: "Path(`/consul`) || Path(`/consul/`)"
service: consul-cluster
middlewares:
- consul-redirect
entryPoints:
- web
priority: 100
consul-ui:
rule: "PathPrefix(`/consul/ui`)"
service: consul-cluster
middlewares:
- consul-stripprefix
entryPoints:
- web
priority: 5
consul-api:
rule: "PathPrefix(`/consul/v1`)"
service: consul-cluster
middlewares:
- consul-stripprefix
entryPoints:
- web
priority: 5
traefik-api:
rule: "PathPrefix(`/traefik/api`)"
service: api@internal
middlewares:
- traefik-stripprefix
entryPoints:
- web
priority: 6
traefik-dashboard:
rule: "PathPrefix(`/traefik/dashboard`)"
service: dashboard@internal
middlewares:
- traefik-stripprefix
entryPoints:
- web
priority: 5
traefik-redirect:
rule: "Path(`/traefik`) || Path(`/traefik/`)"
middlewares:
- "traefik-redirect"
entryPoints:
- web
priority: 100
nomad-redirect:
rule: "Path(`/nomad`) || Path(`/nomad/`)"
service: nomad-cluster
middlewares:
- nomad-redirect
entryPoints:
- web
priority: 100
nomad-ui:
rule: "PathPrefix(`/nomad/ui`)"
service: nomad-cluster
middlewares:
- nomad-stripprefix
entryPoints:
- web
priority: 5
nomad-api:
rule: "PathPrefix(`/nomad/v1`)"
service: nomad-cluster
middlewares:
- nomad-stripprefix
entryPoints:
- web
priority: 5
EOF
destination = "local/dynamic.yml"
}
resources {
cpu = 500
memory = 512
}
}
}
}

View File

@ -1,150 +0,0 @@
job "traefik-consul-lb" {
datacenters = ["dc1"]
type = "service"
group "traefik" {
count = 1
constraint {
attribute = "${node.unique.name}"
value = "warden"
}
update {
min_healthy_time = "60s"
healthy_deadline = "5m"
progress_deadline = "10m"
auto_revert = false
}
network {
mode = "host"
port "http" {
static = 80
host_network = "tailscale0"
}
port "traefik" {
static = 8080
host_network = "tailscale0"
}
}
task "traefik" {
driver = "exec"
config {
command = "/usr/local/bin/traefik"
args = [
"--configfile=/local/traefik.yml"
]
}
template {
data = <<EOF
api:
dashboard: true
insecure: true
entryPoints:
web:
address: "hcp1.tailnet-68f9.ts.net:80"
traefik:
address: "100.97.62.111:8080"
providers:
file:
filename: /local/dynamic.yml
watch: true
metrics:
prometheus:
addEntryPointsLabels: true
addServicesLabels: true
addRoutersLabels: true
log:
level: INFO
EOF
destination = "local/traefik.yml"
}
template {
data = <<EOF
http:
middlewares:
consul-stripprefix:
stripPrefix:
prefixes:
- "/consul"
traefik-stripprefix:
stripPrefix:
prefixes:
- "/traefik"
services:
consul-cluster:
loadBalancer:
servers:
- url: "http://warden.tailnet-68f9.ts.net:8500" # 北京,优先
- url: "http://ch4.tailnet-68f9.ts.net:8500" # 韩国,备用
- url: "http://ash3c.tailnet-68f9.ts.net:8500" # 美国,备用
healthCheck:
path: "/v1/status/leader"
interval: "30s"
timeout: "15s"
routers:
consul-api:
rule: "PathPrefix(`/consul`)"
service: consul-cluster
middlewares:
- consul-stripprefix
entryPoints:
- web
traefik-dashboard:
rule: "PathPrefix(`/traefik`)"
service: dashboard@internal
middlewares:
- traefik-stripprefix
entryPoints:
- web
EOF
destination = "local/dynamic.yml"
}
resources {
cpu = 500
memory = 512
}
service {
name = "consul-lb"
port = "http"
check {
name = "consul-lb-health"
type = "http"
path = "/consul/v1/status/leader"
interval = "30s"
timeout = "5s"
}
}
service {
name = "traefik-dashboard"
port = "traefik"
check {
name = "traefik-dashboard-health"
type = "http"
path = "/api/rawdata"
interval = "30s"
timeout = "5s"
}
}
}
}
}

View File

@ -1,40 +0,0 @@
job "traefik-no-service" {
datacenters = ["dc1"]
type = "service"
group "traefik" {
count = 1
constraint {
attribute = "${node.unique.name}"
value = "hcp1"
}
network {
mode = "host"
port "http" {
static = 80
host_network = "tailscale0"
}
}
task "traefik" {
driver = "exec"
config {
command = "/usr/local/bin/traefik"
args = [
"--api.dashboard=true",
"--api.insecure=true",
"--providers.file.directory=/tmp",
"--entrypoints.web.address=:80"
]
}
resources {
cpu = 200
memory = 128
}
}
}
}

View File

@ -1,68 +0,0 @@
job "traefik-simple" {
datacenters = ["dc1"]
type = "service"
group "traefik" {
count = 1
constraint {
attribute = "${node.unique.name}"
value = "hcp1"
}
network {
mode = "host"
port "http" {
static = 80
host_network = "tailscale0"
}
port "traefik" {
static = 8080
host_network = "tailscale0"
}
}
task "traefik" {
driver = "exec"
config {
command = "/usr/local/bin/traefik"
args = [
"--configfile=/local/traefik.yml"
]
}
template {
data = <<EOF
api:
dashboard: true
insecure: true
entryPoints:
web:
address: "0.0.0.0:80"
traefik:
address: "0.0.0.0:8080"
providers:
consulCatalog:
endpoint:
address: "warden.tailnet-68f9.ts.net:8500"
scheme: "http"
watch: true
exposedByDefault: false
prefix: "traefik"
log:
level: INFO
EOF
destination = "local/traefik.yml"
}
resources {
cpu = 500
memory = 512
}
}
}
}

View File

@ -1,150 +0,0 @@
job "traefik-consul-lb" {
datacenters = ["dc1"]
type = "service"
group "traefik" {
count = 1
constraint {
attribute = "${node.unique.name}"
value = "hcp1"
}
update {
min_healthy_time = "60s"
healthy_deadline = "5m"
progress_deadline = "10m"
auto_revert = false
}
network {
mode = "host"
port "http" {
static = 80
host_network = "tailscale0"
}
port "traefik" {
static = 8080
host_network = "tailscale0"
}
}
task "traefik" {
driver = "exec"
config {
command = "/usr/local/bin/traefik"
args = [
"--configfile=/local/traefik.yml"
]
}
template {
data = <<EOF
api:
dashboard: true
insecure: true
entryPoints:
web:
address: "100.97.62.111:80"
traefik:
address: "100.97.62.111:8080"
providers:
file:
filename: /local/dynamic.yml
watch: true
metrics:
prometheus:
addEntryPointsLabels: true
addServicesLabels: true
addRoutersLabels: true
log:
level: INFO
EOF
destination = "local/traefik.yml"
}
template {
data = <<EOF
http:
middlewares:
consul-stripprefix:
stripPrefix:
prefixes:
- "/consul"
traefik-stripprefix:
stripPrefix:
prefixes:
- "/traefik"
services:
consul-cluster:
loadBalancer:
servers:
- url: "http://warden.tailnet-68f9.ts.net:8500" # 北京,优先
- url: "http://ch4.tailnet-68f9.ts.net:8500" # 韩国,备用
- url: "http://ash3c.tailnet-68f9.ts.net:8500" # 美国,备用
healthCheck:
path: "/v1/status/leader"
interval: "30s"
timeout: "15s"
routers:
consul-api:
rule: "PathPrefix(`/consul`)"
service: consul-cluster
middlewares:
- consul-stripprefix
entryPoints:
- web
traefik-dashboard:
rule: "PathPrefix(`/traefik`)"
service: dashboard@internal
middlewares:
- traefik-stripprefix
entryPoints:
- web
EOF
destination = "local/dynamic.yml"
}
resources {
cpu = 500
memory = 512
}
service {
name = "consul-lb"
port = "http"
check {
name = "consul-lb-health"
type = "http"
path = "/consul/v1/status/leader"
interval = "30s"
timeout = "5s"
}
}
service {
name = "traefik-dashboard"
port = "traefik"
check {
name = "traefik-dashboard-health"
type = "http"
path = "/api/rawdata"
interval = "30s"
timeout = "5s"
}
}
}
}
}

View File

@ -1,7 +0,0 @@
# Vault 配置
## Jobs
- `vault-cluster-exec.nomad` - Vault 集群 (exec 驱动)
- `vault-cluster-podman.nomad` - Vault 集群 (podman 驱动)
- `vault-dev-warden.nomad` - Vault 开发环境

View File

@ -1,283 +0,0 @@
job "vault-cluster-exec" {
datacenters = ["dc1"]
type = "service"
group "vault-ch4" {
count = 1
# 使用存在的属性替代consul版本检查
constraint {
attribute = "${driver.exec}"
operator = "="
value = "1"
}
constraint {
attribute = "${node.unique.name}"
value = "ch4"
}
network {
port "api" {
static = 8200
}
port "cluster" {
static = 8201
}
}
task "vault" {
driver = "exec"
config {
command = "vault"
args = [
"server",
"-config=/opt/nomad/data/vault/config/vault.hcl"
]
}
template {
data = <<EOH
storage "consul" {
address = "{{ with nomadService "consul" }}{{ range . }}{{ if contains .Tags "http" }}{{ .Address }}:{{ .Port }}{{ end }}{{ end }}{{ end }}"
path = "vault/"
# Consul服务发现配置
service {
name = "vault"
tags = ["vault"]
}
}
listener "tcp" {
address = "0.0.0.0:8200"
tls_disable = 1 # 生产环境应启用TLS
}
api_addr = "http://{{ env "NOMAD_IP_api" }}:8200"
cluster_addr = "http://{{ env "NOMAD_IP_cluster" }}:8201"
ui = true
disable_mlock = true
# 添加更多配置来解决权限问题
disable_sealwrap = true
disable_cache = false
# 启用原始日志记录
enable_raw_log = true
# 集成Nomad服务发现
service_registration {
enabled = true
}
EOH
destination = "/opt/nomad/data/vault/config/vault.hcl"
}
resources {
cpu = 100
memory = 256
}
service {
name = "vault"
port = "api"
check {
name = "vault-health"
type = "http"
path = "/v1/sys/health"
interval = "10s"
timeout = "2s"
}
}
}
}
group "vault-ash3c" {
count = 1
# 移除对consul版本的约束使用driver约束替代
constraint {
attribute = "${driver.exec}"
operator = "="
value = "1"
}
constraint {
attribute = "${node.unique.name}"
value = "us-ash3c"
}
network {
port "api" {
static = 8200
}
port "cluster" {
static = 8201
}
}
task "vault" {
driver = "exec"
config {
command = "vault"
args = [
"server",
"-config=/opt/nomad/data/vault/config/vault.hcl"
]
}
template {
data = <<EOH
storage "consul" {
address = "{{ with nomadService "consul" }}{{ range . }}{{ if contains .Tags "http" }}{{ .Address }}:{{ .Port }}{{ end }}{{ end }}{{ end }}"
path = "vault/"
# Consul服务发现配置
service {
name = "vault"
tags = ["vault"]
}
}
listener "tcp" {
address = "0.0.0.0:8200"
tls_disable = 1 # 生产环境应启用TLS
}
api_addr = "http://{{ env "NOMAD_IP_api" }}:8200"
cluster_addr = "http://{{ env "NOMAD_IP_cluster" }}:8201"
ui = true
disable_mlock = true
# 添加更多配置来解决权限问题
disable_sealwrap = true
disable_cache = false
# 启用原始日志记录
enable_raw_log = true
# 集成Nomad服务发现
service_registration {
enabled = true
}
EOH
destination = "/opt/nomad/data/vault/config/vault.hcl"
}
resources {
cpu = 100
memory = 256
}
service {
name = "vault"
port = "api"
check {
name = "vault-health"
type = "http"
path = "/v1/sys/health"
interval = "10s"
timeout = "2s"
}
}
}
}
group "vault-warden" {
count = 1
# 移除对consul版本的约束使用driver约束替代
constraint {
attribute = "${driver.exec}"
operator = "="
value = "1"
}
constraint {
attribute = "${node.unique.name}"
value = "bj-warden"
}
network {
port "api" {
static = 8200
}
port "cluster" {
static = 8201
}
}
task "vault" {
driver = "exec"
config {
command = "vault"
args = [
"server",
"-config=/opt/nomad/data/vault/config/vault.hcl"
]
}
template {
data = <<EOH
storage "consul" {
address = "{{ with nomadService "consul" }}{{ range . }}{{ if contains .Tags "http" }}{{ .Address }}:{{ .Port }}{{ end }}{{ end }}{{ end }}"
path = "vault/"
# Consul服务发现配置
service {
name = "vault"
tags = ["vault"]
}
}
listener "tcp" {
address = "0.0.0.0:8200"
tls_disable = 1 # 生产环境应启用TLS
}
api_addr = "http://{{ env "NOMAD_IP_api" }}:8200"
cluster_addr = "http://{{ env "NOMAD_IP_cluster" }}:8201"
ui = true
disable_mlock = true
# 添加更多配置来解决权限问题
disable_sealwrap = true
disable_cache = false
# 启用原始日志记录
enable_raw_log = true
# 集成Nomad服务发现
service_registration {
enabled = true
}
EOH
destination = "/opt/nomad/data/vault/config/vault.hcl"
}
resources {
cpu = 100
memory = 256
}
service {
name = "vault"
port = "api"
check {
name = "vault-health"
type = "http"
path = "/v1/sys/health"
interval = "10s"
timeout = "2s"
}
}
}
}
}

View File

@ -1,94 +0,0 @@
job "vault-cluster" {
datacenters = ["dc1"]
type = "service"
group "vault-servers" {
count = 3
constraint {
attribute = "${node.unique.name}"
operator = "regexp"
value = "(warden|ash3c|master)"
}
task "vault" {
driver = "podman"
config {
image = "hashicorp/vault:latest"
ports = ["api", "cluster"]
# 确保容器在退出时不会自动重启
command = "vault"
args = [
"server",
"-config=/vault/config/vault.hcl"
]
# 容器网络设置
network_mode = "host"
# 安全设置
cap_add = ["IPC_LOCK"]
}
template {
data = <<EOH
storage "consul" {
address = "localhost:8500"
path = "vault/"
token = "{{ with secret "consul/creds/vault" }}{{ .Data.token }}{{ end }}"
}
listener "tcp" {
address = "0.0.0.0:8200"
tls_disable = 1 # 生产环境应启用TLS
}
api_addr = "http://{{ env "NOMAD_IP_api" }}:8200"
cluster_addr = "http://{{ env "NOMAD_IP_cluster" }}:8201"
ui = true
disable_mlock = true
EOH
destination = "vault/config/vault.hcl"
}
volume_mount {
volume = "vault-data"
destination = "/vault/data"
read_only = false
}
resources {
cpu = 500
memory = 1024
network {
mbits = 10
port "api" { static = 8200 }
port "cluster" { static = 8201 }
}
}
service {
name = "vault"
port = "api"
check {
name = "vault-health"
type = "http"
path = "/v1/sys/health"
interval = "10s"
timeout = "2s"
}
}
}
volume "vault-data" {
type = "host"
read_only = false
source = "vault-data"
}
}
}

View File

@ -1,65 +0,0 @@
job "vault-dev-warden" {
datacenters = ["dc1"]
type = "service"
group "vault-dev" {
count = 1
# 约束到有consul的节点
constraint {
attribute = "${meta.consul}"
operator = "="
value = "true"
}
network {
port "http" {
to = 8200
}
port "cluster" {
to = 8201
}
}
service {
name = "vault-dev"
port = "http"
check {
type = "http"
path = "/v1/sys/health"
interval = "10s"
timeout = "5s"
}
}
task "vault-dev" {
driver = "raw_exec"
config {
command = "vault"
args = [
"server",
"-dev",
"-dev-listen-address=0.0.0.0:8200",
"-dev-root-token-id=root"
]
}
env {
VAULT_ADDR = "http://127.0.0.1:8200"
VAULT_TOKEN = "root"
}
resources {
cpu = 500
memory = 512
}
logs {
max_files = 10
max_file_size = 10
}
}
}
}

View File

@ -1,241 +0,0 @@
job "vault-cluster-nomad" {
datacenters = ["dc1"]
type = "service"
group "vault-ch4" {
count = 1
constraint {
attribute = "${node.unique.name}"
operator = "="
value = "ch4"
}
network {
port "http" {
static = 8200
to = 8200
}
}
task "vault" {
driver = "exec"
consul {
namespace = "default"
}
resources {
cpu = 500
memory = 1024
}
env {
VAULT_ADDR = "http://127.0.0.1:8200"
}
# 从 consul 读取配置
template {
data = <<EOF
{{ key "vault/config" }}
EOF
destination = "local/vault.hcl"
perms = "644"
wait {
min = "2s"
max = "10s"
}
}
config {
command = "vault"
args = [
"server",
"-config=/local/vault.hcl"
]
}
restart {
attempts = 2
interval = "30m"
delay = "15s"
mode = "fail"
}
}
update {
max_parallel = 3
health_check = "checks"
min_healthy_time = "10s"
healthy_deadline = "5m"
progress_deadline = "10m"
auto_revert = true
canary = 0
}
migrate {
max_parallel = 1
health_check = "checks"
min_healthy_time = "10s"
healthy_deadline = "5m"
}
}
group "vault-ash3c" {
count = 1
constraint {
attribute = "${node.unique.name}"
operator = "="
value = "ash3c"
}
network {
port "http" {
static = 8200
to = 8200
}
}
task "vault" {
driver = "exec"
consul {
namespace = "default"
}
resources {
cpu = 500
memory = 1024
}
env {
VAULT_ADDR = "http://127.0.0.1:8200"
}
# 从 consul 读取配置
template {
data = <<EOF
{{ key "vault/config" }}
EOF
destination = "local/vault.hcl"
perms = "644"
wait {
min = "2s"
max = "10s"
}
}
config {
command = "vault"
args = [
"server",
"-config=/local/vault.hcl"
]
}
restart {
attempts = 2
interval = "30m"
delay = "15s"
mode = "fail"
}
}
update {
max_parallel = 3
health_check = "checks"
min_healthy_time = "10s"
healthy_deadline = "5m"
progress_deadline = "10m"
auto_revert = true
canary = 0
}
migrate {
max_parallel = 1
health_check = "checks"
min_healthy_time = "10s"
healthy_deadline = "5m"
}
}
group "vault-warden" {
count = 1
constraint {
attribute = "${node.unique.name}"
operator = "="
value = "warden"
}
network {
port "http" {
static = 8200
to = 8200
}
}
task "vault" {
driver = "exec"
consul {
namespace = "default"
}
resources {
cpu = 500
memory = 1024
}
env {
VAULT_ADDR = "http://127.0.0.1:8200"
}
# 从 consul 读取配置
template {
data = <<EOF
{{ key "vault/config" }}
EOF
destination = "local/vault.hcl"
perms = "644"
wait {
min = "2s"
max = "10s"
}
}
config {
command = "vault"
args = [
"server",
"-config=/local/vault.hcl"
]
}
restart {
attempts = 2
interval = "30m"
delay = "15s"
mode = "fail"
}
}
update {
max_parallel = 3
health_check = "checks"
min_healthy_time = "10s"
healthy_deadline = "5m"
progress_deadline = "10m"
auto_revert = true
canary = 0
}
migrate {
max_parallel = 1
health_check = "checks"
min_healthy_time = "10s"
healthy_deadline = "5m"
}
}
}

View File

@ -1,157 +0,0 @@
job "vault" {
datacenters = ["dc1"]
type = "service"
# 约束只在 warden、ch4、ash3c 节点上运行
constraint {
attribute = "${node.unique.name}"
operator = "regexp"
value = "^(warden|ch4|ash3c)$"
}
group "vault" {
count = 3
# 确保每个节点只运行一个实例
constraint {
operator = "distinct_hosts"
value = "true"
}
# 网络配置
network {
port "http" {
static = 8200
to = 8200
}
}
# 服务发现配置 - 包含版本信息
service {
name = "vault"
port = "http"
# 添加版本标签以避免检查拒绝
tags = [
"vault",
"secrets",
"version:1.20.3"
]
check {
name = "vault-health"
type = "http"
path = "/v1/sys/health"
interval = "10s"
timeout = "3s"
method = "GET"
}
# 健康检查配置
check {
name = "vault-sealed-check"
type = "script"
command = "/bin/sh"
args = ["-c", "vault status -format=json | jq -r '.sealed' | grep -q 'false'"]
interval = "30s"
timeout = "5s"
task = "vault"
}
}
# 任务配置
task "vault" {
driver = "raw_exec"
# 资源配置
resources {
cpu = 500
memory = 1024
}
# 环境变量
env {
VAULT_ADDR = "http://127.0.0.1:8200"
}
# 模板配置 - Vault 配置文件
template {
data = <<EOF
ui = true
storage "consul" {
address = "127.0.0.1:8500"
path = "vault"
}
# HTTP listener (不使用 TLS因为 nomad 会处理负载均衡)
listener "tcp" {
address = "0.0.0.0:8200"
tls_disable = 1
}
# 禁用 mlock 以避免权限问题
disable_mlock = true
# 日志配置
log_level = "INFO"
log_format = "json"
# 性能优化
max_lease_ttl = "168h"
default_lease_ttl = "24h"
# HA 配置
ha_storage "consul" {
address = "127.0.0.1:8500"
path = "vault"
}
EOF
destination = "local/vault.hcl"
perms = "644"
wait {
min = "2s"
max = "10s"
}
}
# 启动命令
config {
command = "/usr/bin/vault"
args = [
"agent",
"-config=/local/vault.hcl"
]
}
# 重启策略
restart {
attempts = 3
interval = "30m"
delay = "15s"
mode = "fail"
}
}
# 更新策略
update {
max_parallel = 1
health_check = "checks"
min_healthy_time = "10s"
healthy_deadline = "5m"
progress_deadline = "10m"
auto_revert = true
canary = 0
}
# 迁移策略
migrate {
max_parallel = 1
health_check = "checks"
min_healthy_time = "10s"
healthy_deadline = "5m"
}
}
}

View File

@ -1,213 +0,0 @@
job "traefik-cloudflare-v1" {
datacenters = ["dc1"]
type = "service"
group "traefik" {
count = 1
constraint {
attribute = "${node.unique.name}"
value = "hcp1"
}
network {
mode = "host"
port "http" {
static = 80
host_network = "tailscale0"
}
port "https" {
static = 443
host_network = "tailscale0"
}
port "traefik" {
static = 8080
host_network = "tailscale0"
}
}
task "traefik" {
driver = "exec"
config {
command = "/usr/local/bin/traefik"
args = [
"--configfile=/local/traefik.yml"
]
}
template {
data = <<EOF
api:
dashboard: true
insecure: true
entryPoints:
web:
address: "0.0.0.0:80"
http:
redirections:
entrypoint:
to: websecure
scheme: https
permanent: true
websecure:
address: "0.0.0.0:443"
traefik:
address: "0.0.0.0:8080"
providers:
consulCatalog:
endpoint:
address: "warden.tailnet-68f9.ts.net:8500"
scheme: "http"
watch: true
exposedByDefault: false
prefix: "traefik"
defaultRule: "Host(`{{ .Name }}.git4ta.me`)"
file:
filename: /local/dynamic.yml
watch: true
certificatesResolvers:
cloudflare:
acme:
email: houzhongxu.houzhongxu@gmail.com
storage: /local/acme.json
dnsChallenge:
provider: cloudflare
delayBeforeCheck: 30s
resolvers:
- "1.1.1.1:53"
- "1.0.0.1:53"
log:
level: DEBUG
EOF
destination = "local/traefik.yml"
}
template {
data = <<EOF
http:
serversTransports:
waypoint-insecure:
insecureSkipVerify: true
middlewares:
consul-stripprefix:
stripPrefix:
prefixes:
- "/consul"
waypoint-auth:
replacePathRegex:
regex: "^/auth/token(.*)$"
replacement: "/auth/token$1"
services:
consul-cluster:
loadBalancer:
servers:
- url: "http://warden.tailnet-68f9.ts.net:8500" # 北京,优先
- url: "http://ch4.tailnet-68f9.ts.net:8500" # 韩国,备用
- url: "http://ash3c.tailnet-68f9.ts.net:8500" # 美国,备用
healthCheck:
path: "/v1/status/leader"
interval: "30s"
timeout: "15s"
nomad-cluster:
loadBalancer:
servers:
- url: "http://warden.tailnet-68f9.ts.net:4646" # 北京,优先
- url: "http://ch4.tailnet-68f9.ts.net:4646" # 韩国,备用
- url: "http://ash3c.tailnet-68f9.ts.net:4646" # 美国,备用
healthCheck:
path: "/v1/status/leader"
interval: "30s"
timeout: "15s"
waypoint-cluster:
loadBalancer:
servers:
- url: "https://hcp1.tailnet-68f9.ts.net:9701" # hcp1 节点 HTTPS API
serversTransport: waypoint-insecure
vault-cluster:
loadBalancer:
servers:
- url: "http://ch4.tailnet-68f9.ts.net:8200" # 韩国,活跃节点
- url: "http://ash3c.tailnet-68f9.ts.net:8200" # 美国,备用节点
- url: "http://warden.tailnet-68f9.ts.net:8200" # 北京,备用节点
healthCheck:
path: "/v1/sys/health"
interval: "30s"
timeout: "15s"
routers:
consul-api:
rule: "Host(`consul.git4ta.me`)"
service: consul-cluster
middlewares:
- consul-stripprefix
entryPoints:
- websecure
tls:
certResolver: cloudflare
traefik-dashboard:
rule: "Host(`traefik.git4ta.me`)"
service: dashboard@internal
middlewares:
- dashboard_redirect@internal
- dashboard_stripprefix@internal
entryPoints:
- websecure
tls:
certResolver: cloudflare
nomad-ui:
rule: "Host(`nomad.git4ta.me`)"
service: nomad-cluster
entryPoints:
- websecure
tls:
certResolver: cloudflare
waypoint-ui:
rule: "Host(`waypoint.git4ta.me`)"
service: waypoint-cluster
entryPoints:
- websecure
tls:
certResolver: cloudflare
vault-ui:
rule: "Host(`vault.git4ta.me`)"
service: vault-cluster
entryPoints:
- websecure
tls:
certResolver: cloudflare
EOF
destination = "local/dynamic.yml"
}
template {
data = <<EOF
CLOUDFLARE_EMAIL=houzhongxu.houzhongxu@gmail.com
CLOUDFLARE_DNS_API_TOKEN=HYT-cfZTP_jq6Xd9g3tpFMwxopOyIrf8LZpmGAI3
CLOUDFLARE_ZONE_API_TOKEN=HYT-cfZTP_jq6Xd9g3tpFMwxopOyIrf8LZpmGAI3
EOF
destination = "local/cloudflare.env"
env = true
}
resources {
cpu = 500
memory = 512
}
}
}
}

View File

@ -1,241 +0,0 @@
job "vault-cluster-nomad" {
datacenters = ["dc1"]
type = "service"
group "vault-ch4" {
count = 1
constraint {
attribute = "${node.unique.name}"
operator = "="
value = "ch4"
}
network {
port "http" {
static = 8200
to = 8200
}
}
task "vault" {
driver = "exec"
consul {
namespace = "default"
}
resources {
cpu = 500
memory = 1024
}
env {
VAULT_ADDR = "http://127.0.0.1:8200"
}
# 从 consul 读取配置
template {
data = <<EOF
{{ key "vault/config" }}
EOF
destination = "local/vault.hcl"
perms = "644"
wait {
min = "2s"
max = "10s"
}
}
config {
command = "vault"
args = [
"server",
"-config=/local/vault.hcl"
]
}
restart {
attempts = 2
interval = "30m"
delay = "15s"
mode = "fail"
}
}
update {
max_parallel = 3
health_check = "checks"
min_healthy_time = "10s"
healthy_deadline = "5m"
progress_deadline = "10m"
auto_revert = true
canary = 0
}
migrate {
max_parallel = 1
health_check = "checks"
min_healthy_time = "10s"
healthy_deadline = "5m"
}
}
group "vault-ash3c" {
count = 1
constraint {
attribute = "${node.unique.name}"
operator = "="
value = "ash3c"
}
network {
port "http" {
static = 8200
to = 8200
}
}
task "vault" {
driver = "exec"
consul {
namespace = "default"
}
resources {
cpu = 500
memory = 1024
}
env {
VAULT_ADDR = "http://127.0.0.1:8200"
}
# 从 consul 读取配置
template {
data = <<EOF
{{ key "vault/config" }}
EOF
destination = "local/vault.hcl"
perms = "644"
wait {
min = "2s"
max = "10s"
}
}
config {
command = "vault"
args = [
"server",
"-config=/local/vault.hcl"
]
}
restart {
attempts = 2
interval = "30m"
delay = "15s"
mode = "fail"
}
}
update {
max_parallel = 3
health_check = "checks"
min_healthy_time = "10s"
healthy_deadline = "5m"
progress_deadline = "10m"
auto_revert = true
canary = 0
}
migrate {
max_parallel = 1
health_check = "checks"
min_healthy_time = "10s"
healthy_deadline = "5m"
}
}
group "vault-warden" {
count = 1
constraint {
attribute = "${node.unique.name}"
operator = "="
value = "warden"
}
network {
port "http" {
static = 8200
to = 8200
}
}
task "vault" {
driver = "exec"
consul {
namespace = "default"
}
resources {
cpu = 500
memory = 1024
}
env {
VAULT_ADDR = "http://127.0.0.1:8200"
}
# 从 consul 读取配置
template {
data = <<EOF
{{ key "vault/config" }}
EOF
destination = "local/vault.hcl"
perms = "644"
wait {
min = "2s"
max = "10s"
}
}
config {
command = "vault"
args = [
"server",
"-config=/local/vault.hcl"
]
}
restart {
attempts = 2
interval = "30m"
delay = "15s"
mode = "fail"
}
}
update {
max_parallel = 3
health_check = "checks"
min_healthy_time = "10s"
healthy_deadline = "5m"
progress_deadline = "10m"
auto_revert = true
canary = 0
}
migrate {
max_parallel = 1
health_check = "checks"
min_healthy_time = "10s"
healthy_deadline = "5m"
}
}
}

View File

@ -1,49 +0,0 @@
job "waypoint-server" {
datacenters = ["dc1"]
type = "service"
group "waypoint" {
count = 1
constraint {
attribute = "${node.unique.name}"
value = "hcp1"
}
network {
port "http" {
static = 9701
}
port "grpc" {
static = 9702
}
}
task "waypoint" {
driver = "raw_exec"
config {
command = "/usr/local/bin/waypoint"
args = [
"server", "run",
"-accept-tos",
"-vvv",
"-db=/opt/waypoint/waypoint.db",
"-listen-grpc=0.0.0.0:9702",
"-listen-http=0.0.0.0:9701"
]
}
resources {
cpu = 500
memory = 512
}
env {
WAYPOINT_LOG_LEVEL = "DEBUG"
}
}
}
}