1 feat: 重构基础设施架构并完善Consul集群配置

2
     3	主要变更:
     4	- 重构Terraform/OpenTofu目录结构,统一迁移至infrastructure/opentofu
     5	- 添加"7天创造世界"文档,记录基础设施建设演进逻辑
     6	- 更新Consul集群配置管理经验,添加实际案例和解决方案
     7	- 修正README中的Sticky Note,反映Consul集群健康状态
     8	- 添加Ansible部署配置和inventory文件
     9	- 完善项目文档结构,添加各组件配置指南
    10
    11	技术架构演进:
    12	- 第1天: Tailscale网络连接基础 
    13	- 第2天: Ansible分布式控制 
    14	- 第3天: Nomad服务感知与任务调度 
    15	- 第4天: Consul配置集中管理 
    16	- 第5天: OpenTofu状态一致性 
    17	- 第6天: Vault密钥管理 
    18	- 第7天: Waypoint应用部署 
This commit is contained in:
2025-09-30 03:46:33 +00:00
parent c0064b2cad
commit e8bfc76038
119 changed files with 1772 additions and 631 deletions

View File

@@ -0,0 +1,58 @@
# Traefik动态配置文件
# 这里可以添加动态路由、中间件等配置
# HTTP路由示例
http:
routers:
# 测试路由
test-router:
rule: "Host(`test.service.consul`)"
service: "test-service"
entryPoints:
- "https"
tls:
certResolver: "default"
services:
# 测试服务
test-service:
loadBalancer:
servers:
- url: "http://127.0.0.1:8080"
passHostHeader: true
middlewares:
# 基本认证中间件
basic-auth:
basicAuth:
users:
- "test:$apr1$H6uskkkW$IgXLP6ewTrSuBkTrqE8wj/"
# 安全头中间件
security-headers:
headers:
sslRedirect: true
stsSeconds: 31536000
stsIncludeSubdomains: true
stsPreload: true
forceSTSHeader: true
customFrameOptionsValue: "SAMEORIGIN"
contentTypeNosniff: true
browserXssFilter: true
# TCP路由示例
tcp:
routers:
# TCP测试路由
tcp-test-router:
rule: "HostSNI(`*`)"
service: "tcp-test-service"
entryPoints:
- "https"
services:
# TCP测试服务
tcp-test-service:
loadBalancer:
servers:
- address: "127.0.0.1:8080"

View File

@@ -0,0 +1,47 @@
datacenter = "dc1"
data_dir = "/opt/nomad/data"
plugin_dir = "/opt/nomad/plugins"
log_level = "INFO"
bind_addr = "100.116.80.94"
addresses {
http = "100.116.80.94"
rpc = "100.116.80.94"
serf = "100.116.80.94"
}
ports {
http = 4646
rpc = 4647
serf = 4648
}
server {
enabled = false
}
client {
enabled = true
network_interface = "tailscale0"
servers = [
"100.116.158.95:4647", # semaphore
"100.103.147.94:4647", # ash2e
"100.81.26.3:4647", # ash1d
"100.90.159.68:4647" # ch2
]
}
plugin "nomad-driver-podman" {
config {
socket_path = "unix:///run/podman/podman.sock"
volumes {
enabled = true
}
}
}
consul {
address = "100.116.80.94:8500"
}

View File

@@ -0,0 +1,47 @@
datacenter = "dc1"
data_dir = "/opt/nomad/data"
plugin_dir = "/opt/nomad/plugins"
log_level = "INFO"
bind_addr = "100.117.106.136"
addresses {
http = "100.117.106.136"
rpc = "100.117.106.136"
serf = "100.117.106.136"
}
ports {
http = 4646
rpc = 4647
serf = 4648
}
server {
enabled = false
}
client {
enabled = true
network_interface = "tailscale0"
servers = [
"100.116.158.95:4647", # semaphore
"100.103.147.94:4647", # ash2e
"100.81.26.3:4647", # ash1d
"100.90.159.68:4647" # ch2
]
}
plugin "nomad-driver-podman" {
config {
socket_path = "unix:///run/podman/podman.sock"
volumes {
enabled = true
}
}
}
consul {
address = "100.117.106.136:8500"
}

View File

@@ -0,0 +1,38 @@
global:
scrape_interval: 15s
evaluation_interval: 15s
rule_files:
# - "first_rules.yml"
# - "second_rules.yml"
scrape_configs:
- job_name: 'prometheus'
static_configs:
- targets: ['localhost:9090']
- job_name: 'openfaas'
static_configs:
- targets: ['gateway:8080']
metrics_path: /metrics
scrape_interval: 15s
scrape_timeout: 10s
- job_name: 'nats'
static_configs:
- targets: ['nats:8222']
metrics_path: /metrics
scrape_interval: 15s
scrape_timeout: 10s
- job_name: 'node-exporter'
static_configs:
- targets: ['node-exporter:9100']
scrape_interval: 15s
scrape_timeout: 10s
- job_name: 'cadvisor'
static_configs:
- targets: ['cadvisor:8080']
scrape_interval: 15s
scrape_timeout: 10s

View File

@@ -0,0 +1,63 @@
# Traefik静态配置文件
global:
sendAnonymousUsage: false
# API和仪表板配置
api:
dashboard: true
insecure: true # 仅用于测试,生产环境应使用安全配置
# 入口点配置
entryPoints:
http:
address: ":80"
# 重定向HTTP到HTTPS
http:
redirections:
entryPoint:
to: https
scheme: https
https:
address: ":443"
api:
address: ":8080"
# 提供者配置
providers:
# 启用Consul Catalog提供者
consulCatalog:
exposedByDefault: false
prefix: "traefik"
refreshInterval: 15s
requireConsistent: true
stale: false
watch: true
endpoint:
address: "http://127.0.0.1:8500"
scheme: "http"
connectAware: true
connectByDefault: false
# 启用Nomad提供者
nomad:
exposedByDefault: false
prefix: "traefik"
refreshInterval: 15s
stale: false
watch: true
endpoint:
address: "http://127.0.0.1:4646"
scheme: "http"
allowEmptyServices: true
# 日志配置
log:
level: "INFO"
format: "json"
accessLog:
format: "json"
fields:
defaultMode: "keep"
headers:
defaultMode: "keep"

View File

@@ -0,0 +1 @@
components/consul/jobs/

View File

@@ -0,0 +1,37 @@
# DigitalOcean 密钥存储作业
job "digitalocean-key-store" {
datacenters = ["dc1"]
type = "batch"
group "key-store" {
task "store-key" {
driver = "exec"
config {
command = "/bin/sh"
args = [
"-c",
<<EOT
# 将DigitalOcean密钥存储到Consul中
curl -X PUT -H "X-Consul-Token: ${CONSUL_HTTP_TOKEN}" \
http://127.0.0.1:8500/v1/kv/council/digitalocean/token \
-d 'dop_v1_70582bb508873709d96debc7f2a2d04df2093144b2b15fe392dba83b88976376'
# 验证密钥是否存储成功
curl -s http://127.0.0.1:8500/v1/kv/council/digitalocean/token?raw
EOT
]
}
env {
CONSUL_HTTP_ADDR = "http://127.0.0.1:8500"
CONSUL_HTTP_TOKEN = "root" # 根据实际Consul配置调整
}
resources {
cpu = 100
memory = 64
}
}
}
}

View File

@@ -0,0 +1,65 @@
job "hybrid-nfs-app" {
datacenters = ["dc1"]
type = "service"
# 使用约束条件区分存储类型
constraint {
attribute = "${attr.unique.hostname}"
operator = "regexp"
value = "semaphore"
}
group "app" {
count = 1
network {
port "http" {
static = 8080
}
}
# 对于本机semaphore使用host volume
volume "local-storage" {
type = "host"
read_only = false
source = "local-fnsync"
}
task "web-app" {
driver = "exec"
config {
command = "python3"
args = ["-m", "http.server", "8080", "--directory", "local/fnsync"]
}
template {
data = <<EOH
<h1>Hybrid NFS App - Running on {{ env "attr.unique.hostname" }}</h1>
<p>Storage Type: {{ with eq (env "attr.unique.hostname") "semaphore" }}PVE Mount{{ else }}NFS{{ end }}</p>
<p>Timestamp: {{ now | date "2006-01-02 15:04:05" }}</p>
EOH
destination = "local/fnsync/index.html"
}
resources {
cpu = 100
memory = 128
}
service {
name = "hybrid-nfs-app"
port = "http"
tags = ["hybrid", "nfs", "web"]
check {
type = "http"
path = "/"
interval = "10s"
timeout = "2s"
}
}
}
}
}

View File

@@ -0,0 +1,51 @@
job "nfs-app-example" {
datacenters = ["dc1"]
type = "service"
group "app" {
count = 1
# 使用NFS存储卷
volume "nfs-storage" {
type = "host"
read_only = false
source = "nfs-fnsync"
}
task "web-app" {
driver = "docker"
config {
image = "nginx:alpine"
ports = ["http"]
# 挂载NFS卷到容器
mount {
type = "volume"
target = "/usr/share/nginx/html"
source = "nfs-storage"
readonly = false
}
}
resources {
cpu = 100
memory = 128
}
service {
name = "nfs-web-app"
port = "http"
tags = ["nfs", "web"]
check {
type = "http"
path = "/"
interval = "10s"
timeout = "2s"
}
}
}
}
}

View File

@@ -0,0 +1,34 @@
job "nfs-storage-test" {
datacenters = ["dc1"]
type = "batch"
group "test" {
count = 1
volume "nfs-storage" {
type = "csi"
read_only = false
source = "nfs-fnsync"
}
task "storage-test" {
driver = "exec"
volume_mount {
volume = "nfs-storage"
destination = "/mnt/nfs"
read_only = false
}
config {
command = "/bin/sh"
args = ["-c", "echo 'NFS Storage Test - $(hostname) - $(date)' > /mnt/nfs/test-$(hostname).txt && ls -la /mnt/nfs/"]
}
resources {
cpu = 50
memory = 64
}
}
}
}

1
infrastructure/jobs/nomad Symbolic link
View File

@@ -0,0 +1 @@
components/nomad/jobs/

View File

@@ -0,0 +1,84 @@
job "nfs-multi-type-example" {
datacenters = ["dc1"]
type = "service"
# 为本地LXC容器配置的任务组
group "lxc-apps" {
count = 2
constraint {
attribute = "${attr.unique.hostname}"
operator = "regexp"
value = "(influxdb|hcp)"
}
volume "lxc-nfs" {
type = "host"
source = "nfs-shared"
read_only = false
}
task "lxc-app" {
driver = "podman"
config {
image = "alpine:latest"
args = ["tail", "-f", "/dev/null"]
}
volume_mount {
volume = "lxc-nfs"
destination = "/shared/lxc"
read_only = false
}
resources {
cpu = 100
memory = 64
}
}
}
# 为海外PVE容器配置的任务组
group "pve-apps" {
count = 3
constraint {
attribute = "${attr.unique.hostname}"
operator = "regexp"
value = "(ash1d|ash2e|ash3c|ch2|ch3)"
}
volume "pve-nfs" {
type = "host"
source = "nfs-shared"
read_only = false
}
task "pve-app" {
driver = "podman"
config {
image = "alpine:latest"
args = ["tail", "-f", "/dev/null"]
# 为海外节点添加网络优化参数
network_mode = "host"
}
volume_mount {
volume = "pve-nfs"
destination = "/shared/pve"
read_only = false
}
resources {
cpu = 100
memory = 64
network {
mbits = 5
}
}
}
}
}

View File

@@ -0,0 +1,86 @@
job "openfaas-functions" {
datacenters = ["dc1"]
type = "service"
group "hello-world" {
count = 1
constraint {
attribute = "${node.unique.name}"
operator = "regexp"
value = "(master|ash3c|hcp)"
}
task "hello-world" {
driver = "podman"
config {
image = "functions/hello-world:latest"
ports = ["http"]
env = {
"fprocess" = "node index.js"
}
}
resources {
network {
mbits = 10
port "http" { static = 8080 }
}
}
service {
name = "hello-world"
port = "http"
tags = ["openfaas-function"]
check {
type = "http"
path = "/"
interval = "10s"
timeout = "2s"
}
}
}
}
group "figlet" {
count = 1
constraint {
attribute = "${node.unique.name}"
operator = "regexp"
value = "(master|ash3c|hcp)"
}
task "figlet" {
driver = "podman"
config {
image = "functions/figlet:latest"
ports = ["http"]
env = {
"fprocess" = "figlet"
}
}
resources {
network {
mbits = 10
port "http" { static = 8080 }
}
}
service {
name = "figlet"
port = "http"
tags = ["openfaas-function"]
check {
type = "http"
path = "/"
interval = "10s"
timeout = "2s"
}
}
}
}
}

View File

@@ -0,0 +1,176 @@
job "openfaas" {
datacenters = ["dc1"]
type = "service"
group "openfaas-gateway" {
count = 1
constraint {
attribute = "${node.unique.name}"
operator = "regexp"
value = "(master|ash3c|hcp)"
}
task "openfaas-gateway" {
driver = "podman"
config {
image = "ghcr.io/openfaas/gateway:0.2.35"
ports = ["http", "ui"]
env = {
"functions_provider_url" = "http://${NOMAD_IP_http}:8080"
"read_timeout" = "60s"
"write_timeout" = "60s"
"upstream_timeout" = "60s"
"direct_functions" = "true"
"faas_nats_address" = "nats://localhost:4222"
"faas_nats_streaming" = "true"
"basic_auth" = "true"
"secret_mount_path" = "/run/secrets"
"scale_from_zero" = "true"
}
}
resources {
network {
mbits = 10
port "http" { static = 8080 }
port "ui" { static = 8081 }
}
}
service {
name = "openfaas-gateway"
port = "http"
check {
type = "http"
path = "/healthz"
interval = "10s"
timeout = "2s"
}
}
}
}
group "nats" {
count = 1
constraint {
attribute = "${node.unique.name}"
operator = "regexp"
value = "(master|ash3c|hcp)"
}
task "nats" {
driver = "podman"
config {
image = "nats-streaming:0.25.3"
ports = ["nats"]
args = [
"-p",
"4222",
"-m",
"8222",
"-hbi",
"5s",
"-hbt",
"5s",
"-hbf",
"2",
"-SD",
"-cid",
"openfaas"
]
}
resources {
network {
mbits = 10
port "nats" { static = 4222 }
}
}
service {
name = "nats"
port = "nats"
check {
type = "tcp"
interval = "10s"
timeout = "2s"
}
}
}
}
group "queue-worker" {
count = 1
constraint {
attribute = "${node.unique.name}"
operator = "regexp"
value = "(master|ash3c|hcp)"
}
task "queue-worker" {
driver = "podman"
config {
image = "ghcr.io/openfaas/queue-worker:0.12.2"
env = {
"gateway_url" = "http://${NOMAD_IP_http}:8080"
"faas_nats_address" = "nats://localhost:4222"
"faas_nats_streaming" = "true"
"ack_wait" = "5m"
"write_debug" = "true"
}
}
resources {
network {
mbits = 10
}
}
}
}
group "prometheus" {
count = 1
constraint {
attribute = "${node.unique.name}"
operator = "regexp"
value = "(master|ash3c|hcp)"
}
task "prometheus" {
driver = "podman"
config {
image = "prom/prometheus:v2.35.0"
ports = ["prometheus"]
volumes = [
"/opt/openfaas/prometheus.yml:/etc/prometheus/prometheus.yml"
]
}
resources {
network {
mbits = 10
port "prometheus" { static = 9090 }
}
}
service {
name = "prometheus"
port = "prometheus"
check {
type = "http"
path = "/-/healthy"
interval = "10s"
timeout = "2s"
}
}
}
}
}

View File

@@ -0,0 +1,78 @@
job "traefik" {
datacenters = ["dc1"]
type = "service"
update {
strategy = "canary"
max_parallel = 1
min_healthy_time = "10s"
healthy_deadline = "3m"
auto_revert = true
}
group "traefik" {
count = 3
restart {
attempts = 3
interval = "30m"
delay = "15s"
mode = "fail"
}
network {
port "http" {
static = 80
}
port "https" {
static = 443
}
port "api" {
static = 8080
}
}
task "traefik" {
driver = "podman"
config {
image = "traefik:latest"
ports = ["http", "https", "api"]
volumes = [
"/var/run/docker.sock:/var/run/docker.sock:ro", # 如果需要与Docker集成
"/root/mgmt/configs/traefik.yml:/etc/traefik/traefik.yml:ro",
"/root/mgmt/configs/dynamic:/etc/traefik/dynamic:ro"
]
}
env {
NOMAD_ADDR = "http://${attr.unique.network.ip-address}:4646"
CONSUL_HTTP_ADDR = "http://${attr.unique.network.ip-address}:8500"
}
resources {
cpu = 200
memory = 256
}
service {
name = "traefik"
port = "http"
tags = [
"traefik.enable=true",
"traefik.http.routers.api.rule=Host(`traefik.service.consul`)",
"traefik.http.routers.api.service=api@internal",
"traefik.http.routers.api.entrypoints=api",
"traefik.http.services.api.loadbalancer.server.port=8080"
]
check {
type = "http"
path = "/ping"
interval = "10s"
timeout = "2s"
}
}
}
}
}

1
infrastructure/jobs/vault Symbolic link
View File

@@ -0,0 +1 @@
components/vault/jobs/

View File

@@ -0,0 +1,123 @@
# Consul KV 命名规范
本文档描述了在Consul KV中存储配置信息的统一命名规范以确保所有配置管理的一致性和可维护性。
## 命名规范
### 基本格式
```
config/{environment}/{provider}/{region_or_service}/{key}
```
### 各部分说明
- **config**: 固定前缀,表示这是一个配置项
- **environment**: 环境名称,如 `dev``staging``prod`
- **provider**: 云服务提供商,如 `oracle``digitalocean``aws``gcp`
- **region_or_service**: 区域或服务名称,如 `kr``us``sgp`
- **key**: 具体的配置键名,如 `token``tenancy_ocid``user_ocid`
### 示例
#### Oracle Cloud 配置
```
config/dev/oracle/kr/tenancy_ocid
config/dev/oracle/kr/user_ocid
config/dev/oracle/kr/fingerprint
config/dev/oracle/kr/private_key
config/dev/oracle/kr/region
config/dev/oracle/us/tenancy_ocid
config/dev/oracle/us/user_ocid
config/dev/oracle/us/fingerprint
config/dev/oracle/us/private_key
config/dev/oracle/us/region
```
#### DigitalOcean 配置
```
config/dev/digitalocean/token
```
#### 其他云服务商配置(示例)
```
config/dev/aws/access_key
config/dev/aws/secret_key
config/dev/aws/region
config/dev/gcp/project_id
config/dev/gcp/credentials_file
config/dev/gcp/region
```
## 使用说明
### 添加新配置
当需要为新的云服务商或环境添加配置时,请遵循上述命名规范:
1. 确定环境名称(如 `dev`
2. 确定云服务提供商(如 `aws`
3. 确定区域或服务(如 `ap-northeast-2`
4. 确定具体的配置键名(如 `access_key`
例如:
```
consul kv put config/dev/aws/ap-northeast-2/access_key your_access_key
```
### 在Terraform中使用
在Terraform配置中使用 `consul_keys` 数据源获取配置:
```hcl
data "consul_keys" "aws_config" {
key {
name = "access_key"
path = "config/dev/aws/ap-northeast-2/access_key"
}
key {
name = "secret_key"
path = "config/dev/aws/ap-northeast-2/secret_key"
}
}
provider "aws" {
access_key = data.consul_keys.aws_config.var.access_key
secret_key = data.consul_keys.aws_config.var.secret_key
region = "ap-northeast-2"
}
```
### 与Vault集成
当需要与Vault集成时可以使用相同的命名规范确保Consul和Vault中的配置路径保持一致。
## 维护说明
- 所有Agent在添加新的Consul KV键时必须遵循此命名规范
- 定期检查Consul KV中的键确保符合规范
- 如需修改命名规范请更新此文档并通知所有相关Agent
## 常见问题
### Q: 为什么不使用服务名称作为前缀(如 `oracle/config/dev/...`
A: 使用 `config` 作为统一前缀可以更容易地区分配置项和其他类型的键值对,便于管理和筛选。
### Q: 如何处理敏感信息?
A: 敏感信息如API密钥、私钥等应存储在Vault中Consul主要用于非敏感配置。如果必须在Consul中存储敏感信息请确保Consul集群的安全性。
### Q: 如何处理多环境配置?
A: 通过修改 `environment` 部分来区分不同环境,如 `config/dev/...``config/staging/...``config/prod/...`
## 更新历史
- 2024-01-01: 初始版本,定义了基本的命名规范
- 2024-01-02: 统一DigitalOcean配置路径`consul/digitalocean/token` 改为 `config/dev/digitalocean/token`

View File

@@ -0,0 +1,13 @@
# 从Consul获取DigitalOcean API Token
data "consul_keys" "do_token" {
key {
name = "token"
path = "config/dev/digitalocean/token"
default = ""
}
}
# DigitalOcean 提供者配置
provider "digitalocean" {
token = data.consul_keys.do_token.var.token
}

View File

@@ -0,0 +1,162 @@
# 开发环境主配置文件
# 引入共享版本配置
terraform {
required_version = ">= 1.6"
required_providers {
# Oracle Cloud Infrastructure
oci = {
source = "oracle/oci"
version = "~> 7.20"
}
# 其他常用提供商
random = {
source = "hashicorp/random"
version = "~> 3.1"
}
tls = {
source = "hashicorp/tls"
version = "~> 4.0"
}
local = {
source = "hashicorp/local"
version = "~> 2.1"
}
# Consul Provider
consul = {
source = "hashicorp/consul"
version = "~> 2.22.0"
}
# HashiCorp Vault Provider
vault = {
source = "hashicorp/vault"
version = "~> 4.0"
}
# DigitalOcean Provider
digitalocean = {
source = "digitalocean/digitalocean"
version = "~> 2.0"
}
}
# 后端配置
backend "local" {
path = "terraform.tfstate"
}
}
# Consul Provider配置
provider "consul" {
address = "localhost:8500"
scheme = "http"
datacenter = "dc1"
}
# Vault Provider配置
provider "vault" {
address = var.vault_config.address
token = var.vault_token
}
# 从Consul获取Oracle Cloud配置
data "consul_keys" "oracle_config" {
key {
name = "tenancy_ocid"
path = "config/dev/oracle/kr/tenancy_ocid"
}
key {
name = "user_ocid"
path = "config/dev/oracle/kr/user_ocid"
}
key {
name = "fingerprint"
path = "config/dev/oracle/kr/fingerprint"
}
key {
name = "private_key"
path = "config/dev/oracle/kr/private_key"
}
}
# 从Consul获取Oracle Cloud美国区域配置
data "consul_keys" "oracle_config_us" {
key {
name = "tenancy_ocid"
path = "config/dev/oracle/us/tenancy_ocid"
}
key {
name = "user_ocid"
path = "config/dev/oracle/us/user_ocid"
}
key {
name = "fingerprint"
path = "config/dev/oracle/us/fingerprint"
}
key {
name = "private_key"
path = "config/dev/oracle/us/private_key"
}
}
# 使用从Consul获取的配置的OCI Provider
provider "oci" {
tenancy_ocid = data.consul_keys.oracle_config.var.tenancy_ocid
user_ocid = data.consul_keys.oracle_config.var.user_ocid
fingerprint = data.consul_keys.oracle_config.var.fingerprint
private_key = data.consul_keys.oracle_config.var.private_key
region = "ap-chuncheon-1"
}
# 美国区域的OCI Provider
provider "oci" {
alias = "us"
tenancy_ocid = data.consul_keys.oracle_config_us.var.tenancy_ocid
user_ocid = data.consul_keys.oracle_config_us.var.user_ocid
fingerprint = data.consul_keys.oracle_config_us.var.fingerprint
private_key = data.consul_keys.oracle_config_us.var.private_key
region = "us-ashburn-1"
}
# Oracle Cloud 基础设施 - 暂时注释掉以避免VCN数量限制问题
# module "oracle_cloud" {
# source = "../../providers/oracle-cloud"
#
# # 传递变量
# environment = var.environment
# project_name = var.project_name
# owner = var.owner
# vpc_cidr = var.vpc_cidr
# availability_zones = var.availability_zones
# common_tags = var.common_tags
#
# # 使用从Consul获取的配置
# oci_config = {
# tenancy_ocid = data.consul_keys.oracle_config.var.tenancy_ocid
# user_ocid = data.consul_keys.oracle_config.var.user_ocid
# fingerprint = data.consul_keys.oracle_config.var.fingerprint
# private_key = data.consul_keys.oracle_config.var.private_key
# region = "ap-chuncheon-1"
# compartment_ocid = data.consul_keys.oracle_config.var.tenancy_ocid # 使用tenancy_ocid作为compartment_ocid
# }
#
# # 开发环境特定配置
# instance_count = 1
# instance_size = "VM.Standard.E2.1.Micro" # 免费层
#
# providers = {
# oci = oci
# }
# }
# 输出
# output "oracle_cloud_outputs" {
# description = "Oracle Cloud 基础设施输出"
# value = module.oracle_cloud
# }

View File

@@ -0,0 +1,61 @@
# 开发环境配置示例
# 复制此文件为 terraform.tfvars 并填入实际值
# 基本配置
environment = "dev"
project_name = "mgmt"
owner = "ben"
# 要启用的云服务商
cloud_providers = ["oracle", "huawei"]
# 网络配置
vpc_cidr = "10.0.0.0/16"
availability_zones = ["a", "b"]
# 通用标签
common_tags = {
Environment = "dev"
Project = "mgmt"
Owner = "ben"
ManagedBy = "opentofu"
}
# Oracle Cloud 配置
oci_config = {
tenancy_ocid = "ocid1.tenancy.oc1..your-tenancy-id"
user_ocid = "ocid1.user.oc1..your-user-id"
fingerprint = "your-key-fingerprint"
private_key_path = "~/.oci/oci_api_key.pem"
region = "ap-seoul-1"
compartment_ocid = "ocid1.compartment.oc1..your-compartment-id"
}
# 华为云配置
huawei_config = {
access_key = "your-access-key"
secret_key = "your-secret-key"
region = "cn-north-4"
project_id = "your-project-id"
}
# Google Cloud 配置 (可选)
gcp_config = {
project_id = "your-project-id"
region = "asia-northeast3"
zone = "asia-northeast3-a"
credentials_file = "~/.gcp/service-account.json"
}
# AWS 配置 (可选)
aws_config = {
region = "ap-northeast-2"
access_key = "your-access-key"
secret_key = "your-secret-key"
}
# DigitalOcean 配置 (可选)
do_config = {
token = "your-do-token"
region = "sgp1"
}

View File

@@ -0,0 +1,154 @@
# 开发环境变量定义
variable "environment" {
description = "环境名称"
type = string
default = "dev"
}
variable "project_name" {
description = "项目名称"
type = string
default = "mgmt"
}
variable "owner" {
description = "项目所有者"
type = string
default = "ben"
}
variable "cloud_providers" {
description = "要启用的云服务商列表"
type = list(string)
default = ["oracle"]
}
variable "vpc_cidr" {
description = "VPC CIDR 块"
type = string
default = "10.0.0.0/16"
}
variable "availability_zones" {
description = "可用区列表"
type = list(string)
default = ["a", "b"]
}
variable "common_tags" {
description = "通用标签"
type = map(string)
default = {
Environment = "dev"
Project = "mgmt"
ManagedBy = "opentofu"
}
}
# Oracle Cloud 配置
variable "oci_config" {
description = "Oracle Cloud 配置"
type = object({
tenancy_ocid = string
user_ocid = string
fingerprint = string
private_key_path = string
region = string
compartment_ocid = optional(string)
})
default = {
tenancy_ocid = ""
user_ocid = ""
fingerprint = ""
private_key_path = ""
region = "ap-seoul-1"
compartment_ocid = ""
}
}
# 华为云配置
variable "huawei_config" {
description = "华为云配置"
type = object({
access_key = string
secret_key = string
region = string
project_id = optional(string)
})
default = {
access_key = ""
secret_key = ""
region = "cn-north-4"
project_id = ""
}
sensitive = true
}
# Google Cloud 配置
variable "gcp_config" {
description = "Google Cloud 配置"
type = object({
project_id = string
region = string
zone = string
credentials_file = string
})
default = {
project_id = ""
region = "asia-northeast3"
zone = "asia-northeast3-a"
credentials_file = ""
}
}
# AWS 配置
variable "aws_config" {
description = "AWS 配置"
type = object({
region = string
access_key = string
secret_key = string
})
default = {
region = "ap-northeast-2"
access_key = ""
secret_key = ""
}
sensitive = true
}
# DigitalOcean 配置
variable "do_config" {
description = "DigitalOcean 配置"
type = object({
token = string
region = string
})
default = {
token = ""
region = "sgp1"
}
sensitive = true
}
# HashiCorp Vault 配置
variable "vault_config" {
description = "HashiCorp Vault 配置"
type = object({
address = string
token = string
})
default = {
address = "http://localhost:8200"
token = ""
}
sensitive = true
}
variable "vault_token" {
description = "Vault 访问令牌"
type = string
default = ""
sensitive = true
}

View File

@@ -0,0 +1,169 @@
# Nomad 多数据中心生产环境配置
# 部署架构: CN(dc1) + KR(dc2) + US(dc3)
terraform {
required_version = ">= 1.0"
required_providers {
oci = {
source = "oracle/oci"
version = "~> 7.20"
}
huaweicloud = {
source = "huaweicloud/huaweicloud"
version = "~> 1.60"
}
}
}
# Oracle Cloud Provider (韩国)
provider "oci" {
alias = "korea"
tenancy_ocid = var.oracle_tenancy_ocid
user_ocid = var.oracle_user_ocid
fingerprint = var.oracle_fingerprint
private_key_path = var.oracle_private_key_path
region = "ap-seoul-1" # 韩国首尔
}
# 华为云 Provider (美国)
provider "huaweicloud" {
alias = "us"
access_key = var.huawei_access_key
secret_key = var.huawei_secret_key
region = "us-east-1" # 美国东部
}
# 本地变量
locals {
project_name = "nomad-multi-dc"
environment = "production"
common_tags = {
Project = local.project_name
Environment = local.environment
ManagedBy = "opentofu"
Owner = "devops-team"
}
}
# 数据源:获取 SSH 公钥
data "local_file" "ssh_public_key" {
filename = pathexpand("~/.ssh/id_rsa.pub")
}
# Oracle Cloud 基础设施 (韩国 - dc2)
module "oracle_infrastructure" {
source = "../../providers/oracle-cloud"
providers = {
oci = oci.korea
}
project_name = local.project_name
environment = local.environment
vpc_cidr = "10.1.0.0/16"
oci_config = {
tenancy_ocid = var.oracle_tenancy_ocid
user_ocid = var.oracle_user_ocid
fingerprint = var.oracle_fingerprint
private_key_path = var.oracle_private_key_path
region = "ap-seoul-1"
}
common_tags = local.common_tags
}
# 华为云基础设施 (美国 - dc3)
module "huawei_infrastructure" {
source = "../../providers/huawei-cloud"
providers = {
huaweicloud = huaweicloud.us
}
project_name = local.project_name
environment = local.environment
vpc_cidr = "10.2.0.0/16"
availability_zones = ["us-east-1a", "us-east-1b"]
common_tags = local.common_tags
}
# Nomad 多数据中心集群
module "nomad_cluster" {
source = "../../modules/nomad-cluster"
# 部署配置
deploy_korea_node = var.deploy_korea_node
deploy_us_node = var.deploy_us_node
# Oracle Cloud 配置
oracle_config = {
tenancy_ocid = var.oracle_tenancy_ocid
user_ocid = var.oracle_user_ocid
fingerprint = var.oracle_fingerprint
private_key_path = var.oracle_private_key_path
region = "ap-seoul-1"
}
oracle_subnet_id = module.oracle_infrastructure.public_subnet_ids[0]
oracle_security_group_id = module.oracle_infrastructure.security_group_id
# 华为云配置
huawei_config = {
access_key = var.huawei_access_key
secret_key = var.huawei_secret_key
region = "us-east-1"
}
huawei_subnet_id = module.huawei_infrastructure.public_subnet_ids[0]
huawei_security_group_id = module.huawei_infrastructure.security_group_id
# 通用配置
ssh_public_key = data.local_file.ssh_public_key.content
common_tags = local.common_tags
# Nomad 配置
nomad_version = "1.10.5"
nomad_encrypt_key = var.nomad_encrypt_key
}
# 生成 Ansible inventory
resource "local_file" "ansible_inventory" {
filename = "${path.module}/generated/nomad-cluster-inventory.yml"
content = yamlencode({
all = {
children = {
nomad_servers = {
hosts = module.nomad_cluster.ansible_inventory.all.children.nomad_servers.hosts
}
}
vars = {
ansible_user = "ubuntu"
ansible_ssh_private_key_file = "~/.ssh/id_rsa"
ansible_ssh_common_args = "-o StrictHostKeyChecking=no"
}
}
})
}
# 生成部署后配置脚本
resource "local_file" "post_deploy_script" {
filename = "${path.module}/generated/post-deploy.sh"
content = templatefile("${path.module}/templates/post-deploy.sh", {
cluster_overview = module.nomad_cluster.cluster_overview
endpoints = module.nomad_cluster.cluster_endpoints
})
file_permission = "0755"
}
# 生成跨数据中心测试任务
resource "local_file" "cross_dc_test_job" {
filename = "${path.module}/generated/cross-dc-test.nomad"
content = templatefile("${path.module}/templates/cross-dc-test.nomad", {
datacenters = ["dc1", "dc2", "dc3"]
})
}

View File

@@ -0,0 +1,46 @@
# Nomad 多数据中心生产环境输出
output "cluster_overview" {
description = "Nomad 多数据中心集群概览"
value = module.nomad_cluster.cluster_overview
}
output "cluster_endpoints" {
description = "集群连接端点"
value = module.nomad_cluster.cluster_endpoints
}
output "oracle_korea_node" {
description = "Oracle Cloud 韩国节点信息"
value = module.nomad_cluster.oracle_korea_node
}
output "huawei_us_node" {
description = "华为云美国节点信息"
value = module.nomad_cluster.huawei_us_node
}
output "deployment_summary" {
description = "部署摘要"
value = {
total_nodes = module.nomad_cluster.cluster_overview.total_nodes
datacenters = keys(module.nomad_cluster.cluster_overview.datacenters)
next_steps = [
"1. 等待所有节点启动完成 (约 5-10 分钟)",
"2. 运行: ./generated/post-deploy.sh",
"3. 验证集群: nomad server members",
"4. 测试跨 DC 调度: nomad job run generated/cross-dc-test.nomad",
"5. 访问 Web UI 查看集群状态"
]
web_ui_urls = module.nomad_cluster.cluster_endpoints.nomad_ui_urls
ssh_commands = module.nomad_cluster.cluster_endpoints.ssh_commands
}
}
output "verification_commands" {
description = "验证命令"
value = module.nomad_cluster.verification_commands
}

View File

@@ -0,0 +1,22 @@
# Nomad 多数据中心生产环境配置示例
# 复制此文件为 terraform.tfvars 并填入实际值
# 部署控制
deploy_korea_node = true # 是否部署韩国节点
deploy_us_node = true # 是否部署美国节点
# Oracle Cloud 配置 (韩国 - dc2)
# 获取方式: https://docs.oracle.com/en-us/iaas/Content/API/Concepts/apisigningkey.htm
oracle_tenancy_ocid = "ocid1.tenancy.oc1..aaaaaaaa..."
oracle_user_ocid = "ocid1.user.oc1..aaaaaaaa..."
oracle_fingerprint = "aa:bb:cc:dd:ee:ff:..."
oracle_private_key_path = "~/.oci/oci_api_key.pem"
# 华为云配置 (美国 - dc3)
# 获取方式: https://console.huaweicloud.com/iam/#/mine/accessKey
huawei_access_key = "YOUR_HUAWEI_ACCESS_KEY"
huawei_secret_key = "YOUR_HUAWEI_SECRET_KEY"
# Nomad 集群加密密钥 (可选,已有默认值)
# 生成方式: nomad operator keygen
nomad_encrypt_key = "NVOMDvXblgWfhtzFzOUIHnKEOrbXOkPrkIPbRGGf1YQ="

View File

@@ -0,0 +1,60 @@
# Nomad 多数据中心生产环境变量
# 部署控制
variable "deploy_korea_node" {
description = "是否部署韩国节点 (Oracle Cloud)"
type = bool
default = false # 禁用以避免创建计算资源
}
variable "deploy_us_node" {
description = "是否部署美国节点 (华为云)"
type = bool
default = false # 禁用以避免创建计算资源
}
# Oracle Cloud 配置
variable "oracle_tenancy_ocid" {
description = "Oracle Cloud 租户 OCID"
type = string
sensitive = true
}
variable "oracle_user_ocid" {
description = "Oracle Cloud 用户 OCID"
type = string
sensitive = true
}
variable "oracle_fingerprint" {
description = "Oracle Cloud API 密钥指纹"
type = string
sensitive = true
}
variable "oracle_private_key_path" {
description = "Oracle Cloud 私钥文件路径"
type = string
sensitive = true
}
# 华为云配置
variable "huawei_access_key" {
description = "华为云访问密钥"
type = string
sensitive = true
}
variable "huawei_secret_key" {
description = "华为云秘密密钥"
type = string
sensitive = true
}
# Nomad 配置
variable "nomad_encrypt_key" {
description = "Nomad 集群加密密钥"
type = string
sensitive = true
default = "NVOMDvXblgWfhtzFzOUIHnKEOrbXOkPrkIPbRGGf1YQ="
}

View File

@@ -0,0 +1,159 @@
# Nomad 多数据中心集群模块
# 支持跨地域部署CN(dc1) + KR(dc2) + US(dc3)
terraform {
required_providers {
oci = {
source = "oracle/oci"
version = "~> 7.20"
}
huaweicloud = {
source = "huaweicloud/huaweicloud"
version = "~> 1.60"
}
aws = {
source = "hashicorp/aws"
version = "~> 5.0"
}
}
}
# 本地变量
locals {
nomad_version = "1.10.5"
# 通用 Nomad 配置
nomad_encrypt_key = "NVOMDvXblgWfhtzFzOUIHnKEOrbXOkPrkIPbRGGf1YQ="
# 数据中心配置
datacenters = {
dc1 = {
name = "dc1"
region = "cn"
location = "China"
provider = "existing" # 现有的 semaphore 节点
}
dc2 = {
name = "dc2"
region = "kr"
location = "Korea"
provider = "oracle"
}
dc3 = {
name = "dc3"
region = "us"
location = "US"
provider = "huawei" # 或 aws
}
}
# 用户数据模板
user_data_template = templatefile("${path.module}/templates/nomad-userdata.sh", {
nomad_version = local.nomad_version
nomad_encrypt_key = local.nomad_encrypt_key
})
}
# 数据源:获取现有的 semaphore 节点信息
data "external" "semaphore_info" {
program = ["bash", "-c", <<-EOF
echo '{
"ip": "100.116.158.95",
"datacenter": "dc1",
"status": "existing"
}'
EOF
]
}
# Oracle Cloud 韩国节点 (dc2)
module "oracle_korea_node" {
source = "../compute"
count = var.deploy_korea_node ? 1 : 0
# Oracle Cloud 特定配置
provider_type = "oracle"
# 实例配置
instance_config = {
name = "nomad-master-kr"
datacenter = "dc2"
instance_type = "VM.Standard.E2.1.Micro" # 免费层
image_id = var.oracle_ubuntu_image_id
subnet_id = var.oracle_subnet_id
# Nomad 配置
nomad_role = "server"
bootstrap_expect = 1
bind_addr = "auto" # 自动检测
# 网络配置
security_groups = [var.oracle_security_group_id]
# 标签
tags = merge(var.common_tags, {
Name = "nomad-master-kr"
Datacenter = "dc2"
Role = "nomad-server"
Provider = "oracle"
})
}
# 用户数据
user_data = templatefile("${path.module}/templates/nomad-userdata.sh", {
datacenter = "dc2"
nomad_version = local.nomad_version
nomad_encrypt_key = local.nomad_encrypt_key
bootstrap_expect = 1
bind_addr = "auto"
server_enabled = true
client_enabled = true
})
}
# 华为云美国节点 (dc3)
module "huawei_us_node" {
source = "../compute"
count = var.deploy_us_node ? 1 : 0
# 华为云特定配置
provider_type = "huawei"
# 实例配置
instance_config = {
name = "nomad-ash3c-us"
datacenter = "dc3"
instance_type = "s6.small.1" # 1vCPU 1GB
image_id = var.huawei_ubuntu_image_id
subnet_id = var.huawei_subnet_id
# Nomad 配置
nomad_role = "server"
bootstrap_expect = 1
bind_addr = "auto"
# 网络配置
security_groups = [var.huawei_security_group_id]
# 标签
tags = merge(var.common_tags, {
Name = "nomad-ash3c-us"
Datacenter = "dc3"
Role = "nomad-server"
Provider = "huawei"
})
}
# 用户数据
user_data = templatefile("${path.module}/templates/nomad-userdata.sh", {
datacenter = "dc3"
nomad_version = local.nomad_version
nomad_encrypt_key = local.nomad_encrypt_key
bootstrap_expect = 1
bind_addr = "auto"
server_enabled = true
client_enabled = true
})
}

View File

@@ -0,0 +1,145 @@
# Nomad 多数据中心集群输出
# 集群概览
output "cluster_overview" {
description = "Nomad 多数据中心集群概览"
value = {
datacenters = {
dc1 = {
name = "dc1"
location = "China (CN)"
provider = "existing"
node = "semaphore"
ip = "100.116.158.95"
status = "existing"
}
dc2 = var.deploy_korea_node ? {
name = "dc2"
location = "Korea (KR)"
provider = "oracle"
node = "master"
ip = try(module.oracle_korea_node[0].public_ip, "pending")
status = "deployed"
} : null
dc3 = var.deploy_us_node ? {
name = "dc3"
location = "US"
provider = "huawei"
node = "ash3c"
ip = try(module.huawei_us_node[0].public_ip, "pending")
status = "deployed"
} : null
}
total_nodes = 1 + (var.deploy_korea_node ? 1 : 0) + (var.deploy_us_node ? 1 : 0)
}
}
# Oracle Cloud 韩国节点输出
output "oracle_korea_node" {
description = "Oracle Cloud 韩国节点信息"
value = var.deploy_korea_node ? {
instance_id = try(module.oracle_korea_node[0].instance_id, null)
public_ip = try(module.oracle_korea_node[0].public_ip, null)
private_ip = try(module.oracle_korea_node[0].private_ip, null)
datacenter = "dc2"
provider = "oracle"
region = var.oracle_config.region
# 连接信息
ssh_command = try("ssh ubuntu@${module.oracle_korea_node[0].public_ip}", null)
nomad_ui = try("http://${module.oracle_korea_node[0].public_ip}:4646", null)
} : null
}
# 华为云美国节点输出
output "huawei_us_node" {
description = "华为云美国节点信息"
value = var.deploy_us_node ? {
instance_id = try(module.huawei_us_node[0].instance_id, null)
public_ip = try(module.huawei_us_node[0].public_ip, null)
private_ip = try(module.huawei_us_node[0].private_ip, null)
datacenter = "dc3"
provider = "huawei"
region = var.huawei_config.region
# 连接信息
ssh_command = try("ssh ubuntu@${module.huawei_us_node[0].public_ip}", null)
nomad_ui = try("http://${module.huawei_us_node[0].public_ip}:4646", null)
} : null
}
# 集群连接信息
output "cluster_endpoints" {
description = "集群连接端点"
value = {
nomad_ui_urls = compact([
"http://100.116.158.95:4646", # dc1 - semaphore
var.deploy_korea_node ? try("http://${module.oracle_korea_node[0].public_ip}:4646", null) : null, # dc2
var.deploy_us_node ? try("http://${module.huawei_us_node[0].public_ip}:4646", null) : null # dc3
])
ssh_commands = compact([
"ssh root@100.116.158.95", # dc1 - semaphore
var.deploy_korea_node ? try("ssh ubuntu@${module.oracle_korea_node[0].public_ip}", null) : null, # dc2
var.deploy_us_node ? try("ssh ubuntu@${module.huawei_us_node[0].public_ip}", null) : null # dc3
])
}
}
# Ansible inventory 生成
output "ansible_inventory" {
description = "生成的 Ansible inventory"
value = {
all = {
children = {
nomad_servers = {
hosts = merge(
{
semaphore = {
ansible_host = "100.116.158.95"
datacenter = "dc1"
provider = "existing"
}
},
var.deploy_korea_node ? {
master = {
ansible_host = try(module.oracle_korea_node[0].public_ip, "pending")
datacenter = "dc2"
provider = "oracle"
}
} : {},
var.deploy_us_node ? {
ash3c = {
ansible_host = try(module.huawei_us_node[0].public_ip, "pending")
datacenter = "dc3"
provider = "huawei"
}
} : {}
)
}
}
}
}
}
# 部署后验证命令
output "verification_commands" {
description = "部署后验证命令"
value = [
"# 检查集群状态",
"nomad server members",
"",
"# 检查各数据中心节点",
"nomad node status -verbose",
"",
"# 跨数据中心任务调度测试",
"nomad job run examples/cross-dc-test.nomad",
"",
"# 访问 UI",
join("\n", [for url in compact([
"http://100.116.158.95:4646",
var.deploy_korea_node ? try("http://${module.oracle_korea_node[0].public_ip}:4646", null) : null,
var.deploy_us_node ? try("http://${module.huawei_us_node[0].public_ip}:4646", null) : null
]) : "curl -s ${url}/v1/status/leader"])
]
}

View File

@@ -0,0 +1,228 @@
#!/bin/bash
# Nomad 多数据中心节点自动配置脚本
# 数据中心: ${datacenter}
set -e
# 日志函数
log() {
echo "[$(date '+%Y-%m-%d %H:%M:%S')] $1" | tee -a /var/log/nomad-setup.log
}
log "开始配置 Nomad 节点 - 数据中心: ${datacenter}"
# 更新系统
log "更新系统包..."
apt-get update -y
apt-get upgrade -y
# 安装必要的包
log "安装必要的包..."
apt-get install -y \
curl \
wget \
unzip \
jq \
podman \
htop \
net-tools \
vim
# 启动 Podman
log "启动 Podman 服务..."
systemctl enable podman
systemctl start podman
usermod -aG podman ubuntu
# 安装 Nomad
log "安装 Nomad ${nomad_version}..."
cd /tmp
wget -q https://releases.hashicorp.com/nomad/${nomad_version}/nomad_${nomad_version}_linux_amd64.zip
unzip nomad_${nomad_version}_linux_amd64.zip
mv nomad /usr/local/bin/
chmod +x /usr/local/bin/nomad
# 创建 Nomad 用户和目录
log "创建 Nomad 用户和目录..."
useradd --system --home /etc/nomad.d --shell /bin/false nomad
mkdir -p /opt/nomad/data
mkdir -p /etc/nomad.d
mkdir -p /var/log/nomad
chown -R nomad:nomad /opt/nomad /etc/nomad.d /var/log/nomad
# 获取本机 IP 地址
if [ "${bind_addr}" = "auto" ]; then
# 尝试多种方法获取 IP
BIND_ADDR=$(curl -s http://169.254.169.254/latest/meta-data/local-ipv4 2>/dev/null || \
curl -s http://metadata.google.internal/computeMetadata/v1/instance/network-interfaces/0/ip -H "Metadata-Flavor: Google" 2>/dev/null || \
ip route get 8.8.8.8 | awk '{print $7; exit}' || \
hostname -I | awk '{print $1}')
else
BIND_ADDR="${bind_addr}"
fi
log "检测到 IP 地址: $BIND_ADDR"
# 创建 Nomad 配置文件
log "创建 Nomad 配置文件..."
cat > /etc/nomad.d/nomad.hcl << EOF
datacenter = "${datacenter}"
region = "global"
data_dir = "/opt/nomad/data"
bind_addr = "$BIND_ADDR"
%{ if server_enabled }
server {
enabled = true
bootstrap_expect = ${bootstrap_expect}
encrypt = "${nomad_encrypt_key}"
}
%{ endif }
%{ if client_enabled }
client {
enabled = true
host_volume "podman-sock" {
path = "/run/podman/podman.sock"
read_only = false
}
}
%{ endif }
ui {
enabled = true
}
addresses {
http = "0.0.0.0"
rpc = "$BIND_ADDR"
serf = "$BIND_ADDR"
}
ports {
http = 4646
rpc = 4647
serf = 4648
}
plugin "podman" {
config {
volumes {
enabled = true
}
}
}
telemetry {
collection_interval = "10s"
disable_hostname = false
prometheus_metrics = true
publish_allocation_metrics = true
publish_node_metrics = true
}
log_level = "INFO"
log_file = "/var/log/nomad/nomad.log"
EOF
# 创建 systemd 服务文件
log "创建 systemd 服务文件..."
cat > /etc/systemd/system/nomad.service << EOF
[Unit]
Description=Nomad
Documentation=https://www.nomadproject.io/
Requires=network-online.target
After=network-online.target
ConditionFileNotEmpty=/etc/nomad.d/nomad.hcl
[Service]
Type=notify
User=nomad
Group=nomad
ExecStart=/usr/local/bin/nomad agent -config=/etc/nomad.d/nomad.hcl
ExecReload=/bin/kill -HUP \$MAINPID
KillMode=process
Restart=on-failure
LimitNOFILE=65536
[Install]
WantedBy=multi-user.target
EOF
# 启动 Nomad 服务
log "启动 Nomad 服务..."
systemctl daemon-reload
systemctl enable nomad
systemctl start nomad
# 等待服务启动
log "等待 Nomad 服务启动..."
sleep 10
# 验证安装
log "验证 Nomad 安装..."
if systemctl is-active --quiet nomad; then
log "✅ Nomad 服务运行正常"
log "📊 节点信息:"
/usr/local/bin/nomad node status -self || true
else
log "❌ Nomad 服务启动失败"
systemctl status nomad --no-pager || true
journalctl -u nomad --no-pager -n 20 || true
fi
# 配置防火墙(如果需要)
log "配置防火墙规则..."
if command -v ufw >/dev/null 2>&1; then
ufw allow 4646/tcp # HTTP API
ufw allow 4647/tcp # RPC
ufw allow 4648/tcp # Serf
ufw allow 22/tcp # SSH
fi
# 创建有用的别名和脚本
log "创建管理脚本..."
cat > /usr/local/bin/nomad-status << 'EOF'
#!/bin/bash
echo "=== Nomad 服务状态 ==="
systemctl status nomad --no-pager
echo -e "\n=== Nomad 集群成员 ==="
nomad server members 2>/dev/null || echo "无法连接到集群"
echo -e "\n=== Nomad 节点状态 ==="
nomad node status 2>/dev/null || echo "无法获取节点状态"
echo -e "\n=== 最近日志 ==="
journalctl -u nomad --no-pager -n 5
EOF
chmod +x /usr/local/bin/nomad-status
# 添加到 ubuntu 用户的 bashrc
echo 'alias ns="nomad-status"' >> /home/ubuntu/.bashrc
echo 'alias nomad-logs="journalctl -u nomad -f"' >> /home/ubuntu/.bashrc
log "🎉 Nomad 节点配置完成!"
log "📍 数据中心: ${datacenter}"
log "🌐 IP 地址: $BIND_ADDR"
log "🔗 Web UI: http://$BIND_ADDR:4646"
log "📝 使用 'nomad-status' 或 'ns' 命令查看状态"
# 输出重要信息到 motd
cat > /etc/update-motd.d/99-nomad << EOF
#!/bin/bash
echo ""
echo "🚀 Nomad 节点信息:"
echo " 数据中心: ${datacenter}"
echo " IP 地址: $BIND_ADDR"
echo " Web UI: http://$BIND_ADDR:4646"
echo " 状态检查: nomad-status"
echo ""
EOF
chmod +x /etc/update-motd.d/99-nomad
log "节点配置脚本执行完成"

View File

@@ -0,0 +1,118 @@
# Nomad 多数据中心集群变量定义
variable "deploy_korea_node" {
description = "是否部署韩国节点 (Oracle Cloud)"
type = bool
default = true
}
variable "deploy_us_node" {
description = "是否部署美国节点 (华为云)"
type = bool
default = true
}
# Oracle Cloud 配置
variable "oracle_config" {
description = "Oracle Cloud 配置"
type = object({
tenancy_ocid = string
user_ocid = string
fingerprint = string
private_key_path = string
region = string
})
sensitive = true
}
variable "oracle_ubuntu_image_id" {
description = "Oracle Cloud Ubuntu 镜像 ID"
type = string
default = "" # 将通过数据源自动获取
}
variable "oracle_subnet_id" {
description = "Oracle Cloud 子网 ID"
type = string
}
variable "oracle_security_group_id" {
description = "Oracle Cloud 安全组 ID"
type = string
}
# 华为云配置
variable "huawei_config" {
description = "华为云配置"
type = object({
access_key = string
secret_key = string
region = string
})
sensitive = true
}
variable "huawei_ubuntu_image_id" {
description = "华为云 Ubuntu 镜像 ID"
type = string
default = "" # 将通过数据源自动获取
}
variable "huawei_subnet_id" {
description = "华为云子网 ID"
type = string
}
variable "huawei_security_group_id" {
description = "华为云安全组 ID"
type = string
}
# 通用配置
variable "common_tags" {
description = "通用标签"
type = map(string)
default = {
Project = "nomad-multi-dc"
Environment = "production"
ManagedBy = "opentofu"
}
}
variable "ssh_public_key" {
description = "SSH 公钥"
type = string
}
variable "allowed_cidr_blocks" {
description = "允许访问的 CIDR 块"
type = list(string)
default = ["0.0.0.0/0"] # 生产环境应该限制
}
# Nomad 特定配置
variable "nomad_version" {
description = "Nomad 版本"
type = string
default = "1.10.5"
}
variable "nomad_encrypt_key" {
description = "Nomad 集群加密密钥"
type = string
sensitive = true
default = "NVOMDvXblgWfhtzFzOUIHnKEOrbXOkPrkIPbRGGf1YQ="
}
# 网络配置
variable "vpc_cidr" {
description = "VPC CIDR 块"
type = string
default = "10.0.0.0/16"
}
variable "availability_zones" {
description = "可用区列表"
type = list(string)
default = ["a", "b"]
}

View File

@@ -0,0 +1,25 @@
# DigitalOcean Provider 配置
terraform {
required_providers {
digitalocean = {
source = "digitalocean/digitalocean"
version = "~> 2.0"
}
}
}
# DigitalOcean 提供者配置
provider "digitalocean" {
token = var.do_config.token
}
# 创建 DigitalOcean Droplet 示例
resource "digitalocean_droplet" "web" {
image = "ubuntu-22-04-x64"
name = "web-1"
region = var.do_config.region
size = "s-1vcpu-1gb"
tags = ["web", "mgmt"]
}

View File

@@ -0,0 +1,137 @@
# 华为云模块
terraform {
required_providers {
huaweicloud = {
source = "huaweicloud/huaweicloud"
version = "~> 1.60"
}
}
}
# 获取可用区
data "huaweicloud_availability_zones" "zones" {}
# 获取镜像
data "huaweicloud_images_image" "ubuntu" {
name = "Ubuntu 22.04 server 64bit"
most_recent = true
}
# VPC
resource "huaweicloud_vpc" "main" {
name = "${var.project_name}-${var.environment}-vpc"
cidr = var.vpc_cidr
tags = merge(var.common_tags, {
Name = "${var.project_name}-${var.environment}-vpc"
})
}
# 子网
resource "huaweicloud_vpc_subnet" "public" {
count = length(var.availability_zones)
name = "${var.project_name}-${var.environment}-public-${var.availability_zones[count.index]}"
cidr = cidrsubnet(var.vpc_cidr, 8, count.index)
gateway_ip = cidrhost(cidrsubnet(var.vpc_cidr, 8, count.index), 1)
vpc_id = huaweicloud_vpc.main.id
tags = merge(var.common_tags, {
Name = "${var.project_name}-${var.environment}-public-${var.availability_zones[count.index]}"
Type = "public"
})
}
# 安全组
resource "huaweicloud_networking_secgroup" "main" {
name = "${var.project_name}-${var.environment}-sg"
description = "Security group for ${var.project_name} ${var.environment}"
tags = merge(var.common_tags, {
Name = "${var.project_name}-${var.environment}-sg"
})
}
# 安全组规则 - SSH
resource "huaweicloud_networking_secgroup_rule" "ssh" {
direction = "ingress"
ethertype = "IPv4"
protocol = "tcp"
port_range_min = 22
port_range_max = 22
remote_ip_prefix = "0.0.0.0/0"
security_group_id = huaweicloud_networking_secgroup.main.id
}
# 安全组规则 - HTTP
resource "huaweicloud_networking_secgroup_rule" "http" {
direction = "ingress"
ethertype = "IPv4"
protocol = "tcp"
port_range_min = 80
port_range_max = 80
remote_ip_prefix = "0.0.0.0/0"
security_group_id = huaweicloud_networking_secgroup.main.id
}
# 安全组规则 - HTTPS
resource "huaweicloud_networking_secgroup_rule" "https" {
direction = "ingress"
ethertype = "IPv4"
protocol = "tcp"
port_range_min = 443
port_range_max = 443
remote_ip_prefix = "0.0.0.0/0"
security_group_id = huaweicloud_networking_secgroup.main.id
}
# 弹性IP
resource "huaweicloud_vpc_eip" "main" {
count = var.environment == "production" ? 2 : 1
publicip {
type = "5_bgp"
}
bandwidth {
name = "${var.project_name}-${var.environment}-bandwidth-${count.index}"
size = var.environment == "production" ? 10 : 5
share_type = "PER"
charge_mode = "traffic"
}
tags = merge(var.common_tags, {
Name = "${var.project_name}-${var.environment}-eip-${count.index}"
})
}
# 输出
output "vpc_id" {
description = "VPC ID"
value = huaweicloud_vpc.main.id
}
output "subnet_ids" {
description = "子网 ID 列表"
value = huaweicloud_vpc_subnet.public[*].id
}
output "security_group_id" {
description = "安全组 ID"
value = huaweicloud_networking_secgroup.main.id
}
output "availability_zones" {
description = "可用区列表"
value = data.huaweicloud_availability_zones.zones.names
}
output "ubuntu_image_id" {
description = "Ubuntu 镜像 ID"
value = data.huaweicloud_images_image.ubuntu.id
}
output "eip_addresses" {
description = "弹性IP地址列表"
value = huaweicloud_vpc_eip.main[*].address
}

View File

@@ -0,0 +1,54 @@
# 华为云提供商变量定义
variable "environment" {
description = "环境名称"
type = string
}
variable "project_name" {
description = "项目名称"
type = string
}
variable "owner" {
description = "项目所有者"
type = string
}
variable "vpc_cidr" {
description = "VPC CIDR 块"
type = string
}
variable "availability_zones" {
description = "可用区列表"
type = list(string)
}
variable "common_tags" {
description = "通用标签"
type = map(string)
}
variable "huawei_config" {
description = "华为云配置"
type = object({
access_key = string
secret_key = string
region = string
project_id = string
})
sensitive = true
}
variable "instance_count" {
description = "实例数量"
type = number
default = 1
}
variable "instance_size" {
description = "实例规格"
type = string
default = "s6.small.1"
}

View File

@@ -0,0 +1,151 @@
# Oracle Cloud Infrastructure 模块
terraform {
required_providers {
oci = {
source = "oracle/oci"
version = "~> 7.20"
}
}
}
# 获取可用域
data "oci_identity_availability_domains" "ads" {
compartment_id = var.oci_config.tenancy_ocid
}
# 获取镜像
data "oci_core_images" "ubuntu_images" {
compartment_id = var.oci_config.tenancy_ocid
operating_system = "Canonical Ubuntu"
operating_system_version = "22.04"
shape = "VM.Standard.E2.1.Micro"
sort_by = "TIMECREATED"
sort_order = "DESC"
}
# VCN (虚拟云网络)
resource "oci_core_vcn" "main" {
compartment_id = var.oci_config.tenancy_ocid
cidr_blocks = [var.vpc_cidr]
display_name = "${var.project_name}-${var.environment}-vcn"
dns_label = "${var.project_name}${var.environment}"
freeform_tags = merge(var.common_tags, {
Name = "${var.project_name}-${var.environment}-vcn"
})
}
# 互联网网关
resource "oci_core_internet_gateway" "main" {
compartment_id = var.oci_config.tenancy_ocid
vcn_id = oci_core_vcn.main.id
display_name = "${var.project_name}-${var.environment}-igw"
enabled = true
freeform_tags = merge(var.common_tags, {
Name = "${var.project_name}-${var.environment}-igw"
})
}
# 路由表
resource "oci_core_route_table" "main" {
compartment_id = var.oci_config.tenancy_ocid
vcn_id = oci_core_vcn.main.id
display_name = "${var.project_name}-${var.environment}-rt"
route_rules {
destination = "0.0.0.0/0"
destination_type = "CIDR_BLOCK"
network_entity_id = oci_core_internet_gateway.main.id
}
freeform_tags = merge(var.common_tags, {
Name = "${var.project_name}-${var.environment}-rt"
})
}
# 安全列表
resource "oci_core_security_list" "main" {
compartment_id = var.oci_config.tenancy_ocid
vcn_id = oci_core_vcn.main.id
display_name = "${var.project_name}-${var.environment}-sl"
# 出站规则
egress_security_rules {
destination = "0.0.0.0/0"
protocol = "all"
}
# 入站规则 - SSH
ingress_security_rules {
protocol = "6" # TCP
source = "0.0.0.0/0"
tcp_options {
min = 22
max = 22
}
}
# 入站规则 - HTTP
ingress_security_rules {
protocol = "6" # TCP
source = "0.0.0.0/0"
tcp_options {
min = 80
max = 80
}
}
# 入站规则 - HTTPS
ingress_security_rules {
protocol = "6" # TCP
source = "0.0.0.0/0"
tcp_options {
min = 443
max = 443
}
}
freeform_tags = merge(var.common_tags, {
Name = "${var.project_name}-${var.environment}-sl"
})
}
# 子网
resource "oci_core_subnet" "public" {
count = length(var.availability_zones)
compartment_id = var.oci_config.tenancy_ocid
vcn_id = oci_core_vcn.main.id
cidr_block = cidrsubnet(var.vpc_cidr, 8, count.index)
display_name = "${var.project_name}-${var.environment}-public-${var.availability_zones[count.index]}"
dns_label = "public${var.availability_zones[count.index]}"
route_table_id = oci_core_route_table.main.id
security_list_ids = [oci_core_security_list.main.id]
freeform_tags = merge(var.common_tags, {
Name = "${var.project_name}-${var.environment}-public-${var.availability_zones[count.index]}"
Type = "public"
})
}
# 输出
output "vcn_id" {
description = "VCN ID"
value = oci_core_vcn.main.id
}
output "subnet_ids" {
description = "子网 ID 列表"
value = oci_core_subnet.public[*].id
}
output "availability_domains" {
description = "可用域列表"
value = data.oci_identity_availability_domains.ads.availability_domains[*].name
}
output "ubuntu_image_id" {
description = "Ubuntu 镜像 ID"
value = data.oci_core_images.ubuntu_images.images[0].id
}

View File

@@ -0,0 +1,55 @@
# Oracle Cloud 提供商变量定义
variable "environment" {
description = "环境名称"
type = string
}
variable "project_name" {
description = "项目名称"
type = string
}
variable "owner" {
description = "项目所有者"
type = string
}
variable "vpc_cidr" {
description = "VPC CIDR 块"
type = string
}
variable "availability_zones" {
description = "可用区列表"
type = list(string)
}
variable "common_tags" {
description = "通用标签"
type = map(string)
}
variable "oci_config" {
description = "Oracle Cloud 配置"
type = object({
tenancy_ocid = string
user_ocid = string
fingerprint = string
private_key = string
region = string
compartment_ocid = string
})
}
variable "instance_count" {
description = "实例数量"
type = number
default = 1
}
variable "instance_size" {
description = "实例规格"
type = string
default = "VM.Standard.E2.1.Micro"
}

View File

@@ -0,0 +1,39 @@
# 全局输出定义
# 环境信息
output "environment" {
description = "当前部署环境"
value = var.environment
}
output "project_name" {
description = "项目名称"
value = var.project_name
}
# 网络信息
output "vpc_cidr" {
description = "VPC CIDR 块"
value = var.vpc_cidr
}
# 通用标签
output "common_tags" {
description = "通用资源标签"
value = merge(var.common_tags, {
Environment = var.environment
Timestamp = timestamp()
})
}
# 云服务商配置状态
output "enabled_providers" {
description = "启用的云服务商列表"
value = var.cloud_providers
}
# 实例类型配置
output "instance_types" {
description = "当前环境的实例类型配置"
value = var.instance_types[var.environment]
}

View File

@@ -0,0 +1,169 @@
# 全局变量定义
# 环境配置
variable "environment" {
description = "部署环境 (dev, staging, production)"
type = string
validation {
condition = contains(["dev", "staging", "production"], var.environment)
error_message = "环境必须是 dev, staging, 或 production 之一。"
}
}
variable "project_name" {
description = "项目名称"
type = string
default = "mgmt"
}
variable "owner" {
description = "资源所有者"
type = string
default = "ben"
}
# 网络配置
variable "vpc_cidr" {
description = "VPC CIDR 块"
type = string
default = "10.0.0.0/16"
}
variable "availability_zones" {
description = "可用区列表"
type = list(string)
default = ["a", "b", "c"]
}
# 计算资源配置
variable "instance_types" {
description = "不同环境的实例类型"
type = map(object({
web = string
app = string
db = string
cache = string
}))
default = {
dev = {
web = "t3.micro"
app = "t3.small"
db = "t3.micro"
cache = "t3.micro"
}
staging = {
web = "t3.small"
app = "t3.medium"
db = "t3.small"
cache = "t3.small"
}
production = {
web = "t3.medium"
app = "t3.large"
db = "t3.medium"
cache = "t3.medium"
}
}
}
# 标签配置
variable "common_tags" {
description = "通用标签"
type = map(string)
default = {
Project = "mgmt"
ManagedBy = "opentofu"
Owner = "ben"
}
}
# 云服务商特定配置
variable "cloud_providers" {
description = "启用的云服务商"
type = list(string)
default = ["oracle", "huawei", "google", "digitalocean", "aws"]
}
# Oracle Cloud 配置
variable "oci_config" {
description = "Oracle Cloud 配置"
type = object({
tenancy_ocid = string
user_ocid = string
fingerprint = string
private_key_path = string
region = string
})
default = {
tenancy_ocid = ""
user_ocid = ""
fingerprint = ""
private_key_path = "~/.oci/oci_api_key.pem"
region = "ap-seoul-1"
}
sensitive = true
}
# 华为云配置
variable "huawei_config" {
description = "华为云配置"
type = object({
access_key = string
secret_key = string
region = string
})
default = {
access_key = ""
secret_key = ""
region = "cn-north-4"
}
sensitive = true
}
# Google Cloud 配置
variable "gcp_config" {
description = "Google Cloud 配置"
type = object({
project_id = string
region = string
zone = string
credentials = string
})
default = {
project_id = ""
region = "asia-northeast3"
zone = "asia-northeast3-a"
credentials = ""
}
sensitive = true
}
# DigitalOcean 配置
variable "do_config" {
description = "DigitalOcean 配置"
type = object({
token = string
region = string
})
default = {
token = ""
region = "sgp1"
}
sensitive = true
}
# AWS 配置
variable "aws_config" {
description = "AWS 配置"
type = object({
access_key = string
secret_key = string
region = string
})
default = {
access_key = ""
secret_key = ""
region = "ap-northeast-1"
}
sensitive = true
}

View File

@@ -0,0 +1,63 @@
# OpenTofu 版本和提供商配置
terraform {
required_version = ">= 1.6"
required_providers {
# Oracle Cloud Infrastructure
oci = {
source = "oracle/oci"
version = "7.20.0"
}
# 华为云
huaweicloud = {
source = "huaweicloud/huaweicloud"
version = "~> 1.60"
}
# Google Cloud Platform
google = {
source = "hashicorp/google"
version = "~> 5.0"
}
# DigitalOcean
digitalocean = {
source = "digitalocean/digitalocean"
version = "~> 2.0"
}
# Amazon Web Services
aws = {
source = "hashicorp/aws"
version = "~> 5.0"
}
# 其他常用提供商
random = {
source = "hashicorp/random"
version = "3.7.2"
}
tls = {
source = "hashicorp/tls"
version = "4.1.0"
}
local = {
source = "hashicorp/local"
version = "2.5.3"
}
# HashiCorp Vault
vault = {
source = "hashicorp/vault"
version = "~> 4.0"
}
}
# 后端配置 - 可以使用 S3, GCS, 或本地
backend "local" {
path = "terraform.tfstate"
}
}