updated
This commit is contained in:
169
tofu/environments/production/nomad-multi-dc.tf
Normal file
169
tofu/environments/production/nomad-multi-dc.tf
Normal file
@@ -0,0 +1,169 @@
|
||||
# Nomad 多数据中心生产环境配置
|
||||
# 部署架构: CN(dc1) + KR(dc2) + US(dc3)
|
||||
|
||||
terraform {
|
||||
required_version = ">= 1.0"
|
||||
|
||||
required_providers {
|
||||
oci = {
|
||||
source = "oracle/oci"
|
||||
version = "~> 5.0"
|
||||
}
|
||||
huaweicloud = {
|
||||
source = "huaweicloud/huaweicloud"
|
||||
version = "~> 1.60"
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
# Oracle Cloud Provider (韩国)
|
||||
provider "oci" {
|
||||
alias = "korea"
|
||||
tenancy_ocid = var.oracle_tenancy_ocid
|
||||
user_ocid = var.oracle_user_ocid
|
||||
fingerprint = var.oracle_fingerprint
|
||||
private_key_path = var.oracle_private_key_path
|
||||
region = "ap-seoul-1" # 韩国首尔
|
||||
}
|
||||
|
||||
# 华为云 Provider (美国)
|
||||
provider "huaweicloud" {
|
||||
alias = "us"
|
||||
access_key = var.huawei_access_key
|
||||
secret_key = var.huawei_secret_key
|
||||
region = "us-east-1" # 美国东部
|
||||
}
|
||||
|
||||
# 本地变量
|
||||
locals {
|
||||
project_name = "nomad-multi-dc"
|
||||
environment = "production"
|
||||
|
||||
common_tags = {
|
||||
Project = local.project_name
|
||||
Environment = local.environment
|
||||
ManagedBy = "opentofu"
|
||||
Owner = "devops-team"
|
||||
}
|
||||
}
|
||||
|
||||
# 数据源:获取 SSH 公钥
|
||||
data "local_file" "ssh_public_key" {
|
||||
filename = pathexpand("~/.ssh/id_rsa.pub")
|
||||
}
|
||||
|
||||
# Oracle Cloud 基础设施 (韩国 - dc2)
|
||||
module "oracle_infrastructure" {
|
||||
source = "../../providers/oracle-cloud"
|
||||
|
||||
providers = {
|
||||
oci = oci.korea
|
||||
}
|
||||
|
||||
project_name = local.project_name
|
||||
environment = local.environment
|
||||
vpc_cidr = "10.1.0.0/16"
|
||||
|
||||
oci_config = {
|
||||
tenancy_ocid = var.oracle_tenancy_ocid
|
||||
user_ocid = var.oracle_user_ocid
|
||||
fingerprint = var.oracle_fingerprint
|
||||
private_key_path = var.oracle_private_key_path
|
||||
region = "ap-seoul-1"
|
||||
}
|
||||
|
||||
common_tags = local.common_tags
|
||||
}
|
||||
|
||||
# 华为云基础设施 (美国 - dc3)
|
||||
module "huawei_infrastructure" {
|
||||
source = "../../providers/huawei-cloud"
|
||||
|
||||
providers = {
|
||||
huaweicloud = huaweicloud.us
|
||||
}
|
||||
|
||||
project_name = local.project_name
|
||||
environment = local.environment
|
||||
vpc_cidr = "10.2.0.0/16"
|
||||
availability_zones = ["us-east-1a", "us-east-1b"]
|
||||
|
||||
common_tags = local.common_tags
|
||||
}
|
||||
|
||||
# Nomad 多数据中心集群
|
||||
module "nomad_cluster" {
|
||||
source = "../../modules/nomad-cluster"
|
||||
|
||||
# 部署配置
|
||||
deploy_korea_node = var.deploy_korea_node
|
||||
deploy_us_node = var.deploy_us_node
|
||||
|
||||
# Oracle Cloud 配置
|
||||
oracle_config = {
|
||||
tenancy_ocid = var.oracle_tenancy_ocid
|
||||
user_ocid = var.oracle_user_ocid
|
||||
fingerprint = var.oracle_fingerprint
|
||||
private_key_path = var.oracle_private_key_path
|
||||
region = "ap-seoul-1"
|
||||
}
|
||||
|
||||
oracle_subnet_id = module.oracle_infrastructure.public_subnet_ids[0]
|
||||
oracle_security_group_id = module.oracle_infrastructure.security_group_id
|
||||
|
||||
# 华为云配置
|
||||
huawei_config = {
|
||||
access_key = var.huawei_access_key
|
||||
secret_key = var.huawei_secret_key
|
||||
region = "us-east-1"
|
||||
}
|
||||
|
||||
huawei_subnet_id = module.huawei_infrastructure.public_subnet_ids[0]
|
||||
huawei_security_group_id = module.huawei_infrastructure.security_group_id
|
||||
|
||||
# 通用配置
|
||||
ssh_public_key = data.local_file.ssh_public_key.content
|
||||
common_tags = local.common_tags
|
||||
|
||||
# Nomad 配置
|
||||
nomad_version = "1.10.5"
|
||||
nomad_encrypt_key = var.nomad_encrypt_key
|
||||
}
|
||||
|
||||
# 生成 Ansible inventory
|
||||
resource "local_file" "ansible_inventory" {
|
||||
filename = "${path.module}/generated/nomad-cluster-inventory.yml"
|
||||
content = yamlencode({
|
||||
all = {
|
||||
children = {
|
||||
nomad_servers = {
|
||||
hosts = module.nomad_cluster.ansible_inventory.all.children.nomad_servers.hosts
|
||||
}
|
||||
}
|
||||
vars = {
|
||||
ansible_user = "ubuntu"
|
||||
ansible_ssh_private_key_file = "~/.ssh/id_rsa"
|
||||
ansible_ssh_common_args = "-o StrictHostKeyChecking=no"
|
||||
}
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
# 生成部署后配置脚本
|
||||
resource "local_file" "post_deploy_script" {
|
||||
filename = "${path.module}/generated/post-deploy.sh"
|
||||
content = templatefile("${path.module}/templates/post-deploy.sh", {
|
||||
cluster_overview = module.nomad_cluster.cluster_overview
|
||||
endpoints = module.nomad_cluster.cluster_endpoints
|
||||
})
|
||||
|
||||
file_permission = "0755"
|
||||
}
|
||||
|
||||
# 生成跨数据中心测试任务
|
||||
resource "local_file" "cross_dc_test_job" {
|
||||
filename = "${path.module}/generated/cross-dc-test.nomad"
|
||||
content = templatefile("${path.module}/templates/cross-dc-test.nomad", {
|
||||
datacenters = ["dc1", "dc2", "dc3"]
|
||||
})
|
||||
}
|
||||
46
tofu/environments/production/outputs.tf
Normal file
46
tofu/environments/production/outputs.tf
Normal file
@@ -0,0 +1,46 @@
|
||||
# Nomad 多数据中心生产环境输出
|
||||
|
||||
output "cluster_overview" {
|
||||
description = "Nomad 多数据中心集群概览"
|
||||
value = module.nomad_cluster.cluster_overview
|
||||
}
|
||||
|
||||
output "cluster_endpoints" {
|
||||
description = "集群连接端点"
|
||||
value = module.nomad_cluster.cluster_endpoints
|
||||
}
|
||||
|
||||
output "oracle_korea_node" {
|
||||
description = "Oracle Cloud 韩国节点信息"
|
||||
value = module.nomad_cluster.oracle_korea_node
|
||||
}
|
||||
|
||||
output "huawei_us_node" {
|
||||
description = "华为云美国节点信息"
|
||||
value = module.nomad_cluster.huawei_us_node
|
||||
}
|
||||
|
||||
output "deployment_summary" {
|
||||
description = "部署摘要"
|
||||
value = {
|
||||
total_nodes = module.nomad_cluster.cluster_overview.total_nodes
|
||||
datacenters = keys(module.nomad_cluster.cluster_overview.datacenters)
|
||||
|
||||
next_steps = [
|
||||
"1. 等待所有节点启动完成 (约 5-10 分钟)",
|
||||
"2. 运行: ./generated/post-deploy.sh",
|
||||
"3. 验证集群: nomad server members",
|
||||
"4. 测试跨 DC 调度: nomad job run generated/cross-dc-test.nomad",
|
||||
"5. 访问 Web UI 查看集群状态"
|
||||
]
|
||||
|
||||
web_ui_urls = module.nomad_cluster.cluster_endpoints.nomad_ui_urls
|
||||
|
||||
ssh_commands = module.nomad_cluster.cluster_endpoints.ssh_commands
|
||||
}
|
||||
}
|
||||
|
||||
output "verification_commands" {
|
||||
description = "验证命令"
|
||||
value = module.nomad_cluster.verification_commands
|
||||
}
|
||||
22
tofu/environments/production/terraform.tfvars.example
Normal file
22
tofu/environments/production/terraform.tfvars.example
Normal file
@@ -0,0 +1,22 @@
|
||||
# Nomad 多数据中心生产环境配置示例
|
||||
# 复制此文件为 terraform.tfvars 并填入实际值
|
||||
|
||||
# 部署控制
|
||||
deploy_korea_node = true # 是否部署韩国节点
|
||||
deploy_us_node = true # 是否部署美国节点
|
||||
|
||||
# Oracle Cloud 配置 (韩国 - dc2)
|
||||
# 获取方式: https://docs.oracle.com/en-us/iaas/Content/API/Concepts/apisigningkey.htm
|
||||
oracle_tenancy_ocid = "ocid1.tenancy.oc1..aaaaaaaa..."
|
||||
oracle_user_ocid = "ocid1.user.oc1..aaaaaaaa..."
|
||||
oracle_fingerprint = "aa:bb:cc:dd:ee:ff:..."
|
||||
oracle_private_key_path = "~/.oci/oci_api_key.pem"
|
||||
|
||||
# 华为云配置 (美国 - dc3)
|
||||
# 获取方式: https://console.huaweicloud.com/iam/#/mine/accessKey
|
||||
huawei_access_key = "YOUR_HUAWEI_ACCESS_KEY"
|
||||
huawei_secret_key = "YOUR_HUAWEI_SECRET_KEY"
|
||||
|
||||
# Nomad 集群加密密钥 (可选,已有默认值)
|
||||
# 生成方式: nomad operator keygen
|
||||
nomad_encrypt_key = "NVOMDvXblgWfhtzFzOUIHnKEOrbXOkPrkIPbRGGf1YQ="
|
||||
60
tofu/environments/production/variables.tf
Normal file
60
tofu/environments/production/variables.tf
Normal file
@@ -0,0 +1,60 @@
|
||||
# Nomad 多数据中心生产环境变量
|
||||
|
||||
# 部署控制
|
||||
variable "deploy_korea_node" {
|
||||
description = "是否部署韩国节点 (Oracle Cloud)"
|
||||
type = bool
|
||||
default = true
|
||||
}
|
||||
|
||||
variable "deploy_us_node" {
|
||||
description = "是否部署美国节点 (华为云)"
|
||||
type = bool
|
||||
default = true
|
||||
}
|
||||
|
||||
# Oracle Cloud 配置
|
||||
variable "oracle_tenancy_ocid" {
|
||||
description = "Oracle Cloud 租户 OCID"
|
||||
type = string
|
||||
sensitive = true
|
||||
}
|
||||
|
||||
variable "oracle_user_ocid" {
|
||||
description = "Oracle Cloud 用户 OCID"
|
||||
type = string
|
||||
sensitive = true
|
||||
}
|
||||
|
||||
variable "oracle_fingerprint" {
|
||||
description = "Oracle Cloud API 密钥指纹"
|
||||
type = string
|
||||
sensitive = true
|
||||
}
|
||||
|
||||
variable "oracle_private_key_path" {
|
||||
description = "Oracle Cloud 私钥文件路径"
|
||||
type = string
|
||||
sensitive = true
|
||||
}
|
||||
|
||||
# 华为云配置
|
||||
variable "huawei_access_key" {
|
||||
description = "华为云访问密钥"
|
||||
type = string
|
||||
sensitive = true
|
||||
}
|
||||
|
||||
variable "huawei_secret_key" {
|
||||
description = "华为云秘密密钥"
|
||||
type = string
|
||||
sensitive = true
|
||||
}
|
||||
|
||||
# Nomad 配置
|
||||
variable "nomad_encrypt_key" {
|
||||
description = "Nomad 集群加密密钥"
|
||||
type = string
|
||||
sensitive = true
|
||||
default = "NVOMDvXblgWfhtzFzOUIHnKEOrbXOkPrkIPbRGGf1YQ="
|
||||
}
|
||||
159
tofu/modules/nomad-cluster/main.tf
Normal file
159
tofu/modules/nomad-cluster/main.tf
Normal file
@@ -0,0 +1,159 @@
|
||||
# Nomad 多数据中心集群模块
|
||||
# 支持跨地域部署:CN(dc1) + KR(dc2) + US(dc3)
|
||||
|
||||
terraform {
|
||||
required_providers {
|
||||
oci = {
|
||||
source = "oracle/oci"
|
||||
version = "~> 5.0"
|
||||
}
|
||||
huaweicloud = {
|
||||
source = "huaweicloud/huaweicloud"
|
||||
version = "~> 1.60"
|
||||
}
|
||||
aws = {
|
||||
source = "hashicorp/aws"
|
||||
version = "~> 5.0"
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
# 本地变量
|
||||
locals {
|
||||
nomad_version = "1.10.5"
|
||||
|
||||
# 通用 Nomad 配置
|
||||
nomad_encrypt_key = "NVOMDvXblgWfhtzFzOUIHnKEOrbXOkPrkIPbRGGf1YQ="
|
||||
|
||||
# 数据中心配置
|
||||
datacenters = {
|
||||
dc1 = {
|
||||
name = "dc1"
|
||||
region = "cn"
|
||||
location = "China"
|
||||
provider = "existing" # 现有的 semaphore 节点
|
||||
}
|
||||
dc2 = {
|
||||
name = "dc2"
|
||||
region = "kr"
|
||||
location = "Korea"
|
||||
provider = "oracle"
|
||||
}
|
||||
dc3 = {
|
||||
name = "dc3"
|
||||
region = "us"
|
||||
location = "US"
|
||||
provider = "huawei" # 或 aws
|
||||
}
|
||||
}
|
||||
|
||||
# 用户数据模板
|
||||
user_data_template = templatefile("${path.module}/templates/nomad-userdata.sh", {
|
||||
nomad_version = local.nomad_version
|
||||
nomad_encrypt_key = local.nomad_encrypt_key
|
||||
})
|
||||
}
|
||||
|
||||
# 数据源:获取现有的 semaphore 节点信息
|
||||
data "external" "semaphore_info" {
|
||||
program = ["bash", "-c", <<-EOF
|
||||
echo '{
|
||||
"ip": "100.116.158.95",
|
||||
"datacenter": "dc1",
|
||||
"status": "existing"
|
||||
}'
|
||||
EOF
|
||||
]
|
||||
}
|
||||
|
||||
# Oracle Cloud 韩国节点 (dc2)
|
||||
module "oracle_korea_node" {
|
||||
source = "../compute"
|
||||
|
||||
count = var.deploy_korea_node ? 1 : 0
|
||||
|
||||
# Oracle Cloud 特定配置
|
||||
provider_type = "oracle"
|
||||
|
||||
# 实例配置
|
||||
instance_config = {
|
||||
name = "nomad-master-kr"
|
||||
datacenter = "dc2"
|
||||
instance_type = "VM.Standard.E2.1.Micro" # 免费层
|
||||
image_id = var.oracle_ubuntu_image_id
|
||||
subnet_id = var.oracle_subnet_id
|
||||
|
||||
# Nomad 配置
|
||||
nomad_role = "server"
|
||||
bootstrap_expect = 1
|
||||
bind_addr = "auto" # 自动检测
|
||||
|
||||
# 网络配置
|
||||
security_groups = [var.oracle_security_group_id]
|
||||
|
||||
# 标签
|
||||
tags = merge(var.common_tags, {
|
||||
Name = "nomad-master-kr"
|
||||
Datacenter = "dc2"
|
||||
Role = "nomad-server"
|
||||
Provider = "oracle"
|
||||
})
|
||||
}
|
||||
|
||||
# 用户数据
|
||||
user_data = templatefile("${path.module}/templates/nomad-userdata.sh", {
|
||||
datacenter = "dc2"
|
||||
nomad_version = local.nomad_version
|
||||
nomad_encrypt_key = local.nomad_encrypt_key
|
||||
bootstrap_expect = 1
|
||||
bind_addr = "auto"
|
||||
server_enabled = true
|
||||
client_enabled = true
|
||||
})
|
||||
}
|
||||
|
||||
# 华为云美国节点 (dc3)
|
||||
module "huawei_us_node" {
|
||||
source = "../compute"
|
||||
|
||||
count = var.deploy_us_node ? 1 : 0
|
||||
|
||||
# 华为云特定配置
|
||||
provider_type = "huawei"
|
||||
|
||||
# 实例配置
|
||||
instance_config = {
|
||||
name = "nomad-ash3c-us"
|
||||
datacenter = "dc3"
|
||||
instance_type = "s6.small.1" # 1vCPU 1GB
|
||||
image_id = var.huawei_ubuntu_image_id
|
||||
subnet_id = var.huawei_subnet_id
|
||||
|
||||
# Nomad 配置
|
||||
nomad_role = "server"
|
||||
bootstrap_expect = 1
|
||||
bind_addr = "auto"
|
||||
|
||||
# 网络配置
|
||||
security_groups = [var.huawei_security_group_id]
|
||||
|
||||
# 标签
|
||||
tags = merge(var.common_tags, {
|
||||
Name = "nomad-ash3c-us"
|
||||
Datacenter = "dc3"
|
||||
Role = "nomad-server"
|
||||
Provider = "huawei"
|
||||
})
|
||||
}
|
||||
|
||||
# 用户数据
|
||||
user_data = templatefile("${path.module}/templates/nomad-userdata.sh", {
|
||||
datacenter = "dc3"
|
||||
nomad_version = local.nomad_version
|
||||
nomad_encrypt_key = local.nomad_encrypt_key
|
||||
bootstrap_expect = 1
|
||||
bind_addr = "auto"
|
||||
server_enabled = true
|
||||
client_enabled = true
|
||||
})
|
||||
}
|
||||
145
tofu/modules/nomad-cluster/outputs.tf
Normal file
145
tofu/modules/nomad-cluster/outputs.tf
Normal file
@@ -0,0 +1,145 @@
|
||||
# Nomad 多数据中心集群输出
|
||||
|
||||
# 集群概览
|
||||
output "cluster_overview" {
|
||||
description = "Nomad 多数据中心集群概览"
|
||||
value = {
|
||||
datacenters = {
|
||||
dc1 = {
|
||||
name = "dc1"
|
||||
location = "China (CN)"
|
||||
provider = "existing"
|
||||
node = "semaphore"
|
||||
ip = "100.116.158.95"
|
||||
status = "existing"
|
||||
}
|
||||
dc2 = var.deploy_korea_node ? {
|
||||
name = "dc2"
|
||||
location = "Korea (KR)"
|
||||
provider = "oracle"
|
||||
node = "master"
|
||||
ip = try(module.oracle_korea_node[0].public_ip, "pending")
|
||||
status = "deployed"
|
||||
} : null
|
||||
dc3 = var.deploy_us_node ? {
|
||||
name = "dc3"
|
||||
location = "US"
|
||||
provider = "huawei"
|
||||
node = "ash3c"
|
||||
ip = try(module.huawei_us_node[0].public_ip, "pending")
|
||||
status = "deployed"
|
||||
} : null
|
||||
}
|
||||
total_nodes = 1 + (var.deploy_korea_node ? 1 : 0) + (var.deploy_us_node ? 1 : 0)
|
||||
}
|
||||
}
|
||||
|
||||
# Oracle Cloud 韩国节点输出
|
||||
output "oracle_korea_node" {
|
||||
description = "Oracle Cloud 韩国节点信息"
|
||||
value = var.deploy_korea_node ? {
|
||||
instance_id = try(module.oracle_korea_node[0].instance_id, null)
|
||||
public_ip = try(module.oracle_korea_node[0].public_ip, null)
|
||||
private_ip = try(module.oracle_korea_node[0].private_ip, null)
|
||||
datacenter = "dc2"
|
||||
provider = "oracle"
|
||||
region = var.oracle_config.region
|
||||
|
||||
# 连接信息
|
||||
ssh_command = try("ssh ubuntu@${module.oracle_korea_node[0].public_ip}", null)
|
||||
nomad_ui = try("http://${module.oracle_korea_node[0].public_ip}:4646", null)
|
||||
} : null
|
||||
}
|
||||
|
||||
# 华为云美国节点输出
|
||||
output "huawei_us_node" {
|
||||
description = "华为云美国节点信息"
|
||||
value = var.deploy_us_node ? {
|
||||
instance_id = try(module.huawei_us_node[0].instance_id, null)
|
||||
public_ip = try(module.huawei_us_node[0].public_ip, null)
|
||||
private_ip = try(module.huawei_us_node[0].private_ip, null)
|
||||
datacenter = "dc3"
|
||||
provider = "huawei"
|
||||
region = var.huawei_config.region
|
||||
|
||||
# 连接信息
|
||||
ssh_command = try("ssh ubuntu@${module.huawei_us_node[0].public_ip}", null)
|
||||
nomad_ui = try("http://${module.huawei_us_node[0].public_ip}:4646", null)
|
||||
} : null
|
||||
}
|
||||
|
||||
# 集群连接信息
|
||||
output "cluster_endpoints" {
|
||||
description = "集群连接端点"
|
||||
value = {
|
||||
nomad_ui_urls = compact([
|
||||
"http://100.116.158.95:4646", # dc1 - semaphore
|
||||
var.deploy_korea_node ? try("http://${module.oracle_korea_node[0].public_ip}:4646", null) : null, # dc2
|
||||
var.deploy_us_node ? try("http://${module.huawei_us_node[0].public_ip}:4646", null) : null # dc3
|
||||
])
|
||||
|
||||
ssh_commands = compact([
|
||||
"ssh root@100.116.158.95", # dc1 - semaphore
|
||||
var.deploy_korea_node ? try("ssh ubuntu@${module.oracle_korea_node[0].public_ip}", null) : null, # dc2
|
||||
var.deploy_us_node ? try("ssh ubuntu@${module.huawei_us_node[0].public_ip}", null) : null # dc3
|
||||
])
|
||||
}
|
||||
}
|
||||
|
||||
# Ansible inventory 生成
|
||||
output "ansible_inventory" {
|
||||
description = "生成的 Ansible inventory"
|
||||
value = {
|
||||
all = {
|
||||
children = {
|
||||
nomad_servers = {
|
||||
hosts = merge(
|
||||
{
|
||||
semaphore = {
|
||||
ansible_host = "100.116.158.95"
|
||||
datacenter = "dc1"
|
||||
provider = "existing"
|
||||
}
|
||||
},
|
||||
var.deploy_korea_node ? {
|
||||
master = {
|
||||
ansible_host = try(module.oracle_korea_node[0].public_ip, "pending")
|
||||
datacenter = "dc2"
|
||||
provider = "oracle"
|
||||
}
|
||||
} : {},
|
||||
var.deploy_us_node ? {
|
||||
ash3c = {
|
||||
ansible_host = try(module.huawei_us_node[0].public_ip, "pending")
|
||||
datacenter = "dc3"
|
||||
provider = "huawei"
|
||||
}
|
||||
} : {}
|
||||
)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
# 部署后验证命令
|
||||
output "verification_commands" {
|
||||
description = "部署后验证命令"
|
||||
value = [
|
||||
"# 检查集群状态",
|
||||
"nomad server members",
|
||||
"",
|
||||
"# 检查各数据中心节点",
|
||||
"nomad node status -verbose",
|
||||
"",
|
||||
"# 跨数据中心任务调度测试",
|
||||
"nomad job run examples/cross-dc-test.nomad",
|
||||
"",
|
||||
"# 访问 UI",
|
||||
join("\n", [for url in compact([
|
||||
"http://100.116.158.95:4646",
|
||||
var.deploy_korea_node ? try("http://${module.oracle_korea_node[0].public_ip}:4646", null) : null,
|
||||
var.deploy_us_node ? try("http://${module.huawei_us_node[0].public_ip}:4646", null) : null
|
||||
]) : "curl -s ${url}/v1/status/leader"])
|
||||
]
|
||||
}
|
||||
230
tofu/modules/nomad-cluster/templates/nomad-userdata.sh
Normal file
230
tofu/modules/nomad-cluster/templates/nomad-userdata.sh
Normal file
@@ -0,0 +1,230 @@
|
||||
#!/bin/bash
|
||||
# Nomad 多数据中心节点自动配置脚本
|
||||
# 数据中心: ${datacenter}
|
||||
|
||||
set -e
|
||||
|
||||
# 日志函数
|
||||
log() {
|
||||
echo "[$(date '+%Y-%m-%d %H:%M:%S')] $1" | tee -a /var/log/nomad-setup.log
|
||||
}
|
||||
|
||||
log "开始配置 Nomad 节点 - 数据中心: ${datacenter}"
|
||||
|
||||
# 更新系统
|
||||
log "更新系统包..."
|
||||
apt-get update -y
|
||||
apt-get upgrade -y
|
||||
|
||||
# 安装必要的包
|
||||
log "安装必要的包..."
|
||||
apt-get install -y \
|
||||
curl \
|
||||
wget \
|
||||
unzip \
|
||||
jq \
|
||||
docker.io \
|
||||
docker-compose \
|
||||
htop \
|
||||
net-tools \
|
||||
vim
|
||||
|
||||
# 启动 Docker
|
||||
log "启动 Docker 服务..."
|
||||
systemctl enable docker
|
||||
systemctl start docker
|
||||
usermod -aG docker ubuntu
|
||||
|
||||
# 安装 Nomad
|
||||
log "安装 Nomad ${nomad_version}..."
|
||||
cd /tmp
|
||||
wget -q https://releases.hashicorp.com/nomad/${nomad_version}/nomad_${nomad_version}_linux_amd64.zip
|
||||
unzip nomad_${nomad_version}_linux_amd64.zip
|
||||
mv nomad /usr/local/bin/
|
||||
chmod +x /usr/local/bin/nomad
|
||||
|
||||
# 创建 Nomad 用户和目录
|
||||
log "创建 Nomad 用户和目录..."
|
||||
useradd --system --home /etc/nomad.d --shell /bin/false nomad
|
||||
mkdir -p /opt/nomad/data
|
||||
mkdir -p /etc/nomad.d
|
||||
mkdir -p /var/log/nomad
|
||||
chown -R nomad:nomad /opt/nomad /etc/nomad.d /var/log/nomad
|
||||
|
||||
# 获取本机 IP 地址
|
||||
if [ "${bind_addr}" = "auto" ]; then
|
||||
# 尝试多种方法获取 IP
|
||||
BIND_ADDR=$(curl -s http://169.254.169.254/latest/meta-data/local-ipv4 2>/dev/null || \
|
||||
curl -s http://metadata.google.internal/computeMetadata/v1/instance/network-interfaces/0/ip -H "Metadata-Flavor: Google" 2>/dev/null || \
|
||||
ip route get 8.8.8.8 | awk '{print $7; exit}' || \
|
||||
hostname -I | awk '{print $1}')
|
||||
else
|
||||
BIND_ADDR="${bind_addr}"
|
||||
fi
|
||||
|
||||
log "检测到 IP 地址: $BIND_ADDR"
|
||||
|
||||
# 创建 Nomad 配置文件
|
||||
log "创建 Nomad 配置文件..."
|
||||
cat > /etc/nomad.d/nomad.hcl << EOF
|
||||
datacenter = "${datacenter}"
|
||||
region = "global"
|
||||
data_dir = "/opt/nomad/data"
|
||||
|
||||
bind_addr = "$BIND_ADDR"
|
||||
|
||||
%{ if server_enabled }
|
||||
server {
|
||||
enabled = true
|
||||
bootstrap_expect = ${bootstrap_expect}
|
||||
encrypt = "${nomad_encrypt_key}"
|
||||
}
|
||||
%{ endif }
|
||||
|
||||
%{ if client_enabled }
|
||||
client {
|
||||
enabled = true
|
||||
|
||||
host_volume "docker-sock" {
|
||||
path = "/var/run/docker.sock"
|
||||
read_only = false
|
||||
}
|
||||
}
|
||||
%{ endif }
|
||||
|
||||
ui {
|
||||
enabled = true
|
||||
}
|
||||
|
||||
addresses {
|
||||
http = "0.0.0.0"
|
||||
rpc = "$BIND_ADDR"
|
||||
serf = "$BIND_ADDR"
|
||||
}
|
||||
|
||||
ports {
|
||||
http = 4646
|
||||
rpc = 4647
|
||||
serf = 4648
|
||||
}
|
||||
|
||||
plugin "docker" {
|
||||
config {
|
||||
allow_privileged = true
|
||||
volumes {
|
||||
enabled = true
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
telemetry {
|
||||
collection_interval = "10s"
|
||||
disable_hostname = false
|
||||
prometheus_metrics = true
|
||||
publish_allocation_metrics = true
|
||||
publish_node_metrics = true
|
||||
}
|
||||
|
||||
log_level = "INFO"
|
||||
log_file = "/var/log/nomad/nomad.log"
|
||||
EOF
|
||||
|
||||
# 创建 systemd 服务文件
|
||||
log "创建 systemd 服务文件..."
|
||||
cat > /etc/systemd/system/nomad.service << EOF
|
||||
[Unit]
|
||||
Description=Nomad
|
||||
Documentation=https://www.nomadproject.io/
|
||||
Requires=network-online.target
|
||||
After=network-online.target
|
||||
ConditionFileNotEmpty=/etc/nomad.d/nomad.hcl
|
||||
|
||||
[Service]
|
||||
Type=notify
|
||||
User=nomad
|
||||
Group=nomad
|
||||
ExecStart=/usr/local/bin/nomad agent -config=/etc/nomad.d/nomad.hcl
|
||||
ExecReload=/bin/kill -HUP \$MAINPID
|
||||
KillMode=process
|
||||
Restart=on-failure
|
||||
LimitNOFILE=65536
|
||||
|
||||
[Install]
|
||||
WantedBy=multi-user.target
|
||||
EOF
|
||||
|
||||
# 启动 Nomad 服务
|
||||
log "启动 Nomad 服务..."
|
||||
systemctl daemon-reload
|
||||
systemctl enable nomad
|
||||
systemctl start nomad
|
||||
|
||||
# 等待服务启动
|
||||
log "等待 Nomad 服务启动..."
|
||||
sleep 10
|
||||
|
||||
# 验证安装
|
||||
log "验证 Nomad 安装..."
|
||||
if systemctl is-active --quiet nomad; then
|
||||
log "✅ Nomad 服务运行正常"
|
||||
log "📊 节点信息:"
|
||||
/usr/local/bin/nomad node status -self || true
|
||||
else
|
||||
log "❌ Nomad 服务启动失败"
|
||||
systemctl status nomad --no-pager || true
|
||||
journalctl -u nomad --no-pager -n 20 || true
|
||||
fi
|
||||
|
||||
# 配置防火墙(如果需要)
|
||||
log "配置防火墙规则..."
|
||||
if command -v ufw >/dev/null 2>&1; then
|
||||
ufw allow 4646/tcp # HTTP API
|
||||
ufw allow 4647/tcp # RPC
|
||||
ufw allow 4648/tcp # Serf
|
||||
ufw allow 22/tcp # SSH
|
||||
fi
|
||||
|
||||
# 创建有用的别名和脚本
|
||||
log "创建管理脚本..."
|
||||
cat > /usr/local/bin/nomad-status << 'EOF'
|
||||
#!/bin/bash
|
||||
echo "=== Nomad 服务状态 ==="
|
||||
systemctl status nomad --no-pager
|
||||
|
||||
echo -e "\n=== Nomad 集群成员 ==="
|
||||
nomad server members 2>/dev/null || echo "无法连接到集群"
|
||||
|
||||
echo -e "\n=== Nomad 节点状态 ==="
|
||||
nomad node status 2>/dev/null || echo "无法获取节点状态"
|
||||
|
||||
echo -e "\n=== 最近日志 ==="
|
||||
journalctl -u nomad --no-pager -n 5
|
||||
EOF
|
||||
|
||||
chmod +x /usr/local/bin/nomad-status
|
||||
|
||||
# 添加到 ubuntu 用户的 bashrc
|
||||
echo 'alias ns="nomad-status"' >> /home/ubuntu/.bashrc
|
||||
echo 'alias nomad-logs="journalctl -u nomad -f"' >> /home/ubuntu/.bashrc
|
||||
|
||||
log "🎉 Nomad 节点配置完成!"
|
||||
log "📍 数据中心: ${datacenter}"
|
||||
log "🌐 IP 地址: $BIND_ADDR"
|
||||
log "🔗 Web UI: http://$BIND_ADDR:4646"
|
||||
log "📝 使用 'nomad-status' 或 'ns' 命令查看状态"
|
||||
|
||||
# 输出重要信息到 motd
|
||||
cat > /etc/update-motd.d/99-nomad << EOF
|
||||
#!/bin/bash
|
||||
echo ""
|
||||
echo "🚀 Nomad 节点信息:"
|
||||
echo " 数据中心: ${datacenter}"
|
||||
echo " IP 地址: $BIND_ADDR"
|
||||
echo " Web UI: http://$BIND_ADDR:4646"
|
||||
echo " 状态检查: nomad-status"
|
||||
echo ""
|
||||
EOF
|
||||
|
||||
chmod +x /etc/update-motd.d/99-nomad
|
||||
|
||||
log "节点配置脚本执行完成"
|
||||
118
tofu/modules/nomad-cluster/variables.tf
Normal file
118
tofu/modules/nomad-cluster/variables.tf
Normal file
@@ -0,0 +1,118 @@
|
||||
# Nomad 多数据中心集群变量定义
|
||||
|
||||
variable "deploy_korea_node" {
|
||||
description = "是否部署韩国节点 (Oracle Cloud)"
|
||||
type = bool
|
||||
default = true
|
||||
}
|
||||
|
||||
variable "deploy_us_node" {
|
||||
description = "是否部署美国节点 (华为云)"
|
||||
type = bool
|
||||
default = true
|
||||
}
|
||||
|
||||
# Oracle Cloud 配置
|
||||
variable "oracle_config" {
|
||||
description = "Oracle Cloud 配置"
|
||||
type = object({
|
||||
tenancy_ocid = string
|
||||
user_ocid = string
|
||||
fingerprint = string
|
||||
private_key_path = string
|
||||
region = string
|
||||
})
|
||||
sensitive = true
|
||||
}
|
||||
|
||||
variable "oracle_ubuntu_image_id" {
|
||||
description = "Oracle Cloud Ubuntu 镜像 ID"
|
||||
type = string
|
||||
default = "" # 将通过数据源自动获取
|
||||
}
|
||||
|
||||
variable "oracle_subnet_id" {
|
||||
description = "Oracle Cloud 子网 ID"
|
||||
type = string
|
||||
}
|
||||
|
||||
variable "oracle_security_group_id" {
|
||||
description = "Oracle Cloud 安全组 ID"
|
||||
type = string
|
||||
}
|
||||
|
||||
# 华为云配置
|
||||
variable "huawei_config" {
|
||||
description = "华为云配置"
|
||||
type = object({
|
||||
access_key = string
|
||||
secret_key = string
|
||||
region = string
|
||||
})
|
||||
sensitive = true
|
||||
}
|
||||
|
||||
variable "huawei_ubuntu_image_id" {
|
||||
description = "华为云 Ubuntu 镜像 ID"
|
||||
type = string
|
||||
default = "" # 将通过数据源自动获取
|
||||
}
|
||||
|
||||
variable "huawei_subnet_id" {
|
||||
description = "华为云子网 ID"
|
||||
type = string
|
||||
}
|
||||
|
||||
variable "huawei_security_group_id" {
|
||||
description = "华为云安全组 ID"
|
||||
type = string
|
||||
}
|
||||
|
||||
# 通用配置
|
||||
variable "common_tags" {
|
||||
description = "通用标签"
|
||||
type = map(string)
|
||||
default = {
|
||||
Project = "nomad-multi-dc"
|
||||
Environment = "production"
|
||||
ManagedBy = "opentofu"
|
||||
}
|
||||
}
|
||||
|
||||
variable "ssh_public_key" {
|
||||
description = "SSH 公钥"
|
||||
type = string
|
||||
}
|
||||
|
||||
variable "allowed_cidr_blocks" {
|
||||
description = "允许访问的 CIDR 块"
|
||||
type = list(string)
|
||||
default = ["0.0.0.0/0"] # 生产环境应该限制
|
||||
}
|
||||
|
||||
# Nomad 特定配置
|
||||
variable "nomad_version" {
|
||||
description = "Nomad 版本"
|
||||
type = string
|
||||
default = "1.10.5"
|
||||
}
|
||||
|
||||
variable "nomad_encrypt_key" {
|
||||
description = "Nomad 集群加密密钥"
|
||||
type = string
|
||||
sensitive = true
|
||||
default = "NVOMDvXblgWfhtzFzOUIHnKEOrbXOkPrkIPbRGGf1YQ="
|
||||
}
|
||||
|
||||
# 网络配置
|
||||
variable "vpc_cidr" {
|
||||
description = "VPC CIDR 块"
|
||||
type = string
|
||||
default = "10.0.0.0/16"
|
||||
}
|
||||
|
||||
variable "availability_zones" {
|
||||
description = "可用区列表"
|
||||
type = list(string)
|
||||
default = ["a", "b"]
|
||||
}
|
||||
Reference in New Issue
Block a user