1 feat: 重构基础设施架构并完善Consul集群配置

2
     3	主要变更:
     4	- 重构Terraform/OpenTofu目录结构,统一迁移至infrastructure/opentofu
     5	- 添加"7天创造世界"文档,记录基础设施建设演进逻辑
     6	- 更新Consul集群配置管理经验,添加实际案例和解决方案
     7	- 修正README中的Sticky Note,反映Consul集群健康状态
     8	- 添加Ansible部署配置和inventory文件
     9	- 完善项目文档结构,添加各组件配置指南
    10
    11	技术架构演进:
    12	- 第1天: Tailscale网络连接基础 
    13	- 第2天: Ansible分布式控制 
    14	- 第3天: Nomad服务感知与任务调度 
    15	- 第4天: Consul配置集中管理 
    16	- 第5天: OpenTofu状态一致性 
    17	- 第6天: Vault密钥管理 
    18	- 第7天: Waypoint应用部署 
This commit is contained in:
2025-09-30 03:46:33 +00:00
parent c0064b2cad
commit e8bfc76038
119 changed files with 1772 additions and 631 deletions

View File

@@ -0,0 +1,123 @@
# Consul KV 命名规范
本文档描述了在Consul KV中存储配置信息的统一命名规范以确保所有配置管理的一致性和可维护性。
## 命名规范
### 基本格式
```
config/{environment}/{provider}/{region_or_service}/{key}
```
### 各部分说明
- **config**: 固定前缀,表示这是一个配置项
- **environment**: 环境名称,如 `dev``staging``prod`
- **provider**: 云服务提供商,如 `oracle``digitalocean``aws``gcp`
- **region_or_service**: 区域或服务名称,如 `kr``us``sgp`
- **key**: 具体的配置键名,如 `token``tenancy_ocid``user_ocid`
### 示例
#### Oracle Cloud 配置
```
config/dev/oracle/kr/tenancy_ocid
config/dev/oracle/kr/user_ocid
config/dev/oracle/kr/fingerprint
config/dev/oracle/kr/private_key
config/dev/oracle/kr/region
config/dev/oracle/us/tenancy_ocid
config/dev/oracle/us/user_ocid
config/dev/oracle/us/fingerprint
config/dev/oracle/us/private_key
config/dev/oracle/us/region
```
#### DigitalOcean 配置
```
config/dev/digitalocean/token
```
#### 其他云服务商配置(示例)
```
config/dev/aws/access_key
config/dev/aws/secret_key
config/dev/aws/region
config/dev/gcp/project_id
config/dev/gcp/credentials_file
config/dev/gcp/region
```
## 使用说明
### 添加新配置
当需要为新的云服务商或环境添加配置时,请遵循上述命名规范:
1. 确定环境名称(如 `dev`
2. 确定云服务提供商(如 `aws`
3. 确定区域或服务(如 `ap-northeast-2`
4. 确定具体的配置键名(如 `access_key`
例如:
```
consul kv put config/dev/aws/ap-northeast-2/access_key your_access_key
```
### 在Terraform中使用
在Terraform配置中使用 `consul_keys` 数据源获取配置:
```hcl
data "consul_keys" "aws_config" {
key {
name = "access_key"
path = "config/dev/aws/ap-northeast-2/access_key"
}
key {
name = "secret_key"
path = "config/dev/aws/ap-northeast-2/secret_key"
}
}
provider "aws" {
access_key = data.consul_keys.aws_config.var.access_key
secret_key = data.consul_keys.aws_config.var.secret_key
region = "ap-northeast-2"
}
```
### 与Vault集成
当需要与Vault集成时可以使用相同的命名规范确保Consul和Vault中的配置路径保持一致。
## 维护说明
- 所有Agent在添加新的Consul KV键时必须遵循此命名规范
- 定期检查Consul KV中的键确保符合规范
- 如需修改命名规范请更新此文档并通知所有相关Agent
## 常见问题
### Q: 为什么不使用服务名称作为前缀(如 `oracle/config/dev/...`
A: 使用 `config` 作为统一前缀可以更容易地区分配置项和其他类型的键值对,便于管理和筛选。
### Q: 如何处理敏感信息?
A: 敏感信息如API密钥、私钥等应存储在Vault中Consul主要用于非敏感配置。如果必须在Consul中存储敏感信息请确保Consul集群的安全性。
### Q: 如何处理多环境配置?
A: 通过修改 `environment` 部分来区分不同环境,如 `config/dev/...``config/staging/...``config/prod/...`
## 更新历史
- 2024-01-01: 初始版本,定义了基本的命名规范
- 2024-01-02: 统一DigitalOcean配置路径`consul/digitalocean/token` 改为 `config/dev/digitalocean/token`

View File

@@ -0,0 +1,13 @@
# 从Consul获取DigitalOcean API Token
data "consul_keys" "do_token" {
key {
name = "token"
path = "config/dev/digitalocean/token"
default = ""
}
}
# DigitalOcean 提供者配置
provider "digitalocean" {
token = data.consul_keys.do_token.var.token
}

View File

@@ -0,0 +1,162 @@
# 开发环境主配置文件
# 引入共享版本配置
terraform {
required_version = ">= 1.6"
required_providers {
# Oracle Cloud Infrastructure
oci = {
source = "oracle/oci"
version = "~> 7.20"
}
# 其他常用提供商
random = {
source = "hashicorp/random"
version = "~> 3.1"
}
tls = {
source = "hashicorp/tls"
version = "~> 4.0"
}
local = {
source = "hashicorp/local"
version = "~> 2.1"
}
# Consul Provider
consul = {
source = "hashicorp/consul"
version = "~> 2.22.0"
}
# HashiCorp Vault Provider
vault = {
source = "hashicorp/vault"
version = "~> 4.0"
}
# DigitalOcean Provider
digitalocean = {
source = "digitalocean/digitalocean"
version = "~> 2.0"
}
}
# 后端配置
backend "local" {
path = "terraform.tfstate"
}
}
# Consul Provider配置
provider "consul" {
address = "localhost:8500"
scheme = "http"
datacenter = "dc1"
}
# Vault Provider配置
provider "vault" {
address = var.vault_config.address
token = var.vault_token
}
# 从Consul获取Oracle Cloud配置
data "consul_keys" "oracle_config" {
key {
name = "tenancy_ocid"
path = "config/dev/oracle/kr/tenancy_ocid"
}
key {
name = "user_ocid"
path = "config/dev/oracle/kr/user_ocid"
}
key {
name = "fingerprint"
path = "config/dev/oracle/kr/fingerprint"
}
key {
name = "private_key"
path = "config/dev/oracle/kr/private_key"
}
}
# 从Consul获取Oracle Cloud美国区域配置
data "consul_keys" "oracle_config_us" {
key {
name = "tenancy_ocid"
path = "config/dev/oracle/us/tenancy_ocid"
}
key {
name = "user_ocid"
path = "config/dev/oracle/us/user_ocid"
}
key {
name = "fingerprint"
path = "config/dev/oracle/us/fingerprint"
}
key {
name = "private_key"
path = "config/dev/oracle/us/private_key"
}
}
# 使用从Consul获取的配置的OCI Provider
provider "oci" {
tenancy_ocid = data.consul_keys.oracle_config.var.tenancy_ocid
user_ocid = data.consul_keys.oracle_config.var.user_ocid
fingerprint = data.consul_keys.oracle_config.var.fingerprint
private_key = data.consul_keys.oracle_config.var.private_key
region = "ap-chuncheon-1"
}
# 美国区域的OCI Provider
provider "oci" {
alias = "us"
tenancy_ocid = data.consul_keys.oracle_config_us.var.tenancy_ocid
user_ocid = data.consul_keys.oracle_config_us.var.user_ocid
fingerprint = data.consul_keys.oracle_config_us.var.fingerprint
private_key = data.consul_keys.oracle_config_us.var.private_key
region = "us-ashburn-1"
}
# Oracle Cloud 基础设施 - 暂时注释掉以避免VCN数量限制问题
# module "oracle_cloud" {
# source = "../../providers/oracle-cloud"
#
# # 传递变量
# environment = var.environment
# project_name = var.project_name
# owner = var.owner
# vpc_cidr = var.vpc_cidr
# availability_zones = var.availability_zones
# common_tags = var.common_tags
#
# # 使用从Consul获取的配置
# oci_config = {
# tenancy_ocid = data.consul_keys.oracle_config.var.tenancy_ocid
# user_ocid = data.consul_keys.oracle_config.var.user_ocid
# fingerprint = data.consul_keys.oracle_config.var.fingerprint
# private_key = data.consul_keys.oracle_config.var.private_key
# region = "ap-chuncheon-1"
# compartment_ocid = data.consul_keys.oracle_config.var.tenancy_ocid # 使用tenancy_ocid作为compartment_ocid
# }
#
# # 开发环境特定配置
# instance_count = 1
# instance_size = "VM.Standard.E2.1.Micro" # 免费层
#
# providers = {
# oci = oci
# }
# }
# 输出
# output "oracle_cloud_outputs" {
# description = "Oracle Cloud 基础设施输出"
# value = module.oracle_cloud
# }

View File

@@ -0,0 +1,61 @@
# 开发环境配置示例
# 复制此文件为 terraform.tfvars 并填入实际值
# 基本配置
environment = "dev"
project_name = "mgmt"
owner = "ben"
# 要启用的云服务商
cloud_providers = ["oracle", "huawei"]
# 网络配置
vpc_cidr = "10.0.0.0/16"
availability_zones = ["a", "b"]
# 通用标签
common_tags = {
Environment = "dev"
Project = "mgmt"
Owner = "ben"
ManagedBy = "opentofu"
}
# Oracle Cloud 配置
oci_config = {
tenancy_ocid = "ocid1.tenancy.oc1..your-tenancy-id"
user_ocid = "ocid1.user.oc1..your-user-id"
fingerprint = "your-key-fingerprint"
private_key_path = "~/.oci/oci_api_key.pem"
region = "ap-seoul-1"
compartment_ocid = "ocid1.compartment.oc1..your-compartment-id"
}
# 华为云配置
huawei_config = {
access_key = "your-access-key"
secret_key = "your-secret-key"
region = "cn-north-4"
project_id = "your-project-id"
}
# Google Cloud 配置 (可选)
gcp_config = {
project_id = "your-project-id"
region = "asia-northeast3"
zone = "asia-northeast3-a"
credentials_file = "~/.gcp/service-account.json"
}
# AWS 配置 (可选)
aws_config = {
region = "ap-northeast-2"
access_key = "your-access-key"
secret_key = "your-secret-key"
}
# DigitalOcean 配置 (可选)
do_config = {
token = "your-do-token"
region = "sgp1"
}

View File

@@ -0,0 +1,154 @@
# 开发环境变量定义
variable "environment" {
description = "环境名称"
type = string
default = "dev"
}
variable "project_name" {
description = "项目名称"
type = string
default = "mgmt"
}
variable "owner" {
description = "项目所有者"
type = string
default = "ben"
}
variable "cloud_providers" {
description = "要启用的云服务商列表"
type = list(string)
default = ["oracle"]
}
variable "vpc_cidr" {
description = "VPC CIDR 块"
type = string
default = "10.0.0.0/16"
}
variable "availability_zones" {
description = "可用区列表"
type = list(string)
default = ["a", "b"]
}
variable "common_tags" {
description = "通用标签"
type = map(string)
default = {
Environment = "dev"
Project = "mgmt"
ManagedBy = "opentofu"
}
}
# Oracle Cloud 配置
variable "oci_config" {
description = "Oracle Cloud 配置"
type = object({
tenancy_ocid = string
user_ocid = string
fingerprint = string
private_key_path = string
region = string
compartment_ocid = optional(string)
})
default = {
tenancy_ocid = ""
user_ocid = ""
fingerprint = ""
private_key_path = ""
region = "ap-seoul-1"
compartment_ocid = ""
}
}
# 华为云配置
variable "huawei_config" {
description = "华为云配置"
type = object({
access_key = string
secret_key = string
region = string
project_id = optional(string)
})
default = {
access_key = ""
secret_key = ""
region = "cn-north-4"
project_id = ""
}
sensitive = true
}
# Google Cloud 配置
variable "gcp_config" {
description = "Google Cloud 配置"
type = object({
project_id = string
region = string
zone = string
credentials_file = string
})
default = {
project_id = ""
region = "asia-northeast3"
zone = "asia-northeast3-a"
credentials_file = ""
}
}
# AWS 配置
variable "aws_config" {
description = "AWS 配置"
type = object({
region = string
access_key = string
secret_key = string
})
default = {
region = "ap-northeast-2"
access_key = ""
secret_key = ""
}
sensitive = true
}
# DigitalOcean 配置
variable "do_config" {
description = "DigitalOcean 配置"
type = object({
token = string
region = string
})
default = {
token = ""
region = "sgp1"
}
sensitive = true
}
# HashiCorp Vault 配置
variable "vault_config" {
description = "HashiCorp Vault 配置"
type = object({
address = string
token = string
})
default = {
address = "http://localhost:8200"
token = ""
}
sensitive = true
}
variable "vault_token" {
description = "Vault 访问令牌"
type = string
default = ""
sensitive = true
}

View File

@@ -0,0 +1,169 @@
# Nomad 多数据中心生产环境配置
# 部署架构: CN(dc1) + KR(dc2) + US(dc3)
terraform {
required_version = ">= 1.0"
required_providers {
oci = {
source = "oracle/oci"
version = "~> 7.20"
}
huaweicloud = {
source = "huaweicloud/huaweicloud"
version = "~> 1.60"
}
}
}
# Oracle Cloud Provider (韩国)
provider "oci" {
alias = "korea"
tenancy_ocid = var.oracle_tenancy_ocid
user_ocid = var.oracle_user_ocid
fingerprint = var.oracle_fingerprint
private_key_path = var.oracle_private_key_path
region = "ap-seoul-1" # 韩国首尔
}
# 华为云 Provider (美国)
provider "huaweicloud" {
alias = "us"
access_key = var.huawei_access_key
secret_key = var.huawei_secret_key
region = "us-east-1" # 美国东部
}
# 本地变量
locals {
project_name = "nomad-multi-dc"
environment = "production"
common_tags = {
Project = local.project_name
Environment = local.environment
ManagedBy = "opentofu"
Owner = "devops-team"
}
}
# 数据源:获取 SSH 公钥
data "local_file" "ssh_public_key" {
filename = pathexpand("~/.ssh/id_rsa.pub")
}
# Oracle Cloud 基础设施 (韩国 - dc2)
module "oracle_infrastructure" {
source = "../../providers/oracle-cloud"
providers = {
oci = oci.korea
}
project_name = local.project_name
environment = local.environment
vpc_cidr = "10.1.0.0/16"
oci_config = {
tenancy_ocid = var.oracle_tenancy_ocid
user_ocid = var.oracle_user_ocid
fingerprint = var.oracle_fingerprint
private_key_path = var.oracle_private_key_path
region = "ap-seoul-1"
}
common_tags = local.common_tags
}
# 华为云基础设施 (美国 - dc3)
module "huawei_infrastructure" {
source = "../../providers/huawei-cloud"
providers = {
huaweicloud = huaweicloud.us
}
project_name = local.project_name
environment = local.environment
vpc_cidr = "10.2.0.0/16"
availability_zones = ["us-east-1a", "us-east-1b"]
common_tags = local.common_tags
}
# Nomad 多数据中心集群
module "nomad_cluster" {
source = "../../modules/nomad-cluster"
# 部署配置
deploy_korea_node = var.deploy_korea_node
deploy_us_node = var.deploy_us_node
# Oracle Cloud 配置
oracle_config = {
tenancy_ocid = var.oracle_tenancy_ocid
user_ocid = var.oracle_user_ocid
fingerprint = var.oracle_fingerprint
private_key_path = var.oracle_private_key_path
region = "ap-seoul-1"
}
oracle_subnet_id = module.oracle_infrastructure.public_subnet_ids[0]
oracle_security_group_id = module.oracle_infrastructure.security_group_id
# 华为云配置
huawei_config = {
access_key = var.huawei_access_key
secret_key = var.huawei_secret_key
region = "us-east-1"
}
huawei_subnet_id = module.huawei_infrastructure.public_subnet_ids[0]
huawei_security_group_id = module.huawei_infrastructure.security_group_id
# 通用配置
ssh_public_key = data.local_file.ssh_public_key.content
common_tags = local.common_tags
# Nomad 配置
nomad_version = "1.10.5"
nomad_encrypt_key = var.nomad_encrypt_key
}
# 生成 Ansible inventory
resource "local_file" "ansible_inventory" {
filename = "${path.module}/generated/nomad-cluster-inventory.yml"
content = yamlencode({
all = {
children = {
nomad_servers = {
hosts = module.nomad_cluster.ansible_inventory.all.children.nomad_servers.hosts
}
}
vars = {
ansible_user = "ubuntu"
ansible_ssh_private_key_file = "~/.ssh/id_rsa"
ansible_ssh_common_args = "-o StrictHostKeyChecking=no"
}
}
})
}
# 生成部署后配置脚本
resource "local_file" "post_deploy_script" {
filename = "${path.module}/generated/post-deploy.sh"
content = templatefile("${path.module}/templates/post-deploy.sh", {
cluster_overview = module.nomad_cluster.cluster_overview
endpoints = module.nomad_cluster.cluster_endpoints
})
file_permission = "0755"
}
# 生成跨数据中心测试任务
resource "local_file" "cross_dc_test_job" {
filename = "${path.module}/generated/cross-dc-test.nomad"
content = templatefile("${path.module}/templates/cross-dc-test.nomad", {
datacenters = ["dc1", "dc2", "dc3"]
})
}

View File

@@ -0,0 +1,46 @@
# Nomad 多数据中心生产环境输出
output "cluster_overview" {
description = "Nomad 多数据中心集群概览"
value = module.nomad_cluster.cluster_overview
}
output "cluster_endpoints" {
description = "集群连接端点"
value = module.nomad_cluster.cluster_endpoints
}
output "oracle_korea_node" {
description = "Oracle Cloud 韩国节点信息"
value = module.nomad_cluster.oracle_korea_node
}
output "huawei_us_node" {
description = "华为云美国节点信息"
value = module.nomad_cluster.huawei_us_node
}
output "deployment_summary" {
description = "部署摘要"
value = {
total_nodes = module.nomad_cluster.cluster_overview.total_nodes
datacenters = keys(module.nomad_cluster.cluster_overview.datacenters)
next_steps = [
"1. 等待所有节点启动完成 (约 5-10 分钟)",
"2. 运行: ./generated/post-deploy.sh",
"3. 验证集群: nomad server members",
"4. 测试跨 DC 调度: nomad job run generated/cross-dc-test.nomad",
"5. 访问 Web UI 查看集群状态"
]
web_ui_urls = module.nomad_cluster.cluster_endpoints.nomad_ui_urls
ssh_commands = module.nomad_cluster.cluster_endpoints.ssh_commands
}
}
output "verification_commands" {
description = "验证命令"
value = module.nomad_cluster.verification_commands
}

View File

@@ -0,0 +1,22 @@
# Nomad 多数据中心生产环境配置示例
# 复制此文件为 terraform.tfvars 并填入实际值
# 部署控制
deploy_korea_node = true # 是否部署韩国节点
deploy_us_node = true # 是否部署美国节点
# Oracle Cloud 配置 (韩国 - dc2)
# 获取方式: https://docs.oracle.com/en-us/iaas/Content/API/Concepts/apisigningkey.htm
oracle_tenancy_ocid = "ocid1.tenancy.oc1..aaaaaaaa..."
oracle_user_ocid = "ocid1.user.oc1..aaaaaaaa..."
oracle_fingerprint = "aa:bb:cc:dd:ee:ff:..."
oracle_private_key_path = "~/.oci/oci_api_key.pem"
# 华为云配置 (美国 - dc3)
# 获取方式: https://console.huaweicloud.com/iam/#/mine/accessKey
huawei_access_key = "YOUR_HUAWEI_ACCESS_KEY"
huawei_secret_key = "YOUR_HUAWEI_SECRET_KEY"
# Nomad 集群加密密钥 (可选,已有默认值)
# 生成方式: nomad operator keygen
nomad_encrypt_key = "NVOMDvXblgWfhtzFzOUIHnKEOrbXOkPrkIPbRGGf1YQ="

View File

@@ -0,0 +1,60 @@
# Nomad 多数据中心生产环境变量
# 部署控制
variable "deploy_korea_node" {
description = "是否部署韩国节点 (Oracle Cloud)"
type = bool
default = false # 禁用以避免创建计算资源
}
variable "deploy_us_node" {
description = "是否部署美国节点 (华为云)"
type = bool
default = false # 禁用以避免创建计算资源
}
# Oracle Cloud 配置
variable "oracle_tenancy_ocid" {
description = "Oracle Cloud 租户 OCID"
type = string
sensitive = true
}
variable "oracle_user_ocid" {
description = "Oracle Cloud 用户 OCID"
type = string
sensitive = true
}
variable "oracle_fingerprint" {
description = "Oracle Cloud API 密钥指纹"
type = string
sensitive = true
}
variable "oracle_private_key_path" {
description = "Oracle Cloud 私钥文件路径"
type = string
sensitive = true
}
# 华为云配置
variable "huawei_access_key" {
description = "华为云访问密钥"
type = string
sensitive = true
}
variable "huawei_secret_key" {
description = "华为云秘密密钥"
type = string
sensitive = true
}
# Nomad 配置
variable "nomad_encrypt_key" {
description = "Nomad 集群加密密钥"
type = string
sensitive = true
default = "NVOMDvXblgWfhtzFzOUIHnKEOrbXOkPrkIPbRGGf1YQ="
}