REMOVE: 删除不再使用的 Terraform 配置文件
Some checks failed
Deploy Nomad Configurations / deploy-nomad (push) Failing after 7m45s
Infrastructure CI/CD / Validate Infrastructure (push) Failing after 2m33s
Infrastructure CI/CD / Plan Infrastructure (push) Has been skipped
Infrastructure CI/CD / Apply Infrastructure (push) Has been skipped
Simple Test / test (push) Failing after 2m48s

- 移除 nomad-terraform.tf 和 test_opentofu_consul.tf 文件
- 更新 Ansible inventory,注释掉不存在的节点 hcp2
- 修改 inventory.ini,确保节点配置的准确性
- 在 nomad-config 模块中添加 null_provider 以支持新配置
- 更新 influxdb1.hcl,添加 Grafana 和 Prometheus 数据卷配置
This commit is contained in:
2025-10-10 13:53:41 +00:00
parent 45f93cc68c
commit eff8d3ec6d
50 changed files with 3683 additions and 239 deletions

View File

@@ -0,0 +1,78 @@
# OpenTofu 小王 - 修复不安全的服务器配置
# terraform 块已在 onecloud1-deploy-clean.tf 中定义
# 需要修复的不安全服务器节点
variable "insecure_servers" {
type = list(string)
default = [
"ash1d",
"ash2e"
]
}
# 为每个服务器节点生成安全配置文件
resource "local_file" "secure_server_configs" {
for_each = toset(var.insecure_servers)
filename = "${path.module}/generated/${each.key}-server-secure.hcl"
content = replace(
file("${path.module}/../nomad-configs-tofu/server-template-secure.hcl"),
"NODE_NAME",
each.key
)
}
# 部署安全配置到每个服务器节点
resource "null_resource" "fix_insecure_servers" {
for_each = toset(var.insecure_servers)
depends_on = [local_file.secure_server_configs]
provisioner "local-exec" {
command = <<EOF
echo "=== 修复 ${each.key} 的不安全配置 ==="
echo "开始时间: $(date)"
echo "1. 测试连接 ${each.key}..."
ping -c 1 ${each.key}.tailnet-68f9.ts.net || echo " - ${each.key} ping 失败"
echo "2. 上传安全配置文件..."
sshpass -p '3131' scp -o StrictHostKeyChecking=no -o ConnectTimeout=5 \
${path.module}/generated/${each.key}-server-secure.hcl \
ben@${each.key}.tailnet-68f9.ts.net:/tmp/nomad-secure.hcl && echo " - 文件上传成功" || echo " - 文件上传失败"
echo "3. 备份旧配置并部署安全配置..."
sshpass -p '3131' ssh -o StrictHostKeyChecking=no -o ConnectTimeout=5 \
ben@${each.key}.tailnet-68f9.ts.net \
"echo '=== ${each.key} 安全配置部署开始 ==='; \
echo '3131' | sudo -S systemctl stop nomad; \
echo '备份不安全的配置...'; \
echo '3131' | sudo -S cp /etc/nomad.d/nomad.hcl /etc/nomad.d/nomad.hcl.insecure.backup.\$(date +%Y%m%d_%H%M%S); \
echo '部署安全配置...'; \
echo '3131' | sudo -S cp /tmp/nomad-secure.hcl /etc/nomad.d/nomad.hcl; \
echo '清理 Raft 数据以重新加入集群...'; \
echo '3131' | sudo -S rm -rf /opt/nomad/data/server/raft/; \
echo '启动服务...'; \
echo '3131' | sudo -S systemctl start nomad; \
sleep 10; \
echo '检查服务状态...'; \
echo '3131' | sudo -S systemctl status nomad --no-pager; \
echo '=== ${each.key} 安全配置部署完成 ==='" && echo " - ${each.key} 安全修复成功" || echo " - ${each.key} 安全修复失败"
echo "=== ${each.key} 安全修复完成!时间: $(date) ==="
EOF
}
triggers = {
config_hash = local_file.secure_server_configs[each.key].content_md5
deploy_time = timestamp()
}
}
output "security_fix_summary" {
value = {
fixed_servers = var.insecure_servers
config_files = [for server in var.insecure_servers : "${server}-server-secure.hcl"]
deploy_time = timestamp()
}
}

View File

@@ -0,0 +1,68 @@
# Nomad 服务器节点安全配置模板
datacenter = "dc1"
data_dir = "/opt/nomad/data"
plugin_dir = "/opt/nomad/plugins"
log_level = "INFO"
name = "ash1d"
# 安全绑定 - 只绑定到 Tailscale 接口
bind_addr = "ash1d.tailnet-68f9.ts.net"
addresses {
http = "ash1d.tailnet-68f9.ts.net"
rpc = "ash1d.tailnet-68f9.ts.net"
serf = "ash1d.tailnet-68f9.ts.net"
}
advertise {
http = "ash1d.tailnet-68f9.ts.net:4646"
rpc = "ash1d.tailnet-68f9.ts.net:4647"
serf = "ash1d.tailnet-68f9.ts.net:4648"
}
ports {
http = 4646
rpc = 4647
serf = 4648
}
server {
enabled = true
# 七仙女服务器发现配置
server_join {
retry_join = [
"semaphore.tailnet-68f9.ts.net:4647",
"ash1d.tailnet-68f9.ts.net:4647",
"ash2e.tailnet-68f9.ts.net:4647",
"ch2.tailnet-68f9.ts.net:4647",
"ch3.tailnet-68f9.ts.net:4647",
"onecloud1.tailnet-68f9.ts.net:4647",
"de.tailnet-68f9.ts.net:4647"
]
}
}
# 安全的 Consul 配置 - 指向本地客户端
consul {
address = "127.0.0.1:8500"
server_service_name = "nomad"
client_service_name = "nomad-client"
auto_advertise = true
server_auto_join = true
client_auto_join = true
}
# 安全的 Vault 配置 - 指向本地代理
vault {
enabled = false # 暂时禁用,等 Vault 集群部署完成
}
# 遥测配置
telemetry {
collection_interval = "1s"
disable_hostname = false
prometheus_metrics = true
publish_allocation_metrics = true
publish_node_metrics = true
}

View File

@@ -0,0 +1,68 @@
# Nomad 服务器节点安全配置模板
datacenter = "dc1"
data_dir = "/opt/nomad/data"
plugin_dir = "/opt/nomad/plugins"
log_level = "INFO"
name = "ash2e"
# 安全绑定 - 只绑定到 Tailscale 接口
bind_addr = "ash2e.tailnet-68f9.ts.net"
addresses {
http = "ash2e.tailnet-68f9.ts.net"
rpc = "ash2e.tailnet-68f9.ts.net"
serf = "ash2e.tailnet-68f9.ts.net"
}
advertise {
http = "ash2e.tailnet-68f9.ts.net:4646"
rpc = "ash2e.tailnet-68f9.ts.net:4647"
serf = "ash2e.tailnet-68f9.ts.net:4648"
}
ports {
http = 4646
rpc = 4647
serf = 4648
}
server {
enabled = true
# 七仙女服务器发现配置
server_join {
retry_join = [
"semaphore.tailnet-68f9.ts.net:4647",
"ash1d.tailnet-68f9.ts.net:4647",
"ash2e.tailnet-68f9.ts.net:4647",
"ch2.tailnet-68f9.ts.net:4647",
"ch3.tailnet-68f9.ts.net:4647",
"onecloud1.tailnet-68f9.ts.net:4647",
"de.tailnet-68f9.ts.net:4647"
]
}
}
# 安全的 Consul 配置 - 指向本地客户端
consul {
address = "127.0.0.1:8500"
server_service_name = "nomad"
client_service_name = "nomad-client"
auto_advertise = true
server_auto_join = true
client_auto_join = true
}
# 安全的 Vault 配置 - 指向本地代理
vault {
enabled = false # 暂时禁用,等 Vault 集群部署完成
}
# 遥测配置
telemetry {
collection_interval = "1s"
disable_hostname = false
prometheus_metrics = true
publish_allocation_metrics = true
publish_node_metrics = true
}

View File

@@ -0,0 +1,79 @@
# OpenTofu 小王 - onecloud1 配置部署
terraform {
required_providers {
null = {
source = "registry.opentofu.org/hashicorp/null"
version = "3.2.4"
}
}
}
# 用小王的 local-exec 执行 SSH 命令
resource "null_resource" "onecloud1_deploy_via_ssh" {
provisioner "local-exec" {
command = <<EOF
echo "=== OpenTofu 小王开始部署 onecloud1 ==="
echo "开始时间: $(date)"
echo "1. 部署 ash1d 服务器配置..."
echo " - 测试连接 ash1d..."
ping -c 1 ash1d.tailnet-68f9.ts.net || echo " - ash1d ping 失败"
echo " - 上传配置文件..."
sshpass -p '3131' scp -o StrictHostKeyChecking=no -o ConnectTimeout=5 \
../nomad-configs-tofu/ash1d-server.hcl \
ben@ash1d.tailnet-68f9.ts.net:/tmp/nomad-new.hcl && echo " - 文件上传成功" || echo " - 文件上传失败"
echo " - 执行配置部署..."
sshpass -p '3131' ssh -o StrictHostKeyChecking=no -o ConnectTimeout=5 \
ben@ash1d.tailnet-68f9.ts.net \
"echo '=== ash1d 配置部署开始 ==='; \
echo '3131' | sudo -S systemctl stop nomad; \
echo '清理 Raft 数据...'; \
echo '3131' | sudo -S rm -rf /opt/nomad/data/server/raft/; \
echo '替换配置文件...'; \
echo '3131' | sudo -S cp /tmp/nomad-new.hcl /etc/nomad.d/nomad.hcl; \
echo '启动服务...'; \
echo '3131' | sudo -S systemctl start nomad; \
sleep 5; \
echo '检查服务状态...'; \
echo '3131' | sudo -S systemctl status nomad --no-pager; \
echo '=== ash1d 部署完成 ==='" && echo " - ash1d 部署成功" || echo " - ash1d 部署失败"
echo "2. 部署 onecloud1 服务器配置..."
echo " - 测试连接 onecloud1..."
ping -c 1 onecloud1.tailnet-68f9.ts.net || echo " - onecloud1 ping 失败"
echo " - 上传配置文件..."
sshpass -p '3131' scp -o StrictHostKeyChecking=no -o ConnectTimeout=5 \
../nomad-configs-tofu/onecloud1-server.hcl \
ben@onecloud1.tailnet-68f9.ts.net:/tmp/nomad-new.hcl && echo " - 文件上传成功" || echo " - 文件上传失败"
echo " - 执行配置部署..."
sshpass -p '3131' ssh -o StrictHostKeyChecking=no -o ConnectTimeout=5 \
ben@onecloud1.tailnet-68f9.ts.net \
"echo '=== onecloud1 配置部署开始 ==='; \
echo '3131' | sudo -S systemctl stop nomad; \
echo '清理 Raft 数据...'; \
echo '3131' | sudo -S rm -rf /opt/nomad/data/server/raft/; \
echo '替换配置文件...'; \
echo '3131' | sudo -S cp /tmp/nomad-new.hcl /etc/nomad.d/nomad.hcl; \
echo '启动服务...'; \
echo '3131' | sudo -S systemctl start nomad; \
sleep 5; \
echo '检查服务状态...'; \
echo '3131' | sudo -S systemctl status nomad --no-pager; \
echo '=== onecloud1 部署完成 ==='" && echo " - onecloud1 部署成功" || echo " - onecloud1 部署失败"
echo "=== 小王部署完成!时间: $(date) ==="
EOF
}
triggers = {
deploy_time = timestamp()
}
}
output "deploy_result" {
value = "OpenTofu 小王已完成 onecloud1 部署"
}