REMOVE: 删除不再使用的 Terraform 配置文件
Some checks failed
Deploy Nomad Configurations / deploy-nomad (push) Failing after 7m45s
Infrastructure CI/CD / Validate Infrastructure (push) Failing after 2m33s
Infrastructure CI/CD / Plan Infrastructure (push) Has been skipped
Infrastructure CI/CD / Apply Infrastructure (push) Has been skipped
Simple Test / test (push) Failing after 2m48s

- 移除 nomad-terraform.tf 和 test_opentofu_consul.tf 文件
- 更新 Ansible inventory,注释掉不存在的节点 hcp2
- 修改 inventory.ini,确保节点配置的准确性
- 在 nomad-config 模块中添加 null_provider 以支持新配置
- 更新 influxdb1.hcl,添加 Grafana 和 Prometheus 数据卷配置
This commit is contained in:
2025-10-10 13:53:41 +00:00
parent 45f93cc68c
commit eff8d3ec6d
50 changed files with 3683 additions and 239 deletions

View File

@@ -0,0 +1,48 @@
---
# Ansible 探马 - 检查所有客户端节点的基础环境
- name: 侦察客户端节点基础环境
hosts: all
gather_facts: yes
tasks:
- name: 收集系统架构信息
debug:
msg: "节点 {{ inventory_hostname }} - 架构: {{ ansible_architecture }} - 系统: {{ ansible_distribution }} {{ ansible_distribution_version }}"
- name: 检查 HashiCorp 软件包安装状态
shell: |
echo "=== HashiCorp 软件包检查 ==="
echo "Nomad: $(nomad version 2>/dev/null || echo '未安装')"
echo "Consul: $(consul version 2>/dev/null || echo '未安装')"
echo "Vault: $(vault version 2>/dev/null || echo '未安装')"
register: hashicorp_status
- name: 检查 HashiCorp 软件源配置
shell: |
echo "=== 软件源配置检查 ==="
if [ -f /etc/apt/sources.list.d/hashicorp.list ]; then
echo "HashiCorp 源文件存在:"
cat /etc/apt/sources.list.d/hashicorp.list
else
echo "HashiCorp 源文件不存在"
fi
register: sources_status
- name: 检查系统服务状态
shell: |
echo "=== 系统服务状态 ==="
echo "Nomad: $(systemctl is-active nomad 2>/dev/null || echo '未配置')"
echo "Consul: $(systemctl is-active consul 2>/dev/null || echo '未配置')"
echo "Podman: $(systemctl is-active podman 2>/dev/null || echo '未配置')"
register: services_status
- name: 显示侦察结果
debug:
msg: |
==========================================
节点: {{ inventory_hostname }}
架构: {{ ansible_architecture }}
==========================================
{{ hashicorp_status.stdout }}
{{ sources_status.stdout }}
{{ services_status.stdout }}
==========================================

View File

@@ -0,0 +1,170 @@
#!/bin/bash
# HCP 集群先决条件检查脚本
# 检查所有客户端节点的 HashiCorp 软件包安装状态
set -e
# 客户端节点列表
CLIENT_NODES=(
"ash2e.tailnet-68f9.ts.net"
"ash1d.tailnet-68f9.ts.net"
"hcp1.tailnet-68f9.ts.net"
"influxdb.tailnet-68f9.ts.net"
"ash3c.tailnet-68f9.ts.net"
"ch4.tailnet-68f9.ts.net"
"warden.tailnet-68f9.ts.net"
"browser.tailnet-68f9.ts.net"
)
SSH_OPTS="-o StrictHostKeyChecking=no -o ConnectTimeout=5"
PASSWORD="3131"
echo "=== HCP 集群先决条件检查开始 ==="
echo "检查时间: $(date)"
echo
# 检查函数
check_node_prerequisites() {
local node=$1
echo "检查节点: $node"
# 检查网络连通性
if ! ping -c 1 -W 2 "$node" >/dev/null 2>&1; then
echo " ❌ 网络不通"
return 1
fi
# 检查 SSH 连接
if ! sshpass -p "$PASSWORD" ssh $SSH_OPTS ben@"$node" "echo 'SSH OK'" >/dev/null 2>&1; then
echo " ❌ SSH 连接失败"
return 1
fi
echo " ✅ 网络和 SSH 连接正常"
# 检查 HashiCorp 软件源配置
echo " 检查 HashiCorp 软件源..."
sshpass -p "$PASSWORD" ssh $SSH_OPTS ben@"$node" "
if [ -f /etc/apt/sources.list.d/hashicorp.list ]; then
echo ' ✅ HashiCorp 软件源文件存在'
if grep -q 'trusted=yes' /etc/apt/sources.list.d/hashicorp.list; then
echo ' ✅ 已配置 trusted=yes'
else
echo ' ⚠️ 未配置 trusted=yes'
fi
cat /etc/apt/sources.list.d/hashicorp.list | sed 's/^/ /'
else
echo ' ❌ HashiCorp 软件源文件不存在'
fi
"
# 检查二进制文件安装
echo " 检查 HashiCorp 二进制文件..."
sshpass -p "$PASSWORD" ssh $SSH_OPTS ben@"$node" "
for binary in nomad consul vault; do
if command -v \$binary >/dev/null 2>&1; then
version=\$(\$binary version | head -n1)
echo \" ✅ \$binary: \$version\"
else
echo \" ❌ \$binary: 未安装\"
fi
done
"
# 检查系统服务状态
echo " 检查系统服务状态..."
sshpass -p "$PASSWORD" ssh $SSH_OPTS ben@"$node" "
for service in nomad consul; do
if systemctl is-enabled \$service >/dev/null 2>&1; then
status=\$(systemctl is-active \$service)
echo \" \$service: \$status\"
else
echo \" \$service: 未配置\"
fi
done
"
echo
}
# 修复软件源配置的函数
fix_hashicorp_sources() {
local node=$1
echo "修复节点 $node 的 HashiCorp 软件源配置..."
sshpass -p "$PASSWORD" ssh $SSH_OPTS ben@"$node" "
echo '修复 HashiCorp 软件源配置...'
# 备份现有配置
if [ -f /etc/apt/sources.list.d/hashicorp.list ]; then
echo '$PASSWORD' | sudo -S cp /etc/apt/sources.list.d/hashicorp.list /etc/apt/sources.list.d/hashicorp.list.bak
fi
# 创建新的软件源配置 (trusted=yes)
echo '$PASSWORD' | sudo -S tee /etc/apt/sources.list.d/hashicorp.list > /dev/null << 'EOF'
deb [arch=amd64 trusted=yes] https://apt.releases.hashicorp.com jammy main
EOF
# 更新软件包列表
echo '$PASSWORD' | sudo -S apt update
echo '✅ HashiCorp 软件源配置已修复'
"
}
# 安装缺失软件包的函数
install_missing_packages() {
local node=$1
echo "在节点 $node 上安装 HashiCorp 软件包..."
sshpass -p "$PASSWORD" ssh $SSH_OPTS ben@"$node" "
echo '安装 HashiCorp 软件包...'
echo '$PASSWORD' | sudo -S apt install -y nomad consul vault
echo '✅ HashiCorp 软件包安装完成'
"
}
# 主检查流程
main() {
local failed_nodes=()
local needs_source_fix=()
local needs_package_install=()
# 第一轮:检查所有节点
for node in "${CLIENT_NODES[@]}"; do
if ! check_node_prerequisites "$node"; then
failed_nodes+=("$node")
fi
done
# 汇总报告
echo "=== 检查结果汇总 ==="
if [ ${#failed_nodes[@]} -eq 0 ]; then
echo "✅ 所有节点先决条件检查通过"
else
echo "⚠️ 以下节点需要修复:"
for node in "${failed_nodes[@]}"; do
echo " - $node"
done
echo
echo "是否要自动修复这些节点? (y/N)"
read -r response
if [[ "$response" =~ ^[Yy]$ ]]; then
for node in "${failed_nodes[@]}"; do
echo "修复节点: $node"
fix_hashicorp_sources "$node"
install_missing_packages "$node"
echo
done
echo "=== 重新检查修复后的节点 ==="
for node in "${failed_nodes[@]}"; do
check_node_prerequisites "$node"
done
fi
fi
}
main "$@"