feat: 更新OCI Provider版本至7.20并集成Vault配置
refactor: 重构Terraform配置以使用Consul和Vault存储敏感信息 docs: 添加Vault实施文档和配置指南 chore: 清理不再使用的配置文件和脚本 feat: 添加Nomad集群领导者发现脚本和文档 feat: 实现MCP配置共享方案和同步脚本 style: 更新README中的网络访问注意事项 test: 添加Consul Provider集成测试脚本
This commit is contained in:
193
scripts/nomad-leader-discovery.sh
Executable file
193
scripts/nomad-leader-discovery.sh
Executable file
@@ -0,0 +1,193 @@
|
||||
#!/bin/bash
|
||||
|
||||
# Nomad 集群领导者发现与访问脚本
|
||||
# 此脚本自动发现当前 Nomad 集群领导者并执行相应命令
|
||||
|
||||
# 默认服务器列表(可根据实际情况修改)
|
||||
SERVERS=(
|
||||
"100.116.158.95" # bj-semaphore.global
|
||||
"100.81.26.3" # ash1d.global
|
||||
"100.103.147.94" # ash2e.global
|
||||
"100.90.159.68" # ch2.global
|
||||
"100.86.141.112" # ch3.global
|
||||
"100.98.209.50" # bj-onecloud1.global
|
||||
"100.120.225.29" # de.global
|
||||
)
|
||||
|
||||
# 超时设置(秒)
|
||||
TIMEOUT=5
|
||||
|
||||
# 颜色输出
|
||||
RED='\033[0;31m'
|
||||
GREEN='\033[0;32m'
|
||||
YELLOW='\033[1;33m'
|
||||
NC='\033[0m' # No Color
|
||||
|
||||
# 打印帮助信息
|
||||
function show_help() {
|
||||
echo "Nomad 集群领导者发现与访问脚本"
|
||||
echo ""
|
||||
echo "用法: $0 [选项] [nomad命令]"
|
||||
echo ""
|
||||
echo "选项:"
|
||||
echo " -h, --help 显示此帮助信息"
|
||||
echo " -s, --server IP 指定初始服务器IP"
|
||||
echo " -t, --timeout SECS 设置超时时间(默认: $TIMEOUT 秒)"
|
||||
echo " -l, --list-servers 列出所有配置的服务器"
|
||||
echo " -c, --check-leader 仅检查领导者,不执行命令"
|
||||
echo ""
|
||||
echo "示例:"
|
||||
echo " $0 node status # 使用自动发现的领导者查看节点状态"
|
||||
echo " $0 -s 100.116.158.95 job status # 指定初始服务器查看作业状态"
|
||||
echo " $0 -c # 仅检查当前领导者"
|
||||
echo ""
|
||||
}
|
||||
|
||||
# 列出所有配置的服务器
|
||||
function list_servers() {
|
||||
echo -e "${YELLOW}配置的服务器列表:${NC}"
|
||||
for server in "${SERVERS[@]}"; do
|
||||
echo " - $server"
|
||||
done
|
||||
}
|
||||
|
||||
# 发现领导者
|
||||
function discover_leader() {
|
||||
local initial_server=$1
|
||||
|
||||
# 如果指定了初始服务器,先尝试使用它
|
||||
if [ -n "$initial_server" ]; then
|
||||
echo -e "${YELLOW}尝试从服务器 $initial_server 发现领导者...${NC}" >&2
|
||||
leader=$(curl -s --max-time $TIMEOUT "http://${initial_server}:4646/v1/status/leader" 2>/dev/null | sed 's/"//g')
|
||||
if [ -n "$leader" ] && [ "$leader" != "" ]; then
|
||||
# 将RPC端口(4647)替换为HTTP端口(4646)
|
||||
leader=$(echo "$leader" | sed 's/:4647$/:4646/')
|
||||
echo -e "${GREEN}发现领导者: $leader${NC}" >&2
|
||||
echo "$leader"
|
||||
return 0
|
||||
fi
|
||||
echo -e "${RED}无法从 $initial_server 获取领导者信息${NC}" >&2
|
||||
fi
|
||||
|
||||
# 遍历所有服务器尝试发现领导者
|
||||
echo -e "${YELLOW}遍历所有服务器寻找领导者...${NC}" >&2
|
||||
for server in "${SERVERS[@]}"; do
|
||||
echo -n " 检查 $server ... " >&2
|
||||
leader=$(curl -s --max-time $TIMEOUT "http://${server}:4646/v1/status/leader" 2>/dev/null | sed 's/"//g')
|
||||
if [ -n "$leader" ] && [ "$leader" != "" ]; then
|
||||
# 将RPC端口(4647)替换为HTTP端口(4646)
|
||||
leader=$(echo "$leader" | sed 's/:4647$/:4646/')
|
||||
echo -e "${GREEN}成功${NC}" >&2
|
||||
echo -e "${GREEN}发现领导者: $leader${NC}" >&2
|
||||
echo "$leader"
|
||||
return 0
|
||||
else
|
||||
echo -e "${RED}失败${NC}" >&2
|
||||
fi
|
||||
done
|
||||
|
||||
echo -e "${RED}无法发现领导者,请检查集群状态${NC}" >&2
|
||||
return 1
|
||||
}
|
||||
|
||||
# 解析命令行参数
|
||||
INITIAL_SERVER=""
|
||||
CHECK_LEADER_ONLY=false
|
||||
NOMAD_COMMAND=()
|
||||
|
||||
while [[ $# -gt 0 ]]; do
|
||||
case $1 in
|
||||
-h|--help)
|
||||
show_help
|
||||
exit 0
|
||||
;;
|
||||
-s|--server)
|
||||
INITIAL_SERVER="$2"
|
||||
shift 2
|
||||
;;
|
||||
-t|--timeout)
|
||||
TIMEOUT="$2"
|
||||
shift 2
|
||||
;;
|
||||
-l|--list-servers)
|
||||
list_servers
|
||||
exit 0
|
||||
;;
|
||||
-c|--check-leader)
|
||||
CHECK_LEADER_ONLY=true
|
||||
shift
|
||||
;;
|
||||
*)
|
||||
NOMAD_COMMAND+=("$1")
|
||||
shift
|
||||
;;
|
||||
esac
|
||||
done
|
||||
|
||||
# 主逻辑
|
||||
echo -e "${YELLOW}Nomad 集群领导者发现与访问脚本${NC}" >&2
|
||||
echo "==================================" >&2
|
||||
|
||||
# 发现领导者
|
||||
LEADER=$(discover_leader "$INITIAL_SERVER")
|
||||
if [ $? -ne 0 ]; then
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# 提取领导者IP和端口
|
||||
LEADER_IP=$(echo "$LEADER" | cut -d':' -f1)
|
||||
LEADER_PORT=$(echo "$LEADER" | cut -d':' -f2)
|
||||
|
||||
# 如果仅检查领导者,则退出
|
||||
if [ "$CHECK_LEADER_ONLY" = true ]; then
|
||||
echo -e "${GREEN}当前领导者: $LEADER${NC}" >&2
|
||||
exit 0
|
||||
fi
|
||||
|
||||
# 如果没有指定命令,显示交互式菜单
|
||||
if [ ${#NOMAD_COMMAND[@]} -eq 0 ]; then
|
||||
echo -e "${YELLOW}未指定命令,请选择要执行的操作:${NC}" >&2
|
||||
echo "1) 查看节点状态" >&2
|
||||
echo "2) 查看作业状态" >&2
|
||||
echo "3) 查看服务器成员" >&2
|
||||
echo "4) 查看集群状态" >&2
|
||||
echo "5) 自定义命令" >&2
|
||||
echo "0) 退出" >&2
|
||||
|
||||
read -p "请输入选项 (0-5): " choice
|
||||
|
||||
case $choice in
|
||||
1) NOMAD_COMMAND=("node" "status") ;;
|
||||
2) NOMAD_COMMAND=("job" "status") ;;
|
||||
3) NOMAD_COMMAND=("server" "members") ;;
|
||||
4) NOMAD_COMMAND=("operator" "raft" "list-peers") ;;
|
||||
5)
|
||||
read -p "请输入完整的 Nomad 命令: " -a NOMAD_COMMAND
|
||||
;;
|
||||
0) exit 0 ;;
|
||||
*)
|
||||
echo -e "${RED}无效选项${NC}" >&2
|
||||
exit 1
|
||||
;;
|
||||
esac
|
||||
fi
|
||||
|
||||
# 执行命令
|
||||
echo -e "${YELLOW}执行命令: nomad ${NOMAD_COMMAND[*]} -address=http://${LEADER}${NC}" >&2
|
||||
nomad "${NOMAD_COMMAND[@]}" -address="http://${LEADER}"
|
||||
|
||||
# 检查命令执行结果
|
||||
if [ $? -eq 0 ]; then
|
||||
echo -e "${GREEN}命令执行成功${NC}" >&2
|
||||
else
|
||||
echo -e "${RED}命令执行失败,可能需要重新发现领导者${NC}" >&2
|
||||
echo -e "${YELLOW}尝试重新发现领导者...${NC}" >&2
|
||||
NEW_LEADER=$(discover_leader)
|
||||
if [ $? -eq 0 ] && [ "$NEW_LEADER" != "$LEADER" ]; then
|
||||
echo -e "${YELLOW}领导者已更改,重新执行命令...${NC}" >&2
|
||||
nomad "${NOMAD_COMMAND[@]}" -address="http://${NEW_LEADER}"
|
||||
else
|
||||
echo -e "${RED}无法恢复,请检查集群状态${NC}" >&2
|
||||
exit 1
|
||||
fi
|
||||
fi
|
||||
275
scripts/test-traefik-deployment.sh
Normal file
275
scripts/test-traefik-deployment.sh
Normal file
@@ -0,0 +1,275 @@
|
||||
#!/bin/bash
|
||||
|
||||
# Traefik部署测试脚本
|
||||
# 用于测试Traefik在Nomad集群中的部署和功能
|
||||
|
||||
set -e
|
||||
|
||||
# 颜色定义
|
||||
RED='\033[0;31m'
|
||||
GREEN='\033[0;32m'
|
||||
YELLOW='\033[1;33m'
|
||||
NC='\033[0m' # No Color
|
||||
|
||||
# 日志函数
|
||||
log_info() {
|
||||
echo -e "${GREEN}[INFO]${NC} $1"
|
||||
}
|
||||
|
||||
log_warn() {
|
||||
echo -e "${YELLOW}[WARN]${NC} $1"
|
||||
}
|
||||
|
||||
log_error() {
|
||||
echo -e "${RED}[ERROR]${NC} $1"
|
||||
}
|
||||
|
||||
# 检查Nomad集群状态
|
||||
check_nomad_cluster() {
|
||||
log_info "检查Nomad集群状态..."
|
||||
|
||||
# 使用我们之前创建的领导者发现脚本
|
||||
if [ -f "/root/mgmt/scripts/nomad-leader-discovery.sh" ]; then
|
||||
chmod +x /root/mgmt/scripts/nomad-leader-discovery.sh
|
||||
LEADER_INFO=$(/root/mgmt/scripts/nomad-leader-discovery.sh -c 2>&1)
|
||||
log_info "Nomad领导者信息: $LEADER_INFO"
|
||||
else
|
||||
log_warn "未找到Nomad领导者发现脚本,使用默认方式检查"
|
||||
nomad server members 2>/dev/null || log_error "无法连接到Nomad集群"
|
||||
fi
|
||||
}
|
||||
|
||||
# 检查Consul集群状态
|
||||
check_consul_cluster() {
|
||||
log_info "检查Consul集群状态..."
|
||||
|
||||
consul members 2>/dev/null || log_error "无法连接到Consul集群"
|
||||
|
||||
# 检查Consul领导者
|
||||
CONSUL_LEADER=$(curl -s http://127.0.0.1:8500/v1/status/leader)
|
||||
if [ -n "$CONSUL_LEADER" ]; then
|
||||
log_info "Consul领导者: $CONSUL_LEADER"
|
||||
else
|
||||
log_error "无法获取Consul领导者信息"
|
||||
fi
|
||||
}
|
||||
|
||||
# 部署Traefik
|
||||
deploy_traefik() {
|
||||
log_info "部署Traefik..."
|
||||
|
||||
# 检查作业文件是否存在
|
||||
if [ ! -f "/root/mgmt/jobs/traefik.nomad" ]; then
|
||||
log_error "Traefik作业文件不存在: /root/mgmt/jobs/traefik.nomad"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# 部署作业
|
||||
nomad run /root/mgmt/jobs/traefik.nomad
|
||||
|
||||
# 等待部署完成
|
||||
log_info "等待Traefik部署完成..."
|
||||
sleep 10
|
||||
|
||||
# 检查作业状态
|
||||
nomad status traefik
|
||||
}
|
||||
|
||||
# 检查Traefik状态
|
||||
check_traefik_status() {
|
||||
log_info "检查Traefik状态..."
|
||||
|
||||
# 检查作业状态
|
||||
JOB_STATUS=$(nomad job status traefik -json | jq -r '.Status')
|
||||
if [ "$JOB_STATUS" == "running" ]; then
|
||||
log_info "Traefik作业状态: $JOB_STATUS"
|
||||
else
|
||||
log_error "Traefik作业状态异常: $JOB_STATUS"
|
||||
return 1
|
||||
fi
|
||||
|
||||
# 检查分配状态
|
||||
ALLOCATIONS=$(nomad job allocs traefik | tail -n +3 | head -n -1 | awk '{print $1}')
|
||||
for alloc in $ALLOCATIONS; do
|
||||
alloc_status=$(nomad alloc status $alloc -json | jq -r '.ClientStatus')
|
||||
if [ "$alloc_status" == "running" ]; then
|
||||
log_info "分配 $alloc 状态: $alloc_status"
|
||||
else
|
||||
log_error "分配 $alloc 状态异常: $alloc_status"
|
||||
fi
|
||||
done
|
||||
|
||||
# 检查服务注册
|
||||
log_info "检查Consul中的服务注册..."
|
||||
consul catalog services | grep traefik && log_info "Traefik服务已注册到Consul" || log_warn "Traefik服务未注册到Consul"
|
||||
}
|
||||
|
||||
# 测试Traefik功能
|
||||
test_traefik_functionality() {
|
||||
log_info "测试Traefik功能..."
|
||||
|
||||
# 获取Traefik服务地址
|
||||
TRAEFIK_ADDR=$(consul catalog service traefik | jq -r '.[0].ServiceAddress' 2>/dev/null)
|
||||
if [ -z "$TRAEFIK_ADDR" ]; then
|
||||
log_warn "无法从Consul获取Traefik地址,使用本地地址"
|
||||
TRAEFIK_ADDR="127.0.0.1"
|
||||
fi
|
||||
|
||||
# 测试API端点
|
||||
log_info "测试Traefik API端点..."
|
||||
if curl -s http://$TRAEFIK_ADDR:8080/ping > /dev/null; then
|
||||
log_info "Traefik API端点响应正常"
|
||||
else
|
||||
log_error "Traefik API端点无响应"
|
||||
fi
|
||||
|
||||
# 测试仪表板
|
||||
log_info "测试Traefik仪表板..."
|
||||
if curl -s http://$TRAEFIK_ADDR:8080/dashboard/ > /dev/null; then
|
||||
log_info "Traefik仪表板可访问"
|
||||
else
|
||||
log_error "无法访问Traefik仪表板"
|
||||
fi
|
||||
|
||||
# 测试HTTP入口点
|
||||
log_info "测试HTTP入口点..."
|
||||
if curl -s -I http://$TRAEFIK_ADDR:80 | grep -q "Location: https://"; then
|
||||
log_info "HTTP到HTTPS重定向正常工作"
|
||||
else
|
||||
log_warn "HTTP到HTTPS重定向可能未正常工作"
|
||||
fi
|
||||
}
|
||||
|
||||
# 创建测试服务
|
||||
create_test_service() {
|
||||
log_info "创建测试服务..."
|
||||
|
||||
# 创建一个简单的测试服务作业文件
|
||||
cat > /tmp/test-service.nomad << EOF
|
||||
job "test-web" {
|
||||
datacenters = ["dc1"]
|
||||
type = "service"
|
||||
|
||||
group "web" {
|
||||
count = 1
|
||||
|
||||
network {
|
||||
port "http" {
|
||||
to = 8080
|
||||
}
|
||||
}
|
||||
|
||||
task "nginx" {
|
||||
driver = "podman"
|
||||
|
||||
config {
|
||||
image = "nginx:alpine"
|
||||
ports = ["http"]
|
||||
}
|
||||
|
||||
resources {
|
||||
cpu = 100
|
||||
memory = 64
|
||||
}
|
||||
|
||||
service {
|
||||
name = "test-web"
|
||||
port = "http"
|
||||
tags = [
|
||||
"traefik.enable=true",
|
||||
"traefik.http.routers.test-web.rule=Host(`test-web.service.consul`)",
|
||||
"traefik.http.routers.test-web.entrypoints=https"
|
||||
]
|
||||
|
||||
check {
|
||||
type = "http"
|
||||
path = "/"
|
||||
interval = "10s"
|
||||
timeout = "2s"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
EOF
|
||||
|
||||
# 部署测试服务
|
||||
nomad run /tmp/test-service.nomad
|
||||
|
||||
# 等待服务启动
|
||||
sleep 15
|
||||
|
||||
# 测试服务是否可通过Traefik访问
|
||||
log_info "测试服务是否可通过Traefik访问..."
|
||||
if curl -s -H "Host: test-web.service.consul" http://$TRAEFIK_ADDR:80 | grep -q "Welcome to nginx"; then
|
||||
log_info "测试服务可通过Traefik正常访问"
|
||||
else
|
||||
log_error "无法通过Traefik访问测试服务"
|
||||
fi
|
||||
}
|
||||
|
||||
# 清理测试资源
|
||||
cleanup_test_resources() {
|
||||
log_info "清理测试资源..."
|
||||
|
||||
# 停止测试服务
|
||||
nomad job stop test-web 2>/dev/null || true
|
||||
nomad job purge test-web 2>/dev/null || true
|
||||
|
||||
# 停止Traefik
|
||||
nomad job stop traefik 2>/dev/null || true
|
||||
nomad job purge traefik 2>/dev/null || true
|
||||
|
||||
# 删除临时文件
|
||||
rm -f /tmp/test-service.nomad
|
||||
|
||||
log_info "清理完成"
|
||||
}
|
||||
|
||||
# 主函数
|
||||
main() {
|
||||
case "${1:-all}" in
|
||||
"check")
|
||||
check_nomad_cluster
|
||||
check_consul_cluster
|
||||
;;
|
||||
"deploy")
|
||||
deploy_traefik
|
||||
;;
|
||||
"status")
|
||||
check_traefik_status
|
||||
;;
|
||||
"test")
|
||||
test_traefik_functionality
|
||||
;;
|
||||
"test-service")
|
||||
create_test_service
|
||||
;;
|
||||
"cleanup")
|
||||
cleanup_test_resources
|
||||
;;
|
||||
"all")
|
||||
check_nomad_cluster
|
||||
check_consul_cluster
|
||||
deploy_traefik
|
||||
check_traefik_status
|
||||
test_traefik_functionality
|
||||
create_test_service
|
||||
log_info "所有测试完成"
|
||||
;;
|
||||
*)
|
||||
echo "用法: $0 {check|deploy|status|test|test-service|cleanup|all}"
|
||||
echo " check - 检查集群状态"
|
||||
echo " deploy - 部署Traefik"
|
||||
echo " status - 检查Traefik状态"
|
||||
echo " test - 测试Traefik功能"
|
||||
echo " test-service - 创建并测试示例服务"
|
||||
echo " cleanup - 清理测试资源"
|
||||
echo " all - 执行所有步骤(默认)"
|
||||
exit 1
|
||||
;;
|
||||
esac
|
||||
}
|
||||
|
||||
# 执行主函数
|
||||
main "$@"
|
||||
Reference in New Issue
Block a user