mgmt/scripts/test-traefik-deployment.sh

275 lines
7.1 KiB
Bash
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#!/bin/bash
# Traefik部署测试脚本
# 用于测试Traefik在Nomad集群中的部署和功能
set -e
# 颜色定义
RED='\033[0;31m'
GREEN='\033[0;32m'
YELLOW='\033[1;33m'
NC='\033[0m' # No Color
# 日志函数
log_info() {
echo -e "${GREEN}[INFO]${NC} $1"
}
log_warn() {
echo -e "${YELLOW}[WARN]${NC} $1"
}
log_error() {
echo -e "${RED}[ERROR]${NC} $1"
}
# 检查Nomad集群状态
check_nomad_cluster() {
log_info "检查Nomad集群状态..."
# 使用我们之前创建的领导者发现脚本
if [ -f "/root/mgmt/scripts/nomad-leader-discovery.sh" ]; then
chmod +x /root/mgmt/scripts/nomad-leader-discovery.sh
LEADER_INFO=$(/root/mgmt/scripts/nomad-leader-discovery.sh -c 2>&1)
log_info "Nomad领导者信息: $LEADER_INFO"
else
log_warn "未找到Nomad领导者发现脚本使用默认方式检查"
nomad server members 2>/dev/null || log_error "无法连接到Nomad集群"
fi
}
# 检查Consul集群状态
check_consul_cluster() {
log_info "检查Consul集群状态..."
consul members 2>/dev/null || log_error "无法连接到Consul集群"
# 检查Consul领导者
CONSUL_LEADER=$(curl -s http://127.0.0.1:8500/v1/status/leader)
if [ -n "$CONSUL_LEADER" ]; then
log_info "Consul领导者: $CONSUL_LEADER"
else
log_error "无法获取Consul领导者信息"
fi
}
# 部署Traefik
deploy_traefik() {
log_info "部署Traefik..."
# 检查作业文件是否存在
if [ ! -f "/root/mgmt/jobs/traefik.nomad" ]; then
log_error "Traefik作业文件不存在: /root/mgmt/jobs/traefik.nomad"
exit 1
fi
# 部署作业
nomad run /root/mgmt/jobs/traefik.nomad
# 等待部署完成
log_info "等待Traefik部署完成..."
sleep 10
# 检查作业状态
nomad status traefik
}
# 检查Traefik状态
check_traefik_status() {
log_info "检查Traefik状态..."
# 检查作业状态
JOB_STATUS=$(nomad job status traefik -json | jq -r '.Status')
if [ "$JOB_STATUS" == "running" ]; then
log_info "Traefik作业状态: $JOB_STATUS"
else
log_error "Traefik作业状态异常: $JOB_STATUS"
return 1
fi
# 检查分配状态
ALLOCATIONS=$(nomad job allocs traefik | tail -n +3 | head -n -1 | awk '{print $1}')
for alloc in $ALLOCATIONS; do
alloc_status=$(nomad alloc status $alloc -json | jq -r '.ClientStatus')
if [ "$alloc_status" == "running" ]; then
log_info "分配 $alloc 状态: $alloc_status"
else
log_error "分配 $alloc 状态异常: $alloc_status"
fi
done
# 检查服务注册
log_info "检查Consul中的服务注册..."
consul catalog services | grep traefik && log_info "Traefik服务已注册到Consul" || log_warn "Traefik服务未注册到Consul"
}
# 测试Traefik功能
test_traefik_functionality() {
log_info "测试Traefik功能..."
# 获取Traefik服务地址
TRAEFIK_ADDR=$(consul catalog service traefik | jq -r '.[0].ServiceAddress' 2>/dev/null)
if [ -z "$TRAEFIK_ADDR" ]; then
log_warn "无法从Consul获取Traefik地址使用本地地址"
TRAEFIK_ADDR="127.0.0.1"
fi
# 测试API端点
log_info "测试Traefik API端点..."
if curl -s http://$TRAEFIK_ADDR:8080/ping > /dev/null; then
log_info "Traefik API端点响应正常"
else
log_error "Traefik API端点无响应"
fi
# 测试仪表板
log_info "测试Traefik仪表板..."
if curl -s http://$TRAEFIK_ADDR:8080/dashboard/ > /dev/null; then
log_info "Traefik仪表板可访问"
else
log_error "无法访问Traefik仪表板"
fi
# 测试HTTP入口点
log_info "测试HTTP入口点..."
if curl -s -I http://$TRAEFIK_ADDR:80 | grep -q "Location: https://"; then
log_info "HTTP到HTTPS重定向正常工作"
else
log_warn "HTTP到HTTPS重定向可能未正常工作"
fi
}
# 创建测试服务
create_test_service() {
log_info "创建测试服务..."
# 创建一个简单的测试服务作业文件
cat > /tmp/test-service.nomad << EOF
job "test-web" {
datacenters = ["dc1"]
type = "service"
group "web" {
count = 1
network {
port "http" {
to = 8080
}
}
task "nginx" {
driver = "podman"
config {
image = "nginx:alpine"
ports = ["http"]
}
resources {
cpu = 100
memory = 64
}
service {
name = "test-web"
port = "http"
tags = [
"traefik.enable=true",
"traefik.http.routers.test-web.rule=Host(`test-web.service.consul`)",
"traefik.http.routers.test-web.entrypoints=https"
]
check {
type = "http"
path = "/"
interval = "10s"
timeout = "2s"
}
}
}
}
}
EOF
# 部署测试服务
nomad run /tmp/test-service.nomad
# 等待服务启动
sleep 15
# 测试服务是否可通过Traefik访问
log_info "测试服务是否可通过Traefik访问..."
if curl -s -H "Host: test-web.service.consul" http://$TRAEFIK_ADDR:80 | grep -q "Welcome to nginx"; then
log_info "测试服务可通过Traefik正常访问"
else
log_error "无法通过Traefik访问测试服务"
fi
}
# 清理测试资源
cleanup_test_resources() {
log_info "清理测试资源..."
# 停止测试服务
nomad job stop test-web 2>/dev/null || true
nomad job purge test-web 2>/dev/null || true
# 停止Traefik
nomad job stop traefik 2>/dev/null || true
nomad job purge traefik 2>/dev/null || true
# 删除临时文件
rm -f /tmp/test-service.nomad
log_info "清理完成"
}
# 主函数
main() {
case "${1:-all}" in
"check")
check_nomad_cluster
check_consul_cluster
;;
"deploy")
deploy_traefik
;;
"status")
check_traefik_status
;;
"test")
test_traefik_functionality
;;
"test-service")
create_test_service
;;
"cleanup")
cleanup_test_resources
;;
"all")
check_nomad_cluster
check_consul_cluster
deploy_traefik
check_traefik_status
test_traefik_functionality
create_test_service
log_info "所有测试完成"
;;
*)
echo "用法: $0 {check|deploy|status|test|test-service|cleanup|all}"
echo " check - 检查集群状态"
echo " deploy - 部署Traefik"
echo " status - 检查Traefik状态"
echo " test - 测试Traefik功能"
echo " test-service - 创建并测试示例服务"
echo " cleanup - 清理测试资源"
echo " all - 执行所有步骤(默认)"
exit 1
;;
esac
}
# 执行主函数
main "$@"