mgmt/scripts/utilities/consul-cluster-manager.sh

233 lines
6.0 KiB
Bash
Executable File
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#!/bin/bash
# Consul 集群管理脚本
# 提供集群状态检查、重启、停止等功能
set -e
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
PROJECT_ROOT="$(cd "$SCRIPT_DIR/../.." && pwd)"
INVENTORY_FILE="$PROJECT_ROOT/configuration/inventories/production/consul-cluster.ini"
# 颜色定义
RED='\033[0;31m'
GREEN='\033[0;32m'
YELLOW='\033[1;33m'
BLUE='\033[0;34m'
NC='\033[0m' # No Color
# 打印带颜色的消息
print_status() {
echo -e "${GREEN}[INFO]${NC} $1"
}
print_warning() {
echo -e "${YELLOW}[WARN]${NC} $1"
}
print_error() {
echo -e "${RED}[ERROR]${NC} $1"
}
print_header() {
echo -e "${BLUE}=== $1 ===${NC}"
}
# 检查必要文件
check_prerequisites() {
if [[ ! -f "$INVENTORY_FILE" ]]; then
print_error "清单文件不存在: $INVENTORY_FILE"
exit 1
fi
if ! command -v ansible &> /dev/null; then
print_error "未找到 ansible 命令"
exit 1
fi
}
# 显示帮助信息
show_help() {
echo "Consul 集群管理脚本"
echo
echo "用法: $0 [命令]"
echo
echo "命令:"
echo " status - 检查集群状态"
echo " members - 显示集群成员"
echo " leader - 显示集群领导者"
echo " restart - 重启 Consul 服务"
echo " stop - 停止 Consul 服务"
echo " start - 启动 Consul 服务"
echo " logs - 查看服务日志"
echo " health - 健康检查"
echo " cleanup - 清理 Consul 数据(危险操作)"
echo " help - 显示此帮助信息"
echo
}
# 检查集群状态
check_status() {
print_header "Consul 服务状态"
ansible -i "$INVENTORY_FILE" consul_cluster -m shell -a "systemctl is-active consul" -o
echo
print_header "Consul 进程状态"
ansible -i "$INVENTORY_FILE" consul_cluster -m shell -a "ps aux | grep consul | grep -v grep" -o
}
# 显示集群成员
show_members() {
print_header "Consul 集群成员"
ansible -i "$INVENTORY_FILE" consul_cluster -m shell -a "consul members" -o
}
# 显示集群领导者
show_leader() {
print_header "Consul 集群领导者"
ansible -i "$INVENTORY_FILE" consul_cluster -m shell -a "consul operator raft list-peers" -o
echo
print_header "通过 API 检查领导者"
ansible -i "$INVENTORY_FILE" consul_cluster -m shell -a "curl -s http://localhost:8500/v1/status/leader" -o
}
# 重启服务
restart_service() {
print_header "重启 Consul 服务"
print_warning "即将重启所有 Consul 节点..."
read -p "确认继续? (y/N): " confirm
if [[ $confirm != "y" && $confirm != "Y" ]]; then
print_status "操作已取消"
return
fi
ansible -i "$INVENTORY_FILE" consul_cluster -m systemd -a "name=consul state=restarted" -b
print_status "等待服务启动..."
sleep 10
check_status
}
# 停止服务
stop_service() {
print_header "停止 Consul 服务"
print_warning "即将停止所有 Consul 节点..."
read -p "确认继续? (y/N): " confirm
if [[ $confirm != "y" && $confirm != "Y" ]]; then
print_status "操作已取消"
return
fi
ansible -i "$INVENTORY_FILE" consul_cluster -m systemd -a "name=consul state=stopped" -b
}
# 启动服务
start_service() {
print_header "启动 Consul 服务"
ansible -i "$INVENTORY_FILE" consul_cluster -m systemd -a "name=consul state=started" -b
print_status "等待服务启动..."
sleep 10
check_status
}
# 查看日志
show_logs() {
print_header "Consul 服务日志"
ansible -i "$INVENTORY_FILE" consul_cluster -m shell -a "journalctl -u consul --no-pager -n 20" -o
}
# 健康检查
health_check() {
print_header "Consul 健康检查"
# 检查服务状态
print_status "检查服务状态..."
ansible -i "$INVENTORY_FILE" consul_cluster -m shell -a "systemctl is-active consul" -o
echo
# 检查端口监听
print_status "检查端口监听..."
ansible -i "$INVENTORY_FILE" consul_cluster -m shell -a "ss -tlnp | grep :8500" -o
echo
# 检查集群成员
print_status "检查集群成员..."
ansible -i "$INVENTORY_FILE" consul_cluster -m shell -a "consul members | wc -l" -o
echo
# 检查 API 响应
print_status "检查 API 响应..."
ansible -i "$INVENTORY_FILE" consul_cluster -m shell -a "curl -s -o /dev/null -w '%{http_code}' http://localhost:8500/v1/status/leader" -o
}
# 清理数据(危险操作)
cleanup_data() {
print_header "清理 Consul 数据"
print_error "警告: 此操作将删除所有 Consul 数据包括服务注册、KV 存储等!"
print_error "此操作不可逆!"
echo
read -p "确认要清理所有数据? 请输入 'YES' 确认: " confirm
if [[ $confirm != "YES" ]]; then
print_status "操作已取消"
return
fi
print_status "停止 Consul 服务..."
ansible -i "$INVENTORY_FILE" consul_cluster -m systemd -a "name=consul state=stopped" -b
print_status "清理数据目录..."
ansible -i "$INVENTORY_FILE" consul_cluster -m shell -a "rm -rf /opt/consul/data/*" -b
print_status "启动 Consul 服务..."
ansible -i "$INVENTORY_FILE" consul_cluster -m systemd -a "name=consul state=started" -b
print_status "数据清理完成"
}
# 主函数
main() {
check_prerequisites
case "${1:-help}" in
status)
check_status
;;
members)
show_members
;;
leader)
show_leader
;;
restart)
restart_service
;;
stop)
stop_service
;;
start)
start_service
;;
logs)
show_logs
;;
health)
health_check
;;
cleanup)
cleanup_data
;;
help|--help|-h)
show_help
;;
*)
print_error "未知命令: $1"
echo
show_help
exit 1
;;
esac
}
main "$@"