46 lines
1.5 KiB
Bash
Executable File
46 lines
1.5 KiB
Bash
Executable File
#!/bin/bash
|
|
# Nomad 集群硬盘监控部署脚本
|
|
# 使用现有的 InfluxDB + Grafana 监控栈
|
|
|
|
echo "🚀 开始部署 Nomad 集群硬盘监控..."
|
|
|
|
# 检查配置文件
|
|
if [[ ! -f "inventories/production/group_vars/all.yml" ]]; then
|
|
echo "❌ 配置文件不存在,请先配置 InfluxDB 连接信息"
|
|
exit 1
|
|
fi
|
|
|
|
# 显示配置信息
|
|
echo "📋 当前监控配置:"
|
|
grep -E "influxdb_|disk_usage_|collection_interval" inventories/production/group_vars/all.yml
|
|
|
|
echo ""
|
|
read -p "🤔 确认配置正确吗?(y/N): " confirm
|
|
if [[ $confirm != [yY] ]]; then
|
|
echo "❌ 部署取消,请修改配置后重试"
|
|
exit 1
|
|
fi
|
|
|
|
# 部署到所有节点
|
|
echo "📦 开始安装 Telegraf 到所有节点..."
|
|
ansible-playbook -i inventories/production/nomad-cluster.ini playbooks/setup-disk-monitoring.yml
|
|
|
|
# 检查部署结果
|
|
if [[ $? -eq 0 ]]; then
|
|
echo "✅ 硬盘监控部署完成!"
|
|
echo ""
|
|
echo "📊 监控信息:"
|
|
echo "- 数据将发送到你现有的 InfluxDB"
|
|
echo "- 可以在 Grafana 中创建仪表板查看数据"
|
|
echo "- 已禁用本地日志文件以节省硬盘空间"
|
|
echo "- 监控数据每30秒收集一次"
|
|
echo ""
|
|
echo "🔧 下一步:"
|
|
echo "1. 在 Grafana 中创建 Nomad 集群监控仪表板"
|
|
echo "2. 设置硬盘使用率告警规则"
|
|
echo "3. 可以运行以下命令检查监控状态:"
|
|
echo " ansible all -i inventories/production/nomad-cluster.ini -m shell -a 'systemctl status telegraf'"
|
|
else
|
|
echo "❌ 部署失败,请检查错误信息"
|
|
exit 1
|
|
fi |