mgmt/configuration/fix-telegraf-install.sh

53 lines
2.1 KiB
Bash
Executable File
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#!/bin/bash
# 简化的 Telegraf 安装脚本 - 使用 Ubuntu 官方仓库
echo "🚀 使用简化方案安装 Telegraf..."
# 定义失败的节点(需要手动处理)
FAILED_NODES="ch3,ch2,ash1d,ash2e,ash3c,syd"
echo "📦 第一步:在失败的节点安装 TelegrafUbuntu 官方版本)..."
ansible $FAILED_NODES -i inventories/production/nomad-cluster.ini -m apt -a "name=telegraf state=present update_cache=yes" --become
if [[ $? -eq 0 ]]; then
echo "✅ Telegraf 安装成功"
else
echo "❌ 安装失败,尝试手动方式..."
# 手动安装方式
ansible $FAILED_NODES -i inventories/production/nomad-cluster.ini -m shell -a "apt update && apt install -y telegraf" --become
fi
echo "🔧 第二步:配置 Telegraf 使用远程配置..."
# 创建环境变量文件
ansible $FAILED_NODES -i inventories/production/nomad-cluster.ini -m copy -a "content='INFLUX_TOKEN=VU_dOCVZzqEHb9jSFsDe0bJlEBaVbiG4LqfoczlnmcbfrbmklSt904HJPL4idYGvVi0c2eHkYDi2zCTni7Ay4w==
INFLUX_ORG=nomad
INFLUX_BUCKET=nomad_monitoring
INFLUX_URL=http://influxdb1.tailnet-68f9.ts.net:8086' dest=/etc/default/telegraf owner=root group=root mode=0600" --become
# 创建 systemd 服务文件
ansible $FAILED_NODES -i inventories/production/nomad-cluster.ini -m copy -a "content='[Unit]
Description=Telegraf - 节点监控服务
Documentation=https://github.com/influxdata/telegraf
After=network.target
[Service]
Type=notify
User=telegraf
Group=telegraf
ExecStart=/usr/bin/telegraf --config http://influxdb1.tailnet-68f9.ts.net:8086/api/v2/telegrafs/0f8a73496790c000
ExecReload=/bin/kill -HUP \$MAINPID
KillMode=control-group
Restart=on-failure
RestartSec=5
TimeoutStopSec=20
EnvironmentFile=/etc/default/telegraf
[Install]
WantedBy=multi-user.target' dest=/etc/systemd/system/telegraf.service owner=root group=root mode=0644" --become
echo "🔄 第三步:启动服务..."
ansible $FAILED_NODES -i inventories/production/nomad-cluster.ini -m systemd -a "daemon_reload=yes name=telegraf state=started enabled=yes" --become
echo "✅ 检查结果..."
ansible $FAILED_NODES -i inventories/production/nomad-cluster.ini -m shell -a "systemctl status telegraf --no-pager -l | head -5" --become