mgmt/configuration/fix-telegraf-simple.sh

52 lines
2.1 KiB
Bash
Executable File

#!/bin/bash
# 直接使用远程配置运行 Telegraf 的简化方案
echo "🚀 创建简化的 Telegraf 服务..."
# 失败的节点
FAILED_NODES="ch3,ch2,ash1d,ash2e,syd"
echo "📥 第一步:下载并安装 Telegraf 二进制文件..."
ansible $FAILED_NODES -i inventories/production/nomad-cluster.ini -m shell -a "
cd /tmp &&
curl -L https://dl.influxdata.com/telegraf/releases/telegraf-1.36.1_linux_amd64.tar.gz -o telegraf.tar.gz &&
tar -xzf telegraf.tar.gz &&
sudo cp telegraf-1.36.1/usr/bin/telegraf /usr/bin/ &&
sudo chmod +x /usr/bin/telegraf &&
telegraf version
" --become
echo "🔧 第二步:创建简化的启动脚本..."
ansible $FAILED_NODES -i inventories/production/nomad-cluster.ini -m copy -a "content='#!/bin/bash
export INFLUX_TOKEN=VU_dOCVZzqEHb9jSFsDe0bJlEBaVbiG4LqfoczlnmcbfrbmklSt904HJPL4idYGvVi0c2eHkYDi2zCTni7Ay4w==
export INFLUX_ORG=seekkey
export INFLUX_BUCKET=VPS
export INFLUX_URL=http://influxdb1.tailnet-68f9.ts.net:8086
/usr/bin/telegraf --config http://influxdb1.tailnet-68f9.ts.net:8086/api/v2/telegrafs/0f8a73496790c000
' dest=/usr/local/bin/telegraf-start.sh owner=root group=root mode=0755" --become
echo "🔄 第三步:停止旧服务并启动新的简化服务..."
ansible $FAILED_NODES -i inventories/production/nomad-cluster.ini -m systemd -a "name=telegraf state=stopped enabled=no" --become || true
# 创建简化的 systemd 服务
ansible $FAILED_NODES -i inventories/production/nomad-cluster.ini -m copy -a "content='[Unit]
Description=Telegraf (Simplified)
After=network.target
[Service]
Type=simple
User=telegraf
Group=telegraf
ExecStart=/usr/local/bin/telegraf-start.sh
Restart=always
RestartSec=5
[Install]
WantedBy=multi-user.target' dest=/etc/systemd/system/telegraf-simple.service owner=root group=root mode=0644" --become
echo "🚀 第四步:启动简化服务..."
ansible $FAILED_NODES -i inventories/production/nomad-cluster.ini -m systemd -a "daemon_reload=yes name=telegraf-simple state=started enabled=yes" --become
echo "✅ 检查结果..."
ansible $FAILED_NODES -i inventories/production/nomad-cluster.ini -m shell -a "systemctl status telegraf-simple --no-pager -l | head -10" --become