53 lines
2.1 KiB
Bash
Executable File
53 lines
2.1 KiB
Bash
Executable File
#!/bin/bash
|
||
# 简化的 Telegraf 安装脚本 - 使用 Ubuntu 官方仓库
|
||
|
||
echo "🚀 使用简化方案安装 Telegraf..."
|
||
|
||
# 定义失败的节点(需要手动处理)
|
||
FAILED_NODES="ch3,ch2,ash1d,ash2e,ash3c,syd"
|
||
|
||
echo "📦 第一步:在失败的节点安装 Telegraf(Ubuntu 官方版本)..."
|
||
ansible $FAILED_NODES -i inventories/production/nomad-cluster.ini -m apt -a "name=telegraf state=present update_cache=yes" --become
|
||
|
||
if [[ $? -eq 0 ]]; then
|
||
echo "✅ Telegraf 安装成功"
|
||
else
|
||
echo "❌ 安装失败,尝试手动方式..."
|
||
# 手动安装方式
|
||
ansible $FAILED_NODES -i inventories/production/nomad-cluster.ini -m shell -a "apt update && apt install -y telegraf" --become
|
||
fi
|
||
|
||
echo "🔧 第二步:配置 Telegraf 使用远程配置..."
|
||
|
||
# 创建环境变量文件
|
||
ansible $FAILED_NODES -i inventories/production/nomad-cluster.ini -m copy -a "content='INFLUX_TOKEN=VU_dOCVZzqEHb9jSFsDe0bJlEBaVbiG4LqfoczlnmcbfrbmklSt904HJPL4idYGvVi0c2eHkYDi2zCTni7Ay4w==
|
||
INFLUX_ORG=nomad
|
||
INFLUX_BUCKET=nomad_monitoring
|
||
INFLUX_URL=http://influxdb1.tailnet-68f9.ts.net:8086' dest=/etc/default/telegraf owner=root group=root mode=0600" --become
|
||
|
||
# 创建 systemd 服务文件
|
||
ansible $FAILED_NODES -i inventories/production/nomad-cluster.ini -m copy -a "content='[Unit]
|
||
Description=Telegraf - 节点监控服务
|
||
Documentation=https://github.com/influxdata/telegraf
|
||
After=network.target
|
||
|
||
[Service]
|
||
Type=notify
|
||
User=telegraf
|
||
Group=telegraf
|
||
ExecStart=/usr/bin/telegraf --config http://influxdb1.tailnet-68f9.ts.net:8086/api/v2/telegrafs/0f8a73496790c000
|
||
ExecReload=/bin/kill -HUP \$MAINPID
|
||
KillMode=control-group
|
||
Restart=on-failure
|
||
RestartSec=5
|
||
TimeoutStopSec=20
|
||
EnvironmentFile=/etc/default/telegraf
|
||
|
||
[Install]
|
||
WantedBy=multi-user.target' dest=/etc/systemd/system/telegraf.service owner=root group=root mode=0644" --become
|
||
|
||
echo "🔄 第三步:启动服务..."
|
||
ansible $FAILED_NODES -i inventories/production/nomad-cluster.ini -m systemd -a "daemon_reload=yes name=telegraf state=started enabled=yes" --become
|
||
|
||
echo "✅ 检查结果..."
|
||
ansible $FAILED_NODES -i inventories/production/nomad-cluster.ini -m shell -a "systemctl status telegraf --no-pager -l | head -5" --become |