1 feat: 重构基础设施架构并完善Consul集群配置
2
3 主要变更:
4 - 重构Terraform/OpenTofu目录结构,统一迁移至infrastructure/opentofu
5 - 添加"7天创造世界"文档,记录基础设施建设演进逻辑
6 - 更新Consul集群配置管理经验,添加实际案例和解决方案
7 - 修正README中的Sticky Note,反映Consul集群健康状态
8 - 添加Ansible部署配置和inventory文件
9 - 完善项目文档结构,添加各组件配置指南
10
11 技术架构演进:
12 - 第1天: Tailscale网络连接基础 ✅
13 - 第2天: Ansible分布式控制 ✅
14 - 第3天: Nomad服务感知与任务调度 ✅
15 - 第4天: Consul配置集中管理 ✅
16 - 第5天: OpenTofu状态一致性 ✅
17 - 第6天: Vault密钥管理 ⏳
18 - 第7天: Waypoint应用部署 ⏳
This commit is contained in:
187
deployment/ansible/playbooks/setup/setup-disk-monitoring.yml
Normal file
187
deployment/ansible/playbooks/setup/setup-disk-monitoring.yml
Normal file
@@ -0,0 +1,187 @@
|
||||
---
|
||||
- name: 部署 Telegraf 硬盘监控到 Nomad 集群
|
||||
hosts: all
|
||||
become: yes
|
||||
vars:
|
||||
# 连接现有的 InfluxDB 2.x + Grafana 监控栈
|
||||
influxdb_url: "{{ influxdb_url | default('http://influxdb1.tailnet-68f9.ts.net:8086') }}"
|
||||
influxdb_token: "{{ influxdb_token }}"
|
||||
influxdb_org: "{{ influxdb_org | default('nomad') }}"
|
||||
influxdb_bucket: "{{ influxdb_bucket | default('nomad_monitoring') }}"
|
||||
|
||||
# 远程 Telegraf 配置模式(优先)
|
||||
use_remote_config: "{{ use_remote_config | default(true) }}"
|
||||
telegraf_config_url: "{{ telegraf_config_url | default('') }}"
|
||||
|
||||
# 硬盘监控阈值
|
||||
disk_usage_warning: 80 # 80% 使用率警告
|
||||
disk_usage_critical: 90 # 90% 使用率严重告警
|
||||
|
||||
# 监控间隔(秒)
|
||||
collection_interval: 30
|
||||
|
||||
tasks:
|
||||
- name: 显示正在处理的节点
|
||||
debug:
|
||||
msg: "🔧 正在为节点 {{ inventory_hostname }} 安装硬盘监控"
|
||||
|
||||
- name: 添加 InfluxData 仓库密钥
|
||||
apt_key:
|
||||
url: https://repos.influxdata.com/influxdata-archive_compat.key
|
||||
state: present
|
||||
retries: 3
|
||||
delay: 5
|
||||
|
||||
- name: 添加 InfluxData 仓库
|
||||
apt_repository:
|
||||
repo: "deb https://repos.influxdata.com/ubuntu {{ ansible_distribution_release }} stable"
|
||||
state: present
|
||||
update_cache: yes
|
||||
retries: 3
|
||||
delay: 5
|
||||
|
||||
- name: 安装 Telegraf
|
||||
apt:
|
||||
name: telegraf
|
||||
state: present
|
||||
update_cache: yes
|
||||
retries: 3
|
||||
delay: 10
|
||||
|
||||
- name: 创建 Telegraf 配置目录
|
||||
file:
|
||||
path: /etc/telegraf/telegraf.d
|
||||
state: directory
|
||||
owner: telegraf
|
||||
group: telegraf
|
||||
mode: '0755'
|
||||
|
||||
- name: 清理旧的 Telegraf 日志文件(节省硬盘空间)
|
||||
file:
|
||||
path: "{{ item }}"
|
||||
state: absent
|
||||
loop:
|
||||
- /var/log/telegraf
|
||||
- /var/log/telegraf.log
|
||||
ignore_errors: yes
|
||||
|
||||
- name: 禁用 Telegraf 日志目录创建
|
||||
file:
|
||||
path: /var/log/telegraf
|
||||
state: absent
|
||||
ignore_errors: yes
|
||||
|
||||
- name: 创建 Telegraf 环境变量文件
|
||||
template:
|
||||
src: telegraf-env.j2
|
||||
dest: /etc/default/telegraf
|
||||
owner: root
|
||||
group: root
|
||||
mode: '0600'
|
||||
backup: yes
|
||||
notify: restart telegraf
|
||||
|
||||
- name: 创建 Telegraf systemd 服务文件(支持远程配置)
|
||||
template:
|
||||
src: telegraf.service.j2
|
||||
dest: /etc/systemd/system/telegraf.service
|
||||
owner: root
|
||||
group: root
|
||||
mode: '0644'
|
||||
backup: yes
|
||||
notify:
|
||||
- reload systemd
|
||||
- restart telegraf
|
||||
when: telegraf_config_url is defined and telegraf_config_url != ''
|
||||
|
||||
- name: 生成 Telegraf 主配置文件(本地配置模式)
|
||||
template:
|
||||
src: telegraf.conf.j2
|
||||
dest: /etc/telegraf/telegraf.conf
|
||||
owner: telegraf
|
||||
group: telegraf
|
||||
mode: '0644'
|
||||
backup: yes
|
||||
notify: restart telegraf
|
||||
when: telegraf_config_url is not defined or telegraf_config_url == ''
|
||||
|
||||
- name: 生成硬盘监控配置
|
||||
template:
|
||||
src: disk-monitoring.conf.j2
|
||||
dest: /etc/telegraf/telegraf.d/disk-monitoring.conf
|
||||
owner: telegraf
|
||||
group: telegraf
|
||||
mode: '0644'
|
||||
backup: yes
|
||||
notify: restart telegraf
|
||||
|
||||
- name: 生成系统监控配置
|
||||
template:
|
||||
src: system-monitoring.conf.j2
|
||||
dest: /etc/telegraf/telegraf.d/system-monitoring.conf
|
||||
owner: telegraf
|
||||
group: telegraf
|
||||
mode: '0644'
|
||||
backup: yes
|
||||
notify: restart telegraf
|
||||
|
||||
- name: 启用并启动 Telegraf 服务
|
||||
systemd:
|
||||
name: telegraf
|
||||
state: started
|
||||
enabled: yes
|
||||
daemon_reload: yes
|
||||
|
||||
- name: 验证 Telegraf 状态
|
||||
systemd:
|
||||
name: telegraf
|
||||
register: telegraf_status
|
||||
|
||||
- name: 检查 InfluxDB 连接
|
||||
uri:
|
||||
url: "{{ influxdb_url }}/ping"
|
||||
method: GET
|
||||
timeout: 5
|
||||
register: influxdb_ping
|
||||
ignore_errors: yes
|
||||
delegate_to: localhost
|
||||
run_once: true
|
||||
|
||||
- name: 显示 InfluxDB 连接状态
|
||||
debug:
|
||||
msg: "{{ '✅ InfluxDB 连接正常' if influxdb_ping.status == 204 else '❌ InfluxDB 连接失败,请检查配置' }}"
|
||||
run_once: true
|
||||
|
||||
- name: 显示 Telegraf 状态
|
||||
debug:
|
||||
msg: "✅ Telegraf 状态: {{ telegraf_status.status.ActiveState }}"
|
||||
|
||||
- name: 检查硬盘使用情况
|
||||
shell: |
|
||||
df -h | grep -vE '^Filesystem|tmpfs|cdrom|udev' | awk '{print $5 " " $1 " " $6}' | while read output;
|
||||
do
|
||||
usage=$(echo $output | awk '{print $1}' | sed 's/%//g')
|
||||
partition=$(echo $output | awk '{print $2}')
|
||||
mount=$(echo $output | awk '{print $3}')
|
||||
if [ $usage -ge {{ disk_usage_warning }} ]; then
|
||||
echo "⚠️ 警告: $mount ($partition) 使用率 $usage%"
|
||||
else
|
||||
echo "✅ $mount ($partition) 使用率 $usage%"
|
||||
fi
|
||||
done
|
||||
register: disk_check
|
||||
changed_when: false
|
||||
|
||||
- name: 显示硬盘检查结果
|
||||
debug:
|
||||
msg: "{{ disk_check.stdout_lines }}"
|
||||
|
||||
handlers:
|
||||
- name: reload systemd
|
||||
systemd:
|
||||
daemon_reload: yes
|
||||
|
||||
- name: restart telegraf
|
||||
systemd:
|
||||
name: telegraf
|
||||
state: restarted
|
||||
76
deployment/ansible/playbooks/setup/setup-new-nomad-nodes.yml
Normal file
76
deployment/ansible/playbooks/setup/setup-new-nomad-nodes.yml
Normal file
@@ -0,0 +1,76 @@
|
||||
---
|
||||
- name: 安装并配置新的 Nomad Server 节点
|
||||
hosts: influxdb1
|
||||
become: yes
|
||||
gather_facts: no
|
||||
|
||||
tasks:
|
||||
- name: 更新包缓存
|
||||
apt:
|
||||
update_cache: yes
|
||||
cache_valid_time: 3600
|
||||
retries: 3
|
||||
delay: 10
|
||||
|
||||
- name: 安装依赖包
|
||||
apt:
|
||||
name:
|
||||
- wget
|
||||
- curl
|
||||
- unzip
|
||||
- podman
|
||||
- buildah
|
||||
- skopeo
|
||||
state: present
|
||||
retries: 3
|
||||
delay: 10
|
||||
|
||||
- name: 检查 Nomad 是否已安装
|
||||
shell: which nomad || echo "not_found"
|
||||
register: nomad_check
|
||||
changed_when: false
|
||||
|
||||
- name: 下载并安装 Nomad
|
||||
block:
|
||||
- name: 下载 Nomad 1.10.5
|
||||
get_url:
|
||||
url: "https://releases.hashicorp.com/nomad/1.10.5/nomad_1.10.5_linux_amd64.zip"
|
||||
dest: "/tmp/nomad.zip"
|
||||
mode: '0644'
|
||||
|
||||
- name: 解压 Nomad
|
||||
unarchive:
|
||||
src: "/tmp/nomad.zip"
|
||||
dest: "/usr/bin/"
|
||||
remote_src: yes
|
||||
owner: root
|
||||
group: root
|
||||
mode: '0755'
|
||||
|
||||
- name: 清理临时文件
|
||||
file:
|
||||
path: "/tmp/nomad.zip"
|
||||
state: absent
|
||||
when: nomad_check.stdout == "not_found"
|
||||
|
||||
- name: 验证 Nomad 安装
|
||||
shell: nomad version
|
||||
register: nomad_version_output
|
||||
|
||||
- name: 显示安装结果
|
||||
debug:
|
||||
msg: |
|
||||
✅ 节点 {{ inventory_hostname }} 软件安装完成
|
||||
📦 Podman: {{ ansible_facts.packages.podman[0].version if ansible_facts.packages.podman is defined else 'checking...' }}
|
||||
🎯 Nomad: {{ nomad_version_output.stdout.split('\n')[0] }}
|
||||
|
||||
- name: 启用 Podman socket
|
||||
systemd:
|
||||
name: podman.socket
|
||||
enabled: yes
|
||||
state: started
|
||||
ignore_errors: yes
|
||||
|
||||
- name: 继续完整配置
|
||||
debug:
|
||||
msg: "软件安装完成,现在将运行完整的 Nomad 配置..."
|
||||
114
deployment/ansible/playbooks/setup/setup-xfce-chrome-dev.yml
Normal file
114
deployment/ansible/playbooks/setup/setup-xfce-chrome-dev.yml
Normal file
@@ -0,0 +1,114 @@
|
||||
---
|
||||
- name: Setup Xfce desktop environment and Chrome Dev for browser automation
|
||||
hosts: browser
|
||||
become: yes
|
||||
vars:
|
||||
target_user: ben
|
||||
|
||||
tasks:
|
||||
- name: Update package lists
|
||||
apt:
|
||||
update_cache: yes
|
||||
cache_valid_time: 3600
|
||||
|
||||
- name: Install Xfce desktop environment
|
||||
apt:
|
||||
name:
|
||||
- xfce4
|
||||
- xfce4-goodies
|
||||
- lightdm
|
||||
- xorg
|
||||
- dbus-x11
|
||||
state: present
|
||||
|
||||
- name: Install additional useful packages for desktop environment
|
||||
apt:
|
||||
name:
|
||||
- firefox-esr
|
||||
- geany
|
||||
- thunar-archive-plugin
|
||||
- xfce4-terminal
|
||||
- gvfs
|
||||
- fonts-noto
|
||||
- fonts-noto-cjk
|
||||
state: present
|
||||
|
||||
- name: Download Google Chrome Dev .deb package
|
||||
get_url:
|
||||
url: https://dl.google.com/linux/direct/google-chrome-unstable_current_amd64.deb
|
||||
dest: /tmp/google-chrome-unstable_current_amd64.deb
|
||||
mode: '0644'
|
||||
|
||||
- name: Install Google Chrome Dev
|
||||
apt:
|
||||
deb: /tmp/google-chrome-unstable_current_amd64.deb
|
||||
|
||||
- name: Clean up downloaded .deb package
|
||||
file:
|
||||
path: /tmp/google-chrome-unstable_current_amd64.deb
|
||||
state: absent
|
||||
|
||||
- name: Install Chrome automation dependencies
|
||||
apt:
|
||||
name:
|
||||
- python3-pip
|
||||
- python3-venv
|
||||
- python3-dev
|
||||
- build-essential
|
||||
- libssl-dev
|
||||
- libffi-dev
|
||||
state: present
|
||||
|
||||
- name: Install Python packages for browser automation
|
||||
pip:
|
||||
name:
|
||||
- selenium
|
||||
- webdriver-manager
|
||||
- pyvirtualdisplay
|
||||
executable: pip3
|
||||
|
||||
- name: Set up Xfce as default desktop environment
|
||||
copy:
|
||||
dest: /etc/lightdm/lightdm.conf
|
||||
content: |
|
||||
[Seat:*]
|
||||
autologin-user={{ target_user }}
|
||||
autologin-user-timeout=0
|
||||
autologin-session=xfce
|
||||
user-session=xfce
|
||||
|
||||
- name: Ensure user is in necessary groups
|
||||
user:
|
||||
name: "{{ target_user }}"
|
||||
groups:
|
||||
- audio
|
||||
- video
|
||||
- input
|
||||
- netdev
|
||||
append: yes
|
||||
|
||||
- name: Create .xprofile for user
|
||||
copy:
|
||||
dest: /home/{{ target_user }}/.xprofile
|
||||
content: |
|
||||
# Start Xfce on login
|
||||
startxfce4
|
||||
owner: "{{ target_user }}"
|
||||
group: "{{ target_user }}"
|
||||
mode: '0644'
|
||||
|
||||
- name: Enable and start lightdm service
|
||||
systemd:
|
||||
name: lightdm
|
||||
enabled: yes
|
||||
state: started
|
||||
|
||||
- name: Display success message
|
||||
debug:
|
||||
msg: "Xfce desktop environment and Chrome Dev have been configured for user {{ target_user }} on {{ inventory_hostname }}"
|
||||
|
||||
handlers:
|
||||
- name: restart lightdm
|
||||
systemd:
|
||||
name: lightdm
|
||||
state: restarted
|
||||
Reference in New Issue
Block a user