1 feat: 重构基础设施架构并完善Consul集群配置

2
     3	主要变更:
     4	- 重构Terraform/OpenTofu目录结构,统一迁移至infrastructure/opentofu
     5	- 添加"7天创造世界"文档,记录基础设施建设演进逻辑
     6	- 更新Consul集群配置管理经验,添加实际案例和解决方案
     7	- 修正README中的Sticky Note,反映Consul集群健康状态
     8	- 添加Ansible部署配置和inventory文件
     9	- 完善项目文档结构,添加各组件配置指南
    10
    11	技术架构演进:
    12	- 第1天: Tailscale网络连接基础 
    13	- 第2天: Ansible分布式控制 
    14	- 第3天: Nomad服务感知与任务调度 
    15	- 第4天: Consul配置集中管理 
    16	- 第5天: OpenTofu状态一致性 
    17	- 第6天: Vault密钥管理 
    18	- 第7天: Waypoint应用部署 
This commit is contained in:
2025-09-30 03:46:33 +00:00
parent c0064b2cad
commit e8bfc76038
119 changed files with 1772 additions and 631 deletions

View File

@@ -0,0 +1,168 @@
---
- name: 磁盘空间分析 - 使用 ncdu 工具
hosts: all
become: yes
vars:
ncdu_scan_paths:
- "/"
- "/var"
- "/opt"
- "/home"
output_dir: "/tmp/disk-analysis"
tasks:
- name: 安装 ncdu 工具
package:
name: ncdu
state: present
register: ncdu_install
- name: 创建输出目录
file:
path: "{{ output_dir }}"
state: directory
mode: '0755'
- name: 检查磁盘空间使用情况
shell: df -h
register: disk_usage
- name: 显示当前磁盘使用情况
debug:
msg: |
=== {{ inventory_hostname }} 磁盘使用情况 ===
{{ disk_usage.stdout }}
- name: 使用 ncdu 扫描根目录并生成报告
shell: |
ncdu -x -o {{ output_dir }}/ncdu-root-{{ inventory_hostname }}.json /
async: 300
poll: 0
register: ncdu_root_scan
- name: 使用 ncdu 扫描 /var 目录
shell: |
ncdu -x -o {{ output_dir }}/ncdu-var-{{ inventory_hostname }}.json /var
async: 180
poll: 0
register: ncdu_var_scan
when: ansible_mounts | selectattr('mount', 'equalto', '/var') | list | length > 0 or '/var' in ansible_mounts | map(attribute='mount') | list
- name: 使用 ncdu 扫描 /opt 目录
shell: |
ncdu -x -o {{ output_dir }}/ncdu-opt-{{ inventory_hostname }}.json /opt
async: 120
poll: 0
register: ncdu_opt_scan
when: ansible_mounts | selectattr('mount', 'equalto', '/opt') | list | length > 0 or '/opt' in ansible_mounts | map(attribute='mount') | list
- name: 等待根目录扫描完成
async_status:
jid: "{{ ncdu_root_scan.ansible_job_id }}"
register: ncdu_root_result
until: ncdu_root_result.finished
retries: 60
delay: 5
- name: 等待 /var 目录扫描完成
async_status:
jid: "{{ ncdu_var_scan.ansible_job_id }}"
register: ncdu_var_result
until: ncdu_var_result.finished
retries: 36
delay: 5
when: ncdu_var_scan is defined and ncdu_var_scan.ansible_job_id is defined
- name: 等待 /opt 目录扫描完成
async_status:
jid: "{{ ncdu_opt_scan.ansible_job_id }}"
register: ncdu_opt_result
until: ncdu_opt_result.finished
retries: 24
delay: 5
when: ncdu_opt_scan is defined and ncdu_opt_scan.ansible_job_id is defined
- name: 生成磁盘使用分析报告
shell: |
echo "=== {{ inventory_hostname }} 磁盘分析报告 ===" > {{ output_dir }}/disk-report-{{ inventory_hostname }}.txt
echo "生成时间: $(date)" >> {{ output_dir }}/disk-report-{{ inventory_hostname }}.txt
echo "" >> {{ output_dir }}/disk-report-{{ inventory_hostname }}.txt
echo "=== 磁盘使用情况 ===" >> {{ output_dir }}/disk-report-{{ inventory_hostname }}.txt
df -h >> {{ output_dir }}/disk-report-{{ inventory_hostname }}.txt
echo "" >> {{ output_dir }}/disk-report-{{ inventory_hostname }}.txt
echo "=== 最大的目录 (前10个) ===" >> {{ output_dir }}/disk-report-{{ inventory_hostname }}.txt
du -h --max-depth=2 / 2>/dev/null | sort -hr | head -10 >> {{ output_dir }}/disk-report-{{ inventory_hostname }}.txt
echo "" >> {{ output_dir }}/disk-report-{{ inventory_hostname }}.txt
echo "=== /var 目录最大文件 ===" >> {{ output_dir }}/disk-report-{{ inventory_hostname }}.txt
find /var -type f -size +100M -exec ls -lh {} \; 2>/dev/null | head -10 >> {{ output_dir }}/disk-report-{{ inventory_hostname }}.txt
echo "" >> {{ output_dir }}/disk-report-{{ inventory_hostname }}.txt
echo "=== /tmp 目录使用情况 ===" >> {{ output_dir }}/disk-report-{{ inventory_hostname }}.txt
du -sh /tmp/* 2>/dev/null | sort -hr | head -5 >> {{ output_dir }}/disk-report-{{ inventory_hostname }}.txt
echo "" >> {{ output_dir }}/disk-report-{{ inventory_hostname }}.txt
echo "=== 日志文件大小 ===" >> {{ output_dir }}/disk-report-{{ inventory_hostname }}.txt
find /var/log -name "*.log" -type f -size +50M -exec ls -lh {} \; 2>/dev/null >> {{ output_dir }}/disk-report-{{ inventory_hostname }}.txt
- name: 显示分析报告
shell: cat {{ output_dir }}/disk-report-{{ inventory_hostname }}.txt
register: disk_report
- name: 输出磁盘分析结果
debug:
msg: "{{ disk_report.stdout }}"
- name: 检查是否有磁盘使用率超过 80%
shell: df -h | awk 'NR>1 {gsub(/%/, "", $5); if($5 > 80) print $0}'
register: high_usage_disks
- name: 警告高磁盘使用率
debug:
msg: |
⚠️ 警告: {{ inventory_hostname }} 发现高磁盘使用率!
{{ high_usage_disks.stdout }}
when: high_usage_disks.stdout != ""
- name: 创建清理建议
shell: |
echo "=== {{ inventory_hostname }} 清理建议 ===" > {{ output_dir }}/cleanup-suggestions-{{ inventory_hostname }}.txt
echo "" >> {{ output_dir }}/cleanup-suggestions-{{ inventory_hostname }}.txt
echo "1. 检查日志文件:" >> {{ output_dir }}/cleanup-suggestions-{{ inventory_hostname }}.txt
find /var/log -name "*.log" -type f -size +100M -exec echo " 大日志文件: {}" \; 2>/dev/null >> {{ output_dir }}/cleanup-suggestions-{{ inventory_hostname }}.txt
echo "" >> {{ output_dir }}/cleanup-suggestions-{{ inventory_hostname }}.txt
echo "2. 检查临时文件:" >> {{ output_dir }}/cleanup-suggestions-{{ inventory_hostname }}.txt
find /tmp -type f -size +50M -exec echo " 大临时文件: {}" \; 2>/dev/null >> {{ output_dir }}/cleanup-suggestions-{{ inventory_hostname }}.txt
echo "" >> {{ output_dir }}/cleanup-suggestions-{{ inventory_hostname }}.txt
echo "3. 检查包缓存:" >> {{ output_dir }}/cleanup-suggestions-{{ inventory_hostname }}.txt
if [ -d /var/cache/apt ]; then
echo " APT 缓存大小: $(du -sh /var/cache/apt 2>/dev/null | cut -f1)" >> {{ output_dir }}/cleanup-suggestions-{{ inventory_hostname }}.txt
fi
if [ -d /var/cache/yum ]; then
echo " YUM 缓存大小: $(du -sh /var/cache/yum 2>/dev/null | cut -f1)" >> {{ output_dir }}/cleanup-suggestions-{{ inventory_hostname }}.txt
fi
echo "" >> {{ output_dir }}/cleanup-suggestions-{{ inventory_hostname }}.txt
echo "4. 检查容器相关:" >> {{ output_dir }}/cleanup-suggestions-{{ inventory_hostname }}.txt
if command -v podman >/dev/null 2>&1; then
echo " Podman 镜像: $(podman images --format 'table {{.Repository}} {{.Tag}} {{.Size}}' 2>/dev/null | wc -l) 个" >> {{ output_dir }}/cleanup-suggestions-{{ inventory_hostname }}.txt
echo " Podman 容器: $(podman ps -a --format 'table {{.Names}} {{.Status}}' 2>/dev/null | wc -l) 个" >> {{ output_dir }}/cleanup-suggestions-{{ inventory_hostname }}.txt
fi
- name: 显示清理建议
shell: cat {{ output_dir }}/cleanup-suggestions-{{ inventory_hostname }}.txt
register: cleanup_suggestions
- name: 输出清理建议
debug:
msg: "{{ cleanup_suggestions.stdout }}"
- name: 保存 ncdu 文件位置信息
debug:
msg: |
📁 ncdu 扫描文件已保存到:
- 根目录: {{ output_dir }}/ncdu-root-{{ inventory_hostname }}.json
- /var 目录: {{ output_dir }}/ncdu-var-{{ inventory_hostname }}.json (如果存在)
- /opt 目录: {{ output_dir }}/ncdu-opt-{{ inventory_hostname }}.json (如果存在)
💡 使用方法:
ncdu -f {{ output_dir }}/ncdu-root-{{ inventory_hostname }}.json
📊 完整报告: {{ output_dir }}/disk-report-{{ inventory_hostname }}.txt
🧹 清理建议: {{ output_dir }}/cleanup-suggestions-{{ inventory_hostname }}.txt

View File

@@ -0,0 +1,96 @@
---
- name: 磁盘清理工具
hosts: all
become: yes
vars:
cleanup_logs: true
cleanup_cache: true
cleanup_temp: true
cleanup_containers: false # 谨慎操作
tasks:
- name: 检查磁盘使用情况 (清理前)
shell: df -h
register: disk_before
- name: 显示清理前磁盘使用情况
debug:
msg: |
=== {{ inventory_hostname }} 清理前磁盘使用情况 ===
{{ disk_before.stdout }}
- name: 清理系统日志 (保留最近7天)
shell: |
journalctl --vacuum-time=7d
find /var/log -name "*.log" -type f -mtime +7 -exec truncate -s 0 {} \;
find /var/log -name "*.log.*" -type f -mtime +7 -delete
when: cleanup_logs | bool
register: log_cleanup
- name: 清理包管理器缓存
block:
- name: 清理 APT 缓存 (Debian/Ubuntu)
shell: |
apt-get clean
apt-get autoclean
apt-get autoremove -y
when: ansible_os_family == "Debian"
- name: 清理 YUM/DNF 缓存 (RedHat/CentOS)
shell: |
if command -v dnf >/dev/null 2>&1; then
dnf clean all
elif command -v yum >/dev/null 2>&1; then
yum clean all
fi
when: ansible_os_family == "RedHat"
when: cleanup_cache | bool
- name: 清理临时文件
shell: |
find /tmp -type f -atime +7 -delete 2>/dev/null || true
find /var/tmp -type f -atime +7 -delete 2>/dev/null || true
rm -rf /tmp/.* 2>/dev/null || true
when: cleanup_temp | bool
- name: 清理 Podman 资源 (谨慎操作)
block:
- name: 停止所有容器
shell: podman stop --all
ignore_errors: yes
- name: 删除未使用的容器
shell: podman container prune -f
ignore_errors: yes
- name: 删除未使用的镜像
shell: podman image prune -f
ignore_errors: yes
- name: 删除未使用的卷
shell: podman volume prune -f
ignore_errors: yes
when: cleanup_containers | bool
- name: 清理核心转储文件
shell: |
find /var/crash -name "core.*" -type f -delete 2>/dev/null || true
find / -name "core" -type f -size +10M -delete 2>/dev/null || true
ignore_errors: yes
- name: 检查磁盘使用情况 (清理后)
shell: df -h
register: disk_after
- name: 显示清理结果
debug:
msg: |
=== {{ inventory_hostname }} 清理完成 ===
清理前:
{{ disk_before.stdout }}
清理后:
{{ disk_after.stdout }}
🧹 清理操作完成!