diff --git a/README.md b/README.md index f492a69..21cda4d 100644 --- a/README.md +++ b/README.md @@ -1,24 +1,364 @@ -# Management Scripts +# 🛠️ 服务器管理自动化项目 -这个仓库包含系统管理相关的脚本和配置文件。 +这是一个基于 Ansible 的服务器管理自动化项目,用于管理多台服务器的系统更新、配置和维护。 -## 目录结构 +## 📁 项目结构 -- `ansible/` - Ansible 自动化脚本 - - `system-update.yml` - 系统更新 playbook (apt update && apt upgrade) - - `inventory.ini` - 服务器清单文件 - - `ansible.cfg` - Ansible 配置文件 -- `config.json` - Semaphore 配置文件 - -## Ansible 使用方法 - -```bash -cd ansible -LANG=C ansible-playbook -i inventory.ini system-update.yml +``` +mgmt/ +├── ansible/ +│ ├── inventory.ini # 服务器清单 +│ ├── ansible.cfg # Ansible 配置 +│ ├── system-update.yml # 系统更新 playbook +│ ├── cloud-providers-update.yml # 云服务商更新 playbook +│ ├── system-cleanup.yml # 系统清理和维护 +│ ├── service-health-check.yml # 服务健康检查 +│ ├── security-hardening.yml # 安全加固和备份 +│ ├── docker-management.yml # Docker 容器管理 +│ ├── network-connectivity.yml # 网络连通性检查 +│ ├── certificate-management.yml # SSL 证书管理 +│ ├── ops-toolkit.yml # 运维工具包 +│ ├── cron-setup.yml # 定时任务配置 +│ └── run.sh # 执行脚本 +├── scripts/ +│ └── ops-manager.sh # 运维管理脚本 +├── config.json # Semaphore 配置 +├── keys-info.md # SSH 密钥信息 +├── semaphore-setup-guide.md # Semaphore 设置指南 +└── README.md # 项目说明 ``` -## Semaphore 配置 +## 🚀 快速开始 -- Web 界面: http://localhost:3000 -- 用户名: admin -- 密码: admin123 \ No newline at end of file +### 1. 环境准备 + +确保已安装 Ansible: +```bash +# Ubuntu/Debian +sudo apt update && sudo apt install ansible + +# CentOS/RHEL +sudo yum install ansible +``` + +### 2. 配置服务器清单 + +编辑 `ansible/inventory.ini` 文件,服务器已按功能分组: + +- **lxc**: Debian/Ubuntu 容器 +- **alpine**: Alpine Linux 容器 +- **proxmox**: Proxmox VE 物理机 +- **armbian**: ARM 设备 +- **hcp**: HCP 云服务器 +- **feiniu**: 飞牛服务器 +- **germany**: 德国服务器 +- 以及各种云服务商组 + +### 3. 使用运维管理脚本 + +```bash +# 给脚本执行权限 +chmod +x scripts/ops-manager.sh + +# 交互式模式 +./scripts/ops-manager.sh + +# 直接执行 +./scripts/ops-manager.sh update lxc # 更新 LXC 容器 +./scripts/ops-manager.sh cleanup all # 清理所有服务器 +./scripts/ops-manager.sh health proxmox # 检查 Proxmox 健康状态 +./scripts/ops-manager.sh docker lxc # 管理 LXC 中的 Docker +./scripts/ops-manager.sh toolkit germany # 运行德国服务器工具包 + +# 检查模式(不做实际更改) +./scripts/ops-manager.sh update all --check +``` + +## 🛠️ 可用的运维脚本 + +### 核心功能 +- **system-update.yml**: 系统包更新 +- **system-cleanup.yml**: 磁盘清理、日志清理、缓存清理 +- **service-health-check.yml**: 服务状态监控 +- **security-hardening.yml**: 安全加固和备份 + +### 专业工具 +- **docker-management.yml**: Docker 容器和镜像管理 +- **network-connectivity.yml**: 网络连通性和性能测试 +- **certificate-management.yml**: SSL 证书监控和管理 +- **ops-toolkit.yml**: 统一运维仪表板 + +### 自动化 +- **cron-setup.yml**: 配置定时任务自动化 +- **ops-manager.sh**: 便捷的命令行管理工具 + +## 🤖 自动化定时任务 + +设置自动化定时任务: +```bash +ansible-playbook -i ansible/inventory.ini ansible/cron-setup.yml +``` + +配置的定时任务: +- **每日 08:00**: 系统健康检查 +- **每日 01:00**: Docker 清理 (LXC 组) +- **每周日 02:00**: 系统清理 +- **每周一 04:30**: 证书检查 +- **每周二 06:00**: 网络连通性检查 +- **每月1日 03:00**: 安全检查 + +查看自动化状态: +```bash +automation-status +``` + +## 📊 使用 Semaphore Web UI + +参考 `semaphore-setup-guide.md` 文件设置 Semaphore Web 界面管理。 + +推送到 Gitea 后,Semaphore 可以: +- ✅ 直接识别 Ansible 项目结构 +- ✅ 使用现有的 inventory 分组 +- ✅ 运行预定义的 playbooks +- ✅ 支持按组选择性更新 +- ✅ 提供 Web 界面管理和监控 + +## 💡 最佳实践 + +### 日常运维 +```bash +# 每日快速检查 +./scripts/ops-manager.sh toolkit all + +# 每周系统维护 +./scripts/ops-manager.sh cleanup all +./scripts/ops-manager.sh health all + +# 每月安全检查 +./scripts/ops-manager.sh security all --check +./scripts/ops-manager.sh cert all +``` + +### 紧急情况 +```bash +# 紧急安全更新 +./scripts/ops-manager.sh update all + +# 网络问题诊断 +./scripts/ops-manager.sh network all + +# 服务状态检查 +./scripts/ops-manager.sh health all +``` + +### 容器管理 +```bash +# LXC 容器管理 +./scripts/ops-manager.sh docker lxc +./scripts/ops-manager.sh cleanup lxc + +# Alpine 容器更新 +./scripts/ops-manager.sh update alpine +``` + +## 🔧 高级用法 + +### 按组管理 +```bash +# 物理机维护 +./scripts/ops-manager.sh cleanup proxmox +./scripts/ops-manager.sh health armbian + +# 云服务商管理 +./scripts/ops-manager.sh update huawei +./scripts/ops-manager.sh network google + +# 容器管理 +./scripts/ops-manager.sh docker lxc +./scripts/ops-manager.sh update alpine +``` + +### 检查模式 +```bash +# 检查更新但不执行 +./scripts/ops-manager.sh update all --check + +# 详细输出 +./scripts/ops-manager.sh health all --verbose + +# 仅显示命令 +./scripts/ops-manager.sh cleanup all --dry-run +``` + +## 📋 服务器组说明 + +- **lxc**: Debian/Ubuntu 容器 (warden, gitea, mysql, postgresql, influxdb) +- **alpine**: Alpine Linux 容器 (redis, authentik, calibreweb) +- **proxmox**: Proxmox VE 物理机 (pve, xgp, nuc12) +- **armbian**: ARM 设备 (onecloud1) +- **hcp**: HCP 云服务器 (hcp1, hcp2) +- **feiniu**: 飞牛服务器 (snail) +- **germany**: 德国服务器 (de) +- **dev**: 开发服务器 (dev1, dev2) +- **oci_kr/oci_us**: Oracle 云服务器 +- **huawei/google/aws**: 各云服务商 + +## 📝 注意事项 + +- 确保 SSH 密钥已正确配置 +- LXC 组更新需要顺序执行,避免同时更新 +- Alpine 容器使用 `apk` 包管理器 +- 建议先在测试环境验证 +- 定期备份重要数据 +- 监控自动化日志:`tail -f /var/log/daily-health-check.log` + +## 🆘 故障排除 + +### 连接问题 +```bash +# 测试连接 +ansible all -i ansible/inventory.ini -m ping + +# 检查特定组 +ansible lxc -i ansible/inventory.ini -m ping -e "ansible_ssh_pass=313131" +``` + +### 权限问题 +```bash +# 检查 sudo 权限 +ansible all -i ansible/inventory.ini -m shell -a "whoami" --become +``` + +### 日志查看 +```bash +# 查看自动化日志 +ls -la /var/log/*-*.log +tail -f /var/log/daily-health-check.log +``` + +## 🎯 运维脚本使用示例 + +### 系统更新 +```bash +# 更新所有服务器 +./scripts/ops-manager.sh update all + +# 更新特定组 +./scripts/ops-manager.sh update lxc +./scripts/ops-manager.sh update alpine +./scripts/ops-manager.sh update proxmox +``` + +### 系统清理 +```bash +# 清理所有服务器 +./scripts/ops-manager.sh cleanup all + +# 清理特定组 +./scripts/ops-manager.sh cleanup lxc +``` + +### 健康检查 +```bash +# 检查所有服务器健康状态 +./scripts/ops-manager.sh health all + +# 检查特定组 +./scripts/ops-manager.sh health proxmox +``` + +### Docker 管理 +```bash +# 管理 LXC 组的 Docker +./scripts/ops-manager.sh docker lxc + +# 检查 Docker 状态 +./scripts/ops-manager.sh docker all +``` + +### 网络诊断 +```bash +# 检查网络连通性 +./scripts/ops-manager.sh network all + +# 检查特定组网络 +./scripts/ops-manager.sh network germany +``` + +### 证书管理 +```bash +# 检查所有证书 +./scripts/ops-manager.sh cert all + +# 检查特定组证书 +./scripts/ops-manager.sh cert proxmox +``` + +### 安全检查 +```bash +# 安全检查(检查模式) +./scripts/ops-manager.sh security all --check + +# 执行安全加固 +./scripts/ops-manager.sh security all +``` + +### 运维工具包 +```bash +# 运行完整的运维工具包 +./scripts/ops-manager.sh toolkit all + +# 检查特定服务器 +./scripts/ops-manager.sh toolkit germany +``` + +## 📈 监控和日志 + +### 自动化监控 +```bash +# 查看自动化状态 +automation-status + +# 查看定时任务 +crontab -l + +# 查看最近的健康检查 +tail -20 /var/log/daily-health-check.log +``` + +### 手动日志查看 +```bash +# 查看所有自动化日志 +ls -la /var/log/*-*.log + +# 实时监控日志 +tail -f /var/log/daily-health-check.log + +# 查看清理日志 +cat /var/log/weekly-cleanup.log +``` + +## 🔄 定期维护建议 + +### 每日 +- 运行 `./scripts/ops-manager.sh toolkit all` 快速检查 +- 查看 `automation-status` 了解自动化状态 + +### 每周 +- 运行 `./scripts/ops-manager.sh cleanup all` 系统清理 +- 运行 `./scripts/ops-manager.sh health all` 健康检查 +- 检查 `/var/log/` 下的日志文件 + +### 每月 +- 运行 `./scripts/ops-manager.sh security all --check` 安全检查 +- 运行 `./scripts/ops-manager.sh cert all` 证书检查 +- 运行 `./scripts/ops-manager.sh network all` 网络检查 + +### 按需 +- 系统更新:`./scripts/ops-manager.sh update [group]` +- Docker 清理:`./scripts/ops-manager.sh docker lxc` +- 网络诊断:`./scripts/ops-manager.sh network all` + +## 许可证 + +MIT License \ No newline at end of file diff --git a/ansible/certificate-management.yml b/ansible/certificate-management.yml new file mode 100644 index 0000000..600bbe9 --- /dev/null +++ b/ansible/certificate-management.yml @@ -0,0 +1,152 @@ +--- +- name: SSL Certificate Management and Monitoring + hosts: all + gather_facts: yes + + vars: + # 常见证书路径 + cert_paths: + - /etc/ssl/certs + - /etc/letsencrypt/live + - /etc/nginx/ssl + - /etc/apache2/ssl + - /usr/local/share/ca-certificates + + # 需要检查的服务端口 + ssl_services: + - { name: "HTTPS", port: 443 } + - { name: "SMTPS", port: 465 } + - { name: "IMAPS", port: 993 } + - { name: "LDAPS", port: 636 } + + tasks: + # 检查证书目录 + - name: Check certificate directories + stat: + path: "{{ item }}" + register: cert_dirs + loop: "{{ cert_paths }}" + + - name: List existing certificate directories + debug: + msg: "📁 Certificate directory {{ item.item }}: {{ 'EXISTS' if item.stat.exists else 'NOT FOUND' }}" + loop: "{{ cert_dirs.results }}" + + # 查找证书文件 + - name: Find certificate files + find: + paths: "{{ cert_paths }}" + patterns: "*.crt,*.pem,*.cert" + recurse: yes + register: cert_files + + - name: Display found certificates + debug: + msg: "🔐 Found {{ cert_files.files | length }} certificate files" + + # 检查证书过期时间 + - name: Check certificate expiration + shell: | + if [ -f "{{ item.path }}" ]; then + openssl x509 -in "{{ item.path }}" -noout -enddate 2>/dev/null | cut -d= -f2 + fi + register: cert_expiry + loop: "{{ cert_files.files[:10] }}" # 限制检查前10个证书 + failed_when: false + + - name: Display certificate expiration dates + debug: + msg: "📅 {{ item.item.path | basename }}: expires {{ item.stdout if item.stdout else 'INVALID/UNREADABLE' }}" + loop: "{{ cert_expiry.results }}" + when: item.stdout != "" + + # 检查即将过期的证书 (30天内) + - name: Check certificates expiring soon + shell: | + if [ -f "{{ item.path }}" ]; then + exp_date=$(openssl x509 -in "{{ item.path }}" -noout -enddate 2>/dev/null | cut -d= -f2) + if [ ! -z "$exp_date" ]; then + exp_epoch=$(date -d "$exp_date" +%s 2>/dev/null) + now_epoch=$(date +%s) + days_left=$(( (exp_epoch - now_epoch) / 86400 )) + if [ $days_left -lt 30 ]; then + echo "WARNING: $days_left days left" + else + echo "OK: $days_left days left" + fi + fi + fi + register: cert_warnings + loop: "{{ cert_files.files[:10] }}" + failed_when: false + + - name: Display certificate warnings + debug: + msg: "⚠️ {{ item.item.path | basename }}: {{ item.stdout }}" + loop: "{{ cert_warnings.results }}" + when: item.stdout != "" and "WARNING" in item.stdout + + # 检查 Let's Encrypt 证书 + - name: Check Let's Encrypt certificates + shell: certbot certificates 2>/dev/null || echo "Certbot not installed" + register: letsencrypt_certs + failed_when: false + + - name: Display Let's Encrypt status + debug: + msg: "🔒 Let's Encrypt: {{ letsencrypt_certs.stdout_lines }}" + when: "'not installed' not in letsencrypt_certs.stdout" + + # 检查 SSL 服务端口 + - name: Check SSL service ports + wait_for: + port: "{{ item.port }}" + timeout: 3 + register: ssl_ports + loop: "{{ ssl_services }}" + failed_when: false + + - name: Display SSL service status + debug: + msg: "🔌 {{ item.item.name }} (port {{ item.item.port }}): {{ 'LISTENING' if not item.failed else 'NOT AVAILABLE' }}" + loop: "{{ ssl_ports.results }}" + + # 测试 HTTPS 连接 + - name: Test HTTPS connection to localhost + uri: + url: "https://{{ ansible_default_ipv4.address }}" + method: GET + validate_certs: no + timeout: 5 + register: https_test + failed_when: false + when: ssl_ports.results[0] is defined and not ssl_ports.results[0].failed + + - name: Display HTTPS test result + debug: + msg: "🌐 HTTPS Test: {{ 'SUCCESS' if https_test.status is defined else 'FAILED' }}" + when: https_test is defined + + # 检查证书链 + - name: Check certificate chain for HTTPS + shell: | + echo | openssl s_client -connect {{ ansible_default_ipv4.address }}:443 -servername {{ ansible_hostname }} 2>/dev/null | openssl x509 -noout -subject -issuer + register: cert_chain + failed_when: false + when: ssl_ports.results[0] is defined and not ssl_ports.results[0].failed + + - name: Display certificate chain info + debug: + msg: "🔗 Certificate Chain: {{ cert_chain.stdout_lines }}" + when: cert_chain is defined and cert_chain.rc == 0 + + # 生成证书健康报告 + - name: Generate certificate health summary + debug: + msg: | + 🔐 Certificate Health Summary for {{ inventory_hostname }}: + 📁 Certificate directories found: {{ (cert_dirs.results | selectattr('stat.exists') | list | length) }} + 📄 Certificate files found: {{ cert_files.files | length }} + ⚠️ Certificates expiring soon: {{ (cert_warnings.results | selectattr('stdout', 'search', 'WARNING') | list | length) }} + 🔒 Let's Encrypt: {{ 'Configured' if 'not installed' not in letsencrypt_certs.stdout else 'Not installed' }} + 🌐 SSL Services: {{ (ssl_ports.results | rejectattr('failed') | list | length) }}/{{ ssl_services | length }} available \ No newline at end of file diff --git a/ansible/cron-setup.yml b/ansible/cron-setup.yml new file mode 100644 index 0000000..4512919 --- /dev/null +++ b/ansible/cron-setup.yml @@ -0,0 +1,183 @@ +--- +- name: Setup Automated Maintenance Cron Jobs + hosts: localhost + gather_facts: no + + vars: + # 定时任务配置 + cron_jobs: + # 每日快速检查 + - name: "Daily system health check" + job: "cd /root/mgmt && ./scripts/ops-manager.sh toolkit all --check > /var/log/daily-health-check.log 2>&1" + minute: "0" + hour: "8" + day: "*" + month: "*" + weekday: "*" + + # 每周系统清理 + - name: "Weekly system cleanup" + job: "cd /root/mgmt && ./scripts/ops-manager.sh cleanup all > /var/log/weekly-cleanup.log 2>&1" + minute: "0" + hour: "2" + day: "*" + month: "*" + weekday: "0" # Sunday + + # 每月安全检查 + - name: "Monthly security hardening check" + job: "cd /root/mgmt && ./scripts/ops-manager.sh security all --check > /var/log/monthly-security-check.log 2>&1" + minute: "0" + hour: "3" + day: "1" + month: "*" + weekday: "*" + + # 每周证书检查 + - name: "Weekly certificate check" + job: "cd /root/mgmt && ./scripts/ops-manager.sh cert all > /var/log/weekly-cert-check.log 2>&1" + minute: "30" + hour: "4" + day: "*" + month: "*" + weekday: "1" # Monday + + # 每日 Docker 清理 (仅 LXC 组) + - name: "Daily Docker cleanup for LXC" + job: "cd /root/mgmt && ansible lxc -i ansible/inventory.ini -m shell -a 'docker system prune -f' --become -e 'ansible_ssh_pass=313131' > /var/log/daily-docker-cleanup.log 2>&1" + minute: "0" + hour: "1" + day: "*" + month: "*" + weekday: "*" + + # 每周网络连通性检查 + - name: "Weekly network connectivity check" + job: "cd /root/mgmt && ./scripts/ops-manager.sh network all > /var/log/weekly-network-check.log 2>&1" + minute: "0" + hour: "6" + day: "*" + month: "*" + weekday: "2" # Tuesday + + tasks: + # 创建日志目录 + - name: Create log directory + file: + path: /var/log/ansible-automation + state: directory + mode: '0755' + become: yes + + # 设置脚本执行权限 + - name: Make ops-manager.sh executable + file: + path: /root/mgmt/scripts/ops-manager.sh + mode: '0755' + + # 创建定时任务 + - name: Setup cron jobs for automated maintenance + cron: + name: "{{ item.name }}" + job: "{{ item.job }}" + minute: "{{ item.minute }}" + hour: "{{ item.hour }}" + day: "{{ item.day }}" + month: "{{ item.month }}" + weekday: "{{ item.weekday }}" + user: root + loop: "{{ cron_jobs }}" + become: yes + + # 创建日志轮转配置 + - name: Setup log rotation for automation logs + copy: + content: | + /var/log/*-health-check.log + /var/log/*-cleanup.log + /var/log/*-security-check.log + /var/log/*-cert-check.log + /var/log/*-docker-cleanup.log + /var/log/*-network-check.log { + daily + missingok + rotate 30 + compress + delaycompress + notifempty + copytruncate + } + dest: /etc/logrotate.d/ansible-automation + mode: '0644' + become: yes + + # 创建监控脚本 + - name: Create monitoring dashboard script + copy: + content: | + #!/bin/bash + # Automation Monitoring Dashboard + + echo "🤖 Ansible Automation Status Dashboard" + echo "======================================" + echo "" + + echo "📅 Last Execution Times:" + echo "------------------------" + for log in /var/log/*-check.log /var/log/*-cleanup.log; do + if [ -f "$log" ]; then + echo "$(basename "$log" .log): $(stat -c %y "$log" | cut -d. -f1)" + fi + done + echo "" + + echo "📊 Recent Log Summary:" + echo "---------------------" + for log in /var/log/daily-health-check.log /var/log/weekly-cleanup.log; do + if [ -f "$log" ]; then + echo "=== $(basename "$log") ===" + tail -5 "$log" | grep -E "(TASK|PLAY RECAP|ERROR|WARNING)" || echo "No recent activity" + echo "" + fi + done + + echo "⏰ Next Scheduled Jobs:" + echo "----------------------" + crontab -l | grep -E "(health|cleanup|security|cert|docker|network)" | while read line; do + echo "$line" + done + echo "" + + echo "💾 Log File Sizes:" + echo "-----------------" + ls -lh /var/log/*-*.log 2>/dev/null | awk '{print $5, $9}' || echo "No log files found" + dest: /usr/local/bin/automation-status + mode: '0755' + become: yes + + # 显示设置完成信息 + - name: Display setup completion info + debug: + msg: | + 🎉 自动化定时任务设置完成! + + 📋 已配置的定时任务: + • 每日 08:00 - 系统健康检查 + • 每日 01:00 - Docker 清理 (LXC 组) + • 每周日 02:00 - 系统清理 + • 每周一 04:30 - 证书检查 + • 每周二 06:00 - 网络连通性检查 + • 每月1日 03:00 - 安全检查 + + 📊 监控命令: + • 查看状态: automation-status + • 查看定时任务: crontab -l + • 查看日志: tail -f /var/log/daily-health-check.log + + 📁 日志位置: /var/log/ + 🔄 日志轮转: 30天自动清理 + + 💡 手动执行示例: + • ./scripts/ops-manager.sh toolkit all + • ./scripts/ops-manager.sh cleanup lxc + • ./scripts/ops-manager.sh health proxmox \ No newline at end of file diff --git a/ansible/docker-management.yml b/ansible/docker-management.yml new file mode 100644 index 0000000..5f359e9 --- /dev/null +++ b/ansible/docker-management.yml @@ -0,0 +1,128 @@ +--- +- name: Docker Container Management + hosts: all + become: yes + gather_facts: yes + + tasks: + # 检查 Docker 是否安装 + - name: Check if Docker is installed + command: which docker + register: docker_installed + failed_when: false + changed_when: false + + - name: Skip Docker tasks if not installed + debug: + msg: "Docker not installed on {{ inventory_hostname }}, skipping Docker tasks" + when: docker_installed.rc != 0 + + # Docker 系统信息 + - name: Get Docker system info + shell: docker system df + register: docker_system_info + when: docker_installed.rc == 0 + + - name: Display Docker system usage + debug: + msg: "🐳 Docker System Usage: {{ docker_system_info.stdout_lines }}" + when: docker_installed.rc == 0 + + # 检查运行中的容器 + - name: List running containers + shell: docker ps --format "table {{.Names}}\t{{.Status}}\t{{.Ports}}" + register: running_containers + when: docker_installed.rc == 0 + + - name: Display running containers + debug: + msg: "📦 Running Containers: {{ running_containers.stdout_lines }}" + when: docker_installed.rc == 0 + + # 检查停止的容器 + - name: List stopped containers + shell: docker ps -a --filter "status=exited" --format "table {{.Names}}\t{{.Status}}" + register: stopped_containers + when: docker_installed.rc == 0 + + - name: Display stopped containers + debug: + msg: "⏹️ Stopped Containers: {{ stopped_containers.stdout_lines }}" + when: docker_installed.rc == 0 and stopped_containers.stdout_lines | length > 1 + + # 检查 Docker 镜像 + - name: List Docker images + shell: docker images --format "table {{.Repository}}\t{{.Tag}}\t{{.Size}}" + register: docker_images + when: docker_installed.rc == 0 + + - name: Display Docker images + debug: + msg: "🖼️ Docker Images: {{ docker_images.stdout_lines }}" + when: docker_installed.rc == 0 + + # 检查悬空镜像 + - name: Check for dangling images + shell: docker images -f "dangling=true" -q + register: dangling_images + when: docker_installed.rc == 0 + + - name: Report dangling images + debug: + msg: "🗑️ Found {{ dangling_images.stdout_lines | length }} dangling images" + when: docker_installed.rc == 0 + + # 检查 Docker 卷 + - name: List Docker volumes + shell: docker volume ls + register: docker_volumes + when: docker_installed.rc == 0 + + - name: Display Docker volumes + debug: + msg: "💾 Docker Volumes: {{ docker_volumes.stdout_lines }}" + when: docker_installed.rc == 0 + + # 检查 Docker 网络 + - name: List Docker networks + shell: docker network ls + register: docker_networks + when: docker_installed.rc == 0 + + - name: Display Docker networks + debug: + msg: "🌐 Docker Networks: {{ docker_networks.stdout_lines }}" + when: docker_installed.rc == 0 + + # 检查容器资源使用 + - name: Check container resource usage + shell: docker stats --no-stream --format "table {{.Name}}\t{{.CPUPerc}}\t{{.MemUsage}}\t{{.NetIO}}" + register: container_stats + when: docker_installed.rc == 0 + + - name: Display container resource usage + debug: + msg: "📊 Container Stats: {{ container_stats.stdout_lines }}" + when: docker_installed.rc == 0 + + # 检查 Docker 服务状态 + - name: Check Docker service status + systemd: + name: docker + register: docker_service_status + when: docker_installed.rc == 0 + + - name: Display Docker service status + debug: + msg: "🔧 Docker Service: {{ docker_service_status.status.ActiveState }}" + when: docker_installed.rc == 0 + + # 清理建议 + - name: Suggest cleanup if needed + debug: + msg: | + 💡 Cleanup suggestions: + - Run 'docker system prune -f' to remove unused data + - Run 'docker image prune -f' to remove dangling images + - Run 'docker volume prune -f' to remove unused volumes + when: docker_installed.rc == 0 and (dangling_images.stdout_lines | length > 0 or stopped_containers.stdout_lines | length > 1) \ No newline at end of file diff --git a/ansible/network-connectivity.yml b/ansible/network-connectivity.yml new file mode 100644 index 0000000..989749b --- /dev/null +++ b/ansible/network-connectivity.yml @@ -0,0 +1,143 @@ +--- +- name: Network Connectivity and Performance Check + hosts: all + gather_facts: yes + + vars: + test_domains: + - google.com + - github.com + - docker.io + - tailscale.com + test_ports: + - { host: "8.8.8.8", port: 53, name: "Google DNS" } + - { host: "1.1.1.1", port: 53, name: "Cloudflare DNS" } + - { host: "github.com", port: 443, name: "GitHub HTTPS" } + - { host: "docker.io", port: 443, name: "Docker Hub" } + + tasks: + # 基本网络信息 + - name: Get network interfaces + shell: ip addr show | grep -E "^[0-9]+:|inet " + register: network_interfaces + + - name: Display network interfaces + debug: + msg: "🌐 Network Interfaces: {{ network_interfaces.stdout_lines }}" + + # 检查默认路由 + - name: Check default route + shell: ip route | grep default + register: default_route + + - name: Display default route + debug: + msg: "🛣️ Default Route: {{ default_route.stdout }}" + + # DNS 解析测试 + - name: Test DNS resolution + shell: nslookup {{ item }} | grep -A2 "Name:" + register: dns_test + loop: "{{ test_domains }}" + failed_when: false + + - name: Display DNS test results + debug: + msg: "🔍 DNS Test for {{ item.item }}: {{ 'SUCCESS' if item.rc == 0 else 'FAILED' }}" + loop: "{{ dns_test.results }}" + + # 网络连通性测试 + - name: Test network connectivity (ping) + shell: ping -c 3 {{ item }} + register: ping_test + loop: "{{ test_domains }}" + failed_when: false + + - name: Display ping test results + debug: + msg: "🏓 Ping to {{ item.item }}: {{ 'SUCCESS' if item.rc == 0 else 'FAILED' }}" + loop: "{{ ping_test.results }}" + + # 端口连通性测试 + - name: Test port connectivity + wait_for: + host: "{{ item.host }}" + port: "{{ item.port }}" + timeout: 5 + register: port_test + loop: "{{ test_ports }}" + failed_when: false + + - name: Display port test results + debug: + msg: "🔌 {{ item.item.name }} ({{ item.item.host }}:{{ item.item.port }}): {{ 'SUCCESS' if not item.failed else 'FAILED' }}" + loop: "{{ port_test.results }}" + + # 检查 Tailscale 状态 + - name: Check Tailscale status + shell: tailscale status + register: tailscale_status + failed_when: false + + - name: Display Tailscale status + debug: + msg: "🔗 Tailscale Status: {{ 'CONNECTED' if tailscale_status.rc == 0 else 'NOT CONNECTED' }}" + + - name: Show Tailscale details + debug: + msg: "{{ tailscale_status.stdout_lines }}" + when: tailscale_status.rc == 0 + + # 检查防火墙状态 + - name: Check UFW status (Ubuntu/Debian) + shell: ufw status + register: ufw_status + failed_when: false + when: ansible_os_family == "Debian" + + - name: Display UFW status + debug: + msg: "🛡️ UFW Firewall: {{ ufw_status.stdout_lines }}" + when: ansible_os_family == "Debian" and ufw_status.rc == 0 + + # 检查 iptables 规则 + - name: Check iptables rules + shell: iptables -L -n | head -20 + register: iptables_rules + failed_when: false + become: yes + + - name: Display iptables summary + debug: + msg: "🔥 Iptables Rules: {{ iptables_rules.stdout_lines[:10] }}" + when: iptables_rules.rc == 0 + + # 网络性能测试 + - name: Test download speed (small file) + shell: curl -o /dev/null -s -w "%{time_total}" http://speedtest.wdc01.softlayer.com/downloads/test10.zip + register: download_speed + failed_when: false + + - name: Display download speed test + debug: + msg: "⚡ Download Speed Test: {{ download_speed.stdout }}s for 10MB file" + when: download_speed.rc == 0 + + # 检查网络统计 + - name: Get network statistics + shell: cat /proc/net/dev | grep -v "lo:" | grep ":" + register: network_stats + + - name: Display network statistics + debug: + msg: "📊 Network Stats: {{ network_stats.stdout_lines }}" + + # 生成网络健康报告 + - name: Generate network health summary + debug: + msg: | + 🌐 Network Health Summary for {{ inventory_hostname }}: + ✅ DNS Resolution: {{ (dns_test.results | selectattr('rc', 'equalto', 0) | list | length) }}/{{ test_domains | length }} domains + ✅ Ping Connectivity: {{ (ping_test.results | selectattr('rc', 'equalto', 0) | list | length) }}/{{ test_domains | length }} hosts + ✅ Port Connectivity: {{ (port_test.results | rejectattr('failed', 'defined') | list | length) }}/{{ test_ports | length }} ports + ✅ Tailscale: {{ 'Connected' if tailscale_status.rc == 0 else 'Disconnected' }} \ No newline at end of file diff --git a/ansible/ops-toolkit.yml b/ansible/ops-toolkit.yml new file mode 100644 index 0000000..46428c8 --- /dev/null +++ b/ansible/ops-toolkit.yml @@ -0,0 +1,131 @@ +--- +- name: Operations Toolkit - Unified Management Dashboard + hosts: all + gather_facts: yes + + vars: + # 可用的运维脚本 + available_scripts: + - { name: "system-update", desc: "System package updates", file: "system-update.yml" } + - { name: "system-cleanup", desc: "System cleanup and maintenance", file: "system-cleanup.yml" } + - { name: "service-health", desc: "Service health monitoring", file: "service-health-check.yml" } + - { name: "security-hardening", desc: "Security hardening and backup", file: "security-hardening.yml" } + - { name: "docker-management", desc: "Docker container management", file: "docker-management.yml" } + - { name: "network-connectivity", desc: "Network connectivity check", file: "network-connectivity.yml" } + - { name: "certificate-management", desc: "SSL certificate monitoring", file: "certificate-management.yml" } + + tasks: + # 显示系统概览 + - name: Display system overview + debug: + msg: | + 🖥️ System Overview for {{ inventory_hostname }}: + 📊 OS: {{ ansible_distribution }} {{ ansible_distribution_version }} + 💾 Memory: {{ (ansible_memtotal_mb/1024)|round(1) }}GB total, {{ (ansible_memfree_mb/1024)|round(1) }}GB free + 💿 CPU: {{ ansible_processor_vcpus }} cores + 🏠 Architecture: {{ ansible_architecture }} + 🌐 IP: {{ ansible_default_ipv4.address }} + ⏰ Uptime: {{ ansible_uptime_seconds//86400 }}d {{ (ansible_uptime_seconds%86400)//3600 }}h {{ ((ansible_uptime_seconds%3600)//60) }}m + + # 快速系统状态检查 + - name: Quick system status check + shell: | + echo "=== DISK USAGE ===" + df -h | grep -E "(Filesystem|/dev/)" + echo "" + echo "=== MEMORY USAGE ===" + free -h + echo "" + echo "=== LOAD AVERAGE ===" + uptime + echo "" + echo "=== TOP PROCESSES ===" + ps aux --sort=-%cpu | head -6 + register: quick_status + + - name: Display quick status + debug: + msg: "{{ quick_status.stdout_lines }}" + + # 检查关键服务状态 + - name: Check critical services + systemd: + name: "{{ item }}" + register: service_status + loop: + - ssh + - systemd-resolved + - cron + failed_when: false + + - name: Display service status + debug: + msg: "🔧 {{ item.item }}: {{ item.status.ActiveState if item.status is defined else 'NOT FOUND' }}" + loop: "{{ service_status.results }}" + + # 检查最近的系统日志错误 + - name: Check recent system errors + shell: journalctl --since "1 hour ago" --priority=err --no-pager | tail -10 + register: recent_errors + failed_when: false + + - name: Display recent errors + debug: + msg: "🚨 Recent Errors: {{ recent_errors.stdout_lines if recent_errors.stdout_lines else ['No recent errors found'] }}" + + # 检查网络连接 + - name: Quick network check + shell: | + echo "=== NETWORK INTERFACES ===" + ip -br addr show + echo "" + echo "=== DEFAULT ROUTE ===" + ip route | grep default + echo "" + echo "=== DNS TEST ===" + nslookup google.com | grep -A1 "Name:" || echo "DNS resolution failed" + register: network_check + failed_when: false + + - name: Display network status + debug: + msg: "🌐 Network Status: {{ network_check.stdout_lines }}" + + # 显示可用的运维脚本 + - name: Display available operations scripts + debug: + msg: | + 🛠️ Available Operations Scripts: + {% for script in available_scripts %} + {{ loop.index }}. {{ script.name }}: {{ script.desc }} + {% endfor %} + + 💡 Usage Examples: + ansible-playbook -i inventory.ini system-cleanup.yml --limit {{ inventory_hostname }} + ansible-playbook -i inventory.ini docker-management.yml --limit lxc + ansible-playbook -i inventory.ini network-connectivity.yml --limit proxmox + + # 生成运维建议 + - name: Generate maintenance recommendations + debug: + msg: | + 💡 Maintenance Recommendations for {{ inventory_hostname }}: + + 🔄 Regular Tasks (Weekly): + - Run system-cleanup.yml to free up disk space + - Check service-health-check.yml for service status + - Review certificate-management.yml for expiring certificates + + 🔒 Security Tasks (Monthly): + - Execute security-hardening.yml for security updates + - Review network-connectivity.yml for network security + + 🐳 Container Tasks (As needed): + - Use docker-management.yml for Docker maintenance + + 📊 Monitoring Tasks (Daily): + - Quick check with ops-toolkit.yml (this script) + + ⚡ Emergency Tasks: + - Use system-update.yml for critical security patches + - Run network-connectivity.yml for connectivity issues \ No newline at end of file diff --git a/ansible/security-hardening.yml b/ansible/security-hardening.yml new file mode 100644 index 0000000..f5cdd20 --- /dev/null +++ b/ansible/security-hardening.yml @@ -0,0 +1,119 @@ +--- +- name: Security Hardening and Backup + hosts: all + become: yes + gather_facts: yes + + tasks: + # SSH 安全配置检查 + - name: Check SSH configuration security + lineinfile: + path: /etc/ssh/sshd_config + regexp: "{{ item.regexp }}" + line: "{{ item.line }}" + backup: yes + loop: + - { regexp: '^#?PermitRootLogin', line: 'PermitRootLogin no' } + - { regexp: '^#?PasswordAuthentication', line: 'PasswordAuthentication no' } + - { regexp: '^#?X11Forwarding', line: 'X11Forwarding no' } + - { regexp: '^#?MaxAuthTries', line: 'MaxAuthTries 3' } + notify: restart ssh + when: ansible_os_family == "Debian" + + # 防火墙状态检查 + - name: Check UFW firewall status + shell: ufw status + register: ufw_status + changed_when: false + failed_when: false + when: ansible_os_family == "Debian" + + - name: Display firewall status + debug: + msg: "🔥 Firewall Status: {{ ufw_status.stdout_lines }}" + when: ansible_os_family == "Debian" and ufw_status.stdout_lines is defined + + # 检查可疑登录 + - name: Check for failed login attempts + shell: grep "Failed password" /var/log/auth.log | tail -10 + register: failed_logins + changed_when: false + failed_when: false + + - name: Report suspicious login attempts + debug: + msg: "🚨 Recent failed logins: {{ failed_logins.stdout_lines }}" + when: failed_logins.stdout_lines | length > 0 + + # 检查 root 用户活动 + - name: Check recent root activity + shell: grep "sudo.*root" /var/log/auth.log | tail -5 + register: root_activity + changed_when: false + failed_when: false + + - name: Display root activity + debug: + msg: "👑 Recent root activity: {{ root_activity.stdout_lines }}" + when: root_activity.stdout_lines | length > 0 + + # 备份重要配置文件 + - name: Create backup directory + file: + path: /backup/configs + state: directory + mode: '0700' + + - name: Backup important configuration files + copy: + src: "{{ item }}" + dest: "/backup/configs/{{ item | basename }}.{{ ansible_date_time.epoch }}" + remote_src: yes + backup: yes + loop: + - /etc/ssh/sshd_config + - /etc/hosts + - /etc/fstab + - /etc/crontab + failed_when: false + + # 检查系统完整性 + - name: Check for world-writable files + shell: find /etc /usr /bin /sbin -type f -perm -002 2>/dev/null | head -10 + register: world_writable + changed_when: false + + - name: Report world-writable files + debug: + msg: "⚠️ World-writable files found: {{ world_writable.stdout_lines }}" + when: world_writable.stdout_lines | length > 0 + + # 检查 SUID 文件 + - name: Check for SUID files + shell: find /usr /bin /sbin -type f -perm -4000 2>/dev/null + register: suid_files + changed_when: false + + - name: Display SUID files count + debug: + msg: "🔐 Found {{ suid_files.stdout_lines | length }} SUID files" + + # 更新系统时间 + - name: Sync system time + shell: timedatectl set-ntp true + failed_when: false + + - name: Check time synchronization + shell: timedatectl status + register: time_status + + - name: Display time sync status + debug: + msg: "🕐 Time sync: {{ time_status.stdout_lines | select('match', '.*synchronized.*') | list }}" + + handlers: + - name: restart ssh + systemd: + name: ssh + state: restarted + when: ansible_os_family == "Debian" \ No newline at end of file diff --git a/ansible/service-health-check.yml b/ansible/service-health-check.yml new file mode 100644 index 0000000..51e36d9 --- /dev/null +++ b/ansible/service-health-check.yml @@ -0,0 +1,135 @@ +--- +- name: Service Health Check and Monitoring + hosts: all + become: yes + gather_facts: yes + + vars: + critical_services: + - ssh + - systemd-resolved + - cron + web_services: + - nginx + - apache2 + database_services: + - mysql + - mariadb + - postgresql + container_services: + - docker + - containerd + network_services: + - tailscale + - cloudflared + + tasks: + # 检查关键系统服务 + - name: Check critical system services + systemd: + name: "{{ item }}" + register: critical_service_status + loop: "{{ critical_services }}" + failed_when: false + + - name: Report critical service issues + debug: + msg: "⚠️ Critical service {{ item.item }} is {{ item.status.ActiveState | default('not found') }}" + loop: "{{ critical_service_status.results }}" + when: item.status is defined and item.status.ActiveState != "active" + + # 检查 Web 服务 + - name: Check web services + systemd: + name: "{{ item }}" + register: web_service_status + loop: "{{ web_services }}" + failed_when: false + + - name: Report web service status + debug: + msg: "🌐 Web service {{ item.item }}: {{ item.status.ActiveState | default('not installed') }}" + loop: "{{ web_service_status.results }}" + when: item.status is defined + + # 检查数据库服务 + - name: Check database services + systemd: + name: "{{ item }}" + register: db_service_status + loop: "{{ database_services }}" + failed_when: false + + - name: Report database service status + debug: + msg: "🗄️ Database service {{ item.item }}: {{ item.status.ActiveState | default('not installed') }}" + loop: "{{ db_service_status.results }}" + when: item.status is defined + + # 检查容器服务 + - name: Check container services + systemd: + name: "{{ item }}" + register: container_service_status + loop: "{{ container_services }}" + failed_when: false + + - name: Report container service status + debug: + msg: "📦 Container service {{ item.item }}: {{ item.status.ActiveState | default('not installed') }}" + loop: "{{ container_service_status.results }}" + when: item.status is defined + + # 检查网络服务 + - name: Check network services + systemd: + name: "{{ item }}" + register: network_service_status + loop: "{{ network_services }}" + failed_when: false + + - name: Report network service status + debug: + msg: "🌐 Network service {{ item.item }}: {{ item.status.ActiveState | default('not installed') }}" + loop: "{{ network_service_status.results }}" + when: item.status is defined + + # 检查系统负载 + - name: Check system load + shell: uptime + register: system_load + + - name: Display system load + debug: + msg: "📊 System Load: {{ system_load.stdout }}" + + # 检查磁盘空间警告 + - name: Check disk space usage + shell: df -h | awk '$5 > 80 {print $0}' + register: disk_warning + changed_when: false + + - name: Warn about high disk usage + debug: + msg: "⚠️ High disk usage detected: {{ disk_warning.stdout_lines }}" + when: disk_warning.stdout_lines | length > 0 + + # 检查内存使用率 + - name: Check memory usage percentage + shell: free | awk 'NR==2{printf "%.2f%%", $3*100/$2}' + register: memory_percent + + - name: Display memory usage + debug: + msg: "🧠 Memory Usage: {{ memory_percent.stdout }}" + + # 检查最近的系统错误 + - name: Check recent system errors + shell: journalctl --since "1 hour ago" --priority=err --no-pager | tail -10 + register: recent_errors + changed_when: false + + - name: Display recent errors + debug: + msg: "🚨 Recent system errors: {{ recent_errors.stdout_lines }}" + when: recent_errors.stdout_lines | length > 0 \ No newline at end of file diff --git a/ansible/system-cleanup.yml b/ansible/system-cleanup.yml new file mode 100644 index 0000000..b7c741c --- /dev/null +++ b/ansible/system-cleanup.yml @@ -0,0 +1,83 @@ +--- +- name: System Cleanup and Maintenance + hosts: all + become: yes + gather_facts: yes + + tasks: + # 清理包缓存和孤立包 + - name: Clean package cache (Debian/Ubuntu) + apt: + autoclean: yes + autoremove: yes + when: ansible_os_family == "Debian" + + - name: Remove orphaned packages (Debian/Ubuntu) + shell: apt-get autoremove --purge -y + when: ansible_os_family == "Debian" + + # 清理日志文件 + - name: Clean old journal logs (keep 7 days) + shell: journalctl --vacuum-time=7d + + - name: Clean old log files + find: + paths: /var/log + patterns: "*.log.*,*.gz" + age: "7d" + recurse: yes + register: old_logs + + - name: Remove old log files + file: + path: "{{ item.path }}" + state: absent + loop: "{{ old_logs.files }}" + when: old_logs.files is defined + + # 清理临时文件 + - name: Clean /tmp directory (files older than 7 days) + find: + paths: /tmp + age: "7d" + recurse: yes + register: tmp_files + + - name: Remove old temp files + file: + path: "{{ item.path }}" + state: absent + loop: "{{ tmp_files.files }}" + when: tmp_files.files is defined + + # Docker 清理 (如果存在) + - name: Check if Docker is installed + command: which docker + register: docker_check + failed_when: false + changed_when: false + + - name: Clean Docker system + shell: | + docker system prune -f + docker image prune -f + docker volume prune -f + when: docker_check.rc == 0 + + # 磁盘空间检查 + - name: Check disk usage + shell: df -h + register: disk_usage + + - name: Display disk usage + debug: + msg: "{{ disk_usage.stdout_lines }}" + + # 内存使用检查 + - name: Check memory usage + shell: free -h + register: memory_usage + + - name: Display memory usage + debug: + msg: "{{ memory_usage.stdout_lines }}" \ No newline at end of file diff --git a/scripts/ops-manager.sh b/scripts/ops-manager.sh new file mode 100644 index 0000000..31be6f3 --- /dev/null +++ b/scripts/ops-manager.sh @@ -0,0 +1,260 @@ +#!/bin/bash + +# Operations Manager - 便捷的运维脚本管理工具 +# 使用方法: ./ops-manager.sh [action] [target] [options] + +set -e + +# 颜色定义 +RED='\033[0;31m' +GREEN='\033[0;32m' +YELLOW='\033[1;33m' +BLUE='\033[0;34m' +PURPLE='\033[0;35m' +CYAN='\033[0;36m' +NC='\033[0m' # No Color + +# 配置 +ANSIBLE_DIR="$(dirname "$0")/../ansible" +INVENTORY="$ANSIBLE_DIR/inventory.ini" + +# 可用的操作 +declare -A OPERATIONS=( + ["update"]="system-update.yml" + ["cleanup"]="system-cleanup.yml" + ["health"]="service-health-check.yml" + ["security"]="security-hardening.yml" + ["docker"]="docker-management.yml" + ["network"]="network-connectivity.yml" + ["cert"]="certificate-management.yml" + ["toolkit"]="ops-toolkit.yml" + ["cloud"]="cloud-providers-update.yml" +) + +# 可用的目标组 +declare -A TARGETS=( + ["all"]="all" + ["lxc"]="lxc" + ["alpine"]="alpine" + ["proxmox"]="proxmox" + ["armbian"]="armbian" + ["hcp"]="hcp" + ["feiniu"]="feiniu" + ["dev"]="dev" + ["oci-kr"]="oci_kr" + ["oci-us"]="oci_us" + ["huawei"]="huawei" + ["google"]="google" + ["aws"]="aws" + ["germany"]="germany" +) + +# 显示帮助信息 +show_help() { + echo -e "${CYAN}🛠️ Operations Manager - 运维脚本管理工具${NC}" + echo "" + echo -e "${YELLOW}使用方法:${NC}" + echo " $0 [操作] [目标] [选项]" + echo "" + echo -e "${YELLOW}可用操作:${NC}" + for op in "${!OPERATIONS[@]}"; do + echo -e " ${GREEN}$op${NC} - ${OPERATIONS[$op]}" + done + echo "" + echo -e "${YELLOW}可用目标:${NC}" + for target in "${!TARGETS[@]}"; do + echo -e " ${BLUE}$target${NC} - ${TARGETS[$target]}" + done + echo "" + echo -e "${YELLOW}示例:${NC}" + echo -e " $0 ${GREEN}update${NC} ${BLUE}lxc${NC} # 更新 LXC 容器" + echo -e " $0 ${GREEN}cleanup${NC} ${BLUE}all${NC} # 清理所有服务器" + echo -e " $0 ${GREEN}health${NC} ${BLUE}proxmox${NC} # 检查 Proxmox 健康状态" + echo -e " $0 ${GREEN}docker${NC} ${BLUE}lxc${NC} # 管理 LXC 中的 Docker" + echo -e " $0 ${GREEN}toolkit${NC} ${BLUE}germany${NC} # 运行德国服务器工具包" + echo "" + echo -e "${YELLOW}选项:${NC}" + echo -e " ${PURPLE}--dry-run${NC} 仅显示将要执行的命令" + echo -e " ${PURPLE}--verbose${NC} 显示详细输出" + echo -e " ${PURPLE}--check${NC} 检查模式(不做实际更改)" + echo -e " ${PURPLE}--help${NC} 显示此帮助信息" +} + +# 显示状态信息 +show_status() { + echo -e "${CYAN}📊 系统状态概览${NC}" + echo "" + + # 检查 Ansible 是否可用 + if command -v ansible >/dev/null 2>&1; then + echo -e "${GREEN}✅ Ansible 已安装${NC}" + else + echo -e "${RED}❌ Ansible 未安装${NC}" + exit 1 + fi + + # 检查 inventory 文件 + if [ -f "$INVENTORY" ]; then + echo -e "${GREEN}✅ Inventory 文件存在${NC}" + echo -e " 📁 路径: $INVENTORY" + else + echo -e "${RED}❌ Inventory 文件不存在${NC}" + exit 1 + fi + + # 显示可用的主机组 + echo "" + echo -e "${YELLOW}📋 可用主机组:${NC}" + ansible-inventory -i "$INVENTORY" --list | jq -r 'keys[]' | grep -v "_meta" | sort | while read group; do + count=$(ansible-inventory -i "$INVENTORY" --list | jq -r ".[\"$group\"].hosts // [] | length") + echo -e " ${BLUE}$group${NC}: $count 台主机" + done +} + +# 执行 Ansible 命令 +run_ansible() { + local operation=$1 + local target=$2 + local options=$3 + + local playbook="${OPERATIONS[$operation]}" + local host_pattern="${TARGETS[$target]}" + + if [ -z "$playbook" ]; then + echo -e "${RED}❌ 未知操作: $operation${NC}" + show_help + exit 1 + fi + + if [ -z "$host_pattern" ]; then + echo -e "${RED}❌ 未知目标: $target${NC}" + show_help + exit 1 + fi + + local ansible_cmd="ansible-playbook -i $INVENTORY $ANSIBLE_DIR/$playbook --limit $host_pattern" + + # 添加选项 + if [[ "$options" == *"--check"* ]]; then + ansible_cmd="$ansible_cmd --check" + fi + + if [[ "$options" == *"--verbose"* ]]; then + ansible_cmd="$ansible_cmd -v" + fi + + echo -e "${CYAN}🚀 执行操作${NC}" + echo -e "操作: ${GREEN}$operation${NC} ($playbook)" + echo -e "目标: ${BLUE}$target${NC} ($host_pattern)" + echo -e "命令: ${PURPLE}$ansible_cmd${NC}" + echo "" + + if [[ "$options" == *"--dry-run"* ]]; then + echo -e "${YELLOW}🔍 DRY RUN 模式 - 仅显示命令,不执行${NC}" + return 0 + fi + + # 确认执行 + read -p "确认执行? (y/N): " -n 1 -r + echo + if [[ ! $REPLY =~ ^[Yy]$ ]]; then + echo -e "${YELLOW}⏹️ 操作已取消${NC}" + exit 0 + fi + + echo -e "${GREEN}▶️ 开始执行...${NC}" + eval $ansible_cmd +} + +# 快速操作菜单 +interactive_mode() { + echo -e "${CYAN}🎯 交互式运维管理${NC}" + echo "" + + # 选择操作 + echo -e "${YELLOW}选择操作:${NC}" + local ops=($(printf '%s\n' "${!OPERATIONS[@]}" | sort)) + for i in "${!ops[@]}"; do + echo -e " $((i+1)). ${GREEN}${ops[i]}${NC} - ${OPERATIONS[${ops[i]}]}" + done + + read -p "请选择操作 (1-${#ops[@]}): " op_choice + if [[ ! "$op_choice" =~ ^[0-9]+$ ]] || [ "$op_choice" -lt 1 ] || [ "$op_choice" -gt "${#ops[@]}" ]; then + echo -e "${RED}❌ 无效选择${NC}" + exit 1 + fi + + local selected_op="${ops[$((op_choice-1))]}" + + # 选择目标 + echo "" + echo -e "${YELLOW}选择目标:${NC}" + local targets=($(printf '%s\n' "${!TARGETS[@]}" | sort)) + for i in "${!targets[@]}"; do + echo -e " $((i+1)). ${BLUE}${targets[i]}${NC} - ${TARGETS[${targets[i]}]}" + done + + read -p "请选择目标 (1-${#targets[@]}): " target_choice + if [[ ! "$target_choice" =~ ^[0-9]+$ ]] || [ "$target_choice" -lt 1 ] || [ "$target_choice" -gt "${#targets[@]}" ]; then + echo -e "${RED}❌ 无效选择${NC}" + exit 1 + fi + + local selected_target="${targets[$((target_choice-1))]}" + + # 选择选项 + echo "" + echo -e "${YELLOW}选择执行选项:${NC}" + echo -e " 1. ${GREEN}正常执行${NC}" + echo -e " 2. ${PURPLE}检查模式${NC} (--check)" + echo -e " 3. ${PURPLE}详细输出${NC} (--verbose)" + echo -e " 4. ${PURPLE}仅显示命令${NC} (--dry-run)" + + read -p "请选择选项 (1-4): " option_choice + + local options="" + case $option_choice in + 2) options="--check" ;; + 3) options="--verbose" ;; + 4) options="--dry-run" ;; + esac + + run_ansible "$selected_op" "$selected_target" "$options" +} + +# 主程序 +main() { + # 检查参数 + if [ $# -eq 0 ]; then + interactive_mode + exit 0 + fi + + case "$1" in + --help|-h|help) + show_help + ;; + --status|-s|status) + show_status + ;; + --interactive|-i|interactive) + interactive_mode + ;; + *) + if [ $# -lt 2 ]; then + echo -e "${RED}❌ 参数不足${NC}" + show_help + exit 1 + fi + + local operation=$1 + local target=$2 + local options="${@:3}" + + run_ansible "$operation" "$target" "$options" + ;; + esac +} + +# 执行主程序 +main "$@" \ No newline at end of file