135 lines
4.0 KiB
YAML
135 lines
4.0 KiB
YAML
---
|
|
- name: Service Health Check and Monitoring
|
|
hosts: all
|
|
become: yes
|
|
gather_facts: yes
|
|
|
|
vars:
|
|
critical_services:
|
|
- ssh
|
|
- systemd-resolved
|
|
- cron
|
|
web_services:
|
|
- nginx
|
|
- apache2
|
|
database_services:
|
|
- mysql
|
|
- mariadb
|
|
- postgresql
|
|
container_services:
|
|
- docker
|
|
- containerd
|
|
network_services:
|
|
- tailscale
|
|
- cloudflared
|
|
|
|
tasks:
|
|
# 检查关键系统服务
|
|
- name: Check critical system services
|
|
systemd:
|
|
name: "{{ item }}"
|
|
register: critical_service_status
|
|
loop: "{{ critical_services }}"
|
|
failed_when: false
|
|
|
|
- name: Report critical service issues
|
|
debug:
|
|
msg: "⚠️ Critical service {{ item.item }} is {{ item.status.ActiveState | default('not found') }}"
|
|
loop: "{{ critical_service_status.results }}"
|
|
when: item.status is defined and item.status.ActiveState != "active"
|
|
|
|
# 检查 Web 服务
|
|
- name: Check web services
|
|
systemd:
|
|
name: "{{ item }}"
|
|
register: web_service_status
|
|
loop: "{{ web_services }}"
|
|
failed_when: false
|
|
|
|
- name: Report web service status
|
|
debug:
|
|
msg: "🌐 Web service {{ item.item }}: {{ item.status.ActiveState | default('not installed') }}"
|
|
loop: "{{ web_service_status.results }}"
|
|
when: item.status is defined
|
|
|
|
# 检查数据库服务
|
|
- name: Check database services
|
|
systemd:
|
|
name: "{{ item }}"
|
|
register: db_service_status
|
|
loop: "{{ database_services }}"
|
|
failed_when: false
|
|
|
|
- name: Report database service status
|
|
debug:
|
|
msg: "🗄️ Database service {{ item.item }}: {{ item.status.ActiveState | default('not installed') }}"
|
|
loop: "{{ db_service_status.results }}"
|
|
when: item.status is defined
|
|
|
|
# 检查容器服务
|
|
- name: Check container services
|
|
systemd:
|
|
name: "{{ item }}"
|
|
register: container_service_status
|
|
loop: "{{ container_services }}"
|
|
failed_when: false
|
|
|
|
- name: Report container service status
|
|
debug:
|
|
msg: "📦 Container service {{ item.item }}: {{ item.status.ActiveState | default('not installed') }}"
|
|
loop: "{{ container_service_status.results }}"
|
|
when: item.status is defined
|
|
|
|
# 检查网络服务
|
|
- name: Check network services
|
|
systemd:
|
|
name: "{{ item }}"
|
|
register: network_service_status
|
|
loop: "{{ network_services }}"
|
|
failed_when: false
|
|
|
|
- name: Report network service status
|
|
debug:
|
|
msg: "🌐 Network service {{ item.item }}: {{ item.status.ActiveState | default('not installed') }}"
|
|
loop: "{{ network_service_status.results }}"
|
|
when: item.status is defined
|
|
|
|
# 检查系统负载
|
|
- name: Check system load
|
|
shell: uptime
|
|
register: system_load
|
|
|
|
- name: Display system load
|
|
debug:
|
|
msg: "📊 System Load: {{ system_load.stdout }}"
|
|
|
|
# 检查磁盘空间警告
|
|
- name: Check disk space usage
|
|
shell: df -h | awk '$5 > 80 {print $0}'
|
|
register: disk_warning
|
|
changed_when: false
|
|
|
|
- name: Warn about high disk usage
|
|
debug:
|
|
msg: "⚠️ High disk usage detected: {{ disk_warning.stdout_lines }}"
|
|
when: disk_warning.stdout_lines | length > 0
|
|
|
|
# 检查内存使用率
|
|
- name: Check memory usage percentage
|
|
shell: free | awk 'NR==2{printf "%.2f%%", $3*100/$2}'
|
|
register: memory_percent
|
|
|
|
- name: Display memory usage
|
|
debug:
|
|
msg: "🧠 Memory Usage: {{ memory_percent.stdout }}"
|
|
|
|
# 检查最近的系统错误
|
|
- name: Check recent system errors
|
|
shell: journalctl --since "1 hour ago" --priority=err --no-pager | tail -10
|
|
register: recent_errors
|
|
changed_when: false
|
|
|
|
- name: Display recent errors
|
|
debug:
|
|
msg: "🚨 Recent system errors: {{ recent_errors.stdout_lines }}"
|
|
when: recent_errors.stdout_lines | length > 0 |