mgmt/pve/deep-595-investigation-part...

169 lines
5.8 KiB
YAML

---
- name: Deep 595 Error Investigation - Part 2
hosts: pve_cluster
gather_facts: yes
tasks:
- name: Check PVE proxy real-time logs
shell: |
echo "=== PVE Proxy Logs (last 50 lines) ==="
journalctl -u pveproxy -n 50 --no-pager
echo "=== System Logs with 595 errors ==="
journalctl -n 200 --no-pager | grep -i "595\|no route\|connection.*refused\|connection.*reset"
register: pve_proxy_logs
- name: Display PVE proxy logs
debug:
msg: "{{ pve_proxy_logs.stdout_lines }}"
- name: Check system network errors
shell: |
echo "=== Network Interface Status ==="
ip addr show
echo "=== Routing Table ==="
ip route show
echo "=== ARP Table ==="
arp -a 2>/dev/null || echo "ARP table empty"
echo "=== Network Statistics ==="
ss -s
register: network_status
- name: Display network status
debug:
msg: "{{ network_status.stdout_lines }}"
- name: Check PVE cluster communication
shell: |
echo "=== PVE Cluster Status ==="
pvecm status 2>/dev/null || echo "Cluster status failed"
echo "=== PVE Cluster Nodes ==="
pvecm nodes 2>/dev/null || echo "Cluster nodes failed"
echo "=== PVE Cluster Quorum ==="
pvecm quorum status 2>/dev/null || echo "Quorum status failed"
register: cluster_status
- name: Display cluster status
debug:
msg: "{{ cluster_status.stdout_lines }}"
- name: Check firewall and iptables
shell: |
echo "=== PVE Firewall Status ==="
pve-firewall status 2>/dev/null || echo "PVE firewall status failed"
echo "=== UFW Status ==="
ufw status 2>/dev/null || echo "UFW not available"
echo "=== iptables Rules ==="
iptables -L -n 2>/dev/null || echo "iptables not available"
echo "=== iptables NAT Rules ==="
iptables -t nat -L -n 2>/dev/null || echo "iptables NAT not available"
register: firewall_status
- name: Display firewall status
debug:
msg: "{{ firewall_status.stdout_lines }}"
- name: Test connectivity with detailed output
shell: |
echo "=== Testing connectivity to PVE ==="
echo "1. DNS Resolution:"
nslookup pve 2>/dev/null || echo "DNS resolution failed"
echo "2. Ping Test:"
ping -c 3 pve
echo "3. Port Connectivity:"
nc -zv pve 8006
echo "4. HTTP Test:"
curl -k -v -m 10 https://pve:8006 2>&1 | head -20
echo "5. HTTP Status Code:"
curl -k -s -o /dev/null -w "HTTP Status: %{http_code}, Time: %{time_total}s, Size: %{size_download} bytes\n" https://pve:8006
register: connectivity_test
when: inventory_hostname != 'pve'
- name: Display connectivity test results
debug:
msg: "{{ connectivity_test.stdout_lines }}"
when: inventory_hostname != 'pve'
- name: Check PVE proxy configuration
shell: |
echo "=== PVE Proxy Process Info ==="
ps aux | grep pveproxy | grep -v grep
echo "=== PVE Proxy Port Binding ==="
ss -tlnp | grep 8006
echo "=== PVE Proxy Configuration Files ==="
find /etc -name "*pveproxy*" -type f 2>/dev/null
echo "=== PVE Proxy Service Status ==="
systemctl status pveproxy --no-pager
register: pve_proxy_config
- name: Display PVE proxy configuration
debug:
msg: "{{ pve_proxy_config.stdout_lines }}"
- name: Check system resources
shell: |
echo "=== Memory Usage ==="
free -h
echo "=== Disk Usage ==="
df -h
echo "=== Load Average ==="
uptime
echo "=== Network Connections ==="
ss -tuln | grep 8006
register: system_resources
- name: Display system resources
debug:
msg: "{{ system_resources.stdout_lines }}"
- name: Check for any error patterns
shell: |
echo "=== Recent Error Patterns ==="
journalctl -n 500 --no-pager | grep -i "error\|fail\|refuse\|deny\|timeout\|connection.*reset" | tail -20
echo "=== PVE Specific Errors ==="
journalctl -u pveproxy -n 100 --no-pager | grep -i "error\|fail\|refuse\|deny\|timeout"
register: error_patterns
- name: Display error patterns
debug:
msg: "{{ error_patterns.stdout_lines }}"
- name: Test PVE API access
uri:
url: "https://localhost:8006/api2/json/version"
method: GET
validate_certs: no
timeout: 10
register: pve_api_test
ignore_errors: yes
when: inventory_hostname == 'pve'
- name: Display PVE API test result
debug:
msg: "PVE API access: {{ 'SUCCESS' if pve_api_test.status == 200 else 'FAILED' }}"
when: inventory_hostname == 'pve' and pve_api_test is defined
- name: Check PVE proxy access control
shell: |
echo "=== PVE Proxy Access Logs ==="
journalctl -u pveproxy -n 100 --no-pager | grep -E "GET|POST|PUT|DELETE" | tail -10
echo "=== PVE Proxy Error Logs ==="
journalctl -u pveproxy -n 100 --no-pager | grep -i "error\|fail\|refuse\|deny" | tail -10
register: pve_proxy_access
- name: Display PVE proxy access logs
debug:
msg: "{{ pve_proxy_access.stdout_lines }}"
- name: Check network interface details
shell: |
echo "=== Network Interface Details ==="
ip link show
echo "=== Bridge Information ==="
bridge link show 2>/dev/null || echo "Bridge command not available"
echo "=== VLAN Information ==="
ip link show type vlan 2>/dev/null || echo "No VLAN interfaces"
register: network_interface_details
- name: Display network interface details
debug:
msg: "{{ network_interface_details.stdout_lines }}"