diff --git a/configuration/inventories/production/hosts b/configuration/inventories/production/hosts index f20a4e0..3a65d9d 100644 --- a/configuration/inventories/production/hosts +++ b/configuration/inventories/production/hosts @@ -1,15 +1,22 @@ -[nomad_nodes] -# 本机节点 (已通过PVE挂载NFS) -semaphore ansible_host=100.116.158.95 ansible_user=root - -# 云服务器节点 (需要配置NFS挂载) +[nomad_servers] +# 服务器节点 (7个服务器节点) +bj-semaphore ansible_host=100.116.158.95 ansible_user=root ash1d.global ansible_host=100.81.26.3 ansible_user=ben ansible_password=3131 ansible_become_password=3131 ash2e.global ansible_host=100.103.147.94 ansible_user=ben ansible_password=3131 ansible_become_password=3131 ch2.global ansible_host=100.90.159.68 ansible_user=ben ansible_password=3131 ansible_become_password=3131 ch3.global ansible_host=100.86.141.112 ansible_user=ben ansible_password=3131 ansible_become_password=3131 +onecloud1 ansible_host=100.98.209.50 ansible_user=ben ansible_password=3131 ansible_become_password=3131 +de ansible_host=100.120.225.29 ansible_user=ben ansible_password=3131 ansible_become_password=3131 + +[nomad_clients] +# 客户端节点 master ansible_host=100.117.106.136 ansible_user=ben ansible_password=3131 ansible_become_password=3131 ansible_port=60022 ash3c ansible_host=100.116.80.94 ansible_user=ben ansible_password=3131 ansible_become_password=3131 +[nomad_nodes:children] +nomad_servers +nomad_clients + [nomad_nodes:vars] # NFS配置 nfs_server=snail diff --git a/configuration/inventories/production/inventory.ini b/configuration/inventories/production/inventory.ini index 517f056..ddfac77 100644 --- a/configuration/inventories/production/inventory.ini +++ b/configuration/inventories/production/inventory.ini @@ -66,7 +66,8 @@ hcp2 ansible_host=hcp2 ansible_user=root ansible_become=yes ansible_become_pass= snail ansible_host=snail ansible_user=houzhongxu ansible_ssh_pass=Aa313131@ben ansible_become=yes ansible_become_pass=Aa313131@ben [armbian] -onecloud1 ansible_host=onecloud1 ansible_user=ben ansible_ssh_pass=3131 ansible_become=yes ansible_become_pass=3131 +onecloud1 ansible_host=100.98.209.50 ansible_user=ben ansible_password=3131 ansible_become_password=3131 +de ansible_host=100.120.225.29 ansible_user=ben ansible_password=3131 ansible_become_password=3131 [beijing:children] nomadlxc @@ -85,6 +86,7 @@ ditigalocean oci_us oci_kr semaphore +armbian [nomad_cluster:children] nomad_servers diff --git a/configuration/inventories/production/nomad-local.ini b/configuration/inventories/production/nomad-local.ini deleted file mode 100644 index 50ed83d..0000000 --- a/configuration/inventories/production/nomad-local.ini +++ /dev/null @@ -1,15 +0,0 @@ -[nomad_servers] -localhost ansible_connection=local nomad_role=server nomad_bootstrap_expect=1 - -[nomad_clients] -# 如果需要客户端节点,可以在这里添加 - -[nomad_cluster:children] -nomad_servers -nomad_clients - -[nomad_cluster:vars] -ansible_user=root -nomad_version=1.6.2 -nomad_datacenter=dc1 -nomad_region=global \ No newline at end of file diff --git a/configuration/playbooks/debug/debug-nomad-nodes.yml b/configuration/playbooks/debug/debug-nomad-nodes.yml deleted file mode 100644 index abd0b0f..0000000 --- a/configuration/playbooks/debug/debug-nomad-nodes.yml +++ /dev/null @@ -1,30 +0,0 @@ ---- -- name: Gather Nomad debug information from multiple nodes - hosts: all - become: yes - tasks: - - name: Get Nomad service status - shell: systemctl status nomad --no-pager -l - register: nomad_status - changed_when: false - failed_when: false - - - name: Get last 50 lines of Nomad journal logs - shell: journalctl -u nomad -n 50 --no-pager - register: nomad_journal - changed_when: false - failed_when: false - - - name: Display Nomad Status - debug: - msg: | - --- Nomad Status for {{ inventory_hostname }} --- - {{ nomad_status.stdout }} - {{ nomad_status.stderr }} - - - name: Display Nomad Journal - debug: - msg: | - --- Nomad Journal for {{ inventory_hostname }} --- - {{ nomad_journal.stdout }} - {{ nomad_journal.stderr }} \ No newline at end of file diff --git a/configuration/playbooks/debug/debug-nomad-podman.yml b/configuration/playbooks/debug/debug-nomad-podman.yml deleted file mode 100644 index 368f9fb..0000000 --- a/configuration/playbooks/debug/debug-nomad-podman.yml +++ /dev/null @@ -1,60 +0,0 @@ ---- -- name: Debug Nomad Podman Driver Issues - hosts: all - become: yes - vars: - nomad_user: nomad - - tasks: - - name: Check Nomad configuration - shell: cat /etc/nomad.d/nomad.hcl - register: nomad_config - - - name: Display Nomad configuration - debug: - var: nomad_config.stdout_lines - - - name: Check plugin directory contents - shell: ls -la /opt/nomad/data/plugins/ - register: plugin_dir - - - name: Display plugin directory - debug: - var: plugin_dir.stdout_lines - - - name: Check Nomad logs for plugin loading - shell: journalctl -u nomad -n 50 --no-pager | grep -E "(plugin|driver|podman)" - register: nomad_logs - failed_when: false - - - name: Display relevant Nomad logs - debug: - var: nomad_logs.stdout_lines - - - name: Check if plugin is executable - stat: - path: /opt/nomad/data/plugins/nomad-driver-podman - register: plugin_stat - - - name: Display plugin file info - debug: - var: plugin_stat - - - name: Test plugin directly - shell: /opt/nomad/data/plugins/nomad-driver-podman --version - register: plugin_version - failed_when: false - become_user: "{{ nomad_user }}" - - - name: Display plugin version - debug: - msg: "Plugin version test: {{ 'SUCCESS' if plugin_version.rc == 0 else 'FAILED' }} - {{ plugin_version.stdout if plugin_version.rc == 0 else plugin_version.stderr }}" - - - name: Check Podman socket accessibility - shell: sudo -u {{ nomad_user }} curl --unix-socket /run/user/1001/podman/podman.sock http://localhost/v1.0.0/libpod/info 2>/dev/null | head -3 - register: podman_socket_test - failed_when: false - - - name: Display Podman socket test - debug: - msg: "Podman socket test: {{ 'SUCCESS' if podman_socket_test.rc == 0 else 'FAILED' }}" \ No newline at end of file diff --git a/configuration/playbooks/debug/debug-syd.yml b/configuration/playbooks/debug/debug-syd.yml deleted file mode 100644 index 4786e17..0000000 --- a/configuration/playbooks/debug/debug-syd.yml +++ /dev/null @@ -1,12 +0,0 @@ -- name: Distribute new podman binary to syd - hosts: syd - gather_facts: false - tasks: - - name: Copy new podman binary to /usr/local/bin - copy: - src: /root/mgmt/configuration/podman-remote-static-linux_amd64 - dest: /usr/local/bin/podman - owner: root - group: root - mode: '0755' - become: yes \ No newline at end of file diff --git a/configuration/playbooks/fix/fix-apt-errors.yml b/configuration/playbooks/fix/fix-apt-errors.yml deleted file mode 100644 index ca8c0d5..0000000 --- a/configuration/playbooks/fix/fix-apt-errors.yml +++ /dev/null @@ -1,16 +0,0 @@ ---- -- name: Debug apt repository issues - hosts: beijing:children - become: yes - ignore_unreachable: yes - tasks: - - name: Run apt-get update to capture error - ansible.builtin.shell: apt-get update - register: apt_update_result - failed_when: false - changed_when: false - - - name: Display apt-get update stderr - ansible.builtin.debug: - var: apt_update_result.stderr - verbosity: 2 \ No newline at end of file diff --git a/configuration/playbooks/fix/fix-duplicate-podman-config.yml b/configuration/playbooks/fix/fix-duplicate-podman-config.yml deleted file mode 100644 index 15b6852..0000000 --- a/configuration/playbooks/fix/fix-duplicate-podman-config.yml +++ /dev/null @@ -1,126 +0,0 @@ ---- -- name: Fix duplicate Podman configuration in Nomad - hosts: nomad_cluster - become: yes - tasks: - - name: Stop Nomad service - systemd: - name: nomad - state: stopped - - - name: Backup current configuration - copy: - src: /etc/nomad.d/nomad.hcl - dest: /etc/nomad.d/nomad.hcl.backup-duplicate-fix - remote_src: yes - - - name: Read current configuration - slurp: - src: /etc/nomad.d/nomad.hcl - register: current_config - - - name: Create clean configuration for clients - copy: - content: | - datacenter = "{{ nomad_datacenter }}" - region = "{{ nomad_region }}" - data_dir = "/opt/nomad/data" - bind_addr = "{{ tailscale_ip }}" - - server { - enabled = false - } - - client { - enabled = true - servers = ["100.116.158.95:4647", "100.117.106.136:4647", "100.86.141.112:4647", "100.81.26.3:4647", "100.103.147.94:4647"] - } - - ui { - enabled = true - } - - addresses { - http = "0.0.0.0" - rpc = "{{ tailscale_ip }}" - serf = "{{ tailscale_ip }}" - } - - ports { - http = 4646 - rpc = 4647 - serf = 4648 - } - - plugin "podman" { - config { - socket_path = "unix:///run/podman/podman.sock" - volumes { - enabled = true - } - recover_stopped = true - } - } - - consul { - auto_advertise = false - server_auto_join = false - client_auto_join = false - } - - log_level = "INFO" - enable_syslog = true - dest: /etc/nomad.d/nomad.hcl - owner: nomad - group: nomad - mode: '0640' - when: nomad_role == "client" - - - name: Ensure Podman is installed - package: - name: podman - state: present - - - name: Enable and start Podman socket - systemd: - name: podman.socket - enabled: yes - state: started - - - name: Set proper permissions on Podman socket - file: - path: /run/podman/podman.sock - mode: '0666' - ignore_errors: yes - - - name: Validate Nomad configuration - shell: /usr/local/bin/nomad config validate /etc/nomad.d/nomad.hcl || /usr/bin/nomad config validate /etc/nomad.d/nomad.hcl - register: config_validation - failed_when: config_validation.rc != 0 - - - name: Start Nomad service - systemd: - name: nomad - state: started - enabled: yes - - - name: Wait for Nomad to be ready - wait_for: - port: 4646 - host: localhost - delay: 10 - timeout: 60 - - - name: Wait for drivers to load - pause: - seconds: 20 - - - name: Check driver status - shell: | - /usr/local/bin/nomad node status -self | grep -A 10 "Driver Status" || /usr/bin/nomad node status -self | grep -A 10 "Driver Status" - register: driver_status - failed_when: false - - - name: Display driver status - debug: - var: driver_status.stdout_lines \ No newline at end of file diff --git a/configuration/playbooks/fix/fix-hashicorp-apt-source.yml b/configuration/playbooks/fix/fix-hashicorp-apt-source.yml deleted file mode 100644 index 5f85617..0000000 --- a/configuration/playbooks/fix/fix-hashicorp-apt-source.yml +++ /dev/null @@ -1,27 +0,0 @@ ---- -- name: 直接复制正确的 HashiCorp APT 源配置 - hosts: nomad_cluster - become: yes - - tasks: - - name: 创建正确的 HashiCorp APT 源配置 - copy: - content: "deb [trusted=yes] http://apt.releases.hashicorp.com {{ ansible_distribution_release }} main\n" - dest: "/etc/apt/sources.list.d/hashicorp.list" - owner: root - group: root - mode: '0644' - - - name: 更新 APT 缓存 - apt: - update_cache: yes - ignore_errors: yes - - - name: 验证配置 - command: cat /etc/apt/sources.list.d/hashicorp.list - register: config_check - changed_when: false - - - name: 显示配置内容 - debug: - msg: "HashiCorp APT 源配置: {{ config_check.stdout }}" \ No newline at end of file diff --git a/configuration/playbooks/fix/fix-hcp-podman.yml b/configuration/playbooks/fix/fix-hcp-podman.yml deleted file mode 100644 index d76a533..0000000 --- a/configuration/playbooks/fix/fix-hcp-podman.yml +++ /dev/null @@ -1,83 +0,0 @@ ---- -- name: Fix HCP1 and HCP2 Podman Configuration - hosts: hcp1,hcp2 - become: yes - tasks: - - name: Stop Nomad service - systemd: - name: nomad - state: stopped - - - name: Ensure nomad user exists - user: - name: nomad - system: yes - shell: /bin/false - home: /home/nomad - create_home: yes - - - name: Ensure Podman socket is running - systemd: - name: podman.socket - state: started - enabled: yes - - - name: Set proper permissions on Podman socket - file: - path: /run/podman/podman.sock - mode: '0666' - ignore_errors: yes - - - name: Create nomad data directory - file: - path: /opt/nomad/data - state: directory - owner: nomad - group: nomad - mode: '0755' - - - name: Create nomad log directory - file: - path: /var/log/nomad - state: directory - owner: nomad - group: nomad - mode: '0755' - - - name: Test Podman access for nomad user - shell: sudo -u nomad podman version - register: podman_test - failed_when: false - - - name: Display Podman test result - debug: - var: podman_test.stdout_lines - - - name: Validate Nomad configuration - shell: /usr/local/bin/nomad config validate /etc/nomad.d/nomad.hcl - register: config_validation - failed_when: false - - - name: Display configuration validation - debug: - var: config_validation - - - name: Start Nomad service - systemd: - name: nomad - state: started - enabled: yes - - - name: Wait for Nomad to be ready - wait_for: - port: 4646 - timeout: 60 - - - name: Check Nomad node status - shell: /usr/local/bin/nomad node status -self - register: node_status - failed_when: false - - - name: Display node status - debug: - var: node_status.stdout_lines \ No newline at end of file diff --git a/configuration/playbooks/fix/fix-hcs-dpkg-issue.yml b/configuration/playbooks/fix/fix-hcs-dpkg-issue.yml deleted file mode 100644 index 7db31b8..0000000 --- a/configuration/playbooks/fix/fix-hcs-dpkg-issue.yml +++ /dev/null @@ -1,56 +0,0 @@ ---- -- name: Fix dpkg and initramfs issues on hcs - hosts: hcs - become: yes - tasks: - - name: Check current dpkg status - shell: dpkg --audit - register: dpkg_status - ignore_errors: yes - - - name: Display dpkg status - debug: - var: dpkg_status.stdout_lines - - - name: Fix broken btrfs hook - shell: | - # Remove problematic btrfs hook temporarily - mv /usr/share/initramfs-tools/hooks/btrfs /usr/share/initramfs-tools/hooks/btrfs.bak || true - - # Try to reconfigure the failed package - dpkg --configure -a - - # If that works, restore the hook - if [ $? -eq 0 ]; then - mv /usr/share/initramfs-tools/hooks/btrfs.bak /usr/share/initramfs-tools/hooks/btrfs || true - fi - register: fix_result - ignore_errors: yes - - - name: Display fix result - debug: - var: fix_result - - - name: Alternative fix - reinstall initramfs-tools - apt: - name: initramfs-tools - state: latest - force: yes - when: fix_result.rc != 0 - ignore_errors: yes - - - name: Clean up and update - shell: | - apt autoremove -y - apt update - apt upgrade -y - ignore_errors: yes - - - name: Check final dpkg status - shell: dpkg --audit - register: final_status - ignore_errors: yes - - - name: Display final status - debug: - var: final_status.stdout_lines \ No newline at end of file diff --git a/configuration/playbooks/fix/fix-nomad-cluster.yml b/configuration/playbooks/fix/fix-nomad-cluster.yml deleted file mode 100644 index f546ff7..0000000 --- a/configuration/playbooks/fix/fix-nomad-cluster.yml +++ /dev/null @@ -1,98 +0,0 @@ ---- -- name: Fix Nomad Cluster Configuration - hosts: nomad_servers - become: yes - vars: - nomad_servers_list: - - "100.116.158.95" # semaphore - - "100.103.147.94" # ash2e - - "100.81.26.3" # ash1d - - "100.90.159.68" # ch2 - - "{{ ansible_default_ipv4.address }}" # ch3 (will be determined dynamically) - - tasks: - - name: Stop Nomad service - systemd: - name: nomad - state: stopped - ignore_errors: yes - - - name: Create nomad user - user: - name: nomad - system: yes - shell: /bin/false - home: /opt/nomad - create_home: no - - - name: Create Nomad configuration directory - file: - path: /etc/nomad.d - state: directory - mode: '0755' - - - name: Create Nomad data directory - file: - path: /opt/nomad/data - state: directory - mode: '0755' - owner: nomad - group: nomad - ignore_errors: yes - - - name: Create Nomad log directory - file: - path: /var/log/nomad - state: directory - mode: '0755' - owner: nomad - group: nomad - ignore_errors: yes - - - name: Generate Nomad server configuration - template: - src: nomad-server.hcl.j2 - dest: /etc/nomad.d/nomad.hcl - mode: '0644' - notify: restart nomad - - - name: Create Nomad systemd service file - copy: - content: | - [Unit] - Description=Nomad - Documentation=https://www.nomadproject.io/ - Requires=network-online.target - After=network-online.target - ConditionFileNotEmpty=/etc/nomad.d/nomad.hcl - - [Service] - Type=notify - User=nomad - Group=nomad - ExecStart=/usr/bin/nomad agent -config=/etc/nomad.d/nomad.hcl - ExecReload=/bin/kill -HUP $MAINPID - KillMode=process - Restart=on-failure - LimitNOFILE=65536 - - [Install] - WantedBy=multi-user.target - dest: /etc/systemd/system/nomad.service - mode: '0644' - - - name: Reload systemd daemon - systemd: - daemon_reload: yes - - - name: Enable and start Nomad service - systemd: - name: nomad - enabled: yes - state: started - - handlers: - - name: restart nomad - systemd: - name: nomad - state: restarted \ No newline at end of file diff --git a/configuration/playbooks/fix/fix-nomad-local.yml b/configuration/playbooks/fix/fix-nomad-local.yml deleted file mode 100644 index b75fdff..0000000 --- a/configuration/playbooks/fix/fix-nomad-local.yml +++ /dev/null @@ -1,99 +0,0 @@ ---- -- name: Update Nomad configuration for Podman and fix issues - hosts: localhost - become: yes - connection: local - - tasks: - - name: Stop Nomad service - systemd: - name: nomad - state: stopped - - - name: Update Nomad configuration to use Podman and disable Consul - copy: - content: | - datacenter = "dc1" - region = "global" - data_dir = "/opt/nomad/data" - - bind_addr = "100.116.158.95" - - server { - enabled = true - bootstrap_expect = 1 - encrypt = "NVOMDvXblgWfhtzFzOUIHnKEOrbXOkPrkIPbRGGf1YQ=" - } - - client { - enabled = true - } - - ui { - enabled = true - } - - addresses { - http = "0.0.0.0" - rpc = "100.116.158.95" - serf = "100.116.158.95" - } - - ports { - http = 4646 - rpc = 4647 - serf = 4648 - } - - plugin "podman" { - config { - socket_path = "unix:///run/podman/podman.sock" - volumes { - enabled = true - } - } - } - - # Disable Consul integration for now - consul { - address = "" - } - - log_level = "INFO" - log_file = "/var/log/nomad/nomad.log" - dest: /etc/nomad.d/nomad.hcl - owner: nomad - group: nomad - mode: '0640' - backup: yes - - - name: Enable Podman socket for systemd - systemd: - name: podman.socket - enabled: yes - state: started - ignore_errors: yes - - - name: Start Nomad service - systemd: - name: nomad - state: started - - - name: Wait for Nomad to be ready - wait_for: - port: 4646 - host: localhost - delay: 5 - timeout: 30 - - - name: Check Nomad status - uri: - url: http://localhost:4646/v1/status/leader - method: GET - register: nomad_status - retries: 3 - delay: 5 - - - name: Display Nomad status - debug: - msg: "Nomad leader: {{ nomad_status.json if nomad_status.json is defined else 'No leader elected' }}" \ No newline at end of file diff --git a/configuration/playbooks/fix/fix-nomad-podman-config.yml b/configuration/playbooks/fix/fix-nomad-podman-config.yml deleted file mode 100644 index d8e498c..0000000 --- a/configuration/playbooks/fix/fix-nomad-podman-config.yml +++ /dev/null @@ -1,72 +0,0 @@ ---- -- name: Fix Nomad Podman Driver Configuration - hosts: all - become: yes - vars: - nomad_user: nomad - - tasks: - - name: Stop Nomad service - systemd: - name: nomad - state: stopped - - - name: Update Nomad configuration to properly reference Podman plugin - replace: - path: /etc/nomad.d/nomad.hcl - regexp: 'plugin "podman" \{\n config \{\n socket_path = "unix:///run/user/1001/podman/podman.sock"\n volumes \{\n enabled = true\n \}\n \}\n\}' - replace: | - plugin "nomad-driver-podman" { - config { - socket_path = "unix:///run/user/1001/podman/podman.sock" - volumes { - enabled = true - } - } - } - - - name: Start Nomad service - systemd: - name: nomad - state: started - - - name: Wait for Nomad to be ready - wait_for: - port: 4646 - host: localhost - delay: 10 - timeout: 60 - - - name: Wait for plugins to load - pause: - seconds: 15 - - - name: Check if Podman driver is now loaded - shell: | - sudo -u {{ nomad_user }} /usr/local/bin/nomad node status -self | grep -A 20 "Driver Status" - register: driver_status - - - name: Display driver status - debug: - var: driver_status.stdout_lines - - - name: Check Nomad logs for successful plugin loading - shell: journalctl -u nomad -n 20 --no-pager | grep -E "(podman|plugin)" - register: recent_logs - failed_when: false - - - name: Display recent plugin logs - debug: - var: recent_logs.stdout_lines - - - name: Final verification - Test Podman functionality - shell: | - sudo -u {{ nomad_user }} /usr/local/bin/nomad node status -json | jq -r '.Drivers | keys[]' | grep -i podman - register: podman_driver_check - failed_when: false - - - name: Display final result - debug: - msg: | - Podman driver status: {{ 'SUCCESS - Driver loaded!' if 'podman' in (podman_driver_check.stdout | default('')) else 'Still checking...' }} - Available drivers: {{ podman_driver_check.stdout_lines | default(['none']) | join(', ') }} \ No newline at end of file diff --git a/configuration/playbooks/fix/fix-nomad-server-config.yml b/configuration/playbooks/fix/fix-nomad-server-config.yml deleted file mode 100644 index eb3a473..0000000 --- a/configuration/playbooks/fix/fix-nomad-server-config.yml +++ /dev/null @@ -1,45 +0,0 @@ ---- -- name: Fix Nomad server configuration - hosts: localhost - gather_facts: no - become: yes - tasks: - - name: Create corrected nomad.hcl - copy: - dest: /etc/nomad.d/nomad.hcl - content: | - datacenter = "dc1" - data_dir = "/opt/nomad/data" - log_level = "INFO" - - bind_addr = "100.116.158.95" - - server { - enabled = true - bootstrap_expect = 5 - encrypt = "NVOMDvXblgWfhtzFzOUIHnKEOrbXOkPrkIPbRGGf1YQ=" - retry_join = [ - "100.116.158.95", # semaphore - "100.81.26.3", # ash1d - "100.103.147.94", # ash2e - "100.90.159.68", # ch2 - "100.86.141.112" # ch3 - ] - } - - client { - enabled = false - } - - plugin "podman" { - config { - socket_path = "unix:///run/podman/podman.sock" - volumes { - enabled = true - } - } - } - - consul { - address = "100.116.158.95:8500" - } \ No newline at end of file diff --git a/configuration/playbooks/fix/fix-nomad-systemd.yml b/configuration/playbooks/fix/fix-nomad-systemd.yml deleted file mode 100644 index 959ab7b..0000000 --- a/configuration/playbooks/fix/fix-nomad-systemd.yml +++ /dev/null @@ -1,88 +0,0 @@ ---- -- name: Fix Nomad systemd service binary path - hosts: nomad_cluster - become: yes - - tasks: - - name: Check Nomad binary location - shell: which nomad - register: nomad_binary_path - - - name: Display binary path - debug: - msg: "Nomad binary 位于: {{ nomad_binary_path.stdout }}" - - - name: Stop Nomad service - systemd: - name: nomad - state: stopped - ignore_errors: yes - - - name: Update Nomad systemd service with correct binary path - copy: - content: | - [Unit] - Description=Nomad - Documentation=https://www.nomadproject.io/ - Requires=network-online.target - After=network-online.target - ConditionFileNotEmpty=/etc/nomad.d/nomad.hcl - - [Service] - Type=notify - User=nomad - Group=nomad - ExecStart={{ nomad_binary_path.stdout }} agent -config=/etc/nomad.d/nomad.hcl - ExecReload=/bin/kill -HUP $MAINPID - KillMode=process - Restart=on-failure - LimitNOFILE=65536 - - [Install] - WantedBy=multi-user.target - dest: /etc/systemd/system/nomad.service - mode: '0644' - notify: reload systemd - - - name: Reload systemd and start Nomad servers first - systemd: - name: nomad - state: started - enabled: yes - daemon_reload: yes - when: inventory_hostname in groups['nomad_servers'] - - - name: Wait for servers to be ready - pause: - seconds: 15 - when: inventory_hostname in groups['nomad_servers'] - - - name: Start Nomad clients - systemd: - name: nomad - state: started - enabled: yes - daemon_reload: yes - when: inventory_hostname in groups['nomad_clients'] - - - name: Wait for clients to connect - pause: - seconds: 10 - when: inventory_hostname in groups['nomad_clients'] - - - name: Check final service status - shell: systemctl status nomad --no-pager -l - register: service_status - ignore_errors: yes - - - name: Display service status - debug: - msg: | - ✅ 节点 {{ inventory_hostname }} 服务状态: - 📊 状态: {{ 'SUCCESS' if service_status.rc == 0 else 'FAILED' }} - 💾 二进制路径: {{ nomad_binary_path.stdout }} - - handlers: - - name: reload systemd - systemd: - daemon_reload: yes \ No newline at end of file diff --git a/configuration/playbooks/fix/fix-podman-installation.yml b/configuration/playbooks/fix/fix-podman-installation.yml deleted file mode 100644 index 27aa892..0000000 --- a/configuration/playbooks/fix/fix-podman-installation.yml +++ /dev/null @@ -1,79 +0,0 @@ ---- -- name: Fix Podman installation on remaining nodes - hosts: semaphore,master,ash3c,hcs - become: yes - serial: 1 # 逐个处理,避免同时影响多个节点 - - tasks: - - name: Current node status - debug: - msg: "🔧 修复节点: {{ inventory_hostname }}" - - - name: Check if Podman is already installed - shell: podman --version 2>/dev/null || echo "NOT_INSTALLED" - register: podman_check - - - name: Install Podman if not present (semaphore special handling) - apt: - name: - - podman - - buildah - - skopeo - state: present - update_cache: yes - force_apt_get: yes - when: inventory_hostname == 'semaphore' and 'NOT_INSTALLED' in podman_check.stdout - ignore_errors: yes - - - name: Install Podman on other nodes - apt: - name: - - podman - - buildah - - skopeo - state: present - when: inventory_hostname != 'semaphore' - ignore_errors: yes - - - name: Install Python dependencies for podman-compose - apt: - name: - - python3-pip - - python3-setuptools - - python3-yaml - - python3-dotenv - state: present - ignore_errors: yes - - - name: Install podman-compose via pip - pip: - name: - - podman-compose - state: present - executable: pip3 - ignore_errors: yes - - - name: Alternative podman-compose installation via apt - apt: - name: podman-compose - state: present - ignore_errors: yes - - - name: Verify installations - shell: | - echo "Podman: $(podman --version 2>/dev/null || echo 'FAILED')" - echo "Podman Compose: $(podman-compose --version 2>/dev/null || echo 'FAILED')" - register: verify_result - - - name: Display verification results - debug: - msg: | - ✅ 节点 {{ inventory_hostname }} 验证结果: - {{ verify_result.stdout }} - - - name: Enable Podman socket - systemd: - name: podman.socket - enabled: yes - state: started - ignore_errors: yes \ No newline at end of file diff --git a/configuration/playbooks/fix/fix-server-config.yml b/configuration/playbooks/fix/fix-server-config.yml deleted file mode 100644 index aa44bc4..0000000 --- a/configuration/playbooks/fix/fix-server-config.yml +++ /dev/null @@ -1,109 +0,0 @@ ---- -- name: Fix Nomad server configuration - hosts: nomad_servers - become: yes - tasks: - - name: Stop Nomad service - systemd: - name: nomad - state: stopped - - - name: Backup current configuration - copy: - src: /etc/nomad.d/nomad.hcl - dest: /etc/nomad.d/nomad.hcl.backup-server-fix - remote_src: yes - - - name: Create clean server configuration - copy: - content: | - datacenter = "{{ nomad_datacenter }}" - region = "{{ nomad_region }}" - data_dir = "/opt/nomad/data" - bind_addr = "{{ ansible_default_ipv4.address }}" - - server { - enabled = true - bootstrap_expect = {{ nomad_bootstrap_expect }} - encrypt = "{{ nomad_encrypt_key }}" - - retry_join = [ - "100.116.158.95", - "100.103.147.94", - "100.81.26.3", - "100.90.159.68", - "100.86.141.112" - ] - } - - client { - enabled = true - } - - ui { - enabled = true - } - - addresses { - http = "0.0.0.0" - rpc = "{{ ansible_default_ipv4.address }}" - serf = "{{ ansible_default_ipv4.address }}" - } - - ports { - http = 4646 - rpc = 4647 - serf = 4648 - } - - plugin "podman" { - config { - socket_path = "unix:///run/podman/podman.sock" - volumes { - enabled = true - } - recover_stopped = true - } - } - - consul { - auto_advertise = false - server_auto_join = false - client_auto_join = false - } - - log_level = "INFO" - log_file = "/var/log/nomad/nomad.log" - dest: /etc/nomad.d/nomad.hcl - owner: nomad - group: nomad - mode: '0640' - - - name: Ensure Podman is installed - package: - name: podman - state: present - - - name: Enable and start Podman socket - systemd: - name: podman.socket - enabled: yes - state: started - - - name: Validate Nomad configuration - shell: /usr/local/bin/nomad config validate /etc/nomad.d/nomad.hcl || /usr/bin/nomad config validate /etc/nomad.d/nomad.hcl - register: config_validation - failed_when: config_validation.rc != 0 - - - name: Start Nomad service - systemd: - name: nomad - state: started - enabled: yes - - - name: Wait for Nomad to be ready - wait_for: - port: 4646 - host: localhost - delay: 10 - timeout: 60 \ No newline at end of file diff --git a/configuration/playbooks/fix/fix-server-network-config.yml b/configuration/playbooks/fix/fix-server-network-config.yml deleted file mode 100644 index dab81fa..0000000 --- a/configuration/playbooks/fix/fix-server-network-config.yml +++ /dev/null @@ -1,103 +0,0 @@ ---- -- name: Fix Nomad server network configuration - hosts: nomad_servers - become: yes - vars: - server_ips: - semaphore: "100.116.158.95" - ash2e: "100.103.147.94" - ash1d: "100.81.26.3" - ch2: "100.90.159.68" - ch3: "100.86.141.112" - tasks: - - name: Stop Nomad service - systemd: - name: nomad - state: stopped - - - name: Get server IP for this host - set_fact: - server_ip: "{{ server_ips[inventory_hostname] }}" - - - name: Create corrected server configuration - copy: - content: | - datacenter = "{{ nomad_datacenter }}" - region = "{{ nomad_region }}" - data_dir = "/opt/nomad/data" - bind_addr = "{{ server_ip }}" - - server { - enabled = true - bootstrap_expect = {{ nomad_bootstrap_expect }} - encrypt = "{{ nomad_encrypt_key }}" - - retry_join = [ - "100.116.158.95", - "100.103.147.94", - "100.81.26.3", - "100.90.159.68", - "100.86.141.112" - ] - } - - client { - enabled = true - } - - ui { - enabled = true - } - - addresses { - http = "0.0.0.0" - rpc = "{{ server_ip }}" - serf = "{{ server_ip }}" - } - - ports { - http = 4646 - rpc = 4647 - serf = 4648 - } - - plugin "podman" { - config { - socket_path = "unix:///run/podman/podman.sock" - volumes { - enabled = true - } - recover_stopped = true - } - } - - consul { - auto_advertise = false - server_auto_join = false - client_auto_join = false - } - - log_level = "INFO" - log_file = "/var/log/nomad/nomad.log" - dest: /etc/nomad.d/nomad.hcl - owner: nomad - group: nomad - mode: '0640' - - - name: Validate Nomad configuration - shell: /usr/local/bin/nomad config validate /etc/nomad.d/nomad.hcl || /usr/bin/nomad config validate /etc/nomad.d/nomad.hcl - register: config_validation - failed_when: config_validation.rc != 0 - - - name: Start Nomad service - systemd: - name: nomad - state: started - enabled: yes - - - name: Wait for Nomad to be ready - wait_for: - port: 4646 - host: localhost - delay: 10 - timeout: 60 \ No newline at end of file diff --git a/configuration/playbooks/fix/fix-warden-compose.yml b/configuration/playbooks/fix/fix-warden-compose.yml deleted file mode 100644 index b904d65..0000000 --- a/configuration/playbooks/fix/fix-warden-compose.yml +++ /dev/null @@ -1,39 +0,0 @@ ---- -- name: Fix Warden docker-compose.yml - hosts: warden - become: yes - gather_facts: no - - tasks: - - name: Ensure /opt/warden directory exists - file: - path: /opt/warden - state: directory - owner: root - group: root - mode: '0755' - - - name: Create or update docker-compose.yml with correct indentation - copy: - dest: /opt/warden/docker-compose.yml - content: | - services: - vaultwarden: - image: hub.git4ta.fun/vaultwarden/server:latest - security_opt: - - "seccomp=unconfined" - env_file: - - .env - volumes: - - ./data:/data - ports: - - "980:80" - restart: always - networks: - - vaultwarden_network - - networks: - vaultwarden_network: - owner: root - group: root - mode: '0644' \ No newline at end of file diff --git a/configuration/playbooks/other/check-podman-version.yml b/configuration/playbooks/other/check-podman-version.yml deleted file mode 100644 index 7fd02ba..0000000 --- a/configuration/playbooks/other/check-podman-version.yml +++ /dev/null @@ -1,15 +0,0 @@ ---- -- name: 检查 Podman 版本 - hosts: warden - become: yes - gather_facts: yes - - tasks: - - name: 检查当前 Podman 版本 - shell: podman --version - register: current_podman_version - ignore_errors: yes - - - name: 显示当前版本 - debug: - msg: "当前 Podman 版本: {{ current_podman_version.stdout if current_podman_version.rc == 0 else '未安装或无法获取' }}" \ No newline at end of file diff --git a/configuration/playbooks/other/check-podman-versions.yml b/configuration/playbooks/other/check-podman-versions.yml deleted file mode 100644 index 6dac3c6..0000000 --- a/configuration/playbooks/other/check-podman-versions.yml +++ /dev/null @@ -1,22 +0,0 @@ -- name: Check podman version on semaphore (local) - hosts: semaphore - connection: local - gather_facts: false - tasks: - - name: Check podman version - command: /usr/local/bin/podman --version - register: podman_version - - name: Display podman version - debug: - msg: "Podman version on {{ inventory_hostname }} is: {{ podman_version.stdout }}" - -- name: Check podman version on other beijing nodes - hosts: beijing:!semaphore - gather_facts: false - tasks: - - name: Check podman version - command: /usr/local/bin/podman --version - register: podman_version - - name: Display podman version - debug: - msg: "Podman version on {{ inventory_hostname }} is: {{ podman_version.stdout }}" \ No newline at end of file diff --git a/configuration/playbooks/other/cleanup-hashicorp-backups.yml b/configuration/playbooks/other/cleanup-hashicorp-backups.yml deleted file mode 100644 index 7adfc92..0000000 --- a/configuration/playbooks/other/cleanup-hashicorp-backups.yml +++ /dev/null @@ -1,22 +0,0 @@ ---- -- name: 清理 HashiCorp APT 源备份文件 - hosts: nomad_cluster - become: yes - - tasks: - - name: 查找所有 HashiCorp 备份文件 - find: - paths: "/etc/apt/sources.list.d/" - patterns: "hashicorp.list.backup-*" - register: backup_files - - - name: 删除所有备份文件 - file: - path: "{{ item.path }}" - state: absent - loop: "{{ backup_files.files }}" - when: backup_files.files | length > 0 - - - name: 显示清理结果 - debug: - msg: "已删除 {{ backup_files.files | length }} 个备份文件" \ No newline at end of file diff --git a/configuration/playbooks/other/clear-aliases.yml b/configuration/playbooks/other/clear-aliases.yml deleted file mode 100644 index 98f44cf..0000000 --- a/configuration/playbooks/other/clear-aliases.yml +++ /dev/null @@ -1,89 +0,0 @@ ---- -- name: Clear all aliases on hcp1 and hcp2 - hosts: hcp1,hcp2 - become: yes - - tasks: - - name: Check current aliases - shell: alias || echo "No aliases found" - register: current_aliases - - - name: Display current aliases - debug: - msg: "Current aliases: {{ current_aliases.stdout_lines }}" - - - name: Clear aliases from /root/.bashrc - shell: | - sed -i '/^alias /d' /root/.bashrc - sed -i '/^alias\t/d' /root/.bashrc - ignore_errors: yes - - - name: Clear aliases from /root/.profile - shell: | - sed -i '/^alias /d' /root/.profile - sed -i '/^alias\t/d' /root/.profile - ignore_errors: yes - - - name: Clear aliases from /root/.zshrc - shell: | - sed -i '/^alias /d' /root/.zshrc - sed -i '/^alias\t/d' /root/.zshrc - ignore_errors: yes - - - name: Clear aliases from /etc/bash.bashrc - shell: | - sed -i '/^alias /d' /etc/bash.bashrc - sed -i '/^alias\t/d' /etc/bash.bashrc - ignore_errors: yes - - - name: Clear aliases from /etc/profile - shell: | - sed -i '/^alias /d' /etc/profile - sed -i '/^alias\t/d' /etc/profile - ignore_errors: yes - - - name: Find and clear custom alias files - find: - paths: ["/root", "/etc", "/home"] - patterns: ["*.aliases", ".aliases", "aliases"] - recurse: yes - register: alias_files - - - name: Remove found alias files - file: - path: "{{ item.path }}" - state: absent - loop: "{{ alias_files.files }}" - when: alias_files.files is defined - - - name: Clear aliases from /etc/profile.d/aliases.sh - ansible.builtin.file: - path: /etc/profile.d/aliases.sh - state: absent - - - name: Clear aliases from /root/.bashrc - ansible.builtin.lineinfile: - path: /root/.bashrc - state: absent - regexp: "^alias " - - - name: Clear aliases from /root/.bash_aliases - ansible.builtin.file: - path: /root/.bash_aliases - state: absent - - - name: Clear history - ansible.builtin.command: - cmd: > /root/.bash_history - - - name: Restart shell to apply changes - ansible.builtin.command: - cmd: pkill -f bash || true - - - name: Test network connectivity after clearing aliases - shell: ping -c 2 8.8.8.8 || echo "Ping failed" - register: ping_test - - - name: Display ping test result - debug: - msg: "Ping test: {{ ping_test.stdout_lines }}" \ No newline at end of file diff --git a/configuration/playbooks/other/clear-all-aliases.yml b/configuration/playbooks/other/clear-all-aliases.yml deleted file mode 100644 index b0412b2..0000000 --- a/configuration/playbooks/other/clear-all-aliases.yml +++ /dev/null @@ -1,32 +0,0 @@ ---- -- name: Remove all aliases from user shell configuration files - hosts: all - become: yes - gather_facts: false - - tasks: - - name: Find all relevant shell configuration files - find: - paths: /home - patterns: .bashrc, .bash_aliases, .profile - register: shell_config_files - - - name: Remove aliases from shell configuration files - replace: - path: "{{ item.path }}" - regexp: '^alias .*' - replace: '' - loop: "{{ shell_config_files.files }}" - when: shell_config_files.files is defined - - - name: Remove functions from shell configuration files - replace: - path: "{{ item.path }}" - regexp: '^function .*' - replace: '' - loop: "{{ shell_config_files.files }}" - when: shell_config_files.files is defined - - - name: Display completion message - debug: - msg: "All aliases and functions have been removed from user shell configuration files." \ No newline at end of file diff --git a/configuration/playbooks/other/clear-proxy-settings.yml b/configuration/playbooks/other/clear-proxy-settings.yml deleted file mode 100644 index 201d379..0000000 --- a/configuration/playbooks/other/clear-proxy-settings.yml +++ /dev/null @@ -1,47 +0,0 @@ ---- -- name: Clear proxy settings from the system - hosts: all - become: yes - gather_facts: false - - tasks: - - name: Remove proxy environment file - file: - path: /root/mgmt/configuration/proxy.env - state: absent - ignore_errors: yes - - - name: Unset proxy environment variables - shell: | - unset http_proxy - unset https_proxy - unset HTTP_PROXY - unset HTTPS_PROXY - unset no_proxy - unset NO_PROXY - unset ALL_PROXY - unset all_proxy - unset DOCKER_BUILDKIT - unset BUILDKIT_PROGRESS - unset GIT_HTTP_PROXY - unset GIT_HTTPS_PROXY - unset CURL_PROXY - unset WGET_PROXY - ignore_errors: yes - - - name: Remove proxy settings from /etc/environment - lineinfile: - path: /etc/environment - state: absent - regexp: '^(http_proxy|https_proxy|no_proxy|ALL_PROXY|DOCKER_BUILDKIT|BUILDKIT_PROGRESS|GIT_HTTP_PROXY|GIT_HTTPS_PROXY|CURL_PROXY|WGET_PROXY)=' - ignore_errors: yes - - - name: Remove proxy settings from /etc/apt/apt.conf.d/proxy.conf - file: - path: /etc/apt/apt.conf.d/proxy.conf - state: absent - ignore_errors: yes - - - name: Display completion message - debug: - msg: "Proxy settings have been cleared from the system." \ No newline at end of file diff --git a/configuration/playbooks/other/clear-proxy.yml b/configuration/playbooks/other/clear-proxy.yml deleted file mode 100644 index be77bcb..0000000 --- a/configuration/playbooks/other/clear-proxy.yml +++ /dev/null @@ -1,76 +0,0 @@ ---- -- name: Clear proxy settings on hcp1 and hcp2 - hosts: hcp1,hcp2 - become: yes - - tasks: - - name: Check current proxy environment variables - shell: env | grep -i proxy || echo "No proxy vars found" - register: proxy_env_before - - - name: Display current proxy settings - debug: - msg: "Current proxy env: {{ proxy_env_before.stdout_lines }}" - - - name: Clear proxy from /etc/environment - lineinfile: - path: /etc/environment - regexp: "{{ item }}" - state: absent - loop: - - "^http_proxy=" - - "^https_proxy=" - - "^HTTP_PROXY=" - - "^HTTPS_PROXY=" - - "^ftp_proxy=" - - "^FTP_PROXY=" - - "^no_proxy=" - - "^NO_PROXY=" - - - name: Clear proxy from /etc/apt/apt.conf.d/ - file: - path: "{{ item }}" - state: absent - loop: - - /etc/apt/apt.conf.d/95proxies - - /etc/apt/apt.conf.d/proxy.conf - - /etc/apt/apt.conf.d/00proxy - - - name: Clear proxy from user profiles - lineinfile: - path: "{{ item }}" - regexp: ".*proxy.*" - state: absent - loop: - - /root/.bashrc - - /root/.profile - - /home/root/.bashrc - - /home/root/.profile - ignore_errors: yes - - - name: Unset proxy variables in current session - shell: | - unset http_proxy - unset https_proxy - unset HTTP_PROXY - unset HTTPS_PROXY - unset ftp_proxy - unset FTP_PROXY - unset no_proxy - unset NO_PROXY - - - name: Check APT proxy configuration - shell: apt-config dump | grep -i proxy || echo "No APT proxy found" - register: apt_proxy_check - - - name: Display APT proxy status - debug: - msg: "APT proxy config: {{ apt_proxy_check.stdout_lines }}" - - - name: Test direct connection to HashiCorp - shell: curl -I --connect-timeout 10 https://releases.hashicorp.com/ || echo "Connection failed" - register: connection_test - - - name: Display connection test result - debug: - msg: "Connection test: {{ connection_test.stdout_lines }}" \ No newline at end of file diff --git a/configuration/playbooks/other/ensure-nomad-user.yml b/configuration/playbooks/other/ensure-nomad-user.yml deleted file mode 100644 index 7bead5c..0000000 --- a/configuration/playbooks/other/ensure-nomad-user.yml +++ /dev/null @@ -1,25 +0,0 @@ ---- -- name: Ensure nomad user and plugin directory exist - hosts: nomad_clients - become: yes - tasks: - - name: Ensure nomad group exists - group: - name: nomad - state: present - - - name: Ensure nomad user exists - user: - name: nomad - group: nomad - shell: /usr/sbin/nologin - system: yes - create_home: no - - - name: Ensure plugin directory exists with correct ownership - file: - path: /opt/nomad/data/plugins - state: directory - owner: nomad - group: nomad - mode: '0755' \ No newline at end of file diff --git a/configuration/playbooks/other/final-podman-fix.yml b/configuration/playbooks/other/final-podman-fix.yml deleted file mode 100644 index c0832ef..0000000 --- a/configuration/playbooks/other/final-podman-fix.yml +++ /dev/null @@ -1,105 +0,0 @@ ---- -- name: Final Podman Permission Fix for Nomad - hosts: all - become: yes - tasks: - - name: Stop Nomad service - systemd: - name: nomad - state: stopped - - - name: Install podman for nomad user (system-wide) - package: - name: podman - state: present - - - name: Enable podman socket for nomad user - systemd: - name: podman.socket - enabled: yes - state: started - scope: system - daemon_reload: yes - - - name: Create nomad user podman configuration directory - file: - path: /home/nomad/.config/containers - state: directory - owner: nomad - group: nomad - mode: '0755' - recurse: yes - - - name: Configure podman for nomad user to use system socket - copy: - content: | - [containers] - - [engine] - remote = true - - [service_destinations] - [service_destinations.system] - uri = "unix:///run/podman/podman.sock" - dest: /home/nomad/.config/containers/containers.conf - owner: nomad - group: nomad - mode: '0644' - - - name: Update Nomad configuration to use system podman socket - replace: - path: /etc/nomad.d/nomad.hcl - regexp: 'socket_path = "unix:///run/user/1001/podman/podman.sock"' - replace: 'socket_path = "unix:///run/podman/podman.sock"' - - - name: Add nomad user to necessary groups - user: - name: nomad - groups: - - podman - append: yes - - - name: Create podman group if it doesn't exist - group: - name: podman - state: present - - - name: Set proper permissions on system podman socket directory - file: - path: /run/podman - state: directory - mode: '0755' - group: podman - - - name: Start Nomad service - systemd: - name: nomad - state: started - enabled: yes - - - name: Wait for Nomad to be ready - wait_for: - port: 4646 - timeout: 60 - - - name: Wait for plugins to load - pause: - seconds: 20 - - - name: Final verification - Check driver status - shell: sudo -u nomad /usr/local/bin/nomad node status -self | grep -A 10 "Driver Status" - register: final_driver_status - failed_when: false - - - name: Display final driver status - debug: - var: final_driver_status.stdout_lines - - - name: Test podman access for nomad user - shell: sudo -u nomad podman version - register: podman_test - failed_when: false - - - name: Display podman test result - debug: - var: podman_test.stdout_lines \ No newline at end of file diff --git a/configuration/playbooks/other/get-tailscale-ips.yml b/configuration/playbooks/other/get-tailscale-ips.yml deleted file mode 100644 index 0cb6f11..0000000 --- a/configuration/playbooks/other/get-tailscale-ips.yml +++ /dev/null @@ -1,12 +0,0 @@ ---- -- name: Get Tailscale IP for specified nodes - hosts: all - gather_facts: no - tasks: - - name: Get tailscale IP - shell: "tailscale ip -4" - register: tailscale_ip - - - name: Display Tailscale IP - debug: - msg: "Node {{ inventory_hostname }} has IP: {{ tailscale_ip.stdout }}" \ No newline at end of file diff --git a/configuration/playbooks/other/hack-podman-upgrade.yml b/configuration/playbooks/other/hack-podman-upgrade.yml deleted file mode 100644 index 96439b8..0000000 --- a/configuration/playbooks/other/hack-podman-upgrade.yml +++ /dev/null @@ -1,67 +0,0 @@ ---- -- name: 强制升级 Podman 到最新版本 - hosts: warden - become: yes - gather_facts: yes - - tasks: - - name: 检查当前 Podman 版本 - shell: podman --version - register: current_podman_version - ignore_errors: yes - - - name: 显示当前版本 - debug: - msg: "升级前版本: {{ current_podman_version.stdout if current_podman_version.rc == 0 else '未安装' }}" - - - name: 卸载现有 Podman - shell: apt-get remove -y --purge podman* containerd* runc* - ignore_errors: yes - - - name: 清理残留配置 - shell: | - rm -rf /etc/containers - rm -rf /usr/share/containers - rm -rf /var/lib/containers - ignore_errors: yes - - - name: 直接下载并安装最新版Podman二进制文件 - shell: | - # 清理可能存在的旧版本 - rm -f /tmp/podman-latest.tar.gz - rm -f /usr/local/bin/podman - - # 获取最新版本号 - LATEST_VERSION="v5.6.1" # 硬编码最新版本避免网络问题 - echo "安装版本: $LATEST_VERSION" - - # 使用GitHub镜像站点下载二进制文件 - echo "使用GitHub镜像站点下载..." - wget -O /tmp/podman-latest.tar.gz "https://gh.git4ta.fun/github.com/containers/podman/releases/download/${LATEST_VERSION}/podman-linux-static-amd64.tar.gz" - - # 检查文件是否下载成功,如果失败尝试直接下载 - if [ ! -f /tmp/podman-latest.tar.gz ]; then - echo "镜像下载失败,尝试直接下载..." - wget -O /tmp/podman-latest.tar.gz "https://github.com/containers/podman/releases/download/${LATEST_VERSION}/podman-linux-static-amd64.tar.gz" - fi - - # 解压并安装 - tar -xzf /tmp/podman-latest.tar.gz -C /usr/local/bin/ --strip-components=1 - chmod +x /usr/local/bin/podman - - # 更新PATH - echo 'export PATH=/usr/local/bin:$PATH' >> /etc/profile - . /etc/profile - - # 验证安装 - /usr/local/bin/podman --version - ignore_errors: yes - - - name: 验证安装结果 - shell: podman --version - register: new_podman_version - ignore_errors: yes - - - name: 显示最终版本 - debug: - msg: "升级后版本: {{ new_podman_version.stdout if new_podman_version.rc == 0 else '安装失败' }}" \ No newline at end of file diff --git a/configuration/playbooks/other/integrated-podman-setup.yml b/configuration/playbooks/other/integrated-podman-setup.yml deleted file mode 100644 index 871f85e..0000000 --- a/configuration/playbooks/other/integrated-podman-setup.yml +++ /dev/null @@ -1,218 +0,0 @@ ---- -- name: Integrated Podman Setup - Remove Docker, Install and Configure Podman with Compose for Nomad - hosts: all - become: yes - gather_facts: yes - - tasks: - - name: 显示当前处理的节点 - debug: - msg: "🔧 开始集成 Podman 设置: {{ inventory_hostname }}" - - - name: 检查 Docker 服务状态 - shell: systemctl is-active docker 2>/dev/null || echo "inactive" - register: docker_status - changed_when: false - - - name: 停止 Docker 服务 - systemd: - name: docker - state: stopped - enabled: no - ignore_errors: yes - when: docker_status.stdout == "active" - - - name: 停止 Docker socket - systemd: - name: docker.socket - state: stopped - enabled: no - ignore_errors: yes - - - name: 移除 Docker 相关包 - apt: - name: - - docker-ce - - docker-ce-cli - - containerd.io - - docker-buildx-plugin - - docker-compose-plugin - - docker.io - - docker-doc - - docker-compose - - docker-registry - - containerd - - runc - state: absent - purge: yes - ignore_errors: yes - - - name: 清理 Docker 数据目录 - file: - path: "{{ item }}" - state: absent - loop: - - /var/lib/docker - - /var/lib/containerd - - /etc/docker - - /etc/containerd - ignore_errors: yes - - - name: 清理 Docker 用户组 - group: - name: docker - state: absent - ignore_errors: yes - - - name: 更新包缓存 - apt: - update_cache: yes - cache_valid_time: 3600 - - - name: 安装 Podman 及相关工具 - apt: - name: - - podman - - buildah - - skopeo - - python3-pip - - python3-setuptools - state: present - retries: 3 - delay: 10 - - - name: 安装 Podman Compose via pip - pip: - name: podman-compose - state: present - ignore_errors: yes - - - name: 启用 Podman socket 服务 - systemd: - name: podman.socket - enabled: yes - state: started - ignore_errors: yes - - - name: 创建 Podman 用户服务目录 - file: - path: /etc/systemd/user - state: directory - mode: '0755' - - - name: 验证 Podman 安装 - shell: podman --version - register: podman_version - - - name: 验证 Podman Compose 安装 - shell: podman-compose --version 2>/dev/null || echo "未安装" - register: podman_compose_version - - - name: 检查 Docker 清理状态 - shell: systemctl is-active docker 2>/dev/null || echo "已移除" - register: final_docker_status - - - name: 显示 Docker 移除和 Podman 安装结果 - debug: - msg: | - ✅ 节点 {{ inventory_hostname }} Docker 移除和 Podman 安装完成 - 🐳 Docker 状态: {{ final_docker_status.stdout }} - 📦 Podman 版本: {{ podman_version.stdout }} - 🔧 Compose 状态: {{ podman_compose_version.stdout }} - - - name: 创建 Podman 系统配置目录 - file: - path: /etc/containers - state: directory - mode: '0755' - - - name: 配置 Podman 使用系统 socket - copy: - content: | - [engine] - # 使用系统级 socket 而不是用户级 socket - active_service = "system" - [engine.service_destinations] - [engine.service_destinations.system] - uri = "unix:///run/podman/podman.sock" - dest: /etc/containers/containers.conf - mode: '0644' - - - name: 检查是否存在 nomad 用户 - getent: - database: passwd - key: nomad - register: nomad_user_check - ignore_errors: yes - - - name: 为 nomad 用户创建配置目录 - file: - path: "/home/nomad/.config/containers" - state: directory - owner: nomad - group: nomad - mode: '0755' - when: nomad_user_check is succeeded - - - name: 为 nomad 用户配置 Podman - copy: - content: | - [engine] - active_service = "system" - [engine.service_destinations] - [engine.service_destinations.system] - uri = "unix:///run/podman/podman.sock" - dest: /home/nomad/.config/containers/containers.conf - owner: nomad - group: nomad - mode: '0644' - when: nomad_user_check is succeeded - - - name: 将 nomad 用户添加到 podman 组 - user: - name: nomad - groups: podman - append: yes - when: nomad_user_check is succeeded - ignore_errors: yes - - - name: 创建 podman 组(如果不存在) - group: - name: podman - state: present - ignore_errors: yes - - - name: 设置 podman socket 目录权限 - file: - path: /run/podman - state: directory - mode: '0755' - group: podman - ignore_errors: yes - - - name: 验证 Podman socket 权限 - file: - path: /run/podman/podman.sock - mode: '0666' - when: nomad_user_check is succeeded - ignore_errors: yes - - - name: 测试 Podman 功能 - shell: podman info - register: podman_info - ignore_errors: yes - - - name: 清理 apt 缓存 - apt: - autoclean: yes - autoremove: yes - - - name: 显示最终配置结果 - debug: - msg: | - 🎉 节点 {{ inventory_hostname }} 集成 Podman 设置完成! - 📦 Podman 版本: {{ podman_version.stdout }} - 🐳 Podman Compose: {{ podman_compose_version.stdout }} - 👤 Nomad 用户: {{ 'FOUND' if nomad_user_check is succeeded else 'NOT FOUND' }} - 🔧 Podman 状态: {{ 'SUCCESS' if podman_info.rc == 0 else 'WARNING' }} - 🚀 Docker 已移除,Podman 已配置为与 Nomad 集成 \ No newline at end of file diff --git a/configuration/playbooks/other/migrate-to-podman-simple.yml b/configuration/playbooks/other/migrate-to-podman-simple.yml deleted file mode 100644 index 7688caa..0000000 --- a/configuration/playbooks/other/migrate-to-podman-simple.yml +++ /dev/null @@ -1,167 +0,0 @@ ---- -- name: Migrate Nomad from Docker to Podman (Simple Version) - hosts: all - become: yes - vars: - nomad_user: nomad - nomad_config_dir: /etc/nomad.d - nomad_config_file: "{{ nomad_config_dir }}/nomad.hcl" - - tasks: - - name: Stop Nomad service - systemd: - name: nomad - state: stopped - - - name: Backup current Nomad configuration - copy: - src: "{{ nomad_config_file }}" - dest: "{{ nomad_config_file }}.backup-{{ ansible_date_time.epoch }}" - remote_src: yes - - - name: Get nomad user info - getent: - database: passwd - key: "{{ nomad_user }}" - register: nomad_user_info - - - name: Set nomad user UID variable - set_fact: - nomad_uid: "{{ nomad_user_info.ansible_facts.getent_passwd[nomad_user][1] }}" - - - name: Enable lingering for nomad user - command: loginctl enable-linger {{ nomad_user }} - failed_when: false - - - name: Create runtime directory for nomad user - file: - path: "/run/user/{{ nomad_uid }}" - state: directory - owner: "{{ nomad_user }}" - group: "{{ nomad_user }}" - mode: '0700' - - - name: Start Podman socket as nomad user - shell: | - sudo -u {{ nomad_user }} XDG_RUNTIME_DIR=/run/user/{{ nomad_uid }} systemctl --user enable --now podman.socket - args: - creates: "/run/user/{{ nomad_uid }}/podman/podman.sock" - - - name: Create new Nomad configuration with Podman - copy: - content: | - datacenter = "dc1" - region = "global" - data_dir = "/opt/nomad/data" - - bind_addr = "0.0.0.0" - - client { - enabled = true - servers = [ - "100.116.158.95:4647", - ] - } - - # Docker plugin (disabled) - # plugin "docker" { - # config { - # allow_privileged = true - # volumes { - # enabled = true - # } - # } - # } - - plugin "podman" { - config { - socket_path = "unix:///run/user/{{ nomad_uid }}/podman/podman.sock" - volumes { - enabled = true - } - } - } - - consul { - address = "127.0.0.1:8500" - } - dest: "{{ nomad_config_file }}" - owner: root - group: root - mode: '0644' - - - name: Update Nomad systemd service to run as nomad user - copy: - content: | - [Unit] - Description=Nomad - Documentation=https://www.nomadproject.io/ - Requires=network-online.target - After=network-online.target - Wants=network-online.target - - [Service] - Type=notify - User={{ nomad_user }} - Group={{ nomad_user }} - ExecReload=/bin/kill -HUP $MAINPID - ExecStart=/usr/local/bin/nomad agent -config={{ nomad_config_dir }} - KillMode=process - Restart=on-failure - LimitNOFILE=65536 - Environment=XDG_RUNTIME_DIR=/run/user/{{ nomad_uid }} - - [Install] - WantedBy=multi-user.target - dest: /etc/systemd/system/nomad.service - owner: root - group: root - mode: '0644' - - - name: Reload systemd daemon - systemd: - daemon_reload: yes - - - name: Start Nomad service - systemd: - name: nomad - state: started - enabled: yes - - - name: Wait for Nomad to be ready (local check) - wait_for: - port: 4646 - host: localhost - delay: 5 - timeout: 60 - - - name: Verify Nomad is running - shell: systemctl is-active nomad - register: nomad_status - - - name: Display Nomad status - debug: - msg: "Nomad service status: {{ nomad_status.stdout }}" - - - name: Check Podman socket - stat: - path: "/run/user/{{ nomad_uid }}/podman/podman.sock" - register: podman_socket - - - name: Display Podman socket status - debug: - msg: "Podman socket exists: {{ podman_socket.stat.exists }}" - - - name: Test Podman as nomad user - shell: | - sudo -u {{ nomad_user }} XDG_RUNTIME_DIR=/run/user/{{ nomad_uid }} podman version --format json - register: podman_test - failed_when: false - - - name: Display Podman test result - debug: - msg: | - Podman test: {{ 'SUCCESS' if podman_test.rc == 0 else 'FAILED' }} - {% if podman_test.rc != 0 %} - Error: {{ podman_test.stderr }} - {% endif %} \ No newline at end of file diff --git a/configuration/playbooks/other/ping-nodes.yml b/configuration/playbooks/other/ping-nodes.yml deleted file mode 100644 index 8efc80e..0000000 --- a/configuration/playbooks/other/ping-nodes.yml +++ /dev/null @@ -1,7 +0,0 @@ ---- -- name: Ping nodes to check connectivity - hosts: all - gather_facts: no - tasks: - - name: Ping the host - ping: \ No newline at end of file diff --git a/configuration/playbooks/other/read-nomad-config.yml b/configuration/playbooks/other/read-nomad-config.yml deleted file mode 100644 index 18d15cf..0000000 --- a/configuration/playbooks/other/read-nomad-config.yml +++ /dev/null @@ -1,13 +0,0 @@ ---- -- name: Read Nomad config file - hosts: localhost - gather_facts: no - tasks: - - name: Read nomad.hcl - slurp: - src: /etc/nomad.d/nomad.hcl - register: nomad_config - - - name: Display Nomad config - debug: - msg: "{{ nomad_config['content'] | b64decode }}" \ No newline at end of file diff --git a/configuration/playbooks/other/restart-tailscale.yml b/configuration/playbooks/other/restart-tailscale.yml deleted file mode 100644 index 46688b7..0000000 --- a/configuration/playbooks/other/restart-tailscale.yml +++ /dev/null @@ -1,39 +0,0 @@ ---- -- name: Restart Tailscale to fix DNS issues - hosts: hcp1,hcp2 - become: yes - - tasks: - - name: Check current DNS configuration - shell: cat /etc/resolv.conf - register: dns_before - - - name: Display current DNS config - debug: - msg: "Current DNS config: {{ dns_before.stdout_lines }}" - - - name: Restart tailscaled service - systemd: - name: tailscaled - state: restarted - - - name: Wait for tailscale to stabilize - wait_for: - timeout: 10 - - - name: Check DNS configuration after restart - shell: cat /etc/resolv.conf - register: dns_after - - - name: Display new DNS config - debug: - msg: "New DNS config: {{ dns_after.stdout_lines }}" - - - name: Test DNS resolution - shell: nslookup apt.releases.hashicorp.com - register: dns_test - ignore_errors: yes - - - name: Display DNS test result - debug: - msg: "DNS test result: {{ dns_test.stdout_lines }}" \ No newline at end of file diff --git a/configuration/playbooks/other/update-nomad-config.yml b/configuration/playbooks/other/update-nomad-config.yml deleted file mode 100644 index 6d3437a..0000000 --- a/configuration/playbooks/other/update-nomad-config.yml +++ /dev/null @@ -1,37 +0,0 @@ ---- -- name: Update Nomad config to run as a client - hosts: localhost - gather_facts: no - become: yes - tasks: - - name: Create new nomad.hcl - copy: - dest: /etc/nomad.d/nomad.hcl - content: | - datacenter = "dc1" - data_dir = "/opt/nomad/data" - log_level = "INFO" - - bind_addr = "100.116.158.95" - - server { - enabled = false - } - - client { - enabled = true - servers = ["100.81.26.3:4647", "100.103.147.94:4647", "100.90.159.68:4647"] - } - - plugin "podman" { - config { - socket_path = "unix:///run/podman/podman.sock" - volumes { - enabled = true - } - } - } - - consul { - address = "100.116.158.95:8500" - } \ No newline at end of file diff --git a/configuration/playbooks/other/upgrade-podman-to-5.yml b/configuration/playbooks/other/upgrade-podman-to-5.yml deleted file mode 100644 index 823fa0c..0000000 --- a/configuration/playbooks/other/upgrade-podman-to-5.yml +++ /dev/null @@ -1,77 +0,0 @@ ---- -- name: 升级 Podman 到最新版本 (warden 节点测试) - hosts: warden - become: yes - gather_facts: yes - - tasks: - - name: 检查当前 Podman 版本 - shell: podman --version - register: current_podman_version - ignore_errors: yes - - - name: 显示当前版本 - debug: - msg: "当前 Podman 版本: {{ current_podman_version.stdout if current_podman_version.rc == 0 else '未安装或无法获取' }}" - - - name: 备份现有 Podman 配置 - shell: | - if [ -d /etc/containers ]; then - cp -r /etc/containers /etc/containers.backup.$(date +%Y%m%d) - fi - if [ -d /usr/share/containers ]; then - cp -r /usr/share/containers /usr/share/containers.backup.$(date +%Y%m%d) - fi - ignore_errors: yes - - - name: 添加 Kubic 仓库 (HTTP 跳过签名) - shell: | - # 添加仓库并跳过签名验证 - echo "deb [trusted=yes] http://download.opensuse.org/repositories/devel:/kubic:/libcontainers:/stable/xUbuntu_22.04/ /" > /etc/apt/sources.list.d/kubic-containers.list - - - name: 更新包列表 (跳过签名验证) - shell: apt-get update -o Acquire::AllowInsecureRepositories=true -o Acquire::AllowDowngradeToInsecureRepositories=true - - - name: 检查仓库中可用的 Podman 版本 - shell: apt-cache policy podman - register: podman_versions - - - name: 显示可用的 Podman 版本 - debug: - msg: "{{ podman_versions.stdout }}" - - - name: 安装 Podman 5.x (强制跳过签名) - shell: apt-get install -y --allow-unauthenticated --allow-downgrades --allow-remove-essential --allow-change-held-packages podman - - - name: 验证 Podman 5.x 安装 - shell: | - podman --version - podman info --format json | jq -r '.Version.Version' - register: podman_5_verify - - - name: 显示升级结果 - debug: - msg: | - ✅ Podman 升级完成 - 🚀 新版本: {{ podman_5_verify.stdout_lines[0] }} - 📊 详细版本: {{ podman_5_verify.stdout_lines[1] }} - - - name: 测试基本功能 - shell: | - podman run --rm hello-world - register: podman_test - ignore_errors: yes - - - name: 显示测试结果 - debug: - msg: "Podman 功能测试: {{ '成功' if podman_test.rc == 0 else '失败 - ' + podman_test.stderr }}" - - - name: 检查相关服务状态 - shell: | - systemctl status podman.socket 2>/dev/null || echo "podman.socket 未运行" - systemctl status containerd 2>/dev/null || echo "containerd 未运行" - register: service_status - - - name: 显示服务状态 - debug: - msg: "{{ service_status.stdout }}" \ No newline at end of file diff --git a/configuration/playbooks/remove/remove-docker-install-podman-with-compose.yml b/configuration/playbooks/remove/remove-docker-install-podman-with-compose.yml deleted file mode 100644 index 686b660..0000000 --- a/configuration/playbooks/remove/remove-docker-install-podman-with-compose.yml +++ /dev/null @@ -1,126 +0,0 @@ ---- -- name: 移除 Docker 并安装带 Compose 功能的 Podman - hosts: all - become: yes - gather_facts: yes - - tasks: - - name: 显示当前处理的节点 - debug: - msg: "🔧 正在处理节点: {{ inventory_hostname }}" - - - name: 检查 Docker 服务状态 - shell: systemctl is-active docker 2>/dev/null || echo "inactive" - register: docker_status - changed_when: false - - - name: 停止 Docker 服务 - systemd: - name: docker - state: stopped - enabled: no - ignore_errors: yes - when: docker_status.stdout == "active" - - - name: 停止 Docker socket - systemd: - name: docker.socket - state: stopped - enabled: no - ignore_errors: yes - - - name: 移除 Docker 相关包 - apt: - name: - - docker-ce - - docker-ce-cli - - containerd.io - - docker-buildx-plugin - - docker-compose-plugin - - docker.io - - docker-doc - - docker-compose - - docker-registry - - containerd - - runc - state: absent - purge: yes - ignore_errors: yes - - - name: 清理 Docker 数据目录 - file: - path: "{{ item }}" - state: absent - loop: - - /var/lib/docker - - /var/lib/containerd - - /etc/docker - - /etc/containerd - ignore_errors: yes - - - name: 清理 Docker 用户组 - group: - name: docker - state: absent - ignore_errors: yes - - - name: 更新包缓存 - apt: - update_cache: yes - cache_valid_time: 3600 - - - name: 安装 Podman 及相关工具 - apt: - name: - - podman - - buildah - - skopeo - - python3-pip - - python3-setuptools - state: present - retries: 3 - delay: 10 - - - name: 安装 Podman Compose via pip - pip: - name: podman-compose - state: present - ignore_errors: yes - - - name: 启用 Podman socket 服务 - systemd: - name: podman.socket - enabled: yes - state: started - ignore_errors: yes - - - name: 创建 Podman 用户服务目录 - file: - path: /etc/systemd/user - state: directory - mode: '0755' - - - name: 验证 Podman 安装 - shell: podman --version - register: podman_version - - - name: 验证 Podman Compose 安装 - shell: podman-compose --version 2>/dev/null || echo "未安装" - register: podman_compose_version - - - name: 检查 Docker 清理状态 - shell: systemctl is-active docker 2>/dev/null || echo "已移除" - register: final_docker_status - - - name: 显示节点处理结果 - debug: - msg: | - ✅ 节点 {{ inventory_hostname }} 处理完成 - 🐳 Docker 状态: {{ final_docker_status.stdout }} - 📦 Podman 版本: {{ podman_version.stdout }} - 🔧 Compose 状态: {{ podman_compose_version.stdout }} - - - name: 清理 apt 缓存 - apt: - autoclean: yes - autoremove: yes \ No newline at end of file diff --git a/configuration/playbooks/remove/remove-docker-install-podman.yml b/configuration/playbooks/remove/remove-docker-install-podman.yml deleted file mode 100644 index 09ff808..0000000 --- a/configuration/playbooks/remove/remove-docker-install-podman.yml +++ /dev/null @@ -1,120 +0,0 @@ ---- -- name: 移除 Docker 并安装 Podman - 新 Server 节点 - hosts: ash2e,ash1d,ch2 - become: yes - gather_facts: no - serial: 1 # 逐个节点处理,避免并发冲突 - - tasks: - - name: 显示当前处理的节点 - debug: - msg: "🔧 正在处理节点: {{ inventory_hostname }}" - - - name: 检查 Docker 服务状态 - shell: systemctl is-active docker 2>/dev/null || echo "inactive" - register: docker_status - changed_when: false - - - name: 停止 Docker 服务 - systemd: - name: docker - state: stopped - enabled: no - ignore_errors: yes - when: docker_status.stdout == "active" - - - name: 停止 Docker socket - systemd: - name: docker.socket - state: stopped - enabled: no - ignore_errors: yes - - - name: 移除 Docker 相关包 - apt: - name: - - docker-ce - - docker-ce-cli - - containerd.io - - docker-buildx-plugin - - docker-compose-plugin - - docker.io - - docker-doc - - docker-compose - - docker-registry - - containerd - - runc - state: absent - purge: yes - ignore_errors: yes - - - name: 清理 Docker 数据目录 - file: - path: "{{ item }}" - state: absent - loop: - - /var/lib/docker - - /var/lib/containerd - - /etc/docker - - /etc/containerd - ignore_errors: yes - - - name: 清理 Docker 用户组 - group: - name: docker - state: absent - ignore_errors: yes - - - name: 更新包缓存 - apt: - update_cache: yes - cache_valid_time: 3600 - - - name: 安装 Podman 及相关工具 - apt: - name: - - podman - - buildah - - skopeo - - podman-compose - state: present - retries: 3 - delay: 10 - - - name: 启用 Podman socket 服务 - systemd: - name: podman.socket - enabled: yes - state: started - ignore_errors: yes - - - name: 创建 Podman 用户服务目录 - file: - path: /etc/systemd/user - state: directory - mode: '0755' - - - name: 验证 Podman 安装 - shell: podman --version - register: podman_version - - - name: 验证 Podman Compose 安装 - shell: podman-compose --version 2>/dev/null || echo "未安装" - register: podman_compose_version - - - name: 检查 Docker 清理状态 - shell: systemctl is-active docker 2>/dev/null || echo "已移除" - register: final_docker_status - - - name: 显示节点处理结果 - debug: - msg: | - ✅ 节点 {{ inventory_hostname }} 处理完成 - 🐳 Docker 状态: {{ final_docker_status.stdout }} - 📦 Podman 版本: {{ podman_version.stdout }} - 🔧 Compose 状态: {{ podman_compose_version.stdout }} - - - name: 清理 apt 缓存 - apt: - autoclean: yes - autoremove: yes \ No newline at end of file diff --git a/configuration/playbooks/root_playbooks/add-beijing-node-prefix.yml b/configuration/playbooks/root_playbooks/add-beijing-node-prefix.yml deleted file mode 100644 index 4cb6f7a..0000000 --- a/configuration/playbooks/root_playbooks/add-beijing-node-prefix.yml +++ /dev/null @@ -1,69 +0,0 @@ ---- -- name: Add Beijing prefix to LXC node names in Nomad configuration - hosts: beijing - become: yes - - vars: - node_prefixes: - influxdb: "bj-influxdb" - warden: "bj-warden" - hcp1: "bj-hcp1" - hcp2: "bj-hcp2" - tailscale_ips: - influxdb: "100.100.7.4" - warden: "100.122.197.112" - hcp1: "100.97.62.111" - hcp2: "100.116.112.45" - - tasks: - - name: Stop Nomad service - systemd: - name: nomad - state: stopped - - - name: Get current node name from inventory - set_fact: - current_node_name: "{{ inventory_hostname }}" - new_node_name: "{{ node_prefixes[inventory_hostname] }}" - tailscale_ip: "{{ tailscale_ips[inventory_hostname] }}" - - - name: Display node name change - debug: - msg: "Changing node name from {{ current_node_name }} to {{ new_node_name }}, using Tailscale IP {{ tailscale_ip }}" - - - name: Update node name in Nomad configuration - lineinfile: - path: /etc/nomad.d/nomad.hcl - regexp: '^name\s*=' - line: 'name = "{{ new_node_name }}"' - insertafter: 'datacenter = "dc1"' - state: present - - - name: Validate Nomad configuration - shell: nomad config validate /etc/nomad.d/nomad.hcl - register: config_validation - failed_when: config_validation.rc != 0 - - - name: Start Nomad service - systemd: - name: nomad - state: started - - - name: Wait for Nomad to be ready on Tailscale IP - wait_for: - port: 4646 - host: "{{ tailscale_ip }}" - delay: 10 - timeout: 60 - - - name: Wait for node registration - pause: - seconds: 15 - - - name: Display new configuration - shell: cat /etc/nomad.d/nomad.hcl | grep -E "^(datacenter|name|bind_addr)\s*=" - register: nomad_config_check - - - name: Show updated configuration - debug: - var: nomad_config_check.stdout_lines \ No newline at end of file diff --git a/configuration/playbooks/root_playbooks/fix-duplicate-plugin-dir.yml b/configuration/playbooks/root_playbooks/fix-duplicate-plugin-dir.yml deleted file mode 100644 index 6e73d96..0000000 --- a/configuration/playbooks/root_playbooks/fix-duplicate-plugin-dir.yml +++ /dev/null @@ -1,56 +0,0 @@ ---- -- name: Fix duplicate plugin_dir configuration - hosts: nomadlxc,hcp - become: yes - - tasks: - - name: Stop Nomad service - systemd: - name: nomad - state: stopped - - - name: Remove duplicate plugin_dir lines - lineinfile: - path: /etc/nomad.d/nomad.hcl - regexp: '^plugin_dir = "/opt/nomad/plugins"' - state: absent - - - name: Ensure only one plugin_dir configuration exists - lineinfile: - path: /etc/nomad.d/nomad.hcl - regexp: '^plugin_dir = "/opt/nomad/data/plugins"' - line: 'plugin_dir = "/opt/nomad/data/plugins"' - insertafter: 'data_dir = "/opt/nomad/data"' - state: present - - - name: Validate Nomad configuration - shell: nomad config validate /etc/nomad.d/nomad.hcl - register: config_validation - failed_when: config_validation.rc != 0 - - - name: Start Nomad service - systemd: - name: nomad - state: started - - - name: Wait for Nomad to be ready - wait_for: - port: 4646 - host: localhost - delay: 10 - timeout: 60 - - - name: Wait for plugins to load - pause: - seconds: 15 - - - name: Check driver status - shell: | - export NOMAD_ADDR=http://localhost:4646 - nomad node status -self | grep -A 10 "Driver Status" - register: driver_status - failed_when: false - - - name: Display driver status - debug: - var: driver_status.stdout_lines \ No newline at end of file diff --git a/configuration/playbooks/root_playbooks/fix-podman-driver-config.yml b/configuration/playbooks/root_playbooks/fix-podman-driver-config.yml deleted file mode 100644 index 5e6e1d5..0000000 --- a/configuration/playbooks/root_playbooks/fix-podman-driver-config.yml +++ /dev/null @@ -1,112 +0,0 @@ ---- -- name: Fix Nomad Podman Driver Configuration - hosts: nomadlxc,hcp - become: yes - vars: - nomad_user: nomad - - tasks: - - name: Stop Nomad service - systemd: - name: nomad - state: stopped - - - name: Install Podman driver plugin if missing - block: - - name: Check if plugin exists - stat: - path: /opt/nomad/data/plugins/nomad-driver-podman - register: plugin_exists - - - name: Download and install Podman driver plugin - block: - - name: Download Nomad Podman driver - get_url: - url: "https://releases.hashicorp.com/nomad-driver-podman/0.6.1/nomad-driver-podman_0.6.1_linux_amd64.zip" - dest: "/tmp/nomad-driver-podman.zip" - mode: '0644' - - - name: Extract Podman driver - unarchive: - src: "/tmp/nomad-driver-podman.zip" - dest: "/tmp" - remote_src: yes - - - name: Install Podman driver - copy: - src: "/tmp/nomad-driver-podman" - dest: "/opt/nomad/data/plugins/nomad-driver-podman" - owner: "{{ nomad_user }}" - group: "{{ nomad_user }}" - mode: '0755' - remote_src: yes - - - name: Clean up temporary files - file: - path: "{{ item }}" - state: absent - loop: - - "/tmp/nomad-driver-podman.zip" - - "/tmp/nomad-driver-podman" - when: not plugin_exists.stat.exists - - - name: Update Nomad configuration with correct plugin name and socket path - replace: - path: /etc/nomad.d/nomad.hcl - regexp: 'plugin "podman" \{' - replace: 'plugin "nomad-driver-podman" {' - - - name: Update socket path to system socket - replace: - path: /etc/nomad.d/nomad.hcl - regexp: 'socket_path = "unix:///run/user/1001/podman/podman.sock"' - replace: 'socket_path = "unix:///run/podman/podman.sock"' - - - name: Add plugin_dir configuration if missing - lineinfile: - path: /etc/nomad.d/nomad.hcl - line: 'plugin_dir = "/opt/nomad/data/plugins"' - insertafter: 'data_dir = "/opt/nomad/data"' - state: present - - - name: Ensure Podman socket is enabled and running - systemd: - name: podman.socket - enabled: yes - state: started - - - name: Start Nomad service - systemd: - name: nomad - state: started - - - name: Wait for Nomad to be ready - wait_for: - port: 4646 - host: localhost - delay: 10 - timeout: 60 - - - name: Wait for plugins to load - pause: - seconds: 20 - - - name: Check driver status - shell: | - export NOMAD_ADDR=http://localhost:4646 - nomad node status -self | grep -A 10 "Driver Status" - register: driver_status - failed_when: false - - - name: Display driver status - debug: - var: driver_status.stdout_lines - - - name: Check for Podman driver in logs - shell: journalctl -u nomad -n 30 --no-pager | grep -E "(podman|plugin)" | tail -10 - register: plugin_logs - failed_when: false - - - name: Display plugin logs - debug: - var: plugin_logs.stdout_lines \ No newline at end of file diff --git a/configuration/playbooks/root_playbooks/fix-warden-nfs.yml b/configuration/playbooks/root_playbooks/fix-warden-nfs.yml deleted file mode 100644 index 0e4372a..0000000 --- a/configuration/playbooks/root_playbooks/fix-warden-nfs.yml +++ /dev/null @@ -1,46 +0,0 @@ ---- -- name: Fix NFS mounting on warden node - hosts: warden - become: yes - tasks: - - name: Ensure rpcbind is running - systemd: - name: rpcbind - state: started - enabled: yes - - - name: Ensure nfs-client.target is active - systemd: - name: nfs-client.target - state: started - enabled: yes - - - name: Create consul-shared directory - file: - path: /opt/consul-shared - state: directory - mode: '0755' - - - name: Mount NFS share - mount: - path: /opt/consul-shared - src: snail:/fs/1000/nfs - fstype: nfs - opts: rw,sync,vers=3 - state: mounted - - - name: Add to fstab for persistence - mount: - path: /opt/consul-shared - src: snail:/fs/1000/nfs - fstype: nfs - opts: rw,sync,vers=3 - state: present - - - name: Verify mount - command: df -h /opt/consul-shared - register: mount_result - - - name: Display mount result - debug: - var: mount_result.stdout \ No newline at end of file diff --git a/configuration/playbooks/root_playbooks/setup-nfs-by-container-type.yml b/configuration/playbooks/root_playbooks/setup-nfs-by-container-type.yml deleted file mode 100644 index 02fa0a8..0000000 --- a/configuration/playbooks/root_playbooks/setup-nfs-by-container-type.yml +++ /dev/null @@ -1,82 +0,0 @@ ---- -- name: Setup NFS for different container types - hosts: all - become: yes - vars: - nfs_server: snail - nfs_export_path: /fs/1000/nfs/Fnsync - nfs_mount_path: /mnt/fnsync - nfs_options_local: "rw,sync,vers=4.2" - nfs_options_overseas: "rw,sync,vers=3,timeo=600,retrans=2" - - tasks: - - name: Detect container type and location - set_fact: - container_type: "{{ 'lxc' if inventory_hostname in groups['lxc'] else 'pve' }}" - is_overseas: "{{ inventory_hostname in ['ash1d', 'ash2e', 'ash3c', 'ch2', 'ch3'] }}" - - - name: Install NFS client for all nodes - package: - name: nfs-common - state: present - - - name: Create mount directory for all nodes - file: - path: "{{ nfs_mount_path }}" - state: directory - owner: root - group: root - mode: '0755' - - - name: Mount NFS for local LXC containers (direct mount) - mount: - path: "{{ nfs_mount_path }}" - src: "{{ nfs_server }}:{{ nfs_export_path }}" - fstype: nfs - opts: "{{ nfs_options_local }}" - state: mounted - when: container_type == 'lxc' and not is_overseas - - - name: Mount NFS for overseas PVE containers (with retry options) - mount: - path: "{{ nfs_mount_path }}" - src: "{{ nfs_server }}:{{ nfs_export_path }}" - fstype: nfs - opts: "{{ nfs_options_overseas }}" - state: mounted - when: container_type == 'pve' and is_overseas - - - name: Ensure NFS mount persists after reboot - mount: - path: "{{ nfs_mount_path }}" - src: "{{ nfs_server }}:{{ nfs_export_path }}" - fstype: nfs - opts: "{{ nfs_options_local if container_type == 'lxc' and not is_overseas else nfs_options_overseas }}" - state: present - - - name: Verify NFS mount - command: df -h "{{ nfs_mount_path }}" - register: mount_result - ignore_errors: yes - - - name: Display mount status - debug: - msg: "{{ inventory_hostname }} - {{ container_type }} - {{ '海外' if is_overseas else '本地' }} - Mount: {{ '成功' if mount_result.rc == 0 else '失败' }}" - - - name: Create Nomad directories for LXC containers - file: - path: "{{ nfs_mount_path }}/nomad/{{ inventory_hostname }}" - state: directory - owner: nomad - group: nomad - mode: '0755' - when: container_type == 'lxc' - - - name: Create shared volumes directory for PVE containers - file: - path: "{{ nfs_mount_path }}/nomad/volumes/{{ inventory_hostname }}" - state: directory - owner: nomad - group: nomad - mode: '0755' - when: container_type == 'pve' \ No newline at end of file diff --git a/configuration/playbooks/root_playbooks/setup-nfs-storage.yml b/configuration/playbooks/root_playbooks/setup-nfs-storage.yml deleted file mode 100644 index 76c93c6..0000000 --- a/configuration/playbooks/root_playbooks/setup-nfs-storage.yml +++ /dev/null @@ -1,75 +0,0 @@ ---- -- name: Setup NFS Storage for Consul Cluster - hosts: localhost - gather_facts: false - vars: - nfs_server: snail - nfs_export_path: /fs/1000/nfs/Fnsync - nfs_mount_path: /mnt/fnsync - - tasks: - - name: Install NFS client and mount on master - ansible.builtin.shell: | - ssh -o StrictHostKeyChecking=no -p 60022 ben@master ' - echo "3131" | sudo -S apt update && - echo "3131" | sudo -S apt install -y nfs-common && - echo "3131" | sudo -S mkdir -p {{ nfs_mount_path }} && - echo "3131" | sudo -S mount -t nfs {{ nfs_server }}:{{ nfs_export_path }} {{ nfs_mount_path }} && - echo "{{ nfs_server }}:{{ nfs_export_path }} {{ nfs_mount_path }} nfs defaults 0 0" | echo "3131" | sudo -S tee -a /etc/fstab - ' - delegate_to: localhost - register: master_result - - - name: Install NFS client and mount on ash3c - ansible.builtin.shell: | - ssh -o StrictHostKeyChecking=no ben@ash3c ' - echo "3131" | sudo -S apt update && - echo "3131" | sudo -S apt install -y nfs-common && - echo "3131" | sudo -S mkdir -p {{ nfs_mount_path }} && - echo "3131" | sudo -S mount -t nfs {{ nfs_server }}:{{ nfs_export_path }} {{ nfs_mount_path }} && - echo "{{ nfs_server }}:{{ nfs_export_path }} {{ nfs_mount_path }} nfs defaults 0 0" | echo "3131" | sudo -S tee -a /etc/fstab - ' - delegate_to: localhost - register: ash3c_result - - - name: Install NFS client and mount on warden - ansible.builtin.shell: | - ssh -o StrictHostKeyChecking=no ben@warden ' - echo "3131" | sudo -S apt update && - echo "3131" | sudo -S apt install -y nfs-common && - echo "3131" | sudo -S mkdir -p {{ nfs_mount_path }} && - echo "3131" | sudo -S mount -t nfs {{ nfs_server }}:{{ nfs_export_path }} {{ nfs_mount_path }} && - echo "{{ nfs_server }}:{{ nfs_export_path }} {{ nfs_mount_path }} nfs defaults 0 0" | echo "3131" | sudo -S tee -a /etc/fstab - ' - delegate_to: localhost - register: warden_result - - - name: Test NFS connectivity on all nodes - ansible.builtin.shell: | - ssh -o StrictHostKeyChecking=no -p 60022 ben@master 'echo "3131" | sudo -S touch {{ nfs_mount_path }}/test-master-$(date +%s) && ls -la {{ nfs_mount_path }}/' - ssh -o StrictHostKeyChecking=no ben@ash3c 'echo "3131" | sudo -S touch {{ nfs_mount_path }}/test-ash3c-$(date +%s) && ls -la {{ nfs_mount_path }}/' - ssh -o StrictHostKeyChecking=no ben@warden 'echo "3131" | sudo -S touch {{ nfs_mount_path }}/test-warden-$(date +%s) && ls -la {{ nfs_mount_path }}/' - delegate_to: localhost - register: nfs_test_result - - - name: Display NFS test results - ansible.builtin.debug: - var: nfs_test_result.stdout_lines - - - name: Create Consul data directories on NFS - ansible.builtin.shell: | - ssh -o StrictHostKeyChecking=no -p 60022 ben@master 'echo "3131" | sudo -S mkdir -p {{ nfs_mount_path }}/consul-master' - ssh -o StrictHostKeyChecking=no ben@ash3c 'echo "3131" | sudo -S mkdir -p {{ nfs_mount_path }}/consul-ash3c' - ssh -o StrictHostKeyChecking=no ben@warden 'echo "3131" | sudo -S mkdir -p {{ nfs_mount_path }}/consul-warden' - delegate_to: localhost - register: consul_dirs_result - - - name: Display setup completion - ansible.builtin.debug: - msg: - - "NFS setup completed successfully!" - - "NFS mount point: {{ nfs_mount_path }}" - - "Consul data directories created:" - - " - {{ nfs_mount_path }}/consul-master" - - " - {{ nfs_mount_path }}/consul-ash3c" - - " - {{ nfs_mount_path }}/consul-warden" \ No newline at end of file diff --git a/configuration/playbooks/root_playbooks/setup-nomad-nfs-client.yml b/configuration/playbooks/root_playbooks/setup-nomad-nfs-client.yml deleted file mode 100644 index 77a233b..0000000 --- a/configuration/playbooks/root_playbooks/setup-nomad-nfs-client.yml +++ /dev/null @@ -1,50 +0,0 @@ ---- -- name: Configure Nomad client for NFS volumes - hosts: nomad_clients - become: yes - vars: - nfs_mount_path: /mnt/fnsync - - tasks: - - name: Create Nomad plugin directory for NFS - file: - path: /opt/nomad/plugins - state: directory - owner: nomad - group: nomad - mode: '0755' - - - name: Configure Nomad client to use NFS volumes - blockinfile: - path: /etc/nomad.d/nomad.hcl - marker: "# {mark} NFS VOLUME CONFIGURATION" - block: | - plugin "nomad-driver-podman" { - config { - volumes { - enabled = true - } - } - } - - client { - host_volume "nfs-shared" { - path = "{{ nfs_mount_path }}/nomad/volumes" - read_only = false - } - } - insertafter: 'data_dir = "/opt/nomad/data"' - - - name: Restart Nomad service to apply changes - systemd: - name: nomad - state: restarted - - - name: Verify Nomad client configuration - command: nomad node status -self - register: nomad_status - ignore_errors: yes - - - name: Display Nomad status - debug: - msg: "{{ inventory_hostname }} - Nomad status: {{ '运行中' if nomad_status.rc == 0 else '异常' }}" \ No newline at end of file diff --git a/configuration/playbooks/root_playbooks/setup-nomad-nfs.yml b/configuration/playbooks/root_playbooks/setup-nomad-nfs.yml deleted file mode 100644 index a813a3f..0000000 --- a/configuration/playbooks/root_playbooks/setup-nomad-nfs.yml +++ /dev/null @@ -1,63 +0,0 @@ ---- -- name: Setup NFS Storage for Nomad Cluster - hosts: nomad_cluster - become: yes - vars: - nfs_server: snail - nfs_export_path: /fs/1000/nfs/Fnsync - nfs_mount_path: /mnt/fnsync - nfs_options: "rw,sync,vers=4.2" - - tasks: - - name: Install NFS client packages - package: - name: nfs-common - state: present - - - name: Create NFS mount directory - file: - path: "{{ nfs_mount_path }}" - state: directory - owner: root - group: root - mode: '0755' - - - name: Mount NFS share - mount: - path: "{{ nfs_mount_path }}" - src: "{{ nfs_server }}:{{ nfs_export_path }}" - fstype: nfs - opts: "{{ nfs_options }}" - state: mounted - - - name: Ensure NFS mount persists after reboot - mount: - path: "{{ nfs_mount_path }}" - src: "{{ nfs_server }}:{{ nfs_export_path }}" - fstype: nfs - opts: "{{ nfs_options }}" - state: present - - - name: Verify NFS mount - command: df -h "{{ nfs_mount_path }}" - register: mount_result - - - name: Display mount result - debug: - var: mount_result.stdout - - - name: Create Nomad data directories on NFS - file: - path: "{{ nfs_mount_path }}/nomad/{{ inventory_hostname }}" - state: directory - owner: nomad - group: nomad - mode: '0755' - - - name: Create shared volumes directory - file: - path: "{{ nfs_mount_path }}/nomad/volumes" - state: directory - owner: nomad - group: nomad - mode: '0755' \ No newline at end of file diff --git a/configuration/playbooks/test/test-podman-snap-migration.yml b/configuration/playbooks/test/test-podman-snap-migration.yml deleted file mode 100644 index dc1241c..0000000 --- a/configuration/playbooks/test/test-podman-snap-migration.yml +++ /dev/null @@ -1,100 +0,0 @@ ---- -- name: 测试将 Podman 切换到 Snap 版本 (ch2 节点) - hosts: ch2 - become: yes - gather_facts: yes - - tasks: - - name: 检查当前 Podman 版本和安装方式 - shell: | - echo "=== 当前 Podman 信息 ===" - podman --version - echo "安装路径: $(which podman)" - echo "=== Snap 状态 ===" - which snap || echo "snap 未安装" - snap list podman 2>/dev/null || echo "Podman snap 未安装" - echo "=== 包管理器状态 ===" - dpkg -l | grep podman || echo "未通过 apt 安装" - register: current_status - - - name: 显示当前状态 - debug: - msg: "{{ current_status.stdout }}" - - - name: 检查 snap 是否已安装 - shell: which snap - register: snap_check - ignore_errors: yes - changed_when: false - - - name: 安装 snapd (如果未安装) - apt: - name: snapd - state: present - when: snap_check.rc != 0 - - - name: 确保 snapd 服务运行 - systemd: - name: snapd - state: started - enabled: yes - - - name: 检查当前 Podman snap 版本 - shell: snap info podman - register: snap_podman_info - ignore_errors: yes - - - name: 显示可用的 Podman snap 版本 - debug: - msg: "{{ snap_podman_info.stdout if snap_podman_info.rc == 0 else '无法获取 snap podman 信息' }}" - - - name: 停止当前 Podman 相关服务 - systemd: - name: podman - state: stopped - ignore_errors: yes - - - name: 移除通过包管理器安装的 Podman - apt: - name: podman - state: absent - purge: yes - ignore_errors: yes - - - name: 安装 Podman snap (edge 通道) - snap: - name: podman - state: present - classic: yes - channel: edge - - - name: 创建符号链接 (确保 podman 命令可用) - file: - src: /snap/bin/podman - dest: /usr/local/bin/podman - state: link - force: yes - - - name: 验证 Snap Podman 安装 - shell: | - /snap/bin/podman --version - which podman - register: snap_podman_verify - - - name: 显示安装结果 - debug: - msg: | - ✅ Snap Podman 安装完成 - 🚀 版本: {{ snap_podman_verify.stdout_lines[0] }} - 📍 路径: {{ snap_podman_verify.stdout_lines[1] }} - - - name: 测试 Podman 基本功能 - shell: | - /snap/bin/podman version - /snap/bin/podman info --format json | jq -r '.host.arch' - register: podman_test - ignore_errors: yes - - - name: 显示测试结果 - debug: - msg: "Podman 测试结果: {{ podman_test.stdout if podman_test.rc == 0 else '测试失败' }}" \ No newline at end of file diff --git a/jobs/consul/consul-cluster-arm64.nomad b/jobs/consul/consul-cluster-arm64.nomad deleted file mode 100644 index f02ad69..0000000 --- a/jobs/consul/consul-cluster-arm64.nomad +++ /dev/null @@ -1,87 +0,0 @@ -job "consul-cluster-arm64" { - datacenters = ["dc1"] - type = "service" - - # 只在 ARM64 节点上运行:master 和 ash3c - constraint { - attribute = "${attr.unique.hostname}" - operator = "regexp" - value = "(master|ash3c)" - } - - group "consul" { - count = 2 - - # 确保每个节点只运行一个实例 - constraint { - operator = "distinct_hosts" - value = "true" - } - - network { - port "http" { - static = 8500 - } - port "rpc" { - static = 8400 - } - port "serf_lan" { - static = 8301 - } - port "serf_wan" { - static = 8302 - } - port "server" { - static = 8300 - } - port "dns" { - static = 8600 - } - } - - task "consul" { - driver = "exec" - - config { - command = "consul" - args = [ - "agent", - "-server", - "-bootstrap-expect=2", - "-data-dir=/tmp/consul-cluster-data", - "-bind=${NOMAD_IP_serf_lan}", - "-client=0.0.0.0", - "-retry-join=100.117.106.136", # master Tailscale IP - "-retry-join=100.116.80.94", # ash3c Tailscale IP - "-ui-config-enabled=true", - "-log-level=INFO", - "-node=${node.unique.name}-consul", - "-datacenter=dc1" - ] - } - - artifact { - source = "https://releases.hashicorp.com/consul/1.17.0/consul_1.17.0_linux_arm64.zip" - destination = "local/" - } - - resources { - cpu = 200 - memory = 256 - } - - service { - name = "consul-cluster-arm64" - port = "http" - - check { - type = "http" - path = "/v1/status/leader" - port = "http" - interval = "10s" - timeout = "3s" - } - } - } - } -} \ No newline at end of file diff --git a/jobs/consul/consul-cluster-binary.nomad b/jobs/consul/consul-cluster-binary.nomad deleted file mode 100644 index e1acbfa..0000000 --- a/jobs/consul/consul-cluster-binary.nomad +++ /dev/null @@ -1,88 +0,0 @@ -job "consul-cluster" { - datacenters = ["dc1"] - type = "service" - - # 在三个节点上运行:bj-warden, master, ash3c - constraint { - attribute = "${node.unique.name}" - operator = "regexp" - value = "(bj-warden|master|ash3c)" - } - - group "consul" { - count = 3 - - # 确保每个节点只运行一个实例 - constraint { - operator = "distinct_hosts" - value = "true" - } - - network { - port "http" { - static = 8500 - } - port "rpc" { - static = 8400 - } - port "serf_lan" { - static = 8301 - } - port "serf_wan" { - static = 8302 - } - port "server" { - static = 8300 - } - port "dns" { - static = 8600 - } - } - - task "consul" { - driver = "exec" - - config { - command = "consul" - args = [ - "agent", - "-server", - "-bootstrap-expect=3", - "-data-dir=/tmp/consul-cluster-data", - "-bind=${NOMAD_IP_serf_lan}", - "-client=0.0.0.0", - "-retry-join=100.122.197.112", # bj-warden Tailscale IP - "-retry-join=100.117.106.136", # master Tailscale IP - "-retry-join=100.116.80.94", # ash3c Tailscale IP - "-ui-config-enabled=true", - "-log-level=INFO", - "-node=${node.unique.name}-consul", - "-datacenter=dc1" - ] - } - - artifact { - source = "https://releases.hashicorp.com/consul/1.17.0/consul_1.17.0_linux_arm64.zip" - destination = "local/" - } - - resources { - cpu = 200 - memory = 256 - } - - service { - name = "consul-cluster" - port = "http" - - check { - type = "http" - path = "/v1/status/leader" - port = "http" - interval = "10s" - timeout = "3s" - } - } - } - } -} \ No newline at end of file diff --git a/jobs/consul/consul-cluster-nomad.nomad b/jobs/consul/consul-cluster-nomad.nomad deleted file mode 100644 index 4567d24..0000000 --- a/jobs/consul/consul-cluster-nomad.nomad +++ /dev/null @@ -1,81 +0,0 @@ -job "consul-cluster" { - datacenters = ["dc1"] - type = "service" - - constraint { - attribute = "${node.unique.name}" - operator = "regexp" - value = "^(master|ash3c|semaphore)$" - } - - group "consul" { - count = 3 - - network { - port "http" { - static = 8500 - } - port "serf_lan" { - static = 8301 - } - port "serf_wan" { - static = 8302 - } - port "server" { - static = 8300 - } - port "dns" { - static = 8600 - } - } - - service { - name = "consul" - port = "http" - - check { - type = "http" - path = "/v1/status/leader" - interval = "10s" - timeout = "2s" - } - } - - task "consul" { - driver = "podman" - - config { - image = "consul:1.15.4" - network_mode = "host" - - args = [ - "agent", - "-server", - "-bootstrap-expect=3", - "-ui", - "-data-dir=/consul/data", - "-config-dir=/consul/config", - "-bind={{ env \"attr.unique.network.ip-address\" }}", - "-client=0.0.0.0", - "-retry-join=100.117.106.136", - "-retry-join=100.116.80.94", - "-retry-join=100.116.158.95" - ] - - volumes = [ - "consul-data:/consul/data", - "consul-config:/consul/config" - ] - } - - resources { - cpu = 500 - memory = 512 - } - - env { - CONSUL_BIND_INTERFACE = "tailscale0" - } - } - } -} \ No newline at end of file diff --git a/jobs/consul/consul-cluster-simple.nomad b/jobs/consul/consul-cluster-simple.nomad index 8954536..ccfe067 100644 --- a/jobs/consul/consul-cluster-simple.nomad +++ b/jobs/consul/consul-cluster-simple.nomad @@ -7,7 +7,7 @@ job "consul-cluster-simple" { constraint { attribute = "${node.unique.name}" - value = "master" + value = "kr-master" } network { @@ -35,7 +35,7 @@ job "consul-cluster-simple" { "-server", "-bootstrap-expect=3", "-data-dir=/opt/nomad/data/consul", - "-client=100.64.0.0/10", + "-client=100.117.106.136", "-bind=100.117.106.136", "-advertise=100.117.106.136", "-retry-join=100.116.80.94", @@ -58,7 +58,7 @@ job "consul-cluster-simple" { constraint { attribute = "${node.unique.name}" - value = "ash3c" + value = "us-ash3c" } network { @@ -86,7 +86,7 @@ job "consul-cluster-simple" { "-server", "-bootstrap-expect=3", "-data-dir=/opt/nomad/data/consul", - "-client=100.64.0.0/10", + "-client=100.116.80.94", "-bind=100.116.80.94", "-advertise=100.116.80.94", "-retry-join=100.117.106.136", @@ -137,7 +137,7 @@ job "consul-cluster-simple" { "-server", "-bootstrap-expect=3", "-data-dir=/opt/nomad/data/consul", - "-client=100.64.0.0/10", + "-client=100.122.197.112", "-bind=100.122.197.112", "-advertise=100.122.197.112", "-retry-join=100.117.106.136", diff --git a/jobs/consul/consul-cluster-three-nodes.nomad b/jobs/consul/consul-cluster-three-nodes.nomad deleted file mode 100644 index 1b54047..0000000 --- a/jobs/consul/consul-cluster-three-nodes.nomad +++ /dev/null @@ -1,190 +0,0 @@ -job "consul-cluster-three-nodes" { - datacenters = ["dc1"] - type = "service" - - group "consul-master" { - count = 1 - - constraint { - attribute = "${node.unique.name}" - value = "master" - } - - network { - port "http" { - static = 8500 - } - port "rpc" { - static = 8300 - } - port "serf_lan" { - static = 8301 - } - port "serf_wan" { - static = 8302 - } - } - - task "consul" { - driver = "exec" - - config { - command = "consul" - args = [ - "agent", - "-server", - "-bootstrap-expect=3", - "-data-dir=/opt/nomad/data/consul", - "-client=0.0.0.0", - "-bind=100.117.106.136", - "-advertise=100.117.106.136", - "-retry-join=100.116.80.94", - "-retry-join=100.122.197.112", - "-ui-config-enabled=true" - ] - } - - resources { - cpu = 300 - memory = 512 - } - - service { - name = "consul-master" - port = "http" - - check { - type = "http" - path = "/v1/status/leader" - port = "http" - interval = "10s" - timeout = "3s" - } - } - } - } - - group "consul-ash3c" { - count = 1 - - constraint { - attribute = "${node.unique.name}" - value = "ash3c" - } - - network { - port "http" { - static = 8500 - } - port "rpc" { - static = 8300 - } - port "serf_lan" { - static = 8301 - } - port "serf_wan" { - static = 8302 - } - } - - task "consul" { - driver = "exec" - - config { - command = "consul" - args = [ - "agent", - "-server", - "-bootstrap-expect=3", - "-data-dir=/opt/nomad/data/consul", - "-client=0.0.0.0", - "-bind=100.116.80.94", - "-advertise=100.116.80.94", - "-retry-join=100.117.106.136", - "-retry-join=100.122.197.112", - "-ui-config-enabled=true" - ] - } - - resources { - cpu = 300 - memory = 512 - } - - service { - name = "consul-ash3c" - port = "http" - - check { - type = "http" - path = "/v1/status/leader" - port = "http" - interval = "10s" - timeout = "3s" - } - } - } - } - - group "consul-warden" { - count = 1 - - constraint { - attribute = "${node.unique.name}" - value = "bj-warden" - } - - network { - port "http" { - static = 8500 - } - port "rpc" { - static = 8300 - } - port "serf_lan" { - static = 8301 - } - port "serf_wan" { - static = 8302 - } - } - - task "consul" { - driver = "exec" - - config { - command = "consul" - args = [ - "agent", - "-server", - "-bootstrap-expect=3", - "-data-dir=/opt/nomad/data/consul", - "-client=0.0.0.0", - "-bind=100.122.197.112", - "-advertise=100.122.197.112", - "-retry-join=100.117.106.136", - "-retry-join=100.116.80.94", - "-ui-config-enabled=true" - ] - } - - resources { - cpu = 300 - memory = 512 - } - - service { - name = "consul-warden" - port = "http" - - check { - type = "http" - path = "/v1/status/leader" - port = "http" - interval = "10s" - timeout = "3s" - } - } - } - } -} \ No newline at end of file diff --git a/jobs/consul/consul-single-member-new.nomad b/jobs/consul/consul-single-member-new.nomad deleted file mode 100644 index fabc4bb..0000000 --- a/jobs/consul/consul-single-member-new.nomad +++ /dev/null @@ -1,47 +0,0 @@ -job "consul-single-member" { - datacenters = ["dc1"] - type = "service" - priority = 50 - - constraint { - attribute = "${node.unique.name}" - value = "warden" - } - - group "consul" { - count = 1 - - task "consul" { - driver = "exec" - - config { - command = "consul" - args = ["agent", "-dev", "-client=0.0.0.0", "-data-dir=/tmp/consul-data"] - } - - resources { - cpu = 200 - memory = 256 - network { - mbits = 10 - port "http" { - static = 8500 - } - } - } - - service { - name = "consul" - port = "http" - - check { - type = "http" - path = "/v1/status/leader" - port = "http" - interval = "10s" - timeout = "2s" - } - } - } - } -} \ No newline at end of file diff --git a/jobs/consul/consul-single-member.nomad b/jobs/consul/consul-single-member.nomad deleted file mode 100644 index fabc4bb..0000000 --- a/jobs/consul/consul-single-member.nomad +++ /dev/null @@ -1,47 +0,0 @@ -job "consul-single-member" { - datacenters = ["dc1"] - type = "service" - priority = 50 - - constraint { - attribute = "${node.unique.name}" - value = "warden" - } - - group "consul" { - count = 1 - - task "consul" { - driver = "exec" - - config { - command = "consul" - args = ["agent", "-dev", "-client=0.0.0.0", "-data-dir=/tmp/consul-data"] - } - - resources { - cpu = 200 - memory = 256 - network { - mbits = 10 - port "http" { - static = 8500 - } - } - } - - service { - name = "consul" - port = "http" - - check { - type = "http" - path = "/v1/status/leader" - port = "http" - interval = "10s" - timeout = "2s" - } - } - } - } -} \ No newline at end of file diff --git a/jobs/consul/consul-test-warden.nomad b/jobs/consul/consul-test-warden.nomad deleted file mode 100644 index 08c3887..0000000 --- a/jobs/consul/consul-test-warden.nomad +++ /dev/null @@ -1,46 +0,0 @@ -job "consul-test-warden" { - datacenters = ["dc1"] - type = "service" - - constraint { - attribute = "${node.unique.name}" - value = "bj-warden" - } - - group "consul" { - count = 1 - - network { - port "http" { - static = 8500 - } - } - - task "consul" { - driver = "exec" - - config { - command = "consul" - args = ["agent", "-dev", "-client=0.0.0.0", "-data-dir=/tmp/consul-test"] - } - - resources { - cpu = 200 - memory = 256 - } - - service { - name = "consul-test" - port = "http" - - check { - type = "http" - path = "/v1/status/leader" - port = "http" - interval = "10s" - timeout = "2s" - } - } - } - } -} \ No newline at end of file diff --git a/jobs/consul/consul-warden-only.nomad b/jobs/consul/consul-warden-only.nomad deleted file mode 100644 index cc7f4b8..0000000 --- a/jobs/consul/consul-warden-only.nomad +++ /dev/null @@ -1,46 +0,0 @@ -job "consul-warden" { - datacenters = ["dc1"] - type = "service" - priority = 50 - - constraint { - attribute = "${node.unique.name}" - value = "warden" - } - - group "consul" { - count = 1 - - task "consul" { - driver = "exec" - - config { - command = "consul" - args = ["agent", "-dev", "-client=0.0.0.0", "-data-dir=/tmp/consul-data"] - } - - resources { - cpu = 200 - memory = 256 - network { - port "http" { - static = 8500 - } - } - } - - service { - name = "consul" - port = "http" - - check { - type = "http" - path = "/v1/status/leader" - port = "http" - interval = "10s" - timeout = "2s" - } - } - } - } -} \ No newline at end of file diff --git a/jobs/consul/service-discovery-warden.nomad b/jobs/consul/service-discovery-warden.nomad deleted file mode 100644 index fe36d86..0000000 --- a/jobs/consul/service-discovery-warden.nomad +++ /dev/null @@ -1,46 +0,0 @@ -job "service-discovery-warden" { - datacenters = ["dc1"] - type = "service" - - constraint { - attribute = "${node.unique.name}" - value = "warden" - } - - group "discovery" { - count = 1 - - network { - port "http" { - static = 8500 - } - } - - task "discovery" { - driver = "exec" - - config { - command = "consul" - args = ["agent", "-dev", "-client=0.0.0.0", "-data-dir=/tmp/discovery-data"] - } - - resources { - cpu = 200 - memory = 256 - } - - service { - name = "discovery-service" - port = "http" - - check { - type = "http" - path = "/v1/status/leader" - port = "http" - interval = "10s" - timeout = "2s" - } - } - } - } -} \ No newline at end of file diff --git a/jobs/consul/simple-consul-warden.nomad b/jobs/consul/simple-consul-warden.nomad deleted file mode 100644 index fb35a87..0000000 --- a/jobs/consul/simple-consul-warden.nomad +++ /dev/null @@ -1,52 +0,0 @@ -job "simple-consul-test" { - datacenters = ["dc1"] - type = "service" - - constraint { - attribute = "${node.unique.name}" - value = "warden" - } - - group "consul" { - count = 1 - - network { - port "http" { - static = 8500 - } - } - - task "consul" { - driver = "exec" - - config { - command = "consul" - args = [ - "agent", - "-dev", - "-client=0.0.0.0", - "-bind=100.122.197.112", - "-data-dir=/tmp/consul-test-data" - ] - } - - resources { - cpu = 200 - memory = 256 - } - - service { - name = "consul-test" - port = "http" - - check { - type = "http" - path = "/v1/status/leader" - port = "http" - interval = "10s" - timeout = "2s" - } - } - } - } -} \ No newline at end of file diff --git a/jobs/tests/test-job.nomad b/jobs/tests/test-job.nomad deleted file mode 100644 index bc0e9f7..0000000 --- a/jobs/tests/test-job.nomad +++ /dev/null @@ -1,40 +0,0 @@ -job "test-nginx" { - datacenters = ["dc1"] - type = "service" - - group "web" { - count = 1 - - network { - port "http" { - static = 8080 - } - } - - task "nginx" { - driver = "podman" - - config { - image = "nginx:alpine" - ports = ["http"] - } - - resources { - cpu = 100 - memory = 128 - } - - service { - name = "nginx-test" - port = "http" - - check { - type = "http" - path = "/" - interval = "10s" - timeout = "3s" - } - } - } - } -} \ No newline at end of file diff --git a/jobs/tests/test-podman-job.nomad b/jobs/tests/test-podman-job.nomad deleted file mode 100644 index 3392296..0000000 --- a/jobs/tests/test-podman-job.nomad +++ /dev/null @@ -1,24 +0,0 @@ -job "test-podman" { - datacenters = ["dc1"] - type = "batch" - - group "test" { - count = 1 - - task "hello" { - driver = "podman" - - config { - image = "docker.io/library/hello-world:latest" - logging = { - driver = "journald" - } - } - - resources { - cpu = 100 - memory = 128 - } - } - } -} \ No newline at end of file diff --git a/jobs/tests/test-podman-simple.nomad b/jobs/tests/test-podman-simple.nomad deleted file mode 100644 index 3674e05..0000000 --- a/jobs/tests/test-podman-simple.nomad +++ /dev/null @@ -1,23 +0,0 @@ -job "test-podman-simple" { - datacenters = ["dc1"] - type = "batch" - - group "test" { - count = 1 - - task "hello" { - driver = "podman" - - config { - image = "alpine:latest" - command = "echo" - args = ["Hello from Podman!"] - } - - resources { - cpu = 100 - memory = 64 - } - } - } -} \ No newline at end of file diff --git a/jobs/tests/test-private-registry.nomad b/jobs/tests/test-private-registry.nomad deleted file mode 100644 index 4b31f37..0000000 --- a/jobs/tests/test-private-registry.nomad +++ /dev/null @@ -1,31 +0,0 @@ -job "test-private-registry" { - datacenters = ["dc1"] - type = "batch" - - group "test" { - count = 1 - - # 指定运行在北京节点上 - constraint { - attribute = "${node.unique.name}" - operator = "regexp" - value = "bj-.*" - } - - task "hello" { - driver = "podman" - - config { - image = "hello-world:latest" - logging = { - driver = "journald" - } - } - - resources { - cpu = 100 - memory = 64 - } - } - } -} \ No newline at end of file diff --git a/jobs/tests/test-simple.nomad b/jobs/tests/test-simple.nomad deleted file mode 100644 index a1327b3..0000000 --- a/jobs/tests/test-simple.nomad +++ /dev/null @@ -1,27 +0,0 @@ -job "test-simple" { - datacenters = ["dc1"] - type = "service" - - constraint { - attribute = "${node.unique.name}" - value = "warden" - } - - group "test" { - count = 1 - - task "hello" { - driver = "exec" - - config { - command = "echo" - args = ["Hello from warden node!"] - } - - resources { - cpu = 100 - memory = 64 - } - } - } -} \ No newline at end of file diff --git a/scripts/utilities/fix-master-binary.sh b/scripts/utilities/fix-master-binary.sh deleted file mode 100755 index b774783..0000000 --- a/scripts/utilities/fix-master-binary.sh +++ /dev/null @@ -1,26 +0,0 @@ -#!/bin/bash - -echo "🔧 使用 HashiCorp 官方脚本修复 master 节点二进制文件..." - -# 停止 nomad 服务 -echo '3131' | sudo -S systemctl stop nomad || true -echo '3131' | sudo -S pkill -9 -f nomad || true - -# 删除旧的二进制文件 -echo '3131' | sudo -S rm -f /usr/local/bin/nomad /usr/bin/nomad - -# 使用 HashiCorp 官方安装脚本(自动检测架构) -curl -fsSL https://apt.releases.hashicorp.com/gpg | sudo apt-key add - -echo '3131' | sudo -S apt-add-repository "deb [arch=$(dpkg --print-architecture)] https://apt.releases.hashicorp.com $(lsb_release -cs) main" -echo '3131' | sudo -S apt-get update -echo '3131' | sudo -S apt-get install -y nomad=1.10.5-1 - -# 验证安装 -nomad version - -# 重启服务 -echo '3131' | sudo -S systemctl daemon-reload -echo '3131' | sudo -S systemctl enable nomad -echo '3131' | sudo -S systemctl start nomad - -echo "✅ Master 节点二进制文件修复完成!" \ No newline at end of file diff --git a/scripts/utilities/nomad-diagnosis.sh b/scripts/utilities/nomad-diagnosis.sh deleted file mode 100755 index f0caf8d..0000000 --- a/scripts/utilities/nomad-diagnosis.sh +++ /dev/null @@ -1,124 +0,0 @@ -#!/bin/bash - -# 🔍 Nomad 集群快速诊断脚本 - -echo "🔍 Nomad 集群快速诊断" -echo "====================" -echo "" - -# 定义节点信息 -declare -A NODES=( - ["semaphore"]="local" - ["master"]="100.117.106.136:60022" - ["ash3c"]="100.116.80.94:22" -) - -declare -A TAILSCALE_IPS=( - ["semaphore"]="100.116.158.95" - ["master"]="100.117.106.136" - ["ash3c"]="100.116.80.94" -) - -echo "📊 1. 本地 Nomad 服务状态" -echo "------------------------" -systemctl status nomad --no-pager | head -10 || echo "❌ 本地 Nomad 服务异常" -echo "" - -echo "📊 2. 集群成员状态" -echo "----------------" -nomad server members 2>/dev/null || echo "❌ 无法获取集群成员状态" -echo "" - -echo "📊 3. 节点状态" -echo "------------" -nomad node status 2>/dev/null || echo "❌ 无法获取节点状态" -echo "" - -echo "🌐 4. 网络连通性测试" -echo "------------------" -for node in "${!NODES[@]}"; do - ip="${TAILSCALE_IPS[$node]}" - echo "测试 $node ($ip):" - - if [[ "$node" == "semaphore" ]]; then - echo " ✅ 本地节点" - else - # Ping 测试 - if ping -c 1 -W 3 "$ip" >/dev/null 2>&1; then - echo " ✅ Ping: 成功" - else - echo " ❌ Ping: 失败" - fi - - # 端口测试 - if timeout 5 bash -c "/dev/null; then - echo " ✅ RPC端口(4647): 开放" - else - echo " ❌ RPC端口(4647): 关闭" - fi - - if timeout 5 bash -c "/dev/null; then - echo " ✅ HTTP端口(4646): 开放" - else - echo " ❌ HTTP端口(4646): 关闭" - fi - fi - echo "" -done - -echo "🔧 5. 远程节点服务状态" -echo "-------------------" -for node in "${!NODES[@]}"; do - if [[ "$node" == "semaphore" ]]; then - continue - fi - - connection="${NODES[$node]}" - ip=$(echo "$connection" | cut -d: -f1) - port=$(echo "$connection" | cut -d: -f2) - - echo "检查 $node ($ip:$port):" - - if ssh -p "$port" -i ~/.ssh/id_ed25519 -o ConnectTimeout=10 -o StrictHostKeyChecking=no ben@"$ip" "echo '3131' | sudo -S systemctl is-active nomad" 2>/dev/null; then - status=$(ssh -p "$port" -i ~/.ssh/id_ed25519 -o ConnectTimeout=10 -o StrictHostKeyChecking=no ben@"$ip" "echo '3131' | sudo -S systemctl is-active nomad" 2>/dev/null) - echo " 服务状态: $status" - - # 检查配置文件中的 bind_addr - bind_addr=$(ssh -p "$port" -i ~/.ssh/id_ed25519 -o ConnectTimeout=10 -o StrictHostKeyChecking=no ben@"$ip" "echo '3131' | sudo -S grep 'bind_addr' /etc/nomad.d/nomad.hcl 2>/dev/null" | head -1) - echo " 配置绑定地址: $bind_addr" - - # 检查实际监听端口 - listening=$(ssh -p "$port" -i ~/.ssh/id_ed25519 -o ConnectTimeout=10 -o StrictHostKeyChecking=no ben@"$ip" "echo '3131' | sudo -S netstat -tlnp | grep :464" 2>/dev/null | head -3) - if [[ -n "$listening" ]]; then - echo " 监听端口:" - echo "$listening" | sed 's/^/ /' - else - echo " ❌ 未发现 Nomad 监听端口" - fi - else - echo " ❌ 无法连接或服务未运行" - fi - echo "" -done - -echo "📋 6. 问题总结和建议" -echo "==================" - -# 检查是否有 leader -if nomad server members 2>/dev/null | grep -q "leader"; then - echo "✅ 集群有 leader" -else - echo "❌ 集群没有 leader - 这是主要问题!" - echo "" - echo "🔧 建议的修复步骤:" - echo "1. 先尝试 ash3c IP 修复: ./scripts/utilities/fix-ash3c-ip.sh" - echo "2. 如果还不行,使用核弹级重置: ./scripts/utilities/nuclear-reset.sh" - echo "3. 检查 master 节点是否需要重启" -fi - -echo "" -echo "🔗 有用的链接:" -echo " Web UI: http://100.116.158.95:4646" -echo " 日志查看: journalctl -u nomad -f" -echo "" -echo "🔍 诊断完成!" \ No newline at end of file diff --git a/scripts/utilities/nuclear-reset.sh b/scripts/utilities/nuclear-reset.sh deleted file mode 100755 index a1f8d8b..0000000 --- a/scripts/utilities/nuclear-reset.sh +++ /dev/null @@ -1,76 +0,0 @@ -#!/bin/bash - -# ☢️ 核弹级 Nomad 重置执行脚本 ☢️ - -set -e - -echo "☢️☢️☢️ 核弹级 Nomad 集群重置 ☢️☢️☢️" -echo "" -echo "这个脚本将:" -echo "1. 完全摧毁所有 Nomad 进程和数据" -echo "2. 重新下载并安装 Nomad 二进制文件" -echo "3. 创建全新的配置文件" -echo "4. 重新启动整个集群" -echo "" -echo "⚠️ 警告:这是不可逆的操作!⚠️" -echo "" - -# 检查是否在正确的目录 -if [[ ! -f "scripts/utilities/NUCLEAR-NOMAD-RESET.yml" ]]; then - echo "❌ 错误:请在 /root/mgmt 目录下运行此脚本" - exit 1 -fi - -# 确认操作 -read -p "你确定要进行核弹级重置吗?输入 'NUCLEAR' 确认: " confirm -if [[ "$confirm" != "NUCLEAR" ]]; then - echo "❌ 操作已取消" - exit 1 -fi - -echo "" -echo "🚀 开始核弹级重置..." -echo "" - -# 设置 Ansible 配置 -export ANSIBLE_HOST_KEY_CHECKING=False -export ANSIBLE_STDOUT_CALLBACK=yaml - -# 执行核弹级重置 -echo "📡 执行 Ansible playbook..." -cd /root/mgmt/configuration - -ansible-playbook \ - -i inventories/production/nomad-cluster.ini \ - ../scripts/utilities/NUCLEAR-NOMAD-RESET.yml \ - --extra-vars "ansible_ssh_common_args='-o StrictHostKeyChecking=no'" \ - -v - -echo "" -echo "⏰ 等待集群稳定..." -sleep 30 - -echo "" -echo "🔍 检查集群状态..." - -# 检查集群成员 -echo "📊 集群成员状态:" -nomad server members || echo "❌ 无法获取集群成员状态" - -echo "" -echo "📊 节点状态:" -nomad node status || echo "❌ 无法获取节点状态" - -echo "" -echo "🎯 如果上面显示错误,请等待几分钟后再次检查" -echo "集群可能需要一些时间来完全启动和同步" - -echo "" -echo "🔧 有用的命令:" -echo " 检查集群成员: nomad server members" -echo " 检查节点状态: nomad node status" -echo " 查看日志: journalctl -u nomad -f" -echo " Web UI: http://100.116.158.95:4646" - -echo "" -echo "☢️ 核弹级重置完成!☢️" \ No newline at end of file diff --git a/scripts/utilities/ultimate-nomad-fix.yml b/scripts/utilities/ultimate-nomad-fix.yml deleted file mode 100644 index d051a57..0000000 --- a/scripts/utilities/ultimate-nomad-fix.yml +++ /dev/null @@ -1,113 +0,0 @@ ---- -- name: Ultimate Nomad Cluster Fix - Complete Reset - hosts: nomad_cluster - become: yes - gather_facts: yes - vars: - nomad_encrypt_key: "NVOMDvXblgWfhtzFzOUIHnKEOrbXOkPrkIPbRGGf1YQ=" - - tasks: - - name: Stop and disable nomad service completely - systemd: - name: nomad - state: stopped - enabled: no - daemon_reload: yes - ignore_errors: yes - - - name: Kill any remaining nomad processes - shell: pkill -f nomad || true - ignore_errors: yes - - - name: Remove all nomad data and state - file: - path: "{{ item }}" - state: absent - loop: - - /opt/nomad/data - - /etc/nomad.d/nomad.hcl - - /var/log/nomad - - - name: Create clean nomad directories - file: - path: "{{ item }}" - state: directory - owner: nomad - group: nomad - mode: '0755' - loop: - - /etc/nomad.d - - /opt/nomad - - /opt/nomad/data - - /opt/nomad/alloc_mounts - - /var/log/nomad - - - name: Create minimal nomad configuration - copy: - content: | - datacenter = "dc1" - region = "global" - data_dir = "/opt/nomad/data" - - bind_addr = "{{ ansible_default_ipv4.address }}" - - server { - enabled = true - bootstrap_expect = 1 - encrypt = "{{ nomad_encrypt_key }}" - } - - client { - enabled = true - alloc_dir = "/opt/nomad/alloc_mounts" - } - - ui { - enabled = true - } - - addresses { - http = "0.0.0.0" - rpc = "{{ ansible_default_ipv4.address }}" - serf = "{{ ansible_default_ipv4.address }}" - } - - ports { - http = 4646 - rpc = 4647 - serf = 4648 - } - - log_level = "INFO" - log_file = "/var/log/nomad/nomad.log" - dest: /etc/nomad.d/nomad.hcl - owner: nomad - group: nomad - mode: '0640' - - - name: Enable and start nomad service - systemd: - name: nomad - state: started - enabled: yes - daemon_reload: yes - - - name: Wait for nomad to start - wait_for: - port: 4646 - host: "{{ ansible_default_ipv4.address }}" - delay: 10 - timeout: 60 - - - name: Check nomad status - uri: - url: "http://{{ ansible_default_ipv4.address }}:4646/v1/status/leader" - method: GET - register: nomad_leader - retries: 5 - delay: 5 - ignore_errors: yes - - - name: Display nomad status - debug: - msg: "Nomad leader status: {{ nomad_leader.json if nomad_leader.json is defined else 'No leader elected yet' }}" \ No newline at end of file diff --git a/scripts/utilities/verify-podman-migration.sh b/scripts/utilities/verify-podman-migration.sh deleted file mode 100755 index 391be9a..0000000 --- a/scripts/utilities/verify-podman-migration.sh +++ /dev/null @@ -1,31 +0,0 @@ -#!/bin/bash - -echo "=== Nomad Podman Migration Verification ===" -echo - -# Check Nomad service status -echo "1. Checking Nomad service status..." -ssh ben@100.84.197.26 "sudo systemctl status nomad --no-pager -l" -echo - -# Check Nomad configuration -echo "2. Checking Nomad configuration..." -ssh ben@100.84.197.26 "sudo cat /etc/nomad.d/nomad.hcl | grep -A 10 -B 2 podman" -echo - -# Check Podman socket -echo "3. Checking Podman socket..." -ssh ben@100.84.197.26 "ls -la /run/user/*/podman/podman.sock 2>/dev/null || echo 'Podman socket not found'" -echo - -# Check Nomad node status -echo "4. Checking Nomad node status..." -ssh ben@100.84.197.26 "sudo -u nomad /usr/local/bin/nomad node status -self | grep -A 10 'Driver Status'" 2>/dev/null || echo "Could not get node status" -echo - -# Test Podman functionality -echo "5. Testing Podman as nomad user..." -ssh ben@100.84.197.26 "sudo -u nomad podman version --format '{{.Version}}'" 2>/dev/null || echo "Podman test failed" -echo - -echo "=== Verification Complete ===" \ No newline at end of file