111
This commit is contained in:
57
deployment/ansible/cleanup-consul-clients.yml
Normal file
57
deployment/ansible/cleanup-consul-clients.yml
Normal file
@@ -0,0 +1,57 @@
|
||||
---
|
||||
- name: Clean up Consul configuration from dedicated clients
|
||||
hosts: hcp1,influxdb1,browser
|
||||
become: yes
|
||||
|
||||
tasks:
|
||||
- name: Stop Consul service
|
||||
systemd:
|
||||
name: consul
|
||||
state: stopped
|
||||
enabled: no
|
||||
|
||||
- name: Disable Consul service
|
||||
systemd:
|
||||
name: consul
|
||||
enabled: no
|
||||
|
||||
- name: Kill any remaining Consul processes
|
||||
shell: |
|
||||
pkill -f consul || true
|
||||
sleep 2
|
||||
pkill -9 -f consul || true
|
||||
ignore_errors: yes
|
||||
|
||||
- name: Remove Consul systemd service file
|
||||
file:
|
||||
path: /etc/systemd/system/consul.service
|
||||
state: absent
|
||||
|
||||
- name: Remove Consul configuration directory
|
||||
file:
|
||||
path: /etc/consul.d
|
||||
state: absent
|
||||
|
||||
- name: Remove Consul data directory
|
||||
file:
|
||||
path: /opt/consul
|
||||
state: absent
|
||||
|
||||
- name: Reload systemd daemon
|
||||
systemd:
|
||||
daemon_reload: yes
|
||||
|
||||
- name: Verify Consul is stopped
|
||||
shell: |
|
||||
if pgrep -f consul; then
|
||||
echo "Consul still running"
|
||||
exit 1
|
||||
else
|
||||
echo "Consul stopped successfully"
|
||||
fi
|
||||
register: consul_status
|
||||
failed_when: consul_status.rc != 0
|
||||
|
||||
- name: Display cleanup status
|
||||
debug:
|
||||
msg: "Consul cleanup completed on {{ inventory_hostname }}"
|
||||
55
deployment/ansible/configure-consul-autodiscovery.yml
Normal file
55
deployment/ansible/configure-consul-autodiscovery.yml
Normal file
@@ -0,0 +1,55 @@
|
||||
---
|
||||
- name: Configure Consul Auto-Discovery
|
||||
hosts: all
|
||||
become: yes
|
||||
vars:
|
||||
consul_servers:
|
||||
- "warden.tailnet-68f9.ts.net:8301"
|
||||
- "ch4.tailnet-68f9.ts.net:8301"
|
||||
- "ash3c.tailnet-68f9.ts.net:8301"
|
||||
|
||||
tasks:
|
||||
- name: Backup current nomad.hcl
|
||||
copy:
|
||||
src: /etc/nomad.d/nomad.hcl
|
||||
dest: /etc/nomad.d/nomad.hcl.backup.{{ ansible_date_time.epoch }}
|
||||
remote_src: yes
|
||||
backup: yes
|
||||
|
||||
- name: Update Consul configuration for auto-discovery
|
||||
blockinfile:
|
||||
path: /etc/nomad.d/nomad.hcl
|
||||
marker: "# {mark} ANSIBLE MANAGED CONSUL CONFIG"
|
||||
block: |
|
||||
consul {
|
||||
retry_join = [
|
||||
"warden.tailnet-68f9.ts.net:8301",
|
||||
"ch4.tailnet-68f9.ts.net:8301",
|
||||
"ash3c.tailnet-68f9.ts.net:8301"
|
||||
]
|
||||
server_service_name = "nomad"
|
||||
client_service_name = "nomad-client"
|
||||
}
|
||||
insertbefore: '^consul \{'
|
||||
replace: '^consul \{.*?\}'
|
||||
|
||||
- name: Restart Nomad service
|
||||
systemd:
|
||||
name: nomad
|
||||
state: restarted
|
||||
enabled: yes
|
||||
|
||||
- name: Wait for Nomad to be ready
|
||||
wait_for:
|
||||
port: 4646
|
||||
host: "{{ ansible_default_ipv4.address }}"
|
||||
delay: 5
|
||||
timeout: 30
|
||||
|
||||
- name: Verify Consul connection
|
||||
shell: |
|
||||
NOMAD_ADDR=http://localhost:4646 nomad node status | grep -q "ready"
|
||||
register: nomad_ready
|
||||
failed_when: nomad_ready.rc != 0
|
||||
retries: 3
|
||||
delay: 10
|
||||
@@ -0,0 +1,75 @@
|
||||
---
|
||||
- name: Remove Consul configuration from Nomad servers
|
||||
hosts: semaphore,ash1d,ash2e,ch2,ch3,onecloud1,de
|
||||
become: yes
|
||||
|
||||
tasks:
|
||||
- name: Remove entire Consul configuration block
|
||||
blockinfile:
|
||||
path: /etc/nomad.d/nomad.hcl
|
||||
marker: "# {mark} ANSIBLE MANAGED CONSUL CONFIG"
|
||||
state: absent
|
||||
|
||||
- name: Remove Consul configuration lines
|
||||
lineinfile:
|
||||
path: /etc/nomad.d/nomad.hcl
|
||||
regexp: '^consul \{'
|
||||
state: absent
|
||||
|
||||
- name: Remove Consul configuration content
|
||||
lineinfile:
|
||||
path: /etc/nomad.d/nomad.hcl
|
||||
regexp: '^ address ='
|
||||
state: absent
|
||||
|
||||
- name: Remove Consul service names
|
||||
lineinfile:
|
||||
path: /etc/nomad.d/nomad.hcl
|
||||
regexp: '^ server_service_name ='
|
||||
state: absent
|
||||
|
||||
- name: Remove Consul client service name
|
||||
lineinfile:
|
||||
path: /etc/nomad.d/nomad.hcl
|
||||
regexp: '^ client_service_name ='
|
||||
state: absent
|
||||
|
||||
- name: Remove Consul auto-advertise
|
||||
lineinfile:
|
||||
path: /etc/nomad.d/nomad.hcl
|
||||
regexp: '^ auto_advertise ='
|
||||
state: absent
|
||||
|
||||
- name: Remove Consul server auto-join
|
||||
lineinfile:
|
||||
path: /etc/nomad.d/nomad.hcl
|
||||
regexp: '^ server_auto_join ='
|
||||
state: absent
|
||||
|
||||
- name: Remove Consul client auto-join
|
||||
lineinfile:
|
||||
path: /etc/nomad.d/nomad.hcl
|
||||
regexp: '^ client_auto_join ='
|
||||
state: absent
|
||||
|
||||
- name: Remove Consul closing brace
|
||||
lineinfile:
|
||||
path: /etc/nomad.d/nomad.hcl
|
||||
regexp: '^}'
|
||||
state: absent
|
||||
|
||||
- name: Restart Nomad service
|
||||
systemd:
|
||||
name: nomad
|
||||
state: restarted
|
||||
|
||||
- name: Wait for Nomad to be ready
|
||||
wait_for:
|
||||
port: 4646
|
||||
host: "{{ ansible_default_ipv4.address }}"
|
||||
delay: 5
|
||||
timeout: 30
|
||||
|
||||
- name: Display completion message
|
||||
debug:
|
||||
msg: "Removed Consul configuration from {{ inventory_hostname }}"
|
||||
32
deployment/ansible/enable-nomad-client-mode.yml
Normal file
32
deployment/ansible/enable-nomad-client-mode.yml
Normal file
@@ -0,0 +1,32 @@
|
||||
---
|
||||
- name: Enable Nomad Client Mode on Servers
|
||||
hosts: ch2,ch3,de
|
||||
become: yes
|
||||
|
||||
tasks:
|
||||
- name: Enable Nomad client mode
|
||||
lineinfile:
|
||||
path: /etc/nomad.d/nomad.hcl
|
||||
regexp: '^client \{'
|
||||
line: 'client {'
|
||||
state: present
|
||||
|
||||
- name: Enable client mode
|
||||
lineinfile:
|
||||
path: /etc/nomad.d/nomad.hcl
|
||||
regexp: '^ enabled = false'
|
||||
line: ' enabled = true'
|
||||
state: present
|
||||
|
||||
- name: Restart Nomad service
|
||||
systemd:
|
||||
name: nomad
|
||||
state: restarted
|
||||
|
||||
- name: Wait for Nomad to be ready
|
||||
wait_for:
|
||||
port: 4646
|
||||
host: "{{ ansible_default_ipv4.address }}"
|
||||
delay: 5
|
||||
timeout: 30
|
||||
|
||||
62
deployment/ansible/fix-master-references.yml
Normal file
62
deployment/ansible/fix-master-references.yml
Normal file
@@ -0,0 +1,62 @@
|
||||
---
|
||||
- name: Fix all master references to ch4
|
||||
hosts: localhost
|
||||
gather_facts: no
|
||||
vars:
|
||||
files_to_fix:
|
||||
- "scripts/diagnose-consul-sync.sh"
|
||||
- "scripts/register-traefik-to-all-consul.sh"
|
||||
- "deployment/ansible/playbooks/update-nomad-consul-config.yml"
|
||||
- "deployment/ansible/templates/nomad-server.hcl.j2"
|
||||
- "deployment/ansible/templates/nomad-client.hcl"
|
||||
- "deployment/ansible/playbooks/fix-nomad-consul-roles.yml"
|
||||
- "deployment/ansible/onecloud1_nomad.hcl"
|
||||
- "ansible/templates/consul-client.hcl.j2"
|
||||
- "ansible/consul-client-deployment.yml"
|
||||
- "ansible/consul-client-simple.yml"
|
||||
|
||||
tasks:
|
||||
- name: Replace master.tailnet-68f9.ts.net with ch4.tailnet-68f9.ts.net
|
||||
replace:
|
||||
path: "{{ item }}"
|
||||
regexp: 'master\.tailnet-68f9\.ts\.net'
|
||||
replace: 'ch4.tailnet-68f9.ts.net'
|
||||
loop: "{{ files_to_fix }}"
|
||||
when: item is file
|
||||
|
||||
- name: Replace master hostname references
|
||||
replace:
|
||||
path: "{{ item }}"
|
||||
regexp: '\bmaster\b'
|
||||
replace: 'ch4'
|
||||
loop: "{{ files_to_fix }}"
|
||||
when: item is file
|
||||
|
||||
- name: Replace master IP references in comments
|
||||
replace:
|
||||
path: "{{ item }}"
|
||||
regexp: '# master'
|
||||
replace: '# ch4'
|
||||
loop: "{{ files_to_fix }}"
|
||||
when: item is file
|
||||
|
||||
- name: Fix inventory files
|
||||
replace:
|
||||
path: "{{ item }}"
|
||||
regexp: 'master ansible_host=master'
|
||||
replace: 'ch4 ansible_host=ch4'
|
||||
loop:
|
||||
- "deployment/ansible/inventories/production/inventory.ini"
|
||||
- "deployment/ansible/inventories/production/csol-consul-nodes.ini"
|
||||
- "deployment/ansible/inventories/production/nomad-clients.ini"
|
||||
- "deployment/ansible/inventories/production/master-ash3c.ini"
|
||||
- "deployment/ansible/inventories/production/consul-nodes.ini"
|
||||
- "deployment/ansible/inventories/production/vault.ini"
|
||||
|
||||
- name: Fix IP address references (100.117.106.136 comments)
|
||||
replace:
|
||||
path: "{{ item }}"
|
||||
regexp: '100\.117\.106\.136.*# master'
|
||||
replace: '100.117.106.136 # ch4'
|
||||
loop: "{{ files_to_fix }}"
|
||||
when: item is file
|
||||
@@ -72,7 +72,7 @@
|
||||
"description": "Consul客户端节点,用于服务发现和健康检查",
|
||||
"nodes": [
|
||||
{
|
||||
"name": "master",
|
||||
"name": "ch4",
|
||||
"host": "100.117.106.136",
|
||||
"user": "ben",
|
||||
"password": "3131",
|
||||
|
||||
@@ -2,21 +2,21 @@
|
||||
# 服务器节点 (7个服务器节点)
|
||||
# ⚠️ 警告:能力越大,责任越大!服务器节点操作需极其谨慎!
|
||||
# ⚠️ 任何对服务器节点的操作都可能影响整个集群的稳定性!
|
||||
semaphore ansible_host=semaphore.tailnet-68f9.ts.net ansible_user=root ansible_password=313131 ansible_become_password=313131
|
||||
semaphore ansible_host=semaphore.tailnet-68f9.ts.net ansible_user=root ansible_password=3131 ansible_become_password=3131
|
||||
ash1d ansible_host=ash1d.tailnet-68f9.ts.net ansible_user=ben ansible_password=3131 ansible_become_password=3131
|
||||
ash2e ansible_host=ash2e.tailnet-68f9.ts.net ansible_user=ben ansible_password=3131 ansible_become_password=3131
|
||||
ch2 ansible_host=ch2.tailnet-68f9.ts.net ansible_user=ben ansible_password=3131 ansible_become_password=3131
|
||||
ch3 ansible_host=ch3.tailnet-68f9.ts.net ansible_user=ben ansible_password=3131 ansible_become_password=3131
|
||||
onecloud1 ansible_host=onecloud1.tailnet-68f9.ts.net ansible_user=ben ansible_password=3131 ansible_become_password=3131
|
||||
de ansible_host=de.tailnet-68f9.ts.net ansible_user=ben ansible_password=3131 ansible_become_password=3131
|
||||
hcp1 ansible_host=hcp1.tailnet-68f9.ts.net ansible_user=root ansible_password=3131 ansible_become_password=3131
|
||||
|
||||
[nomad_clients]
|
||||
# 客户端节点
|
||||
master ansible_host=master.tailnet-68f9.ts.net ansible_user=ben ansible_password=3131 ansible_become_password=3131 ansible_port=60022
|
||||
# 客户端节点 (5个客户端节点)
|
||||
ch4 ansible_host=ch4.tailnet-68f9.ts.net ansible_user=ben ansible_password=3131 ansible_become_password=3131
|
||||
ash3c ansible_host=ash3c.tailnet-68f9.ts.net ansible_user=ben ansible_password=3131 ansible_become_password=3131
|
||||
browser ansible_host=browser.tailnet-68f9.ts.net ansible_user=ben ansible_password=3131 ansible_become_password=3131
|
||||
influxdb1 ansible_host=influxdb1.tailnet-68f9.ts.net ansible_user=ben ansible_password=3131 ansible_become_password=3131
|
||||
hcp1 ansible_host=hcp1.tailnet-68f9.ts.net ansible_user=root ansible_password=3131 ansible_become_password=3131
|
||||
warden ansible_host=warden.tailnet-68f9.ts.net ansible_user=ben ansible_password=3131 ansible_become_password=3131
|
||||
|
||||
[nomad_nodes:children]
|
||||
|
||||
@@ -11,7 +11,7 @@ ash1d ansible_host=ash1d ansible_user=ben ansible_become=yes ansible_become_pass
|
||||
ash2e ansible_host=ash2e ansible_user=ben ansible_become=yes ansible_become_pass=3131
|
||||
|
||||
[oci_a1]
|
||||
master ansible_host=master ansible_port=60022 ansible_user=ben ansible_become=yes ansible_become_pass=3131
|
||||
ch4 ansible_host=ch4 ansible_user=ben ansible_become=yes ansible_become_pass=3131
|
||||
ash3c ansible_host=ash3c ansible_user=ben ansible_become=yes ansible_become_pass=3131
|
||||
|
||||
|
||||
|
||||
@@ -0,0 +1,62 @@
|
||||
---
|
||||
- name: Configure Nomad Dynamic Host Volumes for NFS
|
||||
hosts: nomad_clients
|
||||
become: yes
|
||||
vars:
|
||||
nfs_server: "snail"
|
||||
nfs_share: "/fs/1000/nfs/Fnsync"
|
||||
mount_point: "/mnt/fnsync"
|
||||
|
||||
tasks:
|
||||
- name: Stop Nomad service
|
||||
systemd:
|
||||
name: nomad
|
||||
state: stopped
|
||||
|
||||
- name: Update Nomad configuration for dynamic host volumes
|
||||
blockinfile:
|
||||
path: /etc/nomad.d/nomad.hcl
|
||||
marker: "# {mark} DYNAMIC HOST VOLUMES CONFIGURATION"
|
||||
block: |
|
||||
client {
|
||||
# 启用动态host volumes
|
||||
host_volume "fnsync" {
|
||||
path = "{{ mount_point }}"
|
||||
read_only = false
|
||||
}
|
||||
|
||||
# 添加NFS相关的节点元数据
|
||||
meta {
|
||||
nfs_server = "{{ nfs_server }}"
|
||||
nfs_share = "{{ nfs_share }}"
|
||||
nfs_mounted = "true"
|
||||
}
|
||||
}
|
||||
insertafter: 'client {'
|
||||
|
||||
- name: Start Nomad service
|
||||
systemd:
|
||||
name: nomad
|
||||
state: started
|
||||
enabled: yes
|
||||
|
||||
- name: Wait for Nomad to start
|
||||
wait_for:
|
||||
port: 4646
|
||||
delay: 10
|
||||
timeout: 60
|
||||
|
||||
- name: Check Nomad status
|
||||
command: nomad node status
|
||||
register: nomad_status
|
||||
ignore_errors: yes
|
||||
|
||||
- name: Display Nomad status
|
||||
debug:
|
||||
var: nomad_status.stdout_lines
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
41
deployment/ansible/playbooks/deploy-nomad-config.yml
Normal file
41
deployment/ansible/playbooks/deploy-nomad-config.yml
Normal file
@@ -0,0 +1,41 @@
|
||||
---
|
||||
- name: 部署Nomad服务器配置模板
|
||||
hosts: nomad_servers
|
||||
become: yes
|
||||
|
||||
tasks:
|
||||
- name: 部署Nomad配置文件
|
||||
template:
|
||||
src: nomad-server.hcl.j2
|
||||
dest: /etc/nomad.d/nomad.hcl
|
||||
backup: yes
|
||||
owner: root
|
||||
group: root
|
||||
mode: '0644'
|
||||
|
||||
- name: 重启Nomad服务
|
||||
systemd:
|
||||
name: nomad
|
||||
state: restarted
|
||||
enabled: yes
|
||||
|
||||
- name: 等待Nomad服务启动
|
||||
wait_for:
|
||||
port: 4646
|
||||
host: "{{ ansible_host }}"
|
||||
timeout: 30
|
||||
|
||||
- name: 显示Nomad服务状态
|
||||
systemd:
|
||||
name: nomad
|
||||
register: nomad_status
|
||||
|
||||
- name: 显示服务状态
|
||||
debug:
|
||||
msg: "{{ inventory_hostname }} Nomad服务状态: {{ nomad_status.status.ActiveState }}"
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
39
deployment/ansible/playbooks/fix-bootstrap-expect.yml
Normal file
39
deployment/ansible/playbooks/fix-bootstrap-expect.yml
Normal file
@@ -0,0 +1,39 @@
|
||||
---
|
||||
- name: 紧急修复Nomad bootstrap_expect配置
|
||||
hosts: nomad_servers
|
||||
become: yes
|
||||
|
||||
tasks:
|
||||
- name: 修复bootstrap_expect为3
|
||||
lineinfile:
|
||||
path: /etc/nomad.d/nomad.hcl
|
||||
regexp: '^ bootstrap_expect = \d+'
|
||||
line: ' bootstrap_expect = 3'
|
||||
backup: yes
|
||||
|
||||
- name: 重启Nomad服务
|
||||
systemd:
|
||||
name: nomad
|
||||
state: restarted
|
||||
enabled: yes
|
||||
|
||||
- name: 等待Nomad服务启动
|
||||
wait_for:
|
||||
port: 4646
|
||||
host: "{{ ansible_host }}"
|
||||
timeout: 30
|
||||
|
||||
- name: 检查Nomad服务状态
|
||||
systemd:
|
||||
name: nomad
|
||||
register: nomad_status
|
||||
|
||||
- name: 显示Nomad服务状态
|
||||
debug:
|
||||
msg: "{{ inventory_hostname }} Nomad服务状态: {{ nomad_status.status.ActiveState }}"
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
103
deployment/ansible/playbooks/fix-ch4-nomad-config.yml
Normal file
103
deployment/ansible/playbooks/fix-ch4-nomad-config.yml
Normal file
@@ -0,0 +1,103 @@
|
||||
---
|
||||
- name: Fix ch4 Nomad configuration - convert from server to client
|
||||
hosts: ch4
|
||||
become: yes
|
||||
vars:
|
||||
ansible_host: 100.117.106.136
|
||||
|
||||
tasks:
|
||||
- name: Backup current Nomad config
|
||||
copy:
|
||||
src: /etc/nomad.d/nomad.hcl
|
||||
dest: /etc/nomad.d/nomad.hcl.backup
|
||||
remote_src: yes
|
||||
backup: yes
|
||||
|
||||
- name: Update Nomad config to client mode
|
||||
blockinfile:
|
||||
path: /etc/nomad.d/nomad.hcl
|
||||
marker: "# {mark} ANSIBLE MANAGED CLIENT CONFIG"
|
||||
block: |
|
||||
server {
|
||||
enabled = false
|
||||
}
|
||||
|
||||
client {
|
||||
enabled = true
|
||||
network_interface = "tailscale0"
|
||||
|
||||
servers = [
|
||||
"semaphore.tailnet-68f9.ts.net:4647",
|
||||
"ash1d.tailnet-68f9.ts.net:4647",
|
||||
"ash2e.tailnet-68f9.ts.net:4647",
|
||||
"ch2.tailnet-68f9.ts.net:4647",
|
||||
"ch3.tailnet-68f9.ts.net:4647",
|
||||
"onecloud1.tailnet-68f9.ts.net:4647",
|
||||
"de.tailnet-68f9.ts.net:4647"
|
||||
]
|
||||
|
||||
meta {
|
||||
consul = "true"
|
||||
consul_version = "1.21.5"
|
||||
consul_server = "true"
|
||||
}
|
||||
}
|
||||
insertbefore: '^server \{'
|
||||
replace: '^server \{.*?\}'
|
||||
|
||||
- name: Update client block
|
||||
blockinfile:
|
||||
path: /etc/nomad.d/nomad.hcl
|
||||
marker: "# {mark} ANSIBLE MANAGED CLIENT BLOCK"
|
||||
block: |
|
||||
client {
|
||||
enabled = true
|
||||
network_interface = "tailscale0"
|
||||
|
||||
servers = [
|
||||
"semaphore.tailnet-68f9.ts.net:4647",
|
||||
"ash1d.tailnet-68f9.ts.net:4647",
|
||||
"ash2e.tailnet-68f9.ts.net:4647",
|
||||
"ch2.tailnet-68f9.ts.net:4647",
|
||||
"ch3.tailnet-68f9.ts.net:4647",
|
||||
"onecloud1.tailnet-68f9.ts.net:4647",
|
||||
"de.tailnet-68f9.ts.net:4647"
|
||||
]
|
||||
|
||||
meta {
|
||||
consul = "true"
|
||||
consul_version = "1.21.5"
|
||||
consul_server = "true"
|
||||
}
|
||||
}
|
||||
insertbefore: '^client \{'
|
||||
replace: '^client \{.*?\}'
|
||||
|
||||
- name: Restart Nomad service
|
||||
systemd:
|
||||
name: nomad
|
||||
state: restarted
|
||||
enabled: yes
|
||||
|
||||
- name: Wait for Nomad to be ready
|
||||
wait_for:
|
||||
port: 4646
|
||||
host: "{{ ansible_default_ipv4.address }}"
|
||||
delay: 5
|
||||
timeout: 30
|
||||
|
||||
- name: Verify Nomad client status
|
||||
shell: |
|
||||
NOMAD_ADDR=http://localhost:4646 nomad node status | grep -q "ready"
|
||||
register: nomad_ready
|
||||
failed_when: nomad_ready.rc != 0
|
||||
retries: 3
|
||||
delay: 10
|
||||
|
||||
- name: Display completion message
|
||||
debug:
|
||||
msg: |
|
||||
✅ Successfully converted ch4 from Nomad server to client
|
||||
✅ Nomad service restarted
|
||||
✅ Configuration updated
|
||||
|
||||
82
deployment/ansible/playbooks/fix-master-to-ch4.yml
Normal file
82
deployment/ansible/playbooks/fix-master-to-ch4.yml
Normal file
@@ -0,0 +1,82 @@
|
||||
---
|
||||
- name: Fix master node - rename to ch4 and restore SSH port 22
|
||||
hosts: master
|
||||
become: yes
|
||||
vars:
|
||||
new_hostname: ch4
|
||||
old_hostname: master
|
||||
|
||||
tasks:
|
||||
- name: Backup current hostname
|
||||
copy:
|
||||
content: "{{ old_hostname }}"
|
||||
dest: /etc/hostname.backup
|
||||
mode: '0644'
|
||||
when: ansible_hostname == old_hostname
|
||||
|
||||
- name: Update hostname to ch4
|
||||
hostname:
|
||||
name: "{{ new_hostname }}"
|
||||
when: ansible_hostname == old_hostname
|
||||
|
||||
- name: Update /etc/hostname file
|
||||
copy:
|
||||
content: "{{ new_hostname }}"
|
||||
dest: /etc/hostname
|
||||
mode: '0644'
|
||||
when: ansible_hostname == old_hostname
|
||||
|
||||
- name: Update /etc/hosts file
|
||||
lineinfile:
|
||||
path: /etc/hosts
|
||||
regexp: '^127\.0\.1\.1.*{{ old_hostname }}'
|
||||
line: '127.0.1.1 {{ new_hostname }}'
|
||||
state: present
|
||||
when: ansible_hostname == old_hostname
|
||||
|
||||
- name: Update Tailscale hostname
|
||||
shell: |
|
||||
tailscale set --hostname={{ new_hostname }}
|
||||
when: ansible_hostname == old_hostname
|
||||
|
||||
- name: Backup SSH config
|
||||
copy:
|
||||
src: /etc/ssh/sshd_config
|
||||
dest: /etc/ssh/sshd_config.backup
|
||||
remote_src: yes
|
||||
backup: yes
|
||||
|
||||
- name: Restore SSH port to 22
|
||||
lineinfile:
|
||||
path: /etc/ssh/sshd_config
|
||||
regexp: '^Port '
|
||||
line: 'Port 22'
|
||||
state: present
|
||||
|
||||
- name: Restart SSH service
|
||||
systemd:
|
||||
name: ssh
|
||||
state: restarted
|
||||
enabled: yes
|
||||
|
||||
- name: Wait for SSH to be ready on port 22
|
||||
wait_for:
|
||||
port: 22
|
||||
host: "{{ ansible_default_ipv4.address }}"
|
||||
delay: 5
|
||||
timeout: 30
|
||||
|
||||
- name: Test SSH connection on port 22
|
||||
ping:
|
||||
delegate_to: "{{ inventory_hostname }}"
|
||||
vars:
|
||||
ansible_port: 22
|
||||
|
||||
- name: Display completion message
|
||||
debug:
|
||||
msg: |
|
||||
✅ Successfully renamed {{ old_hostname }} to {{ new_hostname }}
|
||||
✅ SSH port restored to 22
|
||||
✅ Tailscale hostname updated
|
||||
🔄 Please update your inventory file to use the new hostname and port
|
||||
|
||||
71
deployment/ansible/playbooks/install-consul-clients.yml
Normal file
71
deployment/ansible/playbooks/install-consul-clients.yml
Normal file
@@ -0,0 +1,71 @@
|
||||
---
|
||||
- name: Install and configure Consul clients on all nodes
|
||||
hosts: all
|
||||
become: yes
|
||||
vars:
|
||||
consul_servers:
|
||||
- "100.117.106.136" # ch4 (韩国)
|
||||
- "100.122.197.112" # warden (北京)
|
||||
- "100.116.80.94" # ash3c (美国)
|
||||
|
||||
tasks:
|
||||
- name: Get Tailscale IP address
|
||||
shell: ip addr show tailscale0 | grep 'inet ' | awk '{print $2}' | cut -d/ -f1
|
||||
register: tailscale_ip_result
|
||||
changed_when: false
|
||||
|
||||
- name: Set Tailscale IP fact
|
||||
set_fact:
|
||||
tailscale_ip: "{{ tailscale_ip_result.stdout }}"
|
||||
|
||||
- name: Install Consul
|
||||
apt:
|
||||
name: consul
|
||||
state: present
|
||||
update_cache: yes
|
||||
|
||||
- name: Create Consul data directory
|
||||
file:
|
||||
path: /opt/consul/data
|
||||
state: directory
|
||||
owner: consul
|
||||
group: consul
|
||||
mode: '0755'
|
||||
|
||||
- name: Create Consul log directory
|
||||
file:
|
||||
path: /var/log/consul
|
||||
state: directory
|
||||
owner: consul
|
||||
group: consul
|
||||
mode: '0755'
|
||||
|
||||
- name: Create Consul config directory
|
||||
file:
|
||||
path: /etc/consul.d
|
||||
state: directory
|
||||
owner: consul
|
||||
group: consul
|
||||
mode: '0755'
|
||||
|
||||
- name: Generate Consul client configuration
|
||||
template:
|
||||
src: consul-client.hcl.j2
|
||||
dest: /etc/consul.d/consul.hcl
|
||||
owner: consul
|
||||
group: consul
|
||||
mode: '0644'
|
||||
notify: restart consul
|
||||
|
||||
- name: Enable and start Consul service
|
||||
systemd:
|
||||
name: consul
|
||||
enabled: yes
|
||||
state: started
|
||||
daemon_reload: yes
|
||||
|
||||
handlers:
|
||||
- name: restart consul
|
||||
systemd:
|
||||
name: consul
|
||||
state: restarted
|
||||
@@ -0,0 +1,91 @@
|
||||
---
|
||||
- name: Install NFS CSI Plugin for Nomad
|
||||
hosts: nomad_nodes
|
||||
become: yes
|
||||
vars:
|
||||
nomad_user: nomad
|
||||
nomad_plugins_dir: /opt/nomad/plugins
|
||||
csi_driver_version: "v4.0.0"
|
||||
csi_driver_url: "https://github.com/kubernetes-csi/csi-driver-nfs/releases/download/{{ csi_driver_version }}/csi-nfs-driver"
|
||||
|
||||
tasks:
|
||||
- name: Stop Nomad service
|
||||
systemd:
|
||||
name: nomad
|
||||
state: stopped
|
||||
|
||||
- name: Create plugins directory
|
||||
file:
|
||||
path: "{{ nomad_plugins_dir }}"
|
||||
state: directory
|
||||
owner: "{{ nomad_user }}"
|
||||
group: "{{ nomad_user }}"
|
||||
mode: '0755'
|
||||
|
||||
- name: Download NFS CSI driver
|
||||
get_url:
|
||||
url: "{{ csi_driver_url }}"
|
||||
dest: "{{ nomad_plugins_dir }}/csi-nfs-driver"
|
||||
owner: "{{ nomad_user }}"
|
||||
group: "{{ nomad_user }}"
|
||||
mode: '0755'
|
||||
|
||||
- name: Install required packages for CSI
|
||||
package:
|
||||
name:
|
||||
- nfs-common
|
||||
- mount
|
||||
state: present
|
||||
|
||||
- name: Create CSI mount directory
|
||||
file:
|
||||
path: /opt/nomad/csi
|
||||
state: directory
|
||||
owner: "{{ nomad_user }}"
|
||||
group: "{{ nomad_user }}"
|
||||
mode: '0755'
|
||||
|
||||
- name: Update Nomad configuration for CSI plugin
|
||||
blockinfile:
|
||||
path: /etc/nomad.d/nomad.hcl
|
||||
marker: "# {mark} CSI PLUGIN CONFIGURATION"
|
||||
block: |
|
||||
plugin_dir = "{{ nomad_plugins_dir }}"
|
||||
|
||||
plugin "csi-nfs" {
|
||||
type = "csi"
|
||||
config {
|
||||
driver_name = "nfs.csi.k8s.io"
|
||||
mount_dir = "/opt/nomad/csi"
|
||||
health_timeout = "30s"
|
||||
log_level = "INFO"
|
||||
}
|
||||
}
|
||||
insertafter: 'data_dir = "/opt/nomad/data"'
|
||||
|
||||
- name: Start Nomad service
|
||||
systemd:
|
||||
name: nomad
|
||||
state: started
|
||||
enabled: yes
|
||||
|
||||
- name: Wait for Nomad to start
|
||||
wait_for:
|
||||
port: 4646
|
||||
delay: 10
|
||||
timeout: 60
|
||||
|
||||
- name: Check Nomad status
|
||||
command: nomad node status
|
||||
register: nomad_status
|
||||
ignore_errors: yes
|
||||
|
||||
- name: Display Nomad status
|
||||
debug:
|
||||
var: nomad_status.stdout_lines
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
33
deployment/ansible/playbooks/start-nomad-servers.yml
Normal file
33
deployment/ansible/playbooks/start-nomad-servers.yml
Normal file
@@ -0,0 +1,33 @@
|
||||
---
|
||||
- name: 启动所有Nomad服务器形成集群
|
||||
hosts: nomad_servers
|
||||
become: yes
|
||||
|
||||
tasks:
|
||||
- name: 检查Nomad服务状态
|
||||
systemd:
|
||||
name: nomad
|
||||
register: nomad_status
|
||||
|
||||
- name: 启动Nomad服务(如果未运行)
|
||||
systemd:
|
||||
name: nomad
|
||||
state: started
|
||||
enabled: yes
|
||||
when: nomad_status.status.ActiveState != "active"
|
||||
|
||||
- name: 等待Nomad服务启动
|
||||
wait_for:
|
||||
port: 4646
|
||||
host: "{{ ansible_host }}"
|
||||
timeout: 30
|
||||
|
||||
- name: 显示Nomad服务状态
|
||||
debug:
|
||||
msg: "{{ inventory_hostname }} Nomad服务状态: {{ nomad_status.status.ActiveState }}"
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
61
deployment/ansible/playbooks/templates/consul-client.hcl.j2
Normal file
61
deployment/ansible/playbooks/templates/consul-client.hcl.j2
Normal file
@@ -0,0 +1,61 @@
|
||||
# Consul Client Configuration for {{ inventory_hostname }}
|
||||
datacenter = "dc1"
|
||||
data_dir = "/opt/consul/data"
|
||||
log_level = "INFO"
|
||||
node_name = "{{ inventory_hostname }}"
|
||||
bind_addr = "{{ hostvars[inventory_hostname]['tailscale_ip'] }}"
|
||||
|
||||
# Client mode (not server)
|
||||
server = false
|
||||
|
||||
# Connect to Consul servers (指向三节点集群)
|
||||
retry_join = [
|
||||
{% for server in consul_servers %}
|
||||
"{{ server }}"{% if not loop.last %},{% endif %}
|
||||
{% endfor %}
|
||||
]
|
||||
|
||||
# Performance optimization
|
||||
performance {
|
||||
raft_multiplier = 5
|
||||
}
|
||||
|
||||
# Ports configuration
|
||||
ports {
|
||||
grpc = 8502
|
||||
http = 8500
|
||||
dns = 8600
|
||||
}
|
||||
|
||||
# Enable Connect for service mesh
|
||||
connect {
|
||||
enabled = true
|
||||
}
|
||||
|
||||
# Cache configuration for performance
|
||||
cache {
|
||||
entry_fetch_max_burst = 42
|
||||
entry_fetch_rate = 30
|
||||
}
|
||||
|
||||
# Node metadata
|
||||
node_meta = {
|
||||
region = "unknown"
|
||||
zone = "nomad-{{ 'server' if 'server' in group_names else 'client' }}"
|
||||
}
|
||||
|
||||
# UI disabled for clients
|
||||
ui_config {
|
||||
enabled = false
|
||||
}
|
||||
|
||||
# ACL configuration (if needed)
|
||||
acl = {
|
||||
enabled = false
|
||||
default_policy = "allow"
|
||||
}
|
||||
|
||||
# Logging
|
||||
log_file = "/var/log/consul/consul.log"
|
||||
log_rotate_duration = "24h"
|
||||
log_rotate_max_files = 7
|
||||
106
deployment/ansible/playbooks/templates/nomad-server.hcl.j2
Normal file
106
deployment/ansible/playbooks/templates/nomad-server.hcl.j2
Normal file
@@ -0,0 +1,106 @@
|
||||
datacenter = "dc1"
|
||||
data_dir = "/opt/nomad/data"
|
||||
plugin_dir = "/opt/nomad/plugins"
|
||||
log_level = "INFO"
|
||||
name = "{{ ansible_hostname }}"
|
||||
|
||||
bind_addr = "0.0.0.0"
|
||||
|
||||
addresses {
|
||||
http = "{{ ansible_host }}"
|
||||
rpc = "{{ ansible_host }}"
|
||||
serf = "{{ ansible_host }}"
|
||||
}
|
||||
|
||||
advertise {
|
||||
http = "{{ ansible_host }}:4646"
|
||||
rpc = "{{ ansible_host }}:4647"
|
||||
serf = "{{ ansible_host }}:4648"
|
||||
}
|
||||
|
||||
ports {
|
||||
http = 4646
|
||||
rpc = 4647
|
||||
serf = 4648
|
||||
}
|
||||
|
||||
server {
|
||||
enabled = true
|
||||
bootstrap_expect = 3
|
||||
server_join {
|
||||
retry_join = [
|
||||
"semaphore.tailnet-68f9.ts.net:4648",
|
||||
"ash1d.tailnet-68f9.ts.net:4648",
|
||||
"ash2e.tailnet-68f9.ts.net:4648",
|
||||
"ch2.tailnet-68f9.ts.net:4648",
|
||||
"ch3.tailnet-68f9.ts.net:4648",
|
||||
"onecloud1.tailnet-68f9.ts.net:4648",
|
||||
"de.tailnet-68f9.ts.net:4648",
|
||||
"hcp1.tailnet-68f9.ts.net:4648"
|
||||
]
|
||||
}
|
||||
}
|
||||
|
||||
{% if ansible_hostname == 'hcp1' %}
|
||||
client {
|
||||
enabled = true
|
||||
network_interface = "tailscale0"
|
||||
|
||||
servers = [
|
||||
"semaphore.tailnet-68f9.ts.net:4647",
|
||||
"ash1d.tailnet-68f9.ts.net:4647",
|
||||
"ash2e.tailnet-68f9.ts.net:4647",
|
||||
"ch2.tailnet-68f9.ts.net:4647",
|
||||
"ch3.tailnet-68f9.ts.net:4647",
|
||||
"onecloud1.tailnet-68f9.ts.net:4647",
|
||||
"de.tailnet-68f9.ts.net:4647",
|
||||
"hcp1.tailnet-68f9.ts.net:4647"
|
||||
]
|
||||
|
||||
host_volume "traefik-certs" {
|
||||
path = "/opt/traefik/certs"
|
||||
read_only = false
|
||||
}
|
||||
|
||||
host_volume "fnsync" {
|
||||
path = "/mnt/fnsync"
|
||||
read_only = false
|
||||
}
|
||||
|
||||
meta {
|
||||
consul = "true"
|
||||
consul_version = "1.21.5"
|
||||
consul_client = "true"
|
||||
}
|
||||
|
||||
gc_interval = "5m"
|
||||
gc_disk_usage_threshold = 80
|
||||
gc_inode_usage_threshold = 70
|
||||
}
|
||||
|
||||
plugin "nomad-driver-podman" {
|
||||
config {
|
||||
socket_path = "unix:///run/podman/podman.sock"
|
||||
volumes {
|
||||
enabled = true
|
||||
}
|
||||
}
|
||||
}
|
||||
{% endif %}
|
||||
|
||||
consul {
|
||||
address = "ch4.tailnet-68f9.ts.net:8500,ash3c.tailnet-68f9.ts.net:8500,warden.tailnet-68f9.ts.net:8500"
|
||||
server_service_name = "nomad"
|
||||
client_service_name = "nomad-client"
|
||||
auto_advertise = true
|
||||
server_auto_join = false
|
||||
client_auto_join = true
|
||||
}
|
||||
|
||||
telemetry {
|
||||
collection_interval = "1s"
|
||||
disable_hostname = false
|
||||
prometheus_metrics = true
|
||||
publish_allocation_metrics = true
|
||||
publish_node_metrics = true
|
||||
}
|
||||
@@ -19,7 +19,7 @@
|
||||
- ip: "100.120.225.29"
|
||||
hostnames: ["de"]
|
||||
- ip: "100.117.106.136"
|
||||
hostnames: ["master"]
|
||||
hostnames: ["ch4"]
|
||||
- ip: "100.116.80.94"
|
||||
hostnames: ["ash3c", "influxdb1"]
|
||||
- ip: "100.116.112.45"
|
||||
|
||||
56
deployment/ansible/playbooks/update-nomad-peers.yml
Normal file
56
deployment/ansible/playbooks/update-nomad-peers.yml
Normal file
@@ -0,0 +1,56 @@
|
||||
---
|
||||
- name: 更新Nomad服务器配置,添加hcp1作为peer
|
||||
hosts: nomad_servers
|
||||
become: yes
|
||||
vars:
|
||||
hcp1_ip: "100.97.62.111"
|
||||
bootstrap_expect: 8
|
||||
|
||||
tasks:
|
||||
- name: 备份原配置文件
|
||||
copy:
|
||||
src: /etc/nomad.d/nomad.hcl
|
||||
dest: /etc/nomad.d/nomad.hcl.bak
|
||||
remote_src: yes
|
||||
backup: yes
|
||||
|
||||
- name: 添加hcp1到retry_join列表
|
||||
lineinfile:
|
||||
path: /etc/nomad.d/nomad.hcl
|
||||
regexp: '^ retry_join = \['
|
||||
line: ' retry_join = ["{{ hcp1_ip }}",'
|
||||
backup: yes
|
||||
|
||||
- name: 更新bootstrap_expect为8
|
||||
lineinfile:
|
||||
path: /etc/nomad.d/nomad.hcl
|
||||
regexp: '^ bootstrap_expect = \d+'
|
||||
line: ' bootstrap_expect = {{ bootstrap_expect }}'
|
||||
backup: yes
|
||||
|
||||
- name: 重启Nomad服务
|
||||
systemd:
|
||||
name: nomad
|
||||
state: restarted
|
||||
enabled: yes
|
||||
|
||||
- name: 等待Nomad服务启动
|
||||
wait_for:
|
||||
port: 4646
|
||||
host: "{{ ansible_host }}"
|
||||
timeout: 30
|
||||
|
||||
- name: 检查Nomad服务状态
|
||||
systemd:
|
||||
name: nomad
|
||||
register: nomad_status
|
||||
|
||||
- name: 显示Nomad服务状态
|
||||
debug:
|
||||
msg: "Nomad服务状态: {{ nomad_status.status.ActiveState }}"
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
72
deployment/ansible/remove-consul-from-all-nomad-servers.yml
Normal file
72
deployment/ansible/remove-consul-from-all-nomad-servers.yml
Normal file
@@ -0,0 +1,72 @@
|
||||
---
|
||||
- name: Remove Consul configuration from all Nomad servers
|
||||
hosts: semaphore,ash1d,ash2e,ch2,ch3,onecloud1,de
|
||||
become: yes
|
||||
|
||||
tasks:
|
||||
- name: Create clean Nomad server configuration
|
||||
copy:
|
||||
content: |
|
||||
datacenter = "dc1"
|
||||
data_dir = "/opt/nomad/data"
|
||||
plugin_dir = "/opt/nomad/plugins"
|
||||
log_level = "INFO"
|
||||
name = "{{ inventory_hostname }}"
|
||||
|
||||
bind_addr = "{{ inventory_hostname }}.tailnet-68f9.ts.net"
|
||||
|
||||
addresses {
|
||||
http = "{{ inventory_hostname }}.tailnet-68f9.ts.net"
|
||||
rpc = "{{ inventory_hostname }}.tailnet-68f9.ts.net"
|
||||
serf = "{{ inventory_hostname }}.tailnet-68f9.ts.net"
|
||||
}
|
||||
|
||||
advertise {
|
||||
http = "{{ inventory_hostname }}.tailnet-68f9.ts.net:4646"
|
||||
rpc = "{{ inventory_hostname }}.tailnet-68f9.ts.net:4647"
|
||||
serf = "{{ inventory_hostname }}.tailnet-68f9.ts.net:4648"
|
||||
}
|
||||
|
||||
ports {
|
||||
http = 4646
|
||||
rpc = 4647
|
||||
serf = 4648
|
||||
}
|
||||
|
||||
server {
|
||||
enabled = true
|
||||
bootstrap_expect = 7
|
||||
retry_join = ["ash1d.tailnet-68f9.ts.net","ash2e.tailnet-68f9.ts.net","ch2.tailnet-68f9.ts.net","ch3.tailnet-68f9.ts.net","onecloud1.tailnet-68f9.ts.net","de.tailnet-68f9.ts.net"]
|
||||
}
|
||||
|
||||
client {
|
||||
enabled = false
|
||||
}
|
||||
|
||||
plugin "nomad-driver-podman" {
|
||||
config {
|
||||
socket_path = "unix:///run/podman/podman.sock"
|
||||
volumes {
|
||||
enabled = true
|
||||
}
|
||||
}
|
||||
}
|
||||
dest: /etc/nomad.d/nomad.hcl
|
||||
mode: '0644'
|
||||
|
||||
- name: Restart Nomad service
|
||||
systemd:
|
||||
name: nomad
|
||||
state: restarted
|
||||
|
||||
- name: Wait for Nomad to be ready
|
||||
wait_for:
|
||||
port: 4646
|
||||
host: "{{ ansible_default_ipv4.address }}"
|
||||
delay: 5
|
||||
timeout: 30
|
||||
|
||||
- name: Display completion message
|
||||
debug:
|
||||
msg: "Removed Consul configuration from {{ inventory_hostname }}"
|
||||
|
||||
62
deployment/ansible/templates/consul-client.hcl.j2
Normal file
62
deployment/ansible/templates/consul-client.hcl.j2
Normal file
@@ -0,0 +1,62 @@
|
||||
# Consul Client Configuration for {{ inventory_hostname }}
|
||||
datacenter = "dc1"
|
||||
data_dir = "/opt/consul/data"
|
||||
log_level = "INFO"
|
||||
node_name = "{{ inventory_hostname }}"
|
||||
bind_addr = "{{ ansible_host }}"
|
||||
|
||||
# Client mode (not server)
|
||||
server = false
|
||||
|
||||
# Connect to Consul servers (指向三节点集群)
|
||||
retry_join = [
|
||||
{% for server in consul_servers %}
|
||||
"{{ server }}"{% if not loop.last %},{% endif %}
|
||||
{% endfor %}
|
||||
]
|
||||
|
||||
# Performance optimization
|
||||
performance {
|
||||
raft_multiplier = 5
|
||||
}
|
||||
|
||||
# Ports configuration
|
||||
ports {
|
||||
grpc = 8502
|
||||
http = 8500
|
||||
dns = 8600
|
||||
}
|
||||
|
||||
# Enable Connect for service mesh
|
||||
connect {
|
||||
enabled = true
|
||||
}
|
||||
|
||||
# Cache configuration for performance
|
||||
cache {
|
||||
entry_fetch_max_burst = 42
|
||||
entry_fetch_rate = 30
|
||||
}
|
||||
|
||||
# Node metadata
|
||||
node_meta = {
|
||||
region = "unknown"
|
||||
zone = "nomad-{{ 'server' if 'server' in group_names else 'client' }}"
|
||||
}
|
||||
|
||||
# UI disabled for clients
|
||||
ui_config {
|
||||
enabled = false
|
||||
}
|
||||
|
||||
# ACL configuration (if needed)
|
||||
acl = {
|
||||
enabled = false
|
||||
default_policy = "allow"
|
||||
}
|
||||
|
||||
# Logging
|
||||
log_file = "/var/log/consul/consul.log"
|
||||
log_rotate_duration = "24h"
|
||||
log_rotate_max_files = 7
|
||||
|
||||
@@ -49,6 +49,11 @@ client {
|
||||
read_only = false
|
||||
}
|
||||
|
||||
host_volume "vault-storage" {
|
||||
path = "/opt/nomad/data/vault-storage"
|
||||
read_only = false
|
||||
}
|
||||
|
||||
# 禁用Docker驱动,只使用Podman
|
||||
options {
|
||||
"driver.raw_exec.enable" = "1"
|
||||
|
||||
@@ -2,20 +2,20 @@ datacenter = "dc1"
|
||||
data_dir = "/opt/nomad/data"
|
||||
plugin_dir = "/opt/nomad/plugins"
|
||||
log_level = "INFO"
|
||||
name = "{{ server_name }}"
|
||||
name = "{{ ansible_hostname }}"
|
||||
|
||||
bind_addr = "{{ server_name }}.tailnet-68f9.ts.net"
|
||||
bind_addr = "0.0.0.0"
|
||||
|
||||
addresses {
|
||||
http = "{{ server_name }}.tailnet-68f9.ts.net"
|
||||
rpc = "{{ server_name }}.tailnet-68f9.ts.net"
|
||||
serf = "{{ server_name }}.tailnet-68f9.ts.net"
|
||||
http = "{{ ansible_host }}"
|
||||
rpc = "{{ ansible_host }}"
|
||||
serf = "{{ ansible_host }}"
|
||||
}
|
||||
|
||||
advertise {
|
||||
http = "{{ server_name }}.tailnet-68f9.ts.net:4646"
|
||||
rpc = "{{ server_name }}.tailnet-68f9.ts.net:4647"
|
||||
serf = "{{ server_name }}.tailnet-68f9.ts.net:4648"
|
||||
http = "{{ ansible_host }}:4646"
|
||||
rpc = "{{ ansible_host }}:4647"
|
||||
serf = "{{ ansible_host }}:4648"
|
||||
}
|
||||
|
||||
ports {
|
||||
@@ -26,18 +26,56 @@ ports {
|
||||
|
||||
server {
|
||||
enabled = true
|
||||
bootstrap_expect = 7
|
||||
retry_join = [
|
||||
{%- for server in groups['nomad_servers'] -%}
|
||||
{%- if server != inventory_hostname -%}
|
||||
"{{ server }}.tailnet-68f9.ts.net"{% if not loop.last %},{% endif %}
|
||||
{%- endif -%}
|
||||
{%- endfor -%}
|
||||
]
|
||||
bootstrap_expect = 3
|
||||
server_join {
|
||||
retry_join = [
|
||||
"semaphore.tailnet-68f9.ts.net:4648",
|
||||
"ash1d.tailnet-68f9.ts.net:4648",
|
||||
"ash2e.tailnet-68f9.ts.net:4648",
|
||||
"ch2.tailnet-68f9.ts.net:4648",
|
||||
"ch3.tailnet-68f9.ts.net:4648",
|
||||
"onecloud1.tailnet-68f9.ts.net:4648",
|
||||
"de.tailnet-68f9.ts.net:4648",
|
||||
"hcp1.tailnet-68f9.ts.net:4648"
|
||||
]
|
||||
}
|
||||
}
|
||||
|
||||
{% if ansible_hostname == 'hcp1' %}
|
||||
client {
|
||||
enabled = false
|
||||
enabled = true
|
||||
network_interface = "tailscale0"
|
||||
|
||||
servers = [
|
||||
"semaphore.tailnet-68f9.ts.net:4647",
|
||||
"ash1d.tailnet-68f9.ts.net:4647",
|
||||
"ash2e.tailnet-68f9.ts.net:4647",
|
||||
"ch2.tailnet-68f9.ts.net:4647",
|
||||
"ch3.tailnet-68f9.ts.net:4647",
|
||||
"onecloud1.tailnet-68f9.ts.net:4647",
|
||||
"de.tailnet-68f9.ts.net:4647",
|
||||
"hcp1.tailnet-68f9.ts.net:4647"
|
||||
]
|
||||
|
||||
host_volume "traefik-certs" {
|
||||
path = "/opt/traefik/certs"
|
||||
read_only = false
|
||||
}
|
||||
|
||||
host_volume "fnsync" {
|
||||
path = "/mnt/fnsync"
|
||||
read_only = false
|
||||
}
|
||||
|
||||
meta {
|
||||
consul = "true"
|
||||
consul_version = "1.21.5"
|
||||
consul_client = "true"
|
||||
}
|
||||
|
||||
gc_interval = "5m"
|
||||
gc_disk_usage_threshold = 80
|
||||
gc_inode_usage_threshold = 70
|
||||
}
|
||||
|
||||
plugin "nomad-driver-podman" {
|
||||
@@ -48,20 +86,21 @@ plugin "nomad-driver-podman" {
|
||||
}
|
||||
}
|
||||
}
|
||||
{% endif %}
|
||||
|
||||
consul {
|
||||
address = "master.tailnet-68f9.ts.net:8500,ash3c.tailnet-68f9.ts.net:8500,warden.tailnet-68f9.ts.net:8500"
|
||||
address = "ch4.tailnet-68f9.ts.net:8500,ash3c.tailnet-68f9.ts.net:8500,warden.tailnet-68f9.ts.net:8500"
|
||||
server_service_name = "nomad"
|
||||
client_service_name = "nomad-client"
|
||||
auto_advertise = true
|
||||
server_auto_join = true
|
||||
server_auto_join = false
|
||||
client_auto_join = true
|
||||
}
|
||||
|
||||
vault {
|
||||
enabled = true
|
||||
address = "http://master.tailnet-68f9.ts.net:8200,http://ash3c.tailnet-68f9.ts.net:8200,http://warden.tailnet-68f9.ts.net:8200"
|
||||
token = "hvs.A5Fu4E1oHyezJapVllKPFsWg"
|
||||
create_from_role = "nomad-cluster"
|
||||
tls_skip_verify = true
|
||||
telemetry {
|
||||
collection_interval = "1s"
|
||||
disable_hostname = false
|
||||
prometheus_metrics = true
|
||||
publish_allocation_metrics = true
|
||||
publish_node_metrics = true
|
||||
}
|
||||
@@ -64,7 +64,7 @@ plugin "nomad-driver-podman" {
|
||||
}
|
||||
|
||||
consul {
|
||||
address = "master.tailnet-68f9.ts.net:8500,ash3c.tailnet-68f9.ts.net:8500,warden.tailnet-68f9.ts.net:8500"
|
||||
address = "ch4.tailnet-68f9.ts.net:8500,ash3c.tailnet-68f9.ts.net:8500,warden.tailnet-68f9.ts.net:8500"
|
||||
server_service_name = "nomad"
|
||||
client_service_name = "nomad-client"
|
||||
auto_advertise = true
|
||||
@@ -74,7 +74,7 @@ consul {
|
||||
|
||||
vault {
|
||||
enabled = true
|
||||
address = "http://master.tailnet-68f9.ts.net:8200,http://ash3c.tailnet-68f9.ts.net:8200,http://warden.tailnet-68f9.ts.net:8200"
|
||||
address = "http://ch4.tailnet-68f9.ts.net:8200,http://ash3c.tailnet-68f9.ts.net:8200,http://warden.tailnet-68f9.ts.net:8200"
|
||||
token = "hvs.A5Fu4E1oHyezJapVllKPFsWg"
|
||||
create_from_role = "nomad-cluster"
|
||||
tls_skip_verify = true
|
||||
|
||||
45
deployment/ansible/templates/vault.hcl.j2
Normal file
45
deployment/ansible/templates/vault.hcl.j2
Normal file
@@ -0,0 +1,45 @@
|
||||
# Vault Configuration for {{ inventory_hostname }}
|
||||
|
||||
# Storage backend - Consul
|
||||
storage "consul" {
|
||||
address = "127.0.0.1:8500"
|
||||
path = "vault/"
|
||||
|
||||
# Consul datacenter
|
||||
datacenter = "{{ vault_datacenter }}"
|
||||
|
||||
# Service registration
|
||||
service = "vault"
|
||||
service_tags = "vault-server"
|
||||
|
||||
# Session TTL
|
||||
session_ttl = "15s"
|
||||
lock_wait_time = "15s"
|
||||
}
|
||||
|
||||
# Listener configuration
|
||||
listener "tcp" {
|
||||
address = "0.0.0.0:8200"
|
||||
tls_disable = 1
|
||||
}
|
||||
|
||||
# API address - 使用Tailscale网络地址
|
||||
api_addr = "http://{{ ansible_host }}:8200"
|
||||
|
||||
# Cluster address - 使用Tailscale网络地址
|
||||
cluster_addr = "http://{{ ansible_host }}:8201"
|
||||
|
||||
# UI
|
||||
ui = true
|
||||
|
||||
# Cluster name
|
||||
cluster_name = "{{ vault_cluster_name }}"
|
||||
|
||||
# Disable mlock for development (remove in production)
|
||||
disable_mlock = true
|
||||
|
||||
# Log level
|
||||
log_level = "INFO"
|
||||
|
||||
# Plugin directory
|
||||
plugin_directory = "/opt/vault/plugins"
|
||||
34
deployment/ansible/templates/vault.service.j2
Normal file
34
deployment/ansible/templates/vault.service.j2
Normal file
@@ -0,0 +1,34 @@
|
||||
[Unit]
|
||||
Description=Vault
|
||||
Documentation=https://www.vaultproject.io/docs/
|
||||
Requires=network-online.target
|
||||
After=network-online.target
|
||||
ConditionFileNotEmpty=/etc/vault.d/vault.hcl
|
||||
StartLimitIntervalSec=60
|
||||
StartLimitBurst=3
|
||||
|
||||
[Service]
|
||||
Type=notify
|
||||
User=vault
|
||||
Group=vault
|
||||
ProtectSystem=full
|
||||
ProtectHome=read-only
|
||||
PrivateTmp=yes
|
||||
PrivateDevices=yes
|
||||
SecureBits=keep-caps
|
||||
AmbientCapabilities=CAP_IPC_LOCK
|
||||
CapabilityBoundingSet=CAP_SYSLOG CAP_IPC_LOCK
|
||||
NoNewPrivileges=yes
|
||||
ExecStart=/usr/bin/vault server -config=/etc/vault.d/vault.hcl
|
||||
ExecReload=/bin/kill --signal HUP $MAINPID
|
||||
KillMode=process
|
||||
Restart=on-failure
|
||||
RestartSec=5
|
||||
TimeoutStopSec=30
|
||||
StartLimitInterval=60
|
||||
StartLimitBurst=3
|
||||
LimitNOFILE=65536
|
||||
LimitMEMLOCK=infinity
|
||||
|
||||
[Install]
|
||||
WantedBy=multi-user.target
|
||||
66
deployment/ansible/vault-cluster-init.yml
Normal file
66
deployment/ansible/vault-cluster-init.yml
Normal file
@@ -0,0 +1,66 @@
|
||||
---
|
||||
- name: Initialize Vault Cluster
|
||||
hosts: ch4 # 只在一个节点初始化
|
||||
become: yes
|
||||
|
||||
tasks:
|
||||
- name: Check if Vault is already initialized
|
||||
uri:
|
||||
url: "http://{{ ansible_host }}:8200/v1/sys/health"
|
||||
method: GET
|
||||
status_code: [200, 429, 472, 473, 501, 503]
|
||||
register: vault_health
|
||||
|
||||
- name: Initialize Vault (only if not initialized)
|
||||
uri:
|
||||
url: "http://{{ ansible_host }}:8200/v1/sys/init"
|
||||
method: POST
|
||||
body_format: json
|
||||
body:
|
||||
secret_shares: 5
|
||||
secret_threshold: 3
|
||||
status_code: 200
|
||||
register: vault_init_result
|
||||
when: not vault_health.json.initialized
|
||||
|
||||
- name: Save initialization results to local file
|
||||
copy:
|
||||
content: |
|
||||
# Vault Cluster Initialization Results
|
||||
Generated on: {{ ansible_date_time.iso8601 }}
|
||||
Initialized by: {{ inventory_hostname }}
|
||||
|
||||
## Root Token
|
||||
{{ vault_init_result.json.root_token }}
|
||||
|
||||
## Unseal Keys
|
||||
{% for key in vault_init_result.json.keys %}
|
||||
Key {{ loop.index }}: {{ key }}
|
||||
{% endfor %}
|
||||
|
||||
## Base64 Unseal Keys
|
||||
{% for key in vault_init_result.json.keys_base64 %}
|
||||
Key {{ loop.index }} (base64): {{ key }}
|
||||
{% endfor %}
|
||||
|
||||
## Important Notes
|
||||
- Store these keys securely and separately
|
||||
- You need 3 out of 5 keys to unseal Vault
|
||||
- Root token provides full access to Vault
|
||||
- Consider revoking root token after initial setup
|
||||
dest: /tmp/vault-init-results.txt
|
||||
delegate_to: localhost
|
||||
when: vault_init_result is defined and vault_init_result.json is defined
|
||||
|
||||
- name: Display initialization results
|
||||
debug:
|
||||
msg: |
|
||||
Vault initialized successfully!
|
||||
Root Token: {{ vault_init_result.json.root_token }}
|
||||
Unseal Keys: {{ vault_init_result.json.keys }}
|
||||
when: vault_init_result is defined and vault_init_result.json is defined
|
||||
|
||||
- name: Display already initialized message
|
||||
debug:
|
||||
msg: "Vault is already initialized on {{ inventory_hostname }}"
|
||||
when: vault_health.json.initialized
|
||||
85
deployment/ansible/vault-cluster-setup.yml
Normal file
85
deployment/ansible/vault-cluster-setup.yml
Normal file
@@ -0,0 +1,85 @@
|
||||
---
|
||||
- name: Deploy Vault Cluster with Consul Integration
|
||||
hosts: ch4,ash3c,warden
|
||||
become: yes
|
||||
vars:
|
||||
vault_version: "1.15.2"
|
||||
vault_datacenter: "dc1"
|
||||
vault_cluster_name: "vault-cluster"
|
||||
|
||||
tasks:
|
||||
- name: Update apt cache
|
||||
apt:
|
||||
update_cache: yes
|
||||
cache_valid_time: 3600
|
||||
|
||||
- name: Add HashiCorp GPG key (if not exists)
|
||||
shell: |
|
||||
if [ ! -f /etc/apt/sources.list.d/hashicorp.list ]; then
|
||||
curl -fsSL https://apt.releases.hashicorp.com/gpg | gpg --dearmor | sudo tee /usr/share/keyrings/hashicorp-archive-keyring.gpg
|
||||
echo "deb [signed-by=/usr/share/keyrings/hashicorp-archive-keyring.gpg] https://apt.releases.hashicorp.com $(lsb_release -cs) main" | sudo tee /etc/apt/sources.list.d/hashicorp.list
|
||||
fi
|
||||
args:
|
||||
creates: /etc/apt/sources.list.d/hashicorp.list
|
||||
|
||||
- name: Install Vault
|
||||
apt:
|
||||
name: vault
|
||||
state: present
|
||||
update_cache: yes
|
||||
allow_downgrade: yes
|
||||
|
||||
- name: Create vault user and directories
|
||||
block:
|
||||
- name: Create vault data directory
|
||||
file:
|
||||
path: /opt/vault/data
|
||||
state: directory
|
||||
owner: vault
|
||||
group: vault
|
||||
mode: '0755'
|
||||
|
||||
- name: Create vault config directory
|
||||
file:
|
||||
path: /etc/vault.d
|
||||
state: directory
|
||||
owner: vault
|
||||
group: vault
|
||||
mode: '0755'
|
||||
|
||||
- name: Generate Vault configuration
|
||||
template:
|
||||
src: vault.hcl.j2
|
||||
dest: /etc/vault.d/vault.hcl
|
||||
owner: vault
|
||||
group: vault
|
||||
mode: '0640'
|
||||
notify: restart vault
|
||||
|
||||
- name: Create Vault systemd service
|
||||
template:
|
||||
src: vault.service.j2
|
||||
dest: /etc/systemd/system/vault.service
|
||||
owner: root
|
||||
group: root
|
||||
mode: '0644'
|
||||
notify:
|
||||
- reload systemd
|
||||
- restart vault
|
||||
|
||||
- name: Enable and start Vault service
|
||||
systemd:
|
||||
name: vault
|
||||
enabled: yes
|
||||
state: started
|
||||
daemon_reload: yes
|
||||
|
||||
handlers:
|
||||
- name: reload systemd
|
||||
systemd:
|
||||
daemon_reload: yes
|
||||
|
||||
- name: restart vault
|
||||
systemd:
|
||||
name: vault
|
||||
state: restarted
|
||||
67
deployment/ansible/vault-cluster-verify.yml
Normal file
67
deployment/ansible/vault-cluster-verify.yml
Normal file
@@ -0,0 +1,67 @@
|
||||
---
|
||||
- name: Verify Vault Cluster Status
|
||||
hosts: ch4,ash3c,warden
|
||||
become: yes
|
||||
|
||||
tasks:
|
||||
- name: Check Vault service status
|
||||
systemd:
|
||||
name: vault
|
||||
register: vault_service_status
|
||||
|
||||
- name: Display Vault service status
|
||||
debug:
|
||||
msg: "Vault service on {{ inventory_hostname }}: {{ vault_service_status.status.ActiveState }}"
|
||||
|
||||
- name: Check Vault process
|
||||
shell: ps aux | grep vault | grep -v grep
|
||||
register: vault_process
|
||||
ignore_errors: yes
|
||||
|
||||
- name: Display Vault process
|
||||
debug:
|
||||
msg: "Vault process on {{ inventory_hostname }}: {{ vault_process.stdout_lines }}"
|
||||
|
||||
- name: Check Vault port 8200
|
||||
wait_for:
|
||||
port: 8200
|
||||
host: "{{ ansible_default_ipv4.address }}"
|
||||
timeout: 10
|
||||
register: vault_port_check
|
||||
ignore_errors: yes
|
||||
|
||||
- name: Display port check result
|
||||
debug:
|
||||
msg: "Vault port 8200 on {{ inventory_hostname }}: {{ 'OPEN' if vault_port_check.failed == false else 'CLOSED' }}"
|
||||
|
||||
- name: Get Vault status
|
||||
uri:
|
||||
url: "http://{{ ansible_default_ipv4.address }}:8200/v1/sys/health"
|
||||
method: GET
|
||||
status_code: [200, 429, 472, 473, 501, 503]
|
||||
register: vault_health
|
||||
ignore_errors: yes
|
||||
|
||||
- name: Display Vault health status
|
||||
debug:
|
||||
msg: "Vault health on {{ inventory_hostname }}: {{ vault_health.json if vault_health.json is defined else 'Connection failed' }}"
|
||||
|
||||
- name: Check Consul integration
|
||||
uri:
|
||||
url: "http://127.0.0.1:8500/v1/kv/vault/?recurse"
|
||||
method: GET
|
||||
register: consul_vault_kv
|
||||
ignore_errors: yes
|
||||
|
||||
- name: Display Consul Vault KV
|
||||
debug:
|
||||
msg: "Consul Vault KV on {{ inventory_hostname }}: {{ 'Found vault keys' if consul_vault_kv.status == 200 else 'No vault keys found' }}"
|
||||
|
||||
- name: Check Vault logs for errors
|
||||
shell: journalctl -u vault --no-pager -n 10 | grep -i error || echo "No errors found"
|
||||
register: vault_logs
|
||||
ignore_errors: yes
|
||||
|
||||
- name: Display Vault error logs
|
||||
debug:
|
||||
msg: "Vault errors on {{ inventory_hostname }}: {{ vault_logs.stdout_lines }}"
|
||||
Reference in New Issue
Block a user