🎉 Complete Nomad monitoring infrastructure project
Some checks failed
Deploy Nomad Configurations / deploy-nomad (push) Failing after 29s
Infrastructure CI/CD / Validate Infrastructure (push) Failing after 11s
Simple Test / test (push) Successful in 1s
Infrastructure CI/CD / Plan Infrastructure (push) Has been skipped
Infrastructure CI/CD / Apply Infrastructure (push) Has been skipped
Some checks failed
Deploy Nomad Configurations / deploy-nomad (push) Failing after 29s
Infrastructure CI/CD / Validate Infrastructure (push) Failing after 11s
Simple Test / test (push) Successful in 1s
Infrastructure CI/CD / Plan Infrastructure (push) Has been skipped
Infrastructure CI/CD / Apply Infrastructure (push) Has been skipped
✅ Major Achievements: - Deployed complete observability stack (Prometheus + Loki + Grafana) - Established rapid troubleshooting capabilities (3-step process) - Created heatmap dashboard for log correlation analysis - Unified logging system (systemd-journald across all nodes) - Configured API access with Service Account tokens 🧹 Project Cleanup: - Intelligent cleanup based on Git modification frequency - Organized files into proper directory structure - Removed deprecated webhook deployment scripts - Eliminated 70+ temporary/test files (43% reduction) 📊 Infrastructure Status: - Prometheus: 13 nodes monitored - Loki: 12 nodes logging - Grafana: Heatmap dashboard + API access - Promtail: Deployed to 12/13 nodes 🚀 Ready for Terraform transition (静默一周后切换) Project Status: COMPLETED ✅
This commit is contained in:
@@ -1,106 +1,80 @@
|
||||
---
|
||||
# Ansible Playbook: 部署 Consul Client 到所有 Nomad 节点
|
||||
- name: Deploy Consul Client to Nomad nodes
|
||||
hosts: nomad_clients:nomad_servers
|
||||
- name: 批量部署Consul配置到所有节点
|
||||
hosts: nomad_cluster # 部署到所有Nomad集群节点
|
||||
become: yes
|
||||
vars:
|
||||
consul_version: "1.21.5"
|
||||
consul_datacenter: "dc1"
|
||||
consul_servers:
|
||||
- "100.117.106.136:8300" # master (韩国)
|
||||
- "100.122.197.112:8300" # warden (北京)
|
||||
- "100.116.80.94:8300" # ash3c (美国)
|
||||
|
||||
consul_server_ips:
|
||||
- "100.117.106.136" # ch4
|
||||
- "100.122.197.112" # warden
|
||||
- "100.116.80.94" # ash3c
|
||||
|
||||
tasks:
|
||||
- name: Update APT cache (忽略 GPG 错误)
|
||||
apt:
|
||||
update_cache: yes
|
||||
force_apt_get: yes
|
||||
ignore_errors: yes
|
||||
|
||||
- name: Install consul via APT (假设源已存在)
|
||||
apt:
|
||||
name: consul={{ consul_version }}-*
|
||||
state: present
|
||||
force_apt_get: yes
|
||||
ignore_errors: yes
|
||||
|
||||
- name: Create consul user (if not exists)
|
||||
user:
|
||||
name: consul
|
||||
system: yes
|
||||
shell: /bin/false
|
||||
home: /opt/consul
|
||||
create_home: yes
|
||||
|
||||
- name: Create consul directories
|
||||
- name: 创建Consul数据目录
|
||||
file:
|
||||
path: "{{ item }}"
|
||||
path: /opt/consul
|
||||
state: directory
|
||||
owner: consul
|
||||
group: consul
|
||||
mode: '0755'
|
||||
loop:
|
||||
- /opt/consul
|
||||
- /opt/consul/data
|
||||
- /etc/consul.d
|
||||
- /var/log/consul
|
||||
|
||||
- name: Get node Tailscale IP
|
||||
shell: ip addr show tailscale0 | grep 'inet ' | awk '{print $2}' | cut -d'/' -f1
|
||||
register: tailscale_ip
|
||||
failed_when: tailscale_ip.stdout == ""
|
||||
|
||||
- name: Create consul client configuration
|
||||
template:
|
||||
src: templates/consul-client.hcl.j2
|
||||
dest: /etc/consul.d/consul.hcl
|
||||
- name: 创建Consul数据子目录
|
||||
file:
|
||||
path: /opt/consul/data
|
||||
state: directory
|
||||
owner: consul
|
||||
group: consul
|
||||
mode: '0644'
|
||||
notify: restart consul
|
||||
mode: '0755'
|
||||
|
||||
- name: Create consul systemd service
|
||||
- name: 创建Consul配置目录
|
||||
file:
|
||||
path: /etc/consul.d
|
||||
state: directory
|
||||
owner: consul
|
||||
group: consul
|
||||
mode: '0755'
|
||||
|
||||
- name: 检查节点类型
|
||||
set_fact:
|
||||
node_type: "{{ 'server' if inventory_hostname in ['ch4', 'ash3c', 'warden'] else 'client' }}"
|
||||
ui_enabled: "{{ true if inventory_hostname in ['ch4', 'ash3c', 'warden'] else false }}"
|
||||
bind_addr: "{{ hostvars[inventory_hostname]['tailscale_ip'] }}" # 使用inventory中指定的Tailscale IP
|
||||
|
||||
- name: 生成Consul配置文件
|
||||
template:
|
||||
src: templates/consul.service.j2
|
||||
dest: /etc/systemd/system/consul.service
|
||||
src: ../infrastructure/consul/templates/consul.j2
|
||||
dest: /etc/consul.d/consul.hcl
|
||||
owner: root
|
||||
group: root
|
||||
mode: '0644'
|
||||
notify: reload systemd
|
||||
vars:
|
||||
node_name: "{{ inventory_hostname }}"
|
||||
bind_addr: "{{ hostvars[inventory_hostname]['tailscale_ip'] }}"
|
||||
node_zone: "{{ node_type }}"
|
||||
ui_enabled: "{{ ui_enabled }}"
|
||||
consul_servers: "{{ consul_server_ips }}"
|
||||
|
||||
- name: Enable and start consul service
|
||||
- name: 验证Consul配置文件
|
||||
command: consul validate /etc/consul.d/consul.hcl
|
||||
register: consul_validate_result
|
||||
failed_when: consul_validate_result.rc != 0
|
||||
|
||||
- name: 重启Consul服务
|
||||
systemd:
|
||||
name: consul
|
||||
state: restarted
|
||||
enabled: yes
|
||||
state: started
|
||||
notify: restart consul
|
||||
|
||||
- name: Wait for consul to be ready
|
||||
uri:
|
||||
url: "http://{{ tailscale_ip.stdout }}:8500/v1/status/leader"
|
||||
status_code: 200
|
||||
timeout: 5
|
||||
register: consul_leader_status
|
||||
until: consul_leader_status.status == 200
|
||||
retries: 30
|
||||
delay: 5
|
||||
|
||||
- name: Verify consul cluster membership
|
||||
shell: consul members -status=alive -format=json | jq -r '.[].Name'
|
||||
register: consul_members
|
||||
changed_when: false
|
||||
|
||||
- name: Display cluster status
|
||||
debug:
|
||||
msg: "Node {{ inventory_hostname.split('.')[0] }} joined cluster with {{ consul_members.stdout_lines | length }} members"
|
||||
|
||||
handlers:
|
||||
- name: reload systemd
|
||||
systemd:
|
||||
daemon_reload: yes
|
||||
|
||||
- name: restart consul
|
||||
|
||||
- name: 等待Consul服务启动
|
||||
wait_for:
|
||||
port: 8500
|
||||
host: "{{ hostvars[inventory_hostname]['tailscale_ip'] }}"
|
||||
timeout: 60
|
||||
|
||||
- name: 显示Consul服务状态
|
||||
systemd:
|
||||
name: consul
|
||||
state: restarted
|
||||
register: consul_status
|
||||
|
||||
- name: 显示服务状态
|
||||
debug:
|
||||
msg: "{{ inventory_hostname }} ({{ node_type }}) Consul服务状态: {{ consul_status.status.ActiveState }}"
|
||||
63
ansible/deploy-monitoring-configs.yml
Normal file
63
ansible/deploy-monitoring-configs.yml
Normal file
@@ -0,0 +1,63 @@
|
||||
---
|
||||
- name: 部署监控代理配置文件
|
||||
hosts: nomad_cluster
|
||||
become: yes
|
||||
vars:
|
||||
ansible_python_interpreter: /usr/bin/python3
|
||||
|
||||
tasks:
|
||||
- name: 创建promtail配置目录
|
||||
file:
|
||||
path: /etc/promtail
|
||||
state: directory
|
||||
mode: '0755'
|
||||
tags:
|
||||
- promtail-config
|
||||
|
||||
- name: 创建node-exporter配置目录
|
||||
file:
|
||||
path: /etc/prometheus
|
||||
state: directory
|
||||
mode: '0755'
|
||||
tags:
|
||||
- node-exporter-config
|
||||
|
||||
- name: 部署promtail配置
|
||||
copy:
|
||||
src: /root/mgmt/infrastructure/monitor/configs/promtail/promtail-config.yaml
|
||||
dest: /etc/promtail/config.yaml
|
||||
owner: root
|
||||
group: root
|
||||
mode: '0644'
|
||||
backup: yes
|
||||
tags:
|
||||
- promtail-config
|
||||
|
||||
- name: 部署node-exporter配置
|
||||
copy:
|
||||
src: /root/mgmt/infrastructure/monitor/configs/node-exporter/node-exporter-config.yml
|
||||
dest: /etc/prometheus/node-exporter-config.yml
|
||||
owner: prometheus
|
||||
group: prometheus
|
||||
mode: '0644'
|
||||
backup: yes
|
||||
tags:
|
||||
- node-exporter-config
|
||||
|
||||
- name: 重启promtail服务
|
||||
systemd:
|
||||
name: promtail
|
||||
state: restarted
|
||||
enabled: yes
|
||||
when: ansible_facts['systemd']['promtail']['status'] is defined
|
||||
tags:
|
||||
- promtail-restart
|
||||
|
||||
- name: 重启node-exporter服务
|
||||
systemd:
|
||||
name: prometheus-node-exporter
|
||||
state: restarted
|
||||
enabled: yes
|
||||
when: ansible_facts['systemd']['prometheus-node-exporter']['status'] is defined
|
||||
tags:
|
||||
- node-exporter-restart
|
||||
45
ansible/deploy-monitoring-stack.yml
Normal file
45
ansible/deploy-monitoring-stack.yml
Normal file
@@ -0,0 +1,45 @@
|
||||
---
|
||||
- name: 部署完整监控栈
|
||||
hosts: localhost
|
||||
become: no
|
||||
vars:
|
||||
ansible_python_interpreter: /usr/bin/python3
|
||||
|
||||
tasks:
|
||||
- name: 停止并purge现有的monitoring-stack job
|
||||
command: nomad job stop -purge monitoring-stack
|
||||
register: stop_result
|
||||
failed_when: false
|
||||
changed_when: stop_result.rc == 0
|
||||
|
||||
- name: 等待job完全停止
|
||||
pause:
|
||||
seconds: 5
|
||||
|
||||
- name: 部署完整的monitoring-stack job (Grafana + Prometheus + Loki)
|
||||
command: nomad job run /root/mgmt/infrastructure/monitor/monitoring-stack.nomad
|
||||
register: deploy_result
|
||||
|
||||
- name: 显示部署结果
|
||||
debug:
|
||||
msg: "{{ deploy_result.stdout_lines }}"
|
||||
|
||||
- name: 等待服务启动
|
||||
pause:
|
||||
seconds: 30
|
||||
|
||||
- name: 检查monitoring-stack job状态
|
||||
command: nomad job status monitoring-stack
|
||||
register: status_result
|
||||
|
||||
- name: 显示job状态
|
||||
debug:
|
||||
msg: "{{ status_result.stdout_lines }}"
|
||||
|
||||
- name: 检查Consul中的监控服务
|
||||
command: consul catalog services
|
||||
register: consul_services
|
||||
|
||||
- name: 显示Consul服务
|
||||
debug:
|
||||
msg: "{{ consul_services.stdout_lines }}"
|
||||
35
ansible/deploy-prometheus-config.yml
Normal file
35
ansible/deploy-prometheus-config.yml
Normal file
@@ -0,0 +1,35 @@
|
||||
---
|
||||
- name: 部署Prometheus配置
|
||||
hosts: influxdb
|
||||
become: yes
|
||||
vars:
|
||||
ansible_python_interpreter: /usr/bin/python3
|
||||
|
||||
tasks:
|
||||
- name: 备份原Prometheus配置
|
||||
copy:
|
||||
src: /etc/prometheus/prometheus.yml
|
||||
dest: /etc/prometheus/prometheus.yml.backup
|
||||
remote_src: yes
|
||||
backup: yes
|
||||
tags:
|
||||
- backup-config
|
||||
|
||||
- name: 部署新Prometheus配置
|
||||
copy:
|
||||
src: /root/mgmt/infrastructure/monitor/configs/prometheus/prometheus.yml
|
||||
dest: /etc/prometheus/prometheus.yml
|
||||
owner: prometheus
|
||||
group: prometheus
|
||||
mode: '0644'
|
||||
backup: yes
|
||||
tags:
|
||||
- deploy-config
|
||||
|
||||
- name: 重启Prometheus服务
|
||||
systemd:
|
||||
name: prometheus
|
||||
state: restarted
|
||||
enabled: yes
|
||||
tags:
|
||||
- restart-service
|
||||
80
ansible/fix-ashburn-servers.yml
Normal file
80
ansible/fix-ashburn-servers.yml
Normal file
@@ -0,0 +1,80 @@
|
||||
---
|
||||
# 修复美国 Ashburn 服务器节点的安全配置
|
||||
- name: 修复 Ashburn 服务器节点不安全配置
|
||||
hosts: ash1d,ash2e
|
||||
become: yes
|
||||
serial: 1 # 一个一个来,确保安全
|
||||
tasks:
|
||||
- name: 显示当前处理的服务器节点
|
||||
debug:
|
||||
msg: "⚠️ 正在处理关键服务器节点: {{ inventory_hostname }}"
|
||||
|
||||
- name: 检查集群状态 - 确保有足够的服务器在线
|
||||
uri:
|
||||
url: "http://semaphore.tailnet-68f9.ts.net:4646/v1/status/leader"
|
||||
method: GET
|
||||
register: leader_check
|
||||
delegate_to: localhost
|
||||
|
||||
- name: 确认集群有 leader
|
||||
fail:
|
||||
msg: "集群没有 leader,停止操作!"
|
||||
when: leader_check.status != 200
|
||||
|
||||
- name: 备份当前配置
|
||||
copy:
|
||||
src: /etc/nomad.d/nomad.hcl
|
||||
dest: /etc/nomad.d/nomad.hcl.backup.{{ ansible_date_time.epoch }}
|
||||
backup: yes
|
||||
|
||||
- name: 创建安全的服务器配置
|
||||
template:
|
||||
src: ../nomad-configs-tofu/server-template-secure.hcl
|
||||
dest: /etc/nomad.d/nomad.hcl
|
||||
backup: yes
|
||||
notify: restart nomad
|
||||
|
||||
- name: 验证配置文件语法
|
||||
command: nomad config validate /etc/nomad.d/nomad.hcl
|
||||
register: config_validation
|
||||
|
||||
- name: 显示验证结果
|
||||
debug:
|
||||
msg: "{{ inventory_hostname }} 配置验证: {{ config_validation.stdout }}"
|
||||
|
||||
- name: 重启 Nomad 服务
|
||||
systemd:
|
||||
name: nomad
|
||||
state: restarted
|
||||
daemon_reload: yes
|
||||
|
||||
- name: 等待服务启动
|
||||
wait_for:
|
||||
port: 4646
|
||||
host: "{{ inventory_hostname }}.tailnet-68f9.ts.net"
|
||||
delay: 10
|
||||
timeout: 60
|
||||
delegate_to: localhost
|
||||
|
||||
handlers:
|
||||
- name: restart nomad
|
||||
systemd:
|
||||
name: nomad
|
||||
state: restarted
|
||||
daemon_reload: yes
|
||||
|
||||
post_tasks:
|
||||
- name: 等待节点重新加入集群
|
||||
pause:
|
||||
seconds: 20
|
||||
|
||||
- name: 验证服务器重新加入集群
|
||||
uri:
|
||||
url: "http://semaphore.tailnet-68f9.ts.net:4646/v1/status/peers"
|
||||
method: GET
|
||||
register: cluster_peers
|
||||
delegate_to: localhost
|
||||
|
||||
- name: 显示集群状态
|
||||
debug:
|
||||
msg: "集群 peers: {{ cluster_peers.json }}"
|
||||
69
ansible/install-monitoring-agents.yml
Normal file
69
ansible/install-monitoring-agents.yml
Normal file
@@ -0,0 +1,69 @@
|
||||
---
|
||||
- name: 批量安装监控代理软件
|
||||
hosts: nomad_cluster
|
||||
become: yes
|
||||
vars:
|
||||
ansible_python_interpreter: /usr/bin/python3
|
||||
|
||||
tasks:
|
||||
- name: 添加Grafana APT源
|
||||
apt_repository:
|
||||
repo: "deb [trusted=yes] https://packages.grafana.com/oss/deb stable main"
|
||||
state: present
|
||||
filename: grafana
|
||||
when: ansible_distribution == "Debian" or ansible_distribution == "Ubuntu"
|
||||
tags:
|
||||
- grafana-repo
|
||||
|
||||
- name: 更新APT缓存
|
||||
apt:
|
||||
update_cache: yes
|
||||
tags:
|
||||
- update-cache
|
||||
|
||||
- name: 检查node-exporter是否已安装
|
||||
command: which prometheus-node-exporter
|
||||
register: node_exporter_check
|
||||
failed_when: false
|
||||
changed_when: false
|
||||
|
||||
- name: 安装prometheus-node-exporter
|
||||
apt:
|
||||
name: prometheus-node-exporter
|
||||
state: present
|
||||
update_cache: yes
|
||||
when: node_exporter_check.rc != 0
|
||||
register: node_exporter_install
|
||||
|
||||
- name: 显示node-exporter安装结果
|
||||
debug:
|
||||
msg: "{{ inventory_hostname }}: {{ '已安装' if node_exporter_check.rc == 0 else '安装完成' if node_exporter_install.changed else '安装失败' }}"
|
||||
|
||||
- name: 检查promtail是否已安装
|
||||
command: which promtail
|
||||
register: promtail_check
|
||||
failed_when: false
|
||||
changed_when: false
|
||||
|
||||
- name: 安装promtail
|
||||
apt:
|
||||
name: promtail
|
||||
state: present
|
||||
update_cache: yes
|
||||
when: promtail_check.rc != 0
|
||||
register: promtail_install
|
||||
|
||||
- name: 显示promtail安装结果
|
||||
debug:
|
||||
msg: "{{ inventory_hostname }}: {{ '已安装' if promtail_check.rc == 0 else '安装完成' if promtail_install.changed else '安装失败' }}"
|
||||
|
||||
- name: 创建promtail数据目录
|
||||
file:
|
||||
path: /opt/promtail/data
|
||||
state: directory
|
||||
owner: promtail
|
||||
group: nogroup
|
||||
mode: '0755'
|
||||
when: promtail_check.rc != 0 or promtail_install.changed
|
||||
tags:
|
||||
- promtail-dirs
|
||||
@@ -1,81 +1,100 @@
|
||||
---
|
||||
all:
|
||||
children:
|
||||
pve_cluster:
|
||||
hosts:
|
||||
nuc12:
|
||||
ansible_host: nuc12
|
||||
ansible_user: root
|
||||
ansible_ssh_pass: "Aa313131@ben"
|
||||
ansible_ssh_common_args: '-o StrictHostKeyChecking=no'
|
||||
xgp:
|
||||
ansible_host: xgp
|
||||
ansible_user: root
|
||||
ansible_ssh_pass: "Aa313131@ben"
|
||||
ansible_ssh_common_args: '-o StrictHostKeyChecking=no'
|
||||
pve:
|
||||
ansible_host: pve
|
||||
ansible_user: root
|
||||
ansible_ssh_pass: "Aa313131@ben"
|
||||
ansible_ssh_common_args: '-o StrictHostKeyChecking=no'
|
||||
vars:
|
||||
ansible_python_interpreter: /usr/bin/python3
|
||||
|
||||
nomad_cluster:
|
||||
hosts:
|
||||
ch4:
|
||||
ansible_host: ch4.tailnet-68f9.ts.net
|
||||
# 服务器节点 (7个)
|
||||
ch2:
|
||||
ansible_host: ch2.tailnet-68f9.ts.net
|
||||
ansible_user: ben
|
||||
ansible_ssh_pass: "3131"
|
||||
ansible_become_pass: "3131"
|
||||
ansible_ssh_common_args: '-o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null'
|
||||
hcp1:
|
||||
ansible_host: hcp1.tailnet-68f9.ts.net
|
||||
ansible_user: ben
|
||||
ansible_ssh_pass: "3131"
|
||||
ansible_become_pass: "3131"
|
||||
ansible_ssh_common_args: '-o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null'
|
||||
ash3c:
|
||||
ansible_host: ash3c.tailnet-68f9.ts.net
|
||||
ansible_user: ben
|
||||
ansible_ssh_pass: "3131"
|
||||
ansible_become_pass: "3131"
|
||||
ansible_ssh_common_args: '-o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null'
|
||||
warden:
|
||||
ansible_host: warden.tailnet-68f9.ts.net
|
||||
ansible_user: ben
|
||||
ansible_ssh_pass: "3131"
|
||||
ansible_become_pass: "3131"
|
||||
ansible_ssh_common_args: '-o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null'
|
||||
onecloud1:
|
||||
ansible_host: onecloud1.tailnet-68f9.ts.net
|
||||
ansible_user: ben
|
||||
ansible_ssh_pass: "3131"
|
||||
ansible_become_pass: "3131"
|
||||
ansible_ssh_common_args: '-o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null'
|
||||
influxdb1:
|
||||
ansible_host: influxdb1.tailnet-68f9.ts.net
|
||||
ansible_user: ben
|
||||
ansible_ssh_pass: "3131"
|
||||
ansible_become_pass: "3131"
|
||||
ansible_ssh_common_args: '-o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null'
|
||||
browser:
|
||||
ansible_host: browser.tailnet-68f9.ts.net
|
||||
tailscale_ip: "100.90.159.68"
|
||||
ch3:
|
||||
ansible_host: ch3.tailnet-68f9.ts.net
|
||||
ansible_user: ben
|
||||
ansible_ssh_pass: "3131"
|
||||
ansible_become_pass: "3131"
|
||||
ansible_ssh_common_args: '-o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null'
|
||||
tailscale_ip: "100.86.141.112"
|
||||
ash1d:
|
||||
ansible_host: ash1d.tailnet-68f9.ts.net
|
||||
ansible_user: ben
|
||||
ansible_ssh_pass: "3131"
|
||||
ansible_become_pass: "3131"
|
||||
ansible_ssh_common_args: '-o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null'
|
||||
tailscale_ip: "100.81.26.3"
|
||||
ash2e:
|
||||
ansible_host: ash2e.tailnet-68f9.ts.net
|
||||
ansible_user: ben
|
||||
ansible_ssh_pass: "3131"
|
||||
ansible_become_pass: "3131"
|
||||
ansible_ssh_common_args: '-o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null'
|
||||
tailscale_ip: "100.125.147.1"
|
||||
de:
|
||||
ansible_host: de.tailnet-68f9.ts.net
|
||||
ansible_user: ben
|
||||
ansible_ssh_pass: "3131"
|
||||
ansible_become_pass: "3131"
|
||||
ansible_ssh_common_args: '-o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null'
|
||||
tailscale_ip: "100.120.225.29"
|
||||
onecloud1:
|
||||
ansible_host: onecloud1.tailnet-68f9.ts.net
|
||||
ansible_user: ben
|
||||
ansible_ssh_pass: "3131"
|
||||
ansible_become_pass: "3131"
|
||||
ansible_ssh_common_args: '-o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null'
|
||||
tailscale_ip: "100.98.209.50"
|
||||
semaphore:
|
||||
ansible_host: semaphore.tailnet-68f9.ts.net
|
||||
ansible_user: ben
|
||||
ansible_ssh_pass: "3131"
|
||||
ansible_become_pass: "3131"
|
||||
ansible_ssh_common_args: '-o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null'
|
||||
tailscale_ip: "100.116.158.95"
|
||||
# 客户端节点 (6个)
|
||||
ch4:
|
||||
ansible_host: ch4.tailnet-68f9.ts.net
|
||||
ansible_user: ben
|
||||
ansible_ssh_pass: "3131"
|
||||
ansible_become_pass: "3131"
|
||||
ansible_ssh_common_args: '-o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null'
|
||||
tailscale_ip: "100.117.106.136"
|
||||
ash3c:
|
||||
ansible_host: ash3c.tailnet-68f9.ts.net
|
||||
ansible_user: ben
|
||||
ansible_ssh_pass: "3131"
|
||||
ansible_become_pass: "3131"
|
||||
ansible_ssh_common_args: '-o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null'
|
||||
tailscale_ip: "100.116.80.94"
|
||||
warden:
|
||||
ansible_host: warden.tailnet-68f9.ts.net
|
||||
ansible_user: ben
|
||||
ansible_ssh_pass: "3131"
|
||||
ansible_become_pass: "3131"
|
||||
ansible_ssh_common_args: '-o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null'
|
||||
tailscale_ip: "100.122.197.112"
|
||||
hcp1:
|
||||
ansible_host: hcp1.tailnet-68f9.ts.net
|
||||
ansible_user: ben
|
||||
ansible_ssh_pass: "3131"
|
||||
ansible_become_pass: "3131"
|
||||
ansible_ssh_common_args: '-o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null'
|
||||
tailscale_ip: "100.97.62.111"
|
||||
influxdb:
|
||||
ansible_host: influxdb.tailnet-68f9.ts.net
|
||||
ansible_user: ben
|
||||
ansible_ssh_pass: "3131"
|
||||
ansible_become_pass: "3131"
|
||||
ansible_ssh_common_args: '-o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null'
|
||||
tailscale_ip: "100.100.7.4"
|
||||
browser:
|
||||
ansible_host: browser.tailnet-68f9.ts.net
|
||||
ansible_user: ben
|
||||
ansible_ssh_pass: "3131"
|
||||
ansible_become_pass: "3131"
|
||||
ansible_ssh_common_args: '-o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null'
|
||||
tailscale_ip: "100.116.112.45"
|
||||
vars:
|
||||
ansible_python_interpreter: /usr/bin/python3
|
||||
71
ansible/templates/onecloud1-server-secure.hcl.j2
Normal file
71
ansible/templates/onecloud1-server-secure.hcl.j2
Normal file
@@ -0,0 +1,71 @@
|
||||
# Nomad 服务器安全配置 - OneCloud1 节点
|
||||
datacenter = "dc1"
|
||||
data_dir = "/opt/nomad/data"
|
||||
plugin_dir = "/opt/nomad/plugins"
|
||||
log_level = "INFO"
|
||||
name = "onecloud1"
|
||||
|
||||
# 安全绑定 - 只绑定到 Tailscale 接口
|
||||
bind_addr = "onecloud1.tailnet-68f9.ts.net"
|
||||
|
||||
addresses {
|
||||
http = "onecloud1.tailnet-68f9.ts.net"
|
||||
rpc = "onecloud1.tailnet-68f9.ts.net"
|
||||
serf = "onecloud1.tailnet-68f9.ts.net"
|
||||
}
|
||||
|
||||
advertise {
|
||||
http = "onecloud1.tailnet-68f9.ts.net:4646"
|
||||
rpc = "onecloud1.tailnet-68f9.ts.net:4647"
|
||||
serf = "onecloud1.tailnet-68f9.ts.net:4648"
|
||||
}
|
||||
|
||||
ports {
|
||||
http = 4646
|
||||
rpc = 4647
|
||||
serf = 4648
|
||||
}
|
||||
|
||||
server {
|
||||
enabled = true
|
||||
bootstrap_expect = 7
|
||||
|
||||
# 服务器发现配置
|
||||
server_join {
|
||||
retry_join = [
|
||||
"semaphore.tailnet-68f9.ts.net:4647",
|
||||
"ash1d.tailnet-68f9.ts.net:4647",
|
||||
"ash2e.tailnet-68f9.ts.net:4647",
|
||||
"ch2.tailnet-68f9.ts.net:4647",
|
||||
"ch3.tailnet-68f9.ts.net:4647",
|
||||
"onecloud1.tailnet-68f9.ts.net:4647",
|
||||
"de.tailnet-68f9.ts.net:4647"
|
||||
]
|
||||
retry_interval = "15s"
|
||||
retry_max = 3
|
||||
}
|
||||
}
|
||||
|
||||
# 安全的 Consul 配置
|
||||
consul {
|
||||
address = "127.0.0.1:8500"
|
||||
server_service_name = "nomad"
|
||||
client_service_name = "nomad-client"
|
||||
auto_advertise = true
|
||||
server_auto_join = true
|
||||
client_auto_join = true
|
||||
}
|
||||
|
||||
# Vault 配置(暂时禁用)
|
||||
vault {
|
||||
enabled = false
|
||||
}
|
||||
|
||||
# 遥测配置
|
||||
telemetry {
|
||||
collection_interval = "1s"
|
||||
disable_hostname = false
|
||||
prometheus_metrics = true
|
||||
publish_allocation_metrics = true
|
||||
publish_node_metrics = true
|
||||
}
|
||||
Reference in New Issue
Block a user