REMOVE: 删除不再使用的 Terraform 配置文件
Some checks failed
Deploy Nomad Configurations / deploy-nomad (push) Failing after 7m45s
Infrastructure CI/CD / Validate Infrastructure (push) Failing after 2m33s
Infrastructure CI/CD / Plan Infrastructure (push) Has been skipped
Infrastructure CI/CD / Apply Infrastructure (push) Has been skipped
Simple Test / test (push) Failing after 2m48s
Some checks failed
Deploy Nomad Configurations / deploy-nomad (push) Failing after 7m45s
Infrastructure CI/CD / Validate Infrastructure (push) Failing after 2m33s
Infrastructure CI/CD / Plan Infrastructure (push) Has been skipped
Infrastructure CI/CD / Apply Infrastructure (push) Has been skipped
Simple Test / test (push) Failing after 2m48s
- 移除 nomad-terraform.tf 和 test_opentofu_consul.tf 文件 - 更新 Ansible inventory,注释掉不存在的节点 hcp2 - 修改 inventory.ini,确保节点配置的准确性 - 在 nomad-config 模块中添加 null_provider 以支持新配置 - 更新 influxdb1.hcl,添加 Grafana 和 Prometheus 数据卷配置
This commit is contained in:
62
ansible/fix-all-servers.yml
Normal file
62
ansible/fix-all-servers.yml
Normal file
@@ -0,0 +1,62 @@
|
||||
---
|
||||
# Ansible 批量修复所有服务器节点的安全配置
|
||||
- name: 修复所有 Nomad 服务器节点的安全配置
|
||||
hosts: ash1d,ash2e,onecloud1
|
||||
gather_facts: no
|
||||
vars:
|
||||
nomad_servers:
|
||||
- "semaphore.tailnet-68f9.ts.net:4647"
|
||||
- "ash1d.tailnet-68f9.ts.net:4647"
|
||||
- "ash2e.tailnet-68f9.ts.net:4647"
|
||||
- "ch2.tailnet-68f9.ts.net:4647"
|
||||
- "ch3.tailnet-68f9.ts.net:4647"
|
||||
- "onecloud1.tailnet-68f9.ts.net:4647"
|
||||
- "de.tailnet-68f9.ts.net:4647"
|
||||
|
||||
tasks:
|
||||
- name: 生成安全的 Nomad 服务器配置
|
||||
template:
|
||||
src: server-secure.hcl.j2
|
||||
dest: /tmp/nomad-secure.hcl
|
||||
mode: '0644'
|
||||
|
||||
- name: 停止 Nomad 服务
|
||||
systemd:
|
||||
name: nomad
|
||||
state: stopped
|
||||
become: yes
|
||||
|
||||
- name: 备份当前配置
|
||||
copy:
|
||||
src: /etc/nomad.d/nomad.hcl
|
||||
dest: "/etc/nomad.d/nomad.hcl.backup.{{ ansible_date_time.epoch }}"
|
||||
remote_src: yes
|
||||
become: yes
|
||||
ignore_errors: yes
|
||||
|
||||
- name: 部署安全配置
|
||||
copy:
|
||||
src: /tmp/nomad-secure.hcl
|
||||
dest: /etc/nomad.d/nomad.hcl
|
||||
remote_src: yes
|
||||
become: yes
|
||||
|
||||
- name: 清理 Raft 数据以重新加入集群
|
||||
file:
|
||||
path: /opt/nomad/data/server/raft/
|
||||
state: absent
|
||||
become: yes
|
||||
|
||||
- name: 启动 Nomad 服务
|
||||
systemd:
|
||||
name: nomad
|
||||
state: started
|
||||
enabled: yes
|
||||
become: yes
|
||||
|
||||
- name: 等待服务启动
|
||||
wait_for:
|
||||
port: 4646
|
||||
host: "{{ inventory_hostname }}.tailnet-68f9.ts.net"
|
||||
delay: 10
|
||||
timeout: 60
|
||||
59
ansible/fix-clients-safe.yml
Normal file
59
ansible/fix-clients-safe.yml
Normal file
@@ -0,0 +1,59 @@
|
||||
---
|
||||
# 安全地修复客户端节点配置 - 先客户端,后服务器
|
||||
- name: 修复客户端节点不安全配置
|
||||
hosts: nomad_clients
|
||||
become: yes
|
||||
serial: 1 # 一个一个来,确保安全
|
||||
tasks:
|
||||
- name: 显示当前处理的节点
|
||||
debug:
|
||||
msg: "正在处理客户端节点: {{ inventory_hostname }}"
|
||||
|
||||
- name: 备份当前配置
|
||||
copy:
|
||||
src: /etc/nomad.d/nomad.hcl
|
||||
dest: /etc/nomad.d/nomad.hcl.backup.{{ ansible_date_time.epoch }}
|
||||
backup: yes
|
||||
|
||||
- name: 创建安全的客户端配置
|
||||
template:
|
||||
src: client-secure-template.hcl.j2
|
||||
dest: /etc/nomad.d/nomad.hcl
|
||||
backup: yes
|
||||
notify: restart nomad
|
||||
|
||||
- name: 验证配置文件语法
|
||||
command: nomad config validate /etc/nomad.d/nomad.hcl
|
||||
register: config_validation
|
||||
|
||||
- name: 显示验证结果
|
||||
debug:
|
||||
msg: "{{ inventory_hostname }} 配置验证: {{ config_validation.stdout }}"
|
||||
|
||||
- name: 等待服务重启完成
|
||||
wait_for:
|
||||
port: 4646
|
||||
host: "{% if inventory_hostname == 'influxdb' %}influxdb1.tailnet-68f9.ts.net{% else %}{{ inventory_hostname }}.tailnet-68f9.ts.net{% endif %}"
|
||||
delay: 10
|
||||
timeout: 60
|
||||
delegate_to: localhost
|
||||
|
||||
handlers:
|
||||
- name: restart nomad
|
||||
systemd:
|
||||
name: nomad
|
||||
state: restarted
|
||||
daemon_reload: yes
|
||||
|
||||
post_tasks:
|
||||
- name: 验证节点重新加入集群
|
||||
uri:
|
||||
url: "http://{% if inventory_hostname == 'influxdb' %}influxdb1.tailnet-68f9.ts.net{% else %}{{ inventory_hostname }}.tailnet-68f9.ts.net{% endif %}:4646/v1/agent/self"
|
||||
method: GET
|
||||
register: node_status
|
||||
delegate_to: localhost
|
||||
|
||||
- name: 显示节点状态
|
||||
debug:
|
||||
msg: "{{ inventory_hostname }} 重新加入集群成功"
|
||||
when: node_status.status == 200
|
||||
106
ansible/templates/client-secure-template.hcl.j2
Normal file
106
ansible/templates/client-secure-template.hcl.j2
Normal file
@@ -0,0 +1,106 @@
|
||||
# Nomad 客户端安全配置模板
|
||||
datacenter = "dc1"
|
||||
data_dir = "/opt/nomad/data"
|
||||
plugin_dir = "/opt/nomad/plugins"
|
||||
log_level = "INFO"
|
||||
name = "{{ inventory_hostname }}"
|
||||
|
||||
# 安全绑定 - 只绑定到 Tailscale 接口
|
||||
{% if inventory_hostname == 'influxdb' %}
|
||||
bind_addr = "influxdb1.tailnet-68f9.ts.net"
|
||||
|
||||
addresses {
|
||||
http = "influxdb1.tailnet-68f9.ts.net"
|
||||
rpc = "influxdb1.tailnet-68f9.ts.net"
|
||||
serf = "influxdb1.tailnet-68f9.ts.net"
|
||||
}
|
||||
|
||||
advertise {
|
||||
http = "influxdb1.tailnet-68f9.ts.net:4646"
|
||||
rpc = "influxdb1.tailnet-68f9.ts.net:4647"
|
||||
serf = "influxdb1.tailnet-68f9.ts.net:4648"
|
||||
}
|
||||
{% else %}
|
||||
bind_addr = "{{ inventory_hostname }}.tailnet-68f9.ts.net"
|
||||
|
||||
addresses {
|
||||
http = "{{ inventory_hostname }}.tailnet-68f9.ts.net"
|
||||
rpc = "{{ inventory_hostname }}.tailnet-68f9.ts.net"
|
||||
serf = "{{ inventory_hostname }}.tailnet-68f9.ts.net"
|
||||
}
|
||||
|
||||
advertise {
|
||||
http = "{{ inventory_hostname }}.tailnet-68f9.ts.net:4646"
|
||||
rpc = "{{ inventory_hostname }}.tailnet-68f9.ts.net:4647"
|
||||
serf = "{{ inventory_hostname }}.tailnet-68f9.ts.net:4648"
|
||||
}
|
||||
{% endif %}
|
||||
|
||||
ports {
|
||||
http = 4646
|
||||
rpc = 4647
|
||||
serf = 4648
|
||||
}
|
||||
|
||||
# 纯客户端模式
|
||||
server {
|
||||
enabled = false
|
||||
}
|
||||
|
||||
client {
|
||||
enabled = true
|
||||
network_interface = "tailscale0"
|
||||
|
||||
# 连接到当前活跃的服务器节点
|
||||
servers = [
|
||||
"ch2.tailnet-68f9.ts.net:4647",
|
||||
"ch3.tailnet-68f9.ts.net:4647",
|
||||
"de.tailnet-68f9.ts.net:4647",
|
||||
"semaphore.tailnet-68f9.ts.net:4647"
|
||||
]
|
||||
|
||||
# 基本驱动
|
||||
options {
|
||||
"driver.raw_exec.enable" = "1"
|
||||
"driver.exec.enable" = "1"
|
||||
}
|
||||
|
||||
# 激进的垃圾清理策略
|
||||
gc_interval = "5m"
|
||||
gc_disk_usage_threshold = 80
|
||||
gc_inode_usage_threshold = 70
|
||||
}
|
||||
|
||||
# Podman 插件配置
|
||||
plugin "nomad-driver-podman" {
|
||||
config {
|
||||
socket_path = "unix:///run/podman/podman.sock"
|
||||
volumes {
|
||||
enabled = true
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
# 安全的 Consul 配置 - 指向本地客户端
|
||||
consul {
|
||||
address = "127.0.0.1:8500"
|
||||
server_service_name = "nomad"
|
||||
client_service_name = "nomad-client"
|
||||
auto_advertise = true
|
||||
server_auto_join = true
|
||||
client_auto_join = true
|
||||
}
|
||||
|
||||
# 禁用 Vault - 暂时
|
||||
vault {
|
||||
enabled = false
|
||||
}
|
||||
|
||||
# 遥测配置
|
||||
telemetry {
|
||||
collection_interval = "1s"
|
||||
disable_hostname = false
|
||||
prometheus_metrics = true
|
||||
publish_allocation_metrics = true
|
||||
publish_node_metrics = true
|
||||
}
|
||||
97
ansible/test-semaphore-config.yml
Normal file
97
ansible/test-semaphore-config.yml
Normal file
@@ -0,0 +1,97 @@
|
||||
---
|
||||
# 测试本机 semaphore 的偷梁换柱功能
|
||||
- name: 测试 Ansible 偷梁换柱 - 修复 semaphore 不安全配置
|
||||
hosts: localhost
|
||||
become: yes
|
||||
tasks:
|
||||
- name: 备份当前配置
|
||||
copy:
|
||||
src: /etc/nomad.d/nomad.hcl
|
||||
dest: /etc/nomad.d/nomad.hcl.backup.{{ ansible_date_time.epoch }}
|
||||
backup: yes
|
||||
|
||||
- name: 创建安全的 semaphore 配置
|
||||
copy:
|
||||
content: |
|
||||
datacenter = "dc1"
|
||||
data_dir = "/opt/nomad/data"
|
||||
plugin_dir = "/opt/nomad/plugins"
|
||||
log_level = "INFO"
|
||||
name = "semaphore"
|
||||
|
||||
# 安全绑定 - 只绑定到 Tailscale 接口
|
||||
bind_addr = "semaphore.tailnet-68f9.ts.net"
|
||||
|
||||
addresses {
|
||||
http = "semaphore.tailnet-68f9.ts.net"
|
||||
rpc = "semaphore.tailnet-68f9.ts.net"
|
||||
serf = "semaphore.tailnet-68f9.ts.net"
|
||||
}
|
||||
|
||||
advertise {
|
||||
http = "semaphore.tailnet-68f9.ts.net:4646"
|
||||
rpc = "semaphore.tailnet-68f9.ts.net:4647"
|
||||
serf = "semaphore.tailnet-68f9.ts.net:4648"
|
||||
}
|
||||
|
||||
ports {
|
||||
http = 4646
|
||||
rpc = 4647
|
||||
serf = 4648
|
||||
}
|
||||
|
||||
server {
|
||||
enabled = true
|
||||
|
||||
server_join {
|
||||
retry_join = [
|
||||
"semaphore.tailnet-68f9.ts.net:4647",
|
||||
"ash1d.tailnet-68f9.ts.net:4647",
|
||||
"ash2e.tailnet-68f9.ts.net:4647",
|
||||
"ch2.tailnet-68f9.ts.net:4647",
|
||||
"ch3.tailnet-68f9.ts.net:4647",
|
||||
"onecloud1.tailnet-68f9.ts.net:4647",
|
||||
"de.tailnet-68f9.ts.net:4647"
|
||||
]
|
||||
}
|
||||
}
|
||||
|
||||
# 安全的 Consul 配置
|
||||
consul {
|
||||
address = "127.0.0.1:8500"
|
||||
server_service_name = "nomad"
|
||||
client_service_name = "nomad-client"
|
||||
auto_advertise = true
|
||||
server_auto_join = true
|
||||
client_auto_join = true
|
||||
}
|
||||
|
||||
vault {
|
||||
enabled = false
|
||||
}
|
||||
|
||||
telemetry {
|
||||
collection_interval = "1s"
|
||||
disable_hostname = false
|
||||
prometheus_metrics = true
|
||||
publish_allocation_metrics = true
|
||||
publish_node_metrics = true
|
||||
}
|
||||
dest: /etc/nomad.d/nomad.hcl
|
||||
backup: yes
|
||||
notify: restart nomad
|
||||
|
||||
- name: 验证配置文件语法
|
||||
command: nomad config validate /etc/nomad.d/nomad.hcl
|
||||
register: config_validation
|
||||
|
||||
- name: 显示验证结果
|
||||
debug:
|
||||
msg: "配置验证结果: {{ config_validation.stdout }}"
|
||||
|
||||
handlers:
|
||||
- name: restart nomad
|
||||
systemd:
|
||||
name: nomad
|
||||
state: restarted
|
||||
daemon_reload: yes
|
||||
Reference in New Issue
Block a user