REMOVE: 删除不再使用的 Terraform 配置文件
Some checks failed
Deploy Nomad Configurations / deploy-nomad (push) Failing after 7m45s
Infrastructure CI/CD / Validate Infrastructure (push) Failing after 2m33s
Infrastructure CI/CD / Plan Infrastructure (push) Has been skipped
Infrastructure CI/CD / Apply Infrastructure (push) Has been skipped
Simple Test / test (push) Failing after 2m48s

- 移除 nomad-terraform.tf 和 test_opentofu_consul.tf 文件
- 更新 Ansible inventory,注释掉不存在的节点 hcp2
- 修改 inventory.ini,确保节点配置的准确性
- 在 nomad-config 模块中添加 null_provider 以支持新配置
- 更新 influxdb1.hcl,添加 Grafana 和 Prometheus 数据卷配置
This commit is contained in:
2025-10-10 13:53:41 +00:00
parent 45f93cc68c
commit eff8d3ec6d
50 changed files with 3683 additions and 239 deletions

View File

@@ -0,0 +1,62 @@
---
# Ansible 批量修复所有服务器节点的安全配置
- name: 修复所有 Nomad 服务器节点的安全配置
hosts: ash1d,ash2e,onecloud1
gather_facts: no
vars:
nomad_servers:
- "semaphore.tailnet-68f9.ts.net:4647"
- "ash1d.tailnet-68f9.ts.net:4647"
- "ash2e.tailnet-68f9.ts.net:4647"
- "ch2.tailnet-68f9.ts.net:4647"
- "ch3.tailnet-68f9.ts.net:4647"
- "onecloud1.tailnet-68f9.ts.net:4647"
- "de.tailnet-68f9.ts.net:4647"
tasks:
- name: 生成安全的 Nomad 服务器配置
template:
src: server-secure.hcl.j2
dest: /tmp/nomad-secure.hcl
mode: '0644'
- name: 停止 Nomad 服务
systemd:
name: nomad
state: stopped
become: yes
- name: 备份当前配置
copy:
src: /etc/nomad.d/nomad.hcl
dest: "/etc/nomad.d/nomad.hcl.backup.{{ ansible_date_time.epoch }}"
remote_src: yes
become: yes
ignore_errors: yes
- name: 部署安全配置
copy:
src: /tmp/nomad-secure.hcl
dest: /etc/nomad.d/nomad.hcl
remote_src: yes
become: yes
- name: 清理 Raft 数据以重新加入集群
file:
path: /opt/nomad/data/server/raft/
state: absent
become: yes
- name: 启动 Nomad 服务
systemd:
name: nomad
state: started
enabled: yes
become: yes
- name: 等待服务启动
wait_for:
port: 4646
host: "{{ inventory_hostname }}.tailnet-68f9.ts.net"
delay: 10
timeout: 60

View File

@@ -0,0 +1,59 @@
---
# 安全地修复客户端节点配置 - 先客户端,后服务器
- name: 修复客户端节点不安全配置
hosts: nomad_clients
become: yes
serial: 1 # 一个一个来,确保安全
tasks:
- name: 显示当前处理的节点
debug:
msg: "正在处理客户端节点: {{ inventory_hostname }}"
- name: 备份当前配置
copy:
src: /etc/nomad.d/nomad.hcl
dest: /etc/nomad.d/nomad.hcl.backup.{{ ansible_date_time.epoch }}
backup: yes
- name: 创建安全的客户端配置
template:
src: client-secure-template.hcl.j2
dest: /etc/nomad.d/nomad.hcl
backup: yes
notify: restart nomad
- name: 验证配置文件语法
command: nomad config validate /etc/nomad.d/nomad.hcl
register: config_validation
- name: 显示验证结果
debug:
msg: "{{ inventory_hostname }} 配置验证: {{ config_validation.stdout }}"
- name: 等待服务重启完成
wait_for:
port: 4646
host: "{% if inventory_hostname == 'influxdb' %}influxdb1.tailnet-68f9.ts.net{% else %}{{ inventory_hostname }}.tailnet-68f9.ts.net{% endif %}"
delay: 10
timeout: 60
delegate_to: localhost
handlers:
- name: restart nomad
systemd:
name: nomad
state: restarted
daemon_reload: yes
post_tasks:
- name: 验证节点重新加入集群
uri:
url: "http://{% if inventory_hostname == 'influxdb' %}influxdb1.tailnet-68f9.ts.net{% else %}{{ inventory_hostname }}.tailnet-68f9.ts.net{% endif %}:4646/v1/agent/self"
method: GET
register: node_status
delegate_to: localhost
- name: 显示节点状态
debug:
msg: "{{ inventory_hostname }} 重新加入集群成功"
when: node_status.status == 200

View File

@@ -0,0 +1,106 @@
# Nomad 客户端安全配置模板
datacenter = "dc1"
data_dir = "/opt/nomad/data"
plugin_dir = "/opt/nomad/plugins"
log_level = "INFO"
name = "{{ inventory_hostname }}"
# 安全绑定 - 只绑定到 Tailscale 接口
{% if inventory_hostname == 'influxdb' %}
bind_addr = "influxdb1.tailnet-68f9.ts.net"
addresses {
http = "influxdb1.tailnet-68f9.ts.net"
rpc = "influxdb1.tailnet-68f9.ts.net"
serf = "influxdb1.tailnet-68f9.ts.net"
}
advertise {
http = "influxdb1.tailnet-68f9.ts.net:4646"
rpc = "influxdb1.tailnet-68f9.ts.net:4647"
serf = "influxdb1.tailnet-68f9.ts.net:4648"
}
{% else %}
bind_addr = "{{ inventory_hostname }}.tailnet-68f9.ts.net"
addresses {
http = "{{ inventory_hostname }}.tailnet-68f9.ts.net"
rpc = "{{ inventory_hostname }}.tailnet-68f9.ts.net"
serf = "{{ inventory_hostname }}.tailnet-68f9.ts.net"
}
advertise {
http = "{{ inventory_hostname }}.tailnet-68f9.ts.net:4646"
rpc = "{{ inventory_hostname }}.tailnet-68f9.ts.net:4647"
serf = "{{ inventory_hostname }}.tailnet-68f9.ts.net:4648"
}
{% endif %}
ports {
http = 4646
rpc = 4647
serf = 4648
}
# 纯客户端模式
server {
enabled = false
}
client {
enabled = true
network_interface = "tailscale0"
# 连接到当前活跃的服务器节点
servers = [
"ch2.tailnet-68f9.ts.net:4647",
"ch3.tailnet-68f9.ts.net:4647",
"de.tailnet-68f9.ts.net:4647",
"semaphore.tailnet-68f9.ts.net:4647"
]
# 基本驱动
options {
"driver.raw_exec.enable" = "1"
"driver.exec.enable" = "1"
}
# 激进的垃圾清理策略
gc_interval = "5m"
gc_disk_usage_threshold = 80
gc_inode_usage_threshold = 70
}
# Podman 插件配置
plugin "nomad-driver-podman" {
config {
socket_path = "unix:///run/podman/podman.sock"
volumes {
enabled = true
}
}
}
# 安全的 Consul 配置 - 指向本地客户端
consul {
address = "127.0.0.1:8500"
server_service_name = "nomad"
client_service_name = "nomad-client"
auto_advertise = true
server_auto_join = true
client_auto_join = true
}
# 禁用 Vault - 暂时
vault {
enabled = false
}
# 遥测配置
telemetry {
collection_interval = "1s"
disable_hostname = false
prometheus_metrics = true
publish_allocation_metrics = true
publish_node_metrics = true
}

View File

@@ -0,0 +1,97 @@
---
# 测试本机 semaphore 的偷梁换柱功能
- name: 测试 Ansible 偷梁换柱 - 修复 semaphore 不安全配置
hosts: localhost
become: yes
tasks:
- name: 备份当前配置
copy:
src: /etc/nomad.d/nomad.hcl
dest: /etc/nomad.d/nomad.hcl.backup.{{ ansible_date_time.epoch }}
backup: yes
- name: 创建安全的 semaphore 配置
copy:
content: |
datacenter = "dc1"
data_dir = "/opt/nomad/data"
plugin_dir = "/opt/nomad/plugins"
log_level = "INFO"
name = "semaphore"
# 安全绑定 - 只绑定到 Tailscale 接口
bind_addr = "semaphore.tailnet-68f9.ts.net"
addresses {
http = "semaphore.tailnet-68f9.ts.net"
rpc = "semaphore.tailnet-68f9.ts.net"
serf = "semaphore.tailnet-68f9.ts.net"
}
advertise {
http = "semaphore.tailnet-68f9.ts.net:4646"
rpc = "semaphore.tailnet-68f9.ts.net:4647"
serf = "semaphore.tailnet-68f9.ts.net:4648"
}
ports {
http = 4646
rpc = 4647
serf = 4648
}
server {
enabled = true
server_join {
retry_join = [
"semaphore.tailnet-68f9.ts.net:4647",
"ash1d.tailnet-68f9.ts.net:4647",
"ash2e.tailnet-68f9.ts.net:4647",
"ch2.tailnet-68f9.ts.net:4647",
"ch3.tailnet-68f9.ts.net:4647",
"onecloud1.tailnet-68f9.ts.net:4647",
"de.tailnet-68f9.ts.net:4647"
]
}
}
# 安全的 Consul 配置
consul {
address = "127.0.0.1:8500"
server_service_name = "nomad"
client_service_name = "nomad-client"
auto_advertise = true
server_auto_join = true
client_auto_join = true
}
vault {
enabled = false
}
telemetry {
collection_interval = "1s"
disable_hostname = false
prometheus_metrics = true
publish_allocation_metrics = true
publish_node_metrics = true
}
dest: /etc/nomad.d/nomad.hcl
backup: yes
notify: restart nomad
- name: 验证配置文件语法
command: nomad config validate /etc/nomad.d/nomad.hcl
register: config_validation
- name: 显示验证结果
debug:
msg: "配置验证结果: {{ config_validation.stdout }}"
handlers:
- name: restart nomad
systemd:
name: nomad
state: restarted
daemon_reload: yes