1 feat: 重构基础设施架构并完善Consul集群配置

2
     3	主要变更:
     4	- 重构Terraform/OpenTofu目录结构,统一迁移至infrastructure/opentofu
     5	- 添加"7天创造世界"文档,记录基础设施建设演进逻辑
     6	- 更新Consul集群配置管理经验,添加实际案例和解决方案
     7	- 修正README中的Sticky Note,反映Consul集群健康状态
     8	- 添加Ansible部署配置和inventory文件
     9	- 完善项目文档结构,添加各组件配置指南
    10
    11	技术架构演进:
    12	- 第1天: Tailscale网络连接基础 
    13	- 第2天: Ansible分布式控制 
    14	- 第3天: Nomad服务感知与任务调度 
    15	- 第4天: Consul配置集中管理 
    16	- 第5天: OpenTofu状态一致性 
    17	- 第6天: Vault密钥管理 
    18	- 第7天: Waypoint应用部署 
This commit is contained in:
2025-09-30 03:46:33 +00:00
parent c0064b2cad
commit e8bfc76038
119 changed files with 1772 additions and 631 deletions

View File

@@ -0,0 +1,161 @@
---
- name: Install and Configure Nomad Podman Driver on Client Nodes
hosts: nomad_clients
become: yes
vars:
nomad_plugin_dir: "/opt/nomad/plugins"
tasks:
- name: Create backup directory with timestamp
set_fact:
backup_dir: "/root/backup/{{ ansible_date_time.date }}_{{ ansible_date_time.hour }}{{ ansible_date_time.minute }}{{ ansible_date_time.second }}"
- name: Create backup directory
file:
path: "{{ backup_dir }}"
state: directory
mode: '0755'
- name: Backup current Nomad configuration
copy:
src: /etc/nomad.d/nomad.hcl
dest: "{{ backup_dir }}/nomad.hcl.backup"
remote_src: yes
ignore_errors: yes
- name: Backup current apt sources
shell: |
cp -r /etc/apt/sources.list* {{ backup_dir }}/
dpkg --get-selections > {{ backup_dir }}/installed_packages.txt
ignore_errors: yes
- name: Create temporary directory for apt
file:
path: /tmp/apt-temp
state: directory
mode: '1777'
- name: Download HashiCorp GPG key
get_url:
url: https://apt.releases.hashicorp.com/gpg
dest: /tmp/hashicorp.gpg
mode: '0644'
environment:
TMPDIR: /tmp/apt-temp
- name: Install HashiCorp GPG key
shell: |
gpg --dearmor < /tmp/hashicorp.gpg > /usr/share/keyrings/hashicorp-archive-keyring.gpg
environment:
TMPDIR: /tmp/apt-temp
- name: Add HashiCorp repository
lineinfile:
path: /etc/apt/sources.list.d/hashicorp.list
line: "deb [signed-by=/usr/share/keyrings/hashicorp-archive-keyring.gpg] https://apt.releases.hashicorp.com {{ ansible_distribution_release }} main"
create: yes
mode: '0644'
- name: Update apt cache
apt:
update_cache: yes
environment:
TMPDIR: /tmp/apt-temp
ignore_errors: yes
- name: Install nomad-driver-podman
apt:
name: nomad-driver-podman
state: present
environment:
TMPDIR: /tmp/apt-temp
- name: Create Nomad plugin directory
file:
path: "{{ nomad_plugin_dir }}"
state: directory
owner: nomad
group: nomad
mode: '0755'
- name: Create symlink for nomad-driver-podman in plugin directory
file:
src: /usr/bin/nomad-driver-podman
dest: "{{ nomad_plugin_dir }}/nomad-driver-podman"
state: link
owner: nomad
group: nomad
- name: Get server IP address
shell: |
ip route get 1.1.1.1 | grep -oP 'src \K\S+'
register: server_ip_result
changed_when: false
- name: Set server IP fact
set_fact:
server_ip: "{{ server_ip_result.stdout }}"
- name: Stop Nomad service
systemd:
name: nomad
state: stopped
- name: Create updated Nomad client configuration
copy:
content: |
datacenter = "{{ nomad_datacenter }}"
data_dir = "/opt/nomad/data"
log_level = "INFO"
bind_addr = "{{ server_ip }}"
server {
enabled = false
}
client {
enabled = true
servers = ["100.117.106.136:4647", "100.116.80.94:4647", "100.97.62.111:4647", "100.116.112.45:4647", "100.84.197.26:4647"]
}
plugin_dir = "{{ nomad_plugin_dir }}"
plugin "nomad-driver-podman" {
config {
volumes {
enabled = true
}
recover_stopped = true
}
}
consul {
address = "127.0.0.1:8500"
}
dest: /etc/nomad.d/nomad.hcl
owner: nomad
group: nomad
mode: '0640'
backup: yes
- name: Validate Nomad configuration
shell: nomad config validate /etc/nomad.d/nomad.hcl
register: nomad_validate
failed_when: nomad_validate.rc != 0
- name: Start Nomad service
systemd:
name: nomad
state: started
enabled: yes
- name: Wait for Nomad to be ready
wait_for:
port: 4646
host: "{{ server_ip }}"
delay: 5
timeout: 60
- name: Display backup location
debug:
msg: "Backup created at: {{ backup_dir }}"

View File

@@ -0,0 +1,68 @@
---
- name: 在 master 和 ash3c 节点安装 Consul
hosts: master,ash3c
become: yes
vars:
consul_version: "1.21.5"
consul_arch: "arm64" # 因为这两个节点都是 aarch64
tasks:
- name: 检查节点架构
command: uname -m
register: node_arch
changed_when: false
- name: 显示节点架构
debug:
msg: "节点 {{ inventory_hostname }} 架构: {{ node_arch.stdout }}"
- name: 检查是否已安装 consul
command: which consul
register: consul_check
failed_when: false
changed_when: false
- name: 显示当前 consul 状态
debug:
msg: "Consul 状态: {{ 'already installed' if consul_check.rc == 0 else 'not installed' }}"
- name: 删除错误的 consul 二进制文件(如果存在)
file:
path: /usr/local/bin/consul
state: absent
when: consul_check.rc == 0
- name: 更新 APT 缓存
apt:
update_cache: yes
ignore_errors: yes
- name: 安装 consul 通过 APT
apt:
name: consul={{ consul_version }}-1
state: present
- name: 验证 consul 安装
command: consul version
register: consul_version_check
changed_when: false
- name: 显示安装的 consul 版本
debug:
msg: "安装的 Consul 版本: {{ consul_version_check.stdout_lines[0] }}"
- name: 确保 consul 用户存在
user:
name: consul
system: yes
shell: /bin/false
home: /opt/consul
create_home: no
- name: 创建 consul 数据目录
file:
path: /opt/consul
state: directory
owner: consul
group: consul
mode: '0755'

View File

@@ -0,0 +1,131 @@
---
- name: Install Nomad by direct download from HashiCorp
hosts: all
become: yes
vars:
nomad_user: "nomad"
nomad_group: "nomad"
nomad_home: "/opt/nomad"
nomad_data_dir: "/opt/nomad/data"
nomad_config_dir: "/etc/nomad.d"
nomad_datacenter: "dc1"
nomad_region: "global"
nomad_server_addresses:
- "100.116.158.95:4647" # semaphore server address
tasks:
- name: Create nomad user
user:
name: "{{ nomad_user }}"
group: "{{ nomad_group }}"
system: yes
shell: /bin/false
home: "{{ nomad_home }}"
create_home: yes
- name: Create nomad directories
file:
path: "{{ item }}"
state: directory
owner: "{{ nomad_user }}"
group: "{{ nomad_group }}"
mode: '0755'
loop:
- "{{ nomad_home }}"
- "{{ nomad_data_dir }}"
- "{{ nomad_config_dir }}"
- /var/log/nomad
- name: Install unzip package
apt:
name: unzip
state: present
update_cache: yes
- name: Download Nomad binary
get_url:
url: "{{ nomad_url }}"
dest: "/tmp/nomad_{{ nomad_version }}_linux_amd64.zip"
mode: '0644'
timeout: 300
- name: Extract Nomad binary
unarchive:
src: "/tmp/nomad_{{ nomad_version }}_linux_amd64.zip"
dest: /tmp
remote_src: yes
- name: Copy Nomad binary to /usr/local/bin
copy:
src: /tmp/nomad
dest: /usr/local/bin/nomad
mode: '0755'
owner: root
group: root
remote_src: yes
- name: Create Nomad client configuration
template:
src: templates/nomad-client.hcl.j2
dest: "{{ nomad_config_dir }}/nomad.hcl"
owner: "{{ nomad_user }}"
group: "{{ nomad_group }}"
mode: '0640'
- name: Create Nomad systemd service
copy:
content: |
[Unit]
Description=Nomad
Documentation=https://www.nomadproject.io/
Requires=network-online.target
After=network-online.target
ConditionFileNotEmpty={{ nomad_config_dir }}/nomad.hcl
[Service]
Type=notify
User={{ nomad_user }}
Group={{ nomad_group }}
ExecStart=/usr/local/bin/nomad agent -config={{ nomad_config_dir }}
ExecReload=/bin/kill -HUP $MAINPID
KillMode=process
Restart=on-failure
LimitNOFILE=65536
[Install]
WantedBy=multi-user.target
dest: /etc/systemd/system/nomad.service
mode: '0644'
- name: Reload systemd daemon
systemd:
daemon_reload: yes
- name: Enable and start Nomad service
systemd:
name: nomad
enabled: yes
state: started
- name: Wait for Nomad to be ready
wait_for:
port: 4646
host: localhost
delay: 5
timeout: 60
- name: Verify Nomad installation
command: /usr/local/bin/nomad version
register: nomad_version_output
- name: Display Nomad version
debug:
msg: "{{ nomad_version_output.stdout }}"
- name: Clean up downloaded files
file:
path: "{{ item }}"
state: absent
loop:
- "/tmp/nomad_{{ nomad_version }}_linux_amd64.zip"
- /tmp/nomad

View File

@@ -0,0 +1,131 @@
---
- name: Install Nomad Podman Driver Plugin
hosts: target_nodes
become: yes
vars:
nomad_user: nomad
nomad_data_dir: /opt/nomad/data
nomad_plugins_dir: "{{ nomad_data_dir }}/plugins"
podman_driver_version: "0.6.1"
podman_driver_url: "https://releases.hashicorp.com/nomad-driver-podman/{{ podman_driver_version }}/nomad-driver-podman_{{ podman_driver_version }}_linux_amd64.zip"
tasks:
- name: Stop Nomad service
systemd:
name: nomad
state: stopped
- name: Create plugins directory
file:
path: "{{ nomad_plugins_dir }}"
state: directory
owner: "{{ nomad_user }}"
group: "{{ nomad_user }}"
mode: '0755'
- name: Download Nomad Podman driver
get_url:
url: "{{ podman_driver_url }}"
dest: "/tmp/nomad-driver-podman_{{ podman_driver_version }}_linux_amd64.zip"
mode: '0644'
- name: Extract Nomad Podman driver
unarchive:
src: "/tmp/nomad-driver-podman_{{ podman_driver_version }}_linux_amd64.zip"
dest: "/tmp"
remote_src: yes
- name: Install Nomad Podman driver
copy:
src: "/tmp/nomad-driver-podman"
dest: "{{ nomad_plugins_dir }}/nomad-driver-podman"
owner: "{{ nomad_user }}"
group: "{{ nomad_user }}"
mode: '0755'
remote_src: yes
- name: Update Nomad configuration for plugin directory
blockinfile:
path: /etc/nomad.d/nomad.hcl
marker: "# {mark} PLUGIN DIRECTORY CONFIGURATION"
block: |
plugin_dir = "{{ nomad_plugins_dir }}"
insertafter: 'data_dir = "/opt/nomad/data"'
- name: Fix Podman socket permissions
file:
path: /run/user/1001/podman/podman.sock
mode: '0666'
ignore_errors: yes
- name: Ensure nomad user can access Podman socket
user:
name: "{{ nomad_user }}"
groups: ben
append: yes
- name: Start Nomad service
systemd:
name: nomad
state: started
enabled: yes
- name: Wait for Nomad to be ready
wait_for:
port: 4646
host: localhost
delay: 10
timeout: 60
- name: Verify Nomad is running
systemd:
name: nomad
register: nomad_service_status
- name: Display Nomad service status
debug:
msg: "Nomad service is {{ nomad_service_status.status.ActiveState }}"
- name: Wait for plugins to load
pause:
seconds: 15
- name: Check available drivers
shell: |
sudo -u {{ nomad_user }} /usr/local/bin/nomad node status -self | grep -A 20 "Driver Status"
register: driver_status
failed_when: false
- name: Display driver status
debug:
var: driver_status.stdout_lines
- name: Test Podman driver functionality
shell: |
sudo -u {{ nomad_user }} /usr/local/bin/nomad node status -json | jq -r '.Drivers | keys[]'
register: available_drivers
failed_when: false
- name: Display available drivers
debug:
msg: "Available drivers: {{ available_drivers.stdout_lines | join(', ') }}"
- name: Clean up downloaded files
file:
path: "{{ item }}"
state: absent
loop:
- "/tmp/nomad-driver-podman_{{ podman_driver_version }}_linux_amd64.zip"
- "/tmp/nomad-driver-podman"
- name: Final verification - Check if Podman driver is loaded
shell: |
sudo -u {{ nomad_user }} /usr/local/bin/nomad node status -json | jq -r '.Drivers.podman.Detected'
register: podman_driver_detected
failed_when: false
- name: Display final result
debug:
msg: |
Podman driver installation: {{ 'SUCCESS' if podman_driver_detected.stdout == 'true' else 'NEEDS VERIFICATION' }}
Driver detected: {{ podman_driver_detected.stdout | default('unknown') }}

View File

@@ -0,0 +1,61 @@
---
- name: Install Podman Compose on all Nomad cluster nodes
hosts: nomad_cluster
become: yes
tasks:
- name: Display target node
debug:
msg: "正在安装 Podman Compose 到节点: {{ inventory_hostname }}"
- name: Update package cache
apt:
update_cache: yes
ignore_errors: yes
- name: Install Podman and related tools
apt:
name:
- podman
- podman-compose
- buildah
- skopeo
state: present
ignore_errors: yes
- name: Install additional dependencies
apt:
name:
- python3-pip
- python3-setuptools
state: present
ignore_errors: yes
- name: Install podman-compose via pip if package manager failed
pip:
name: podman-compose
state: present
ignore_errors: yes
- name: Verify Podman installation
shell: podman --version
register: podman_version
- name: Verify Podman Compose installation
shell: podman-compose --version
register: podman_compose_version
ignore_errors: yes
- name: Display installation results
debug:
msg: |
✅ 节点 {{ inventory_hostname }} 安装结果:
📦 Podman: {{ podman_version.stdout }}
🐳 Podman Compose: {{ podman_compose_version.stdout if podman_compose_version.rc == 0 else '安装失败或不可用' }}
- name: Ensure Podman socket is enabled
systemd:
name: podman.socket
enabled: yes
state: started
ignore_errors: yes

View File

@@ -0,0 +1,115 @@
---
- name: 在Kali Linux上安装和配置VNC服务器
hosts: kali
become: yes
vars:
vnc_password: "3131" # VNC连接密码
vnc_port: "5901" # VNC服务端口
vnc_geometry: "1280x1024" # VNC分辨率
vnc_depth: "24" # 颜色深度
tasks:
- name: 更新APT缓存
apt:
update_cache: yes
- name: 安装VNC服务器和客户端
apt:
name:
- tigervnc-standalone-server
- tigervnc-viewer
- xfce4
- xfce4-goodies
state: present
- name: 创建VNC配置目录
file:
path: /home/ben/.vnc
state: directory
owner: ben
group: ben
mode: '0700'
- name: 设置VNC密码
shell: |
echo "{{ vnc_password }}" | vncpasswd -f > /home/ben/.vnc/passwd
echo "{{ vnc_password }}" | vncpasswd -f > /home/ben/.vnc/passwd2
become_user: ben
- name: 设置VNC密码文件权限
file:
path: /home/ben/.vnc/passwd
owner: ben
group: ben
mode: '0600'
- name: 设置VNC密码文件2权限
file:
path: /home/ben/.vnc/passwd2
owner: ben
group: ben
mode: '0600'
- name: 创建VNC启动脚本
copy:
dest: /home/ben/.vnc/xstartup
content: |
#!/bin/bash
unset SESSION_MANAGER
unset DBUS_SESSION_BUS_ADDRESS
exec startxfce4
owner: ben
group: ben
mode: '0755'
- name: 创建VNC服务文件
copy:
dest: /etc/systemd/system/vncserver@.service
content: |
[Unit]
Description=Start TigerVNC server at startup
After=syslog.target network.target
[Service]
Type=forking
User=ben
Group=ben
WorkingDirectory=/home/ben
PIDFile=/home/ben/.vnc/%H:%i.pid
ExecStartPre=-/usr/bin/vncserver -kill :%i > /dev/null 2>&1
ExecStart=/usr/bin/vncserver -depth {{ vnc_depth }} -geometry {{ vnc_geometry }} :%i
ExecStop=/usr/bin/vncserver -kill :%i
[Install]
WantedBy=multi-user.target
- name: 重新加载systemd配置
systemd:
daemon_reload: yes
- name: 启用并启动VNC服务
systemd:
name: vncserver@1.service
enabled: yes
state: started
- name: 检查VNC服务状态
command: systemctl status vncserver@1.service
register: vnc_status
ignore_errors: yes
- name: 显示VNC服务状态
debug:
msg: "{{ vnc_status.stdout_lines }}"
- name: 显示VNC连接信息
debug:
msg: |
VNC服务器已成功配置
连接信息:
- 地址: {{ ansible_host }}
- 端口: {{ vnc_port }}
- 密码: {{ vnc_password }}
- 连接命令: vnc://{{ ansible_host }}:{{ vnc_port }}
- 使用macOS屏幕共享应用连接到上述地址

View File

@@ -0,0 +1,36 @@
---
# install_vault.yml
- name: Install HashiCorp Vault
hosts: vault_servers
become: yes
tasks:
- name: Check if Vault is already installed
command: which vault
register: vault_check
ignore_errors: yes
changed_when: false
- name: Install Vault using apt
apt:
name: vault
state: present
update_cache: yes
when: vault_check.rc != 0
- name: Create Vault data directory
file:
path: "{{ vault_data_dir | default('/opt/nomad/data/vault/config') }}"
state: directory
owner: root
group: root
mode: '0755'
recurse: yes
- name: Verify Vault installation
command: vault --version
register: vault_version
changed_when: false
- name: Display Vault version
debug:
var: vault_version.stdout