feat(监控): 添加Telegraf监控配置和磁盘监控脚本
refactor(容器): 从Docker迁移到Podman并更新Nomad配置 fix(配置): 修复代理和别名配置问题 docs(文档): 更新配置文件和脚本注释 chore(清理): 移除不再使用的Consul和Docker相关文件
This commit is contained in:
@@ -1,375 +0,0 @@
|
||||
---
|
||||
# ☢️ NUCLEAR NOMAD RESET ☢️
|
||||
# 这是比终极还要强的修复脚本
|
||||
# 警告:这将完全摧毁并重建 Nomad 集群
|
||||
- name: "☢️ NUCLEAR NOMAD RESET - 核弹级集群重置 ☢️"
|
||||
hosts: nomad_cluster
|
||||
become: yes
|
||||
gather_facts: yes
|
||||
serial: 1 # 一次处理一个节点,避免同时炸掉所有节点
|
||||
vars:
|
||||
nomad_version: "1.10.5"
|
||||
nomad_encrypt_key: "NVOMDvXblgWfhtzFzOUIHnKEOrbXOkPrkIPbRGGf1YQ="
|
||||
tailscale_ips:
|
||||
semaphore: "100.116.158.95"
|
||||
master: "100.117.106.136"
|
||||
ash3c: "100.116.80.94"
|
||||
|
||||
tasks:
|
||||
- name: "🚨 警告:即将进行核弹级重置"
|
||||
debug:
|
||||
msg: |
|
||||
☢️☢️☢️ 警告:即将对 {{ inventory_hostname }} 进行核弹级重置 ☢️☢️☢️
|
||||
这将完全摧毁所有 Nomad 相关的数据、配置和进程!
|
||||
如果你不确定,请立即按 Ctrl+C 取消!
|
||||
|
||||
- name: "⏰ 等待 10 秒,给你最后的机会取消..."
|
||||
pause:
|
||||
seconds: 10
|
||||
|
||||
# ========== 第一阶段:核弹级清理 ==========
|
||||
- name: "💀 第一阶段:核弹级进程清理"
|
||||
debug:
|
||||
msg: "开始核弹级进程清理..."
|
||||
|
||||
- name: "🔥 停止 Nomad 服务(如果存在)"
|
||||
systemd:
|
||||
name: nomad
|
||||
state: stopped
|
||||
enabled: no
|
||||
daemon_reload: yes
|
||||
ignore_errors: yes
|
||||
|
||||
- name: "💣 强制杀死所有 Nomad 相关进程"
|
||||
shell: |
|
||||
# 杀死所有 nomad 进程
|
||||
pkill -9 -f nomad || true
|
||||
# 杀死所有可能的子进程
|
||||
pkill -9 -f "nomad agent" || true
|
||||
pkill -9 -f "nomad server" || true
|
||||
pkill -9 -f "nomad client" || true
|
||||
# 等待进程完全死亡
|
||||
sleep 5
|
||||
# 再次确认杀死
|
||||
ps aux | grep nomad | grep -v grep | awk '{print $2}' | xargs -r kill -9 || true
|
||||
ignore_errors: yes
|
||||
|
||||
- name: "🧹 清理所有 Nomad 相关文件和目录"
|
||||
file:
|
||||
path: "{{ item }}"
|
||||
state: absent
|
||||
loop:
|
||||
- /opt/nomad
|
||||
- /etc/nomad.d
|
||||
- /var/log/nomad
|
||||
- /etc/systemd/system/nomad.service
|
||||
- /usr/local/bin/nomad
|
||||
- /usr/bin/nomad
|
||||
- /tmp/nomad*
|
||||
- /var/lib/nomad
|
||||
- /run/nomad
|
||||
- /var/run/nomad.pid
|
||||
ignore_errors: yes
|
||||
|
||||
- name: "🔧 清理 systemd 缓存"
|
||||
systemd:
|
||||
daemon_reload: yes
|
||||
|
||||
# ========== 第二阶段:重新安装 Nomad ==========
|
||||
- name: "🚀 第二阶段:重新安装 Nomad"
|
||||
debug:
|
||||
msg: "开始重新安装 Nomad..."
|
||||
|
||||
- name: "🔑 添加 HashiCorp GPG 密钥"
|
||||
apt_key:
|
||||
url: https://apt.releases.hashicorp.com/gpg
|
||||
state: present
|
||||
|
||||
- name: "📦 添加 HashiCorp APT 仓库"
|
||||
apt_repository:
|
||||
repo: "deb [arch={{ ansible_architecture }}] https://apt.releases.hashicorp.com {{ ansible_distribution_release }} main"
|
||||
state: present
|
||||
update_cache: yes
|
||||
|
||||
- name: "🔧 安装 Nomad(自动检测架构)"
|
||||
apt:
|
||||
name: "nomad={{ nomad_version }}-1"
|
||||
state: present
|
||||
update_cache: yes
|
||||
|
||||
- name: "👤 创建 nomad 用户和组"
|
||||
group:
|
||||
name: nomad
|
||||
state: present
|
||||
|
||||
- name: "👤 创建 nomad 用户"
|
||||
user:
|
||||
name: nomad
|
||||
group: nomad
|
||||
system: yes
|
||||
shell: /bin/false
|
||||
home: /opt/nomad
|
||||
create_home: no
|
||||
|
||||
- name: "📁 创建全新的目录结构"
|
||||
file:
|
||||
path: "{{ item.path }}"
|
||||
state: directory
|
||||
owner: "{{ item.owner | default('nomad') }}"
|
||||
group: "{{ item.group | default('nomad') }}"
|
||||
mode: "{{ item.mode | default('0755') }}"
|
||||
loop:
|
||||
- { path: "/etc/nomad.d", mode: "0755" }
|
||||
- { path: "/opt/nomad", mode: "0755" }
|
||||
- { path: "/opt/nomad/data", mode: "0755" }
|
||||
- { path: "/opt/nomad/alloc_mounts", mode: "0755" }
|
||||
- { path: "/var/log/nomad", mode: "0755" }
|
||||
|
||||
# ========== 第三阶段:网络和防火墙检查 ==========
|
||||
- name: "🌐 第三阶段:网络配置验证"
|
||||
debug:
|
||||
msg: "验证网络配置..."
|
||||
|
||||
- name: "🔍 检查 Tailscale IP 是否正确绑定"
|
||||
shell: |
|
||||
ip addr show | grep "{{ tailscale_ips[inventory_hostname] }}" || echo "IP_NOT_FOUND"
|
||||
register: ip_check
|
||||
|
||||
- name: "⚠️ IP 地址检查结果"
|
||||
debug:
|
||||
msg: |
|
||||
节点: {{ inventory_hostname }}
|
||||
期望 IP: {{ tailscale_ips[inventory_hostname] }}
|
||||
检查结果: {{ ip_check.stdout }}
|
||||
{% if 'IP_NOT_FOUND' in ip_check.stdout %}
|
||||
❌ 警告:IP 地址未正确绑定!
|
||||
{% else %}
|
||||
✅ IP 地址检查通过
|
||||
{% endif %}
|
||||
|
||||
- name: "🔥 确保防火墙端口开放"
|
||||
shell: |
|
||||
# 检查并开放 Nomad 端口
|
||||
if command -v ufw >/dev/null 2>&1; then
|
||||
ufw allow 4646/tcp # HTTP API
|
||||
ufw allow 4647/tcp # RPC
|
||||
ufw allow 4648/tcp # Serf
|
||||
elif command -v firewall-cmd >/dev/null 2>&1; then
|
||||
firewall-cmd --permanent --add-port=4646/tcp
|
||||
firewall-cmd --permanent --add-port=4647/tcp
|
||||
firewall-cmd --permanent --add-port=4648/tcp
|
||||
firewall-cmd --reload
|
||||
fi
|
||||
ignore_errors: yes
|
||||
|
||||
# ========== 第四阶段:创建超强配置 ==========
|
||||
- name: "⚙️ 第四阶段:创建超强配置文件"
|
||||
debug:
|
||||
msg: "创建超强配置文件..."
|
||||
|
||||
- name: "📝 创建核弹级 Nomad 配置"
|
||||
copy:
|
||||
content: |
|
||||
# ☢️ 核弹级 Nomad 配置 - {{ inventory_hostname }}
|
||||
datacenter = "dc1"
|
||||
region = "global"
|
||||
data_dir = "/opt/nomad/data"
|
||||
|
||||
# 使用正确的 Tailscale IP
|
||||
bind_addr = "{{ tailscale_ips[inventory_hostname] }}"
|
||||
|
||||
# 日志配置
|
||||
log_level = "INFO"
|
||||
log_file = "/var/log/nomad/nomad.log"
|
||||
log_rotate_duration = "24h"
|
||||
log_rotate_max_files = 5
|
||||
|
||||
server {
|
||||
enabled = true
|
||||
bootstrap_expect = 3
|
||||
encrypt = "{{ nomad_encrypt_key }}"
|
||||
|
||||
# 更激进的重试配置
|
||||
server_join {
|
||||
retry_join = [
|
||||
"{{ tailscale_ips.semaphore }}:4647",
|
||||
"{{ tailscale_ips.master }}:4647",
|
||||
"{{ tailscale_ips.ash3c }}:4647"
|
||||
]
|
||||
retry_max = 10
|
||||
retry_interval = "15s"
|
||||
}
|
||||
|
||||
# 更宽松的心跳配置
|
||||
heartbeat_grace = "30s"
|
||||
min_heartbeat_ttl = "10s"
|
||||
max_heartbeats_per_second = 50.0
|
||||
|
||||
# Raft 配置优化
|
||||
raft_protocol = 3
|
||||
raft_multiplier = 1
|
||||
}
|
||||
|
||||
client {
|
||||
enabled = true
|
||||
|
||||
# 网络接口配置
|
||||
network_interface = "tailscale0"
|
||||
|
||||
# 更宽松的心跳配置
|
||||
max_kill_timeout = "30s"
|
||||
|
||||
# 主机卷配置
|
||||
host_volume "docker-sock" {
|
||||
path = "/var/run/docker.sock"
|
||||
read_only = false
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
# 地址和端口配置
|
||||
addresses {
|
||||
http = "0.0.0.0"
|
||||
rpc = "{{ tailscale_ips[inventory_hostname] }}"
|
||||
serf = "{{ tailscale_ips[inventory_hostname] }}"
|
||||
}
|
||||
|
||||
ports {
|
||||
http = 4646
|
||||
rpc = 4647
|
||||
serf = 4648
|
||||
}
|
||||
|
||||
# Docker 插件配置
|
||||
plugin "docker" {
|
||||
config {
|
||||
allow_privileged = true
|
||||
volumes {
|
||||
enabled = true
|
||||
}
|
||||
|
||||
# 更宽松的资源限制
|
||||
gc {
|
||||
image = true
|
||||
image_delay = "10m"
|
||||
container = true
|
||||
dangling_containers {
|
||||
enabled = true
|
||||
dry_run = false
|
||||
period = "5m"
|
||||
creation_grace = "5m"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
# 遥测配置
|
||||
telemetry {
|
||||
collection_interval = "10s"
|
||||
disable_hostname = false
|
||||
prometheus_metrics = true
|
||||
publish_allocation_metrics = true
|
||||
publish_node_metrics = true
|
||||
}
|
||||
dest: "/etc/nomad.d/nomad.hcl"
|
||||
owner: nomad
|
||||
group: nomad
|
||||
mode: '0640'
|
||||
|
||||
# ========== 第五阶段:创建超强 systemd 服务 ==========
|
||||
- name: "🔧 创建超强 systemd 服务文件"
|
||||
copy:
|
||||
content: |
|
||||
[Unit]
|
||||
Description=Nomad - Nuclear Edition
|
||||
Documentation=https://www.nomadproject.io/
|
||||
Wants=network-online.target
|
||||
After=network-online.target
|
||||
ConditionFileNotEmpty=/etc/nomad.d/nomad.hcl
|
||||
|
||||
[Service]
|
||||
Type=notify
|
||||
User=nomad
|
||||
Group=nomad
|
||||
ExecStart=/usr/bin/nomad agent -config=/etc/nomad.d/nomad.hcl
|
||||
ExecReload=/bin/kill -HUP $MAINPID
|
||||
KillMode=process
|
||||
Restart=always
|
||||
RestartSec=10
|
||||
LimitNOFILE=65536
|
||||
|
||||
# 更强的重启策略
|
||||
StartLimitInterval=0
|
||||
StartLimitBurst=10
|
||||
|
||||
# 环境变量
|
||||
Environment=NOMAD_DISABLE_UPDATE_CHECK=1
|
||||
|
||||
[Install]
|
||||
WantedBy=multi-user.target
|
||||
dest: "/etc/systemd/system/nomad.service"
|
||||
owner: root
|
||||
group: root
|
||||
mode: '0644'
|
||||
|
||||
- name: "🔄 重新加载 systemd"
|
||||
systemd:
|
||||
daemon_reload: yes
|
||||
|
||||
# ========== 第六阶段:启动和验证 ==========
|
||||
- name: "🚀 第六阶段:启动服务"
|
||||
debug:
|
||||
msg: "启动 Nomad 服务..."
|
||||
|
||||
- name: "🔥 启用并启动 Nomad 服务"
|
||||
systemd:
|
||||
name: nomad
|
||||
enabled: yes
|
||||
state: started
|
||||
daemon_reload: yes
|
||||
|
||||
- name: "⏰ 等待服务启动"
|
||||
pause:
|
||||
seconds: 15
|
||||
|
||||
- name: "🔍 验证服务状态"
|
||||
systemd:
|
||||
name: nomad
|
||||
register: nomad_service_status
|
||||
|
||||
- name: "📊 显示服务状态"
|
||||
debug:
|
||||
msg: |
|
||||
☢️ 核弹级重置完成!
|
||||
节点: {{ inventory_hostname }}
|
||||
服务状态: {{ nomad_service_status.status.ActiveState }}
|
||||
IP 地址: {{ tailscale_ips[inventory_hostname] }}
|
||||
|
||||
{% if nomad_service_status.status.ActiveState == 'active' %}
|
||||
✅ 服务启动成功!
|
||||
{% else %}
|
||||
❌ 服务启动失败,请检查日志!
|
||||
{% endif %}
|
||||
|
||||
- name: "🧹 清理临时文件"
|
||||
file:
|
||||
path: "{{ item }}"
|
||||
state: absent
|
||||
loop:
|
||||
- "/tmp/nomad_{{ nomad_version }}_linux_amd64.zip"
|
||||
- "/tmp/nomad"
|
||||
ignore_errors: yes
|
||||
|
||||
- name: "🎉 核弹级重置完成通知"
|
||||
debug:
|
||||
msg: |
|
||||
☢️☢️☢️ 核弹级重置完成!☢️☢️☢️
|
||||
|
||||
节点 {{ inventory_hostname }} 已经被完全摧毁并重建!
|
||||
|
||||
下一步:
|
||||
1. 等待所有节点完成重置
|
||||
2. 检查集群状态:nomad server members
|
||||
3. 检查节点状态:nomad node status
|
||||
4. 如果还有问题,那就真的没救了... 😅
|
||||
@@ -1,189 +0,0 @@
|
||||
---
|
||||
- name: Complete Nomad Cluster Fix with Ansible
|
||||
hosts: nomad_cluster
|
||||
become: yes
|
||||
gather_facts: yes
|
||||
vars:
|
||||
nomad_encrypt_key: "NVOMDvXblgWfhtzFzOUIHnKEOrbXOkPrkIPbRGGf1YQ="
|
||||
tailscale_ips:
|
||||
semaphore: "100.116.158.95"
|
||||
master: "100.117.106.136"
|
||||
ash3c: "100.116.80.94"
|
||||
|
||||
tasks:
|
||||
- name: Stop nomad service completely
|
||||
systemd:
|
||||
name: nomad
|
||||
state: stopped
|
||||
enabled: yes
|
||||
ignore_errors: yes
|
||||
|
||||
- name: Kill any remaining nomad processes
|
||||
shell: pkill -f nomad || true
|
||||
ignore_errors: yes
|
||||
|
||||
- name: Reset systemd failure state
|
||||
shell: systemctl reset-failed nomad
|
||||
ignore_errors: yes
|
||||
|
||||
- name: Create nomad user if not exists
|
||||
user:
|
||||
name: nomad
|
||||
system: yes
|
||||
shell: /bin/false
|
||||
home: /opt/nomad
|
||||
create_home: no
|
||||
|
||||
- name: Create all required directories with correct permissions
|
||||
file:
|
||||
path: "{{ item }}"
|
||||
state: directory
|
||||
owner: nomad
|
||||
group: nomad
|
||||
mode: '0755'
|
||||
loop:
|
||||
- /opt/nomad
|
||||
- /opt/nomad/data
|
||||
- /opt/nomad/alloc_mounts
|
||||
- /var/log/nomad
|
||||
- /etc/nomad.d
|
||||
|
||||
- name: Completely clean nomad data directory
|
||||
shell: rm -rf /opt/nomad/data/* /opt/nomad/data/.*
|
||||
ignore_errors: yes
|
||||
|
||||
- name: Create correct nomad configuration
|
||||
copy:
|
||||
content: |
|
||||
datacenter = "dc1"
|
||||
region = "global"
|
||||
data_dir = "/opt/nomad/data"
|
||||
|
||||
bind_addr = "{{ tailscale_ips[inventory_hostname] }}"
|
||||
|
||||
server {
|
||||
enabled = true
|
||||
bootstrap_expect = 3
|
||||
encrypt = "{{ nomad_encrypt_key }}"
|
||||
|
||||
server_join {
|
||||
retry_join = [
|
||||
"{{ tailscale_ips.semaphore }}:4647",
|
||||
"{{ tailscale_ips.master }}:4647",
|
||||
"{{ tailscale_ips.ash3c }}:4647"
|
||||
]
|
||||
retry_interval = "15s"
|
||||
retry_max = 3
|
||||
}
|
||||
}
|
||||
|
||||
client {
|
||||
enabled = true
|
||||
alloc_dir = "/opt/nomad/alloc_mounts"
|
||||
}
|
||||
|
||||
ui {
|
||||
enabled = true
|
||||
}
|
||||
|
||||
addresses {
|
||||
http = "0.0.0.0"
|
||||
rpc = "{{ tailscale_ips[inventory_hostname] }}"
|
||||
serf = "{{ tailscale_ips[inventory_hostname] }}"
|
||||
}
|
||||
|
||||
ports {
|
||||
http = 4646
|
||||
rpc = 4647
|
||||
serf = 4648
|
||||
}
|
||||
|
||||
plugin "docker" {
|
||||
config {
|
||||
allow_privileged = true
|
||||
volumes {
|
||||
enabled = true
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
log_level = "INFO"
|
||||
log_file = "/var/log/nomad/nomad.log"
|
||||
log_rotate_duration = "24h"
|
||||
log_rotate_max_files = 5
|
||||
dest: /etc/nomad.d/nomad.hcl
|
||||
owner: nomad
|
||||
group: nomad
|
||||
mode: '0640'
|
||||
|
||||
- name: Set correct ownership for all nomad files
|
||||
file:
|
||||
path: "{{ item }}"
|
||||
owner: nomad
|
||||
group: nomad
|
||||
recurse: yes
|
||||
loop:
|
||||
- /opt/nomad
|
||||
- /var/log/nomad
|
||||
- /etc/nomad.d
|
||||
|
||||
- name: Validate nomad configuration
|
||||
shell: nomad config validate /etc/nomad.d/nomad.hcl
|
||||
register: config_validation
|
||||
ignore_errors: yes
|
||||
|
||||
- name: Show config validation result
|
||||
debug:
|
||||
var: config_validation
|
||||
|
||||
- name: Start nomad service on first node (semaphore)
|
||||
systemd:
|
||||
name: nomad
|
||||
state: started
|
||||
daemon_reload: yes
|
||||
when: inventory_hostname == 'semaphore'
|
||||
|
||||
- name: Wait for first node to start
|
||||
pause:
|
||||
seconds: 30
|
||||
when: inventory_hostname == 'semaphore'
|
||||
|
||||
- name: Start nomad service on remaining nodes
|
||||
systemd:
|
||||
name: nomad
|
||||
state: started
|
||||
daemon_reload: yes
|
||||
when: inventory_hostname != 'semaphore'
|
||||
|
||||
- name: Wait for all services to start
|
||||
pause:
|
||||
seconds: 20
|
||||
|
||||
- name: Check nomad service status
|
||||
shell: systemctl status nomad --no-pager -l
|
||||
register: service_status
|
||||
ignore_errors: yes
|
||||
|
||||
- name: Show service status
|
||||
debug:
|
||||
var: service_status.stdout_lines
|
||||
|
||||
- name: Check nomad logs for errors
|
||||
shell: journalctl -u nomad -n 10 --no-pager
|
||||
register: nomad_logs
|
||||
ignore_errors: yes
|
||||
|
||||
- name: Show recent nomad logs
|
||||
debug:
|
||||
var: nomad_logs.stdout_lines
|
||||
|
||||
- name: Test nomad connectivity
|
||||
shell: nomad server members
|
||||
register: nomad_members
|
||||
ignore_errors: yes
|
||||
when: inventory_hostname == 'semaphore'
|
||||
|
||||
- name: Show cluster members
|
||||
debug:
|
||||
var: nomad_members.stdout_lines
|
||||
when: inventory_hostname == 'semaphore'
|
||||
@@ -1,151 +0,0 @@
|
||||
---
|
||||
- name: Complete Nomad Cluster Reset and Rebuild
|
||||
hosts: nomad_cluster
|
||||
become: yes
|
||||
serial: 1 # 一次处理一个节点
|
||||
vars:
|
||||
nomad_encrypt_key: "NVOMDvXblgWfhtzFzOUIHnKEOrbXOkPrkIPbRGGf1YQ="
|
||||
tailscale_ips:
|
||||
semaphore: "100.116.158.95"
|
||||
master: "100.117.106.136"
|
||||
ash3c: "100.116.80.94"
|
||||
|
||||
tasks:
|
||||
- name: Stop nomad service completely
|
||||
systemd:
|
||||
name: nomad
|
||||
state: stopped
|
||||
ignore_errors: yes
|
||||
|
||||
- name: Kill any remaining nomad processes
|
||||
shell: pkill -f nomad || true
|
||||
ignore_errors: yes
|
||||
|
||||
- name: Remove all nomad data and state
|
||||
shell: |
|
||||
rm -rf /opt/nomad/data/*
|
||||
rm -rf /opt/nomad/data/.*
|
||||
rm -rf /var/log/nomad/*
|
||||
ignore_errors: yes
|
||||
|
||||
- name: Create fresh nomad configuration with correct Tailscale IPs
|
||||
copy:
|
||||
content: |
|
||||
datacenter = "dc1"
|
||||
region = "global"
|
||||
data_dir = "/opt/nomad/data"
|
||||
|
||||
# 使用 Tailscale IP 地址
|
||||
bind_addr = "{{ tailscale_ips[inventory_hostname] }}"
|
||||
|
||||
server {
|
||||
enabled = true
|
||||
bootstrap_expect = 3
|
||||
encrypt = "{{ nomad_encrypt_key }}"
|
||||
|
||||
server_join {
|
||||
retry_join = [
|
||||
"{{ tailscale_ips.semaphore }}",
|
||||
"{{ tailscale_ips.master }}",
|
||||
"{{ tailscale_ips.ash3c }}"
|
||||
]
|
||||
}
|
||||
}
|
||||
|
||||
client {
|
||||
enabled = true
|
||||
network_interface = "tailscale0"
|
||||
}
|
||||
|
||||
ui_config {
|
||||
enabled = true
|
||||
}
|
||||
|
||||
addresses {
|
||||
http = "0.0.0.0"
|
||||
rpc = "{{ tailscale_ips[inventory_hostname] }}"
|
||||
serf = "{{ tailscale_ips[inventory_hostname] }}"
|
||||
}
|
||||
|
||||
ports {
|
||||
http = 4646
|
||||
rpc = 4647
|
||||
serf = 4648
|
||||
}
|
||||
|
||||
plugin "docker" {
|
||||
config {
|
||||
allow_privileged = true
|
||||
volumes {
|
||||
enabled = true
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
log_level = "INFO"
|
||||
log_file = "/var/log/nomad/nomad.log"
|
||||
dest: /etc/nomad.d/nomad.hcl
|
||||
owner: nomad
|
||||
group: nomad
|
||||
mode: '0640'
|
||||
|
||||
- name: Ensure log directory exists
|
||||
file:
|
||||
path: /var/log/nomad
|
||||
state: directory
|
||||
owner: nomad
|
||||
group: nomad
|
||||
mode: '0755'
|
||||
|
||||
- name: Start nomad service
|
||||
systemd:
|
||||
name: nomad
|
||||
state: started
|
||||
enabled: yes
|
||||
|
||||
- name: Wait for nomad to start
|
||||
wait_for:
|
||||
port: 4646
|
||||
host: "{{ tailscale_ips[inventory_hostname] }}"
|
||||
delay: 5
|
||||
timeout: 30
|
||||
|
||||
- name: Check nomad service status
|
||||
shell: systemctl status nomad --no-pager -l
|
||||
register: nomad_status
|
||||
ignore_errors: yes
|
||||
|
||||
- name: Display nomad status
|
||||
debug:
|
||||
var: nomad_status.stdout_lines
|
||||
|
||||
- name: Wait for cluster to form
|
||||
hosts: localhost
|
||||
gather_facts: no
|
||||
tasks:
|
||||
- name: Wait for cluster formation
|
||||
pause:
|
||||
seconds: 30
|
||||
prompt: "等待集群形成..."
|
||||
|
||||
- name: Verify cluster status
|
||||
hosts: semaphore
|
||||
become: yes
|
||||
tasks:
|
||||
- name: Check cluster members
|
||||
shell: nomad server members
|
||||
register: cluster_members
|
||||
ignore_errors: yes
|
||||
|
||||
- name: Display cluster members
|
||||
debug:
|
||||
var: cluster_members.stdout_lines
|
||||
|
||||
- name: Check node status
|
||||
shell: nomad node status
|
||||
register: node_status
|
||||
ignore_errors: yes
|
||||
|
||||
- name: Display node status
|
||||
debug:
|
||||
var: node_status.stdout_lines
|
||||
@@ -1,233 +0,0 @@
|
||||
#!/bin/bash
|
||||
|
||||
# Consul 集群管理脚本
|
||||
# 提供集群状态检查、重启、停止等功能
|
||||
|
||||
set -e
|
||||
|
||||
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
||||
PROJECT_ROOT="$(cd "$SCRIPT_DIR/../.." && pwd)"
|
||||
INVENTORY_FILE="$PROJECT_ROOT/configuration/inventories/production/consul-cluster.ini"
|
||||
|
||||
# 颜色定义
|
||||
RED='\033[0;31m'
|
||||
GREEN='\033[0;32m'
|
||||
YELLOW='\033[1;33m'
|
||||
BLUE='\033[0;34m'
|
||||
NC='\033[0m' # No Color
|
||||
|
||||
# 打印带颜色的消息
|
||||
print_status() {
|
||||
echo -e "${GREEN}[INFO]${NC} $1"
|
||||
}
|
||||
|
||||
print_warning() {
|
||||
echo -e "${YELLOW}[WARN]${NC} $1"
|
||||
}
|
||||
|
||||
print_error() {
|
||||
echo -e "${RED}[ERROR]${NC} $1"
|
||||
}
|
||||
|
||||
print_header() {
|
||||
echo -e "${BLUE}=== $1 ===${NC}"
|
||||
}
|
||||
|
||||
# 检查必要文件
|
||||
check_prerequisites() {
|
||||
if [[ ! -f "$INVENTORY_FILE" ]]; then
|
||||
print_error "清单文件不存在: $INVENTORY_FILE"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
if ! command -v ansible &> /dev/null; then
|
||||
print_error "未找到 ansible 命令"
|
||||
exit 1
|
||||
fi
|
||||
}
|
||||
|
||||
# 显示帮助信息
|
||||
show_help() {
|
||||
echo "Consul 集群管理脚本"
|
||||
echo
|
||||
echo "用法: $0 [命令]"
|
||||
echo
|
||||
echo "命令:"
|
||||
echo " status - 检查集群状态"
|
||||
echo " members - 显示集群成员"
|
||||
echo " leader - 显示集群领导者"
|
||||
echo " restart - 重启 Consul 服务"
|
||||
echo " stop - 停止 Consul 服务"
|
||||
echo " start - 启动 Consul 服务"
|
||||
echo " logs - 查看服务日志"
|
||||
echo " health - 健康检查"
|
||||
echo " cleanup - 清理 Consul 数据(危险操作)"
|
||||
echo " help - 显示此帮助信息"
|
||||
echo
|
||||
}
|
||||
|
||||
# 检查集群状态
|
||||
check_status() {
|
||||
print_header "Consul 服务状态"
|
||||
ansible -i "$INVENTORY_FILE" consul_cluster -m shell -a "systemctl is-active consul" -o
|
||||
|
||||
echo
|
||||
print_header "Consul 进程状态"
|
||||
ansible -i "$INVENTORY_FILE" consul_cluster -m shell -a "ps aux | grep consul | grep -v grep" -o
|
||||
}
|
||||
|
||||
# 显示集群成员
|
||||
show_members() {
|
||||
print_header "Consul 集群成员"
|
||||
ansible -i "$INVENTORY_FILE" consul_cluster -m shell -a "consul members" -o
|
||||
}
|
||||
|
||||
# 显示集群领导者
|
||||
show_leader() {
|
||||
print_header "Consul 集群领导者"
|
||||
ansible -i "$INVENTORY_FILE" consul_cluster -m shell -a "consul operator raft list-peers" -o
|
||||
|
||||
echo
|
||||
print_header "通过 API 检查领导者"
|
||||
ansible -i "$INVENTORY_FILE" consul_cluster -m shell -a "curl -s http://localhost:8500/v1/status/leader" -o
|
||||
}
|
||||
|
||||
# 重启服务
|
||||
restart_service() {
|
||||
print_header "重启 Consul 服务"
|
||||
print_warning "即将重启所有 Consul 节点..."
|
||||
read -p "确认继续? (y/N): " confirm
|
||||
if [[ $confirm != "y" && $confirm != "Y" ]]; then
|
||||
print_status "操作已取消"
|
||||
return
|
||||
fi
|
||||
|
||||
ansible -i "$INVENTORY_FILE" consul_cluster -m systemd -a "name=consul state=restarted" -b
|
||||
|
||||
print_status "等待服务启动..."
|
||||
sleep 10
|
||||
check_status
|
||||
}
|
||||
|
||||
# 停止服务
|
||||
stop_service() {
|
||||
print_header "停止 Consul 服务"
|
||||
print_warning "即将停止所有 Consul 节点..."
|
||||
read -p "确认继续? (y/N): " confirm
|
||||
if [[ $confirm != "y" && $confirm != "Y" ]]; then
|
||||
print_status "操作已取消"
|
||||
return
|
||||
fi
|
||||
|
||||
ansible -i "$INVENTORY_FILE" consul_cluster -m systemd -a "name=consul state=stopped" -b
|
||||
}
|
||||
|
||||
# 启动服务
|
||||
start_service() {
|
||||
print_header "启动 Consul 服务"
|
||||
ansible -i "$INVENTORY_FILE" consul_cluster -m systemd -a "name=consul state=started" -b
|
||||
|
||||
print_status "等待服务启动..."
|
||||
sleep 10
|
||||
check_status
|
||||
}
|
||||
|
||||
# 查看日志
|
||||
show_logs() {
|
||||
print_header "Consul 服务日志"
|
||||
ansible -i "$INVENTORY_FILE" consul_cluster -m shell -a "journalctl -u consul --no-pager -n 20" -o
|
||||
}
|
||||
|
||||
# 健康检查
|
||||
health_check() {
|
||||
print_header "Consul 健康检查"
|
||||
|
||||
# 检查服务状态
|
||||
print_status "检查服务状态..."
|
||||
ansible -i "$INVENTORY_FILE" consul_cluster -m shell -a "systemctl is-active consul" -o
|
||||
|
||||
echo
|
||||
# 检查端口监听
|
||||
print_status "检查端口监听..."
|
||||
ansible -i "$INVENTORY_FILE" consul_cluster -m shell -a "ss -tlnp | grep :8500" -o
|
||||
|
||||
echo
|
||||
# 检查集群成员
|
||||
print_status "检查集群成员..."
|
||||
ansible -i "$INVENTORY_FILE" consul_cluster -m shell -a "consul members | wc -l" -o
|
||||
|
||||
echo
|
||||
# 检查 API 响应
|
||||
print_status "检查 API 响应..."
|
||||
ansible -i "$INVENTORY_FILE" consul_cluster -m shell -a "curl -s -o /dev/null -w '%{http_code}' http://localhost:8500/v1/status/leader" -o
|
||||
}
|
||||
|
||||
# 清理数据(危险操作)
|
||||
cleanup_data() {
|
||||
print_header "清理 Consul 数据"
|
||||
print_error "警告: 此操作将删除所有 Consul 数据,包括服务注册、KV 存储等!"
|
||||
print_error "此操作不可逆!"
|
||||
echo
|
||||
read -p "确认要清理所有数据? 请输入 'YES' 确认: " confirm
|
||||
if [[ $confirm != "YES" ]]; then
|
||||
print_status "操作已取消"
|
||||
return
|
||||
fi
|
||||
|
||||
print_status "停止 Consul 服务..."
|
||||
ansible -i "$INVENTORY_FILE" consul_cluster -m systemd -a "name=consul state=stopped" -b
|
||||
|
||||
print_status "清理数据目录..."
|
||||
ansible -i "$INVENTORY_FILE" consul_cluster -m shell -a "rm -rf /opt/consul/data/*" -b
|
||||
|
||||
print_status "启动 Consul 服务..."
|
||||
ansible -i "$INVENTORY_FILE" consul_cluster -m systemd -a "name=consul state=started" -b
|
||||
|
||||
print_status "数据清理完成"
|
||||
}
|
||||
|
||||
# 主函数
|
||||
main() {
|
||||
check_prerequisites
|
||||
|
||||
case "${1:-help}" in
|
||||
status)
|
||||
check_status
|
||||
;;
|
||||
members)
|
||||
show_members
|
||||
;;
|
||||
leader)
|
||||
show_leader
|
||||
;;
|
||||
restart)
|
||||
restart_service
|
||||
;;
|
||||
stop)
|
||||
stop_service
|
||||
;;
|
||||
start)
|
||||
start_service
|
||||
;;
|
||||
logs)
|
||||
show_logs
|
||||
;;
|
||||
health)
|
||||
health_check
|
||||
;;
|
||||
cleanup)
|
||||
cleanup_data
|
||||
;;
|
||||
help|--help|-h)
|
||||
show_help
|
||||
;;
|
||||
*)
|
||||
print_error "未知命令: $1"
|
||||
echo
|
||||
show_help
|
||||
exit 1
|
||||
;;
|
||||
esac
|
||||
}
|
||||
|
||||
main "$@"
|
||||
@@ -1,228 +0,0 @@
|
||||
#!/bin/bash
|
||||
|
||||
# Consul 密钥管理脚本
|
||||
# 用于安全地管理 Oracle Cloud 和其他云服务商的敏感配置
|
||||
|
||||
set -euo pipefail
|
||||
|
||||
# 配置
|
||||
CONSUL_ADDR="${CONSUL_ADDR:-http://localhost:8500}"
|
||||
CONSUL_TOKEN="${CONSUL_TOKEN:-}"
|
||||
ENVIRONMENT="${ENVIRONMENT:-dev}"
|
||||
|
||||
# 颜色输出
|
||||
RED='\033[0;31m'
|
||||
GREEN='\033[0;32m'
|
||||
YELLOW='\033[1;33m'
|
||||
BLUE='\033[0;34m'
|
||||
NC='\033[0m' # No Color
|
||||
|
||||
# 日志函数
|
||||
log_info() {
|
||||
echo -e "${BLUE}[INFO]${NC} $1"
|
||||
}
|
||||
|
||||
log_success() {
|
||||
echo -e "${GREEN}[SUCCESS]${NC} $1"
|
||||
}
|
||||
|
||||
log_warning() {
|
||||
echo -e "${YELLOW}[WARNING]${NC} $1"
|
||||
}
|
||||
|
||||
log_error() {
|
||||
echo -e "${RED}[ERROR]${NC} $1"
|
||||
}
|
||||
|
||||
# 检查 Consul 连接
|
||||
check_consul() {
|
||||
log_info "检查 Consul 连接..."
|
||||
if ! curl -s "${CONSUL_ADDR}/v1/status/leader" > /dev/null; then
|
||||
log_error "无法连接到 Consul: ${CONSUL_ADDR}"
|
||||
exit 1
|
||||
fi
|
||||
log_success "Consul 连接正常"
|
||||
}
|
||||
|
||||
# 设置 Oracle Cloud 配置
|
||||
set_oracle_config() {
|
||||
log_info "设置 Oracle Cloud 配置..."
|
||||
|
||||
echo "请输入 Oracle Cloud 配置信息:"
|
||||
|
||||
read -p "租户 OCID: " tenancy_ocid
|
||||
read -p "用户 OCID: " user_ocid
|
||||
read -p "API 密钥指纹: " fingerprint
|
||||
read -p "私钥文件路径: " private_key_path
|
||||
read -p "区间 OCID: " compartment_ocid
|
||||
|
||||
# 验证私钥文件是否存在
|
||||
if [[ ! -f "$private_key_path" ]]; then
|
||||
log_error "私钥文件不存在: $private_key_path"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# 读取私钥内容
|
||||
private_key_content=$(cat "$private_key_path")
|
||||
|
||||
# 存储到 Consul
|
||||
local base_path="config/${ENVIRONMENT}/oracle"
|
||||
|
||||
curl -s -X PUT "${CONSUL_ADDR}/v1/kv/${base_path}/tenancy_ocid" -d "$tenancy_ocid" > /dev/null
|
||||
curl -s -X PUT "${CONSUL_ADDR}/v1/kv/${base_path}/user_ocid" -d "$user_ocid" > /dev/null
|
||||
curl -s -X PUT "${CONSUL_ADDR}/v1/kv/${base_path}/fingerprint" -d "$fingerprint" > /dev/null
|
||||
curl -s -X PUT "${CONSUL_ADDR}/v1/kv/${base_path}/private_key" -d "$private_key_content" > /dev/null
|
||||
curl -s -X PUT "${CONSUL_ADDR}/v1/kv/${base_path}/compartment_ocid" -d "$compartment_ocid" > /dev/null
|
||||
|
||||
log_success "Oracle Cloud 配置已存储到 Consul"
|
||||
}
|
||||
|
||||
# 获取 Oracle Cloud 配置
|
||||
get_oracle_config() {
|
||||
log_info "从 Consul 获取 Oracle Cloud 配置..."
|
||||
|
||||
local base_path="config/${ENVIRONMENT}/oracle"
|
||||
|
||||
echo "Oracle Cloud 配置:"
|
||||
echo "租户 OCID: $(curl -s "${CONSUL_ADDR}/v1/kv/${base_path}/tenancy_ocid?raw" 2>/dev/null || echo "未设置")"
|
||||
echo "用户 OCID: $(curl -s "${CONSUL_ADDR}/v1/kv/${base_path}/user_ocid?raw" 2>/dev/null || echo "未设置")"
|
||||
echo "指纹: $(curl -s "${CONSUL_ADDR}/v1/kv/${base_path}/fingerprint?raw" 2>/dev/null || echo "未设置")"
|
||||
echo "区间 OCID: $(curl -s "${CONSUL_ADDR}/v1/kv/${base_path}/compartment_ocid?raw" 2>/dev/null || echo "未设置")"
|
||||
echo "私钥: $(curl -s "${CONSUL_ADDR}/v1/kv/${base_path}/private_key?raw" 2>/dev/null | head -1 || echo "未设置")"
|
||||
}
|
||||
|
||||
# 删除 Oracle Cloud 配置
|
||||
delete_oracle_config() {
|
||||
log_warning "删除 Oracle Cloud 配置..."
|
||||
|
||||
read -p "确定要删除所有 Oracle Cloud 配置吗?(y/N): " confirm
|
||||
if [[ "$confirm" != "y" && "$confirm" != "Y" ]]; then
|
||||
log_info "操作已取消"
|
||||
return
|
||||
fi
|
||||
|
||||
local base_path="config/${ENVIRONMENT}/oracle"
|
||||
|
||||
curl -s -X DELETE "${CONSUL_ADDR}/v1/kv/${base_path}?recurse" > /dev/null
|
||||
|
||||
log_success "Oracle Cloud 配置已删除"
|
||||
}
|
||||
|
||||
# 生成 Terraform 变量文件
|
||||
generate_terraform_vars() {
|
||||
log_info "生成 Terraform 变量文件..."
|
||||
|
||||
local base_path="config/${ENVIRONMENT}/oracle"
|
||||
local output_file="infrastructure/environments/${ENVIRONMENT}/terraform.tfvars.consul"
|
||||
|
||||
# 从 Consul 获取配置
|
||||
local tenancy_ocid=$(curl -s "${CONSUL_ADDR}/v1/kv/${base_path}/tenancy_ocid?raw" 2>/dev/null || echo "")
|
||||
local user_ocid=$(curl -s "${CONSUL_ADDR}/v1/kv/${base_path}/user_ocid?raw" 2>/dev/null || echo "")
|
||||
local fingerprint=$(curl -s "${CONSUL_ADDR}/v1/kv/${base_path}/fingerprint?raw" 2>/dev/null || echo "")
|
||||
local compartment_ocid=$(curl -s "${CONSUL_ADDR}/v1/kv/${base_path}/compartment_ocid?raw" 2>/dev/null || echo "")
|
||||
|
||||
if [[ -z "$tenancy_ocid" ]]; then
|
||||
log_error "Consul 中没有找到 Oracle Cloud 配置"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# 创建临时私钥文件
|
||||
local temp_key_file="/tmp/oci_private_key_${ENVIRONMENT}.pem"
|
||||
curl -s "${CONSUL_ADDR}/v1/kv/${base_path}/private_key?raw" > "$temp_key_file"
|
||||
chmod 600 "$temp_key_file"
|
||||
|
||||
# 生成 Terraform 变量文件
|
||||
cat > "$output_file" << EOF
|
||||
# 从 Consul 生成的 Oracle Cloud 配置
|
||||
# 生成时间: $(date)
|
||||
# 环境: ${ENVIRONMENT}
|
||||
|
||||
oci_config = {
|
||||
tenancy_ocid = "$tenancy_ocid"
|
||||
user_ocid = "$user_ocid"
|
||||
fingerprint = "$fingerprint"
|
||||
private_key_path = "$temp_key_file"
|
||||
region = "ap-seoul-1"
|
||||
compartment_ocid = "$compartment_ocid"
|
||||
}
|
||||
EOF
|
||||
|
||||
log_success "Terraform 变量文件已生成: $output_file"
|
||||
log_warning "私钥文件位置: $temp_key_file"
|
||||
log_warning "请在使用完毕后删除临时私钥文件"
|
||||
}
|
||||
|
||||
# 清理临时文件
|
||||
cleanup_temp_files() {
|
||||
log_info "清理临时文件..."
|
||||
|
||||
rm -f /tmp/oci_private_key_*.pem
|
||||
rm -f infrastructure/environments/*/terraform.tfvars.consul
|
||||
|
||||
log_success "临时文件已清理"
|
||||
}
|
||||
|
||||
# 显示帮助信息
|
||||
show_help() {
|
||||
cat << EOF
|
||||
Consul 密钥管理脚本
|
||||
|
||||
用法: $0 [选项]
|
||||
|
||||
选项:
|
||||
set-oracle 设置 Oracle Cloud 配置到 Consul
|
||||
get-oracle 从 Consul 获取 Oracle Cloud 配置
|
||||
delete-oracle 从 Consul 删除 Oracle Cloud 配置
|
||||
generate-vars 从 Consul 生成 Terraform 变量文件
|
||||
cleanup 清理临时文件
|
||||
help 显示此帮助信息
|
||||
|
||||
环境变量:
|
||||
CONSUL_ADDR Consul 地址 (默认: http://localhost:8500)
|
||||
CONSUL_TOKEN Consul ACL Token (可选)
|
||||
ENVIRONMENT 环境名称 (默认: dev)
|
||||
|
||||
示例:
|
||||
# 设置 Oracle Cloud 配置
|
||||
$0 set-oracle
|
||||
|
||||
# 生成 Terraform 变量文件
|
||||
$0 generate-vars
|
||||
|
||||
# 查看配置
|
||||
$0 get-oracle
|
||||
|
||||
# 清理临时文件
|
||||
$0 cleanup
|
||||
EOF
|
||||
}
|
||||
|
||||
# 主函数
|
||||
main() {
|
||||
case "${1:-help}" in
|
||||
"set-oracle")
|
||||
check_consul
|
||||
set_oracle_config
|
||||
;;
|
||||
"get-oracle")
|
||||
check_consul
|
||||
get_oracle_config
|
||||
;;
|
||||
"delete-oracle")
|
||||
check_consul
|
||||
delete_oracle_config
|
||||
;;
|
||||
"generate-vars")
|
||||
check_consul
|
||||
generate_terraform_vars
|
||||
;;
|
||||
"cleanup")
|
||||
cleanup_temp_files
|
||||
;;
|
||||
"help"|*)
|
||||
show_help
|
||||
;;
|
||||
esac
|
||||
}
|
||||
|
||||
main "$@"
|
||||
@@ -1,115 +0,0 @@
|
||||
---
|
||||
- name: Correct Nomad Cluster Configuration
|
||||
hosts: nomad_cluster
|
||||
become: yes
|
||||
gather_facts: yes
|
||||
vars:
|
||||
nomad_encrypt_key: "NVOMDvXblgWfhtzFzOUIHnKEOrbXOkPrkIPbRGGf1YQ="
|
||||
tailscale_ips:
|
||||
semaphore: "100.116.158.95"
|
||||
master: "100.117.106.136"
|
||||
ash3c: "100.116.80.94"
|
||||
|
||||
tasks:
|
||||
- name: Stop nomad service
|
||||
systemd:
|
||||
name: nomad
|
||||
state: stopped
|
||||
ignore_errors: yes
|
||||
|
||||
- name: Clean nomad data
|
||||
file:
|
||||
path: /opt/nomad/data
|
||||
state: absent
|
||||
|
||||
- name: Recreate nomad data directory
|
||||
file:
|
||||
path: /opt/nomad/data
|
||||
state: directory
|
||||
owner: nomad
|
||||
group: nomad
|
||||
mode: '0755'
|
||||
|
||||
- name: Create correct nomad configuration
|
||||
copy:
|
||||
content: |
|
||||
datacenter = "dc1"
|
||||
region = "global"
|
||||
data_dir = "/opt/nomad/data"
|
||||
|
||||
bind_addr = "{{ tailscale_ips[inventory_hostname] }}"
|
||||
|
||||
server {
|
||||
enabled = true
|
||||
bootstrap_expect = 3
|
||||
encrypt = "{{ nomad_encrypt_key }}"
|
||||
|
||||
server_join {
|
||||
retry_join = [
|
||||
"{{ tailscale_ips.semaphore }}:4647",
|
||||
"{{ tailscale_ips.master }}:4647",
|
||||
"{{ tailscale_ips.ash3c }}:4647"
|
||||
]
|
||||
retry_interval = "15s"
|
||||
retry_max = 3
|
||||
}
|
||||
}
|
||||
|
||||
client {
|
||||
enabled = true
|
||||
alloc_dir = "/opt/nomad/alloc_mounts"
|
||||
}
|
||||
|
||||
ui {
|
||||
enabled = true
|
||||
}
|
||||
|
||||
addresses {
|
||||
http = "0.0.0.0"
|
||||
rpc = "{{ tailscale_ips[inventory_hostname] }}"
|
||||
serf = "{{ tailscale_ips[inventory_hostname] }}"
|
||||
}
|
||||
|
||||
ports {
|
||||
http = 4646
|
||||
rpc = 4647
|
||||
serf = 4648
|
||||
}
|
||||
|
||||
plugin "docker" {
|
||||
config {
|
||||
allow_privileged = true
|
||||
volumes {
|
||||
enabled = true
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
log_level = "INFO"
|
||||
log_file = "/var/log/nomad/nomad.log"
|
||||
dest: /etc/nomad.d/nomad.hcl
|
||||
owner: nomad
|
||||
group: nomad
|
||||
mode: '0640'
|
||||
|
||||
- name: Start nomad services in sequence
|
||||
hosts: nomad_cluster
|
||||
become: yes
|
||||
serial: 1
|
||||
tasks:
|
||||
- name: Start nomad service
|
||||
systemd:
|
||||
name: nomad
|
||||
state: started
|
||||
daemon_reload: yes
|
||||
|
||||
- name: Wait for nomad to start
|
||||
wait_for:
|
||||
port: 4646
|
||||
host: "{{ tailscale_ips[inventory_hostname] }}"
|
||||
delay: 10
|
||||
timeout: 60
|
||||
|
||||
- name: Wait between nodes
|
||||
pause:
|
||||
seconds: 30
|
||||
@@ -1,113 +0,0 @@
|
||||
---
|
||||
- name: Deploy Nomad Configurations
|
||||
hosts: nomad_cluster
|
||||
become: yes
|
||||
vars:
|
||||
nomad_encrypt_key: "NVOMDvXblgWfhtzFzOUIHnKEOrbXOkPrkIPbRGGf1YQ="
|
||||
node_ips:
|
||||
semaphore: "100.116.158.95"
|
||||
master: "100.117.106.136"
|
||||
ash3c: "100.116.80.94"
|
||||
|
||||
tasks:
|
||||
- name: Create nomad configuration for each node
|
||||
copy:
|
||||
content: |
|
||||
datacenter = "dc1"
|
||||
region = "global"
|
||||
data_dir = "/opt/nomad/data"
|
||||
|
||||
bind_addr = "{{ node_ips[inventory_hostname] }}"
|
||||
|
||||
server {
|
||||
enabled = true
|
||||
bootstrap_expect = 3
|
||||
encrypt = "{{ nomad_encrypt_key }}"
|
||||
|
||||
server_join {
|
||||
retry_join = [
|
||||
"{{ node_ips.semaphore }}:4647",
|
||||
"{{ node_ips.master }}:4647",
|
||||
"{{ node_ips.ash3c }}:4647"
|
||||
]
|
||||
retry_interval = "15s"
|
||||
retry_max = 3
|
||||
}
|
||||
}
|
||||
|
||||
client {
|
||||
enabled = true
|
||||
alloc_dir = "/opt/nomad/alloc_mounts"
|
||||
}
|
||||
|
||||
ui {
|
||||
enabled = true
|
||||
}
|
||||
|
||||
addresses {
|
||||
http = "0.0.0.0"
|
||||
rpc = "{{ node_ips[inventory_hostname] }}"
|
||||
serf = "{{ node_ips[inventory_hostname] }}"
|
||||
}
|
||||
|
||||
ports {
|
||||
http = 4646
|
||||
rpc = 4647
|
||||
serf = 4648
|
||||
}
|
||||
|
||||
plugin "docker" {
|
||||
config {
|
||||
allow_privileged = true
|
||||
volumes {
|
||||
enabled = true
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
log_level = "INFO"
|
||||
log_file = "/var/log/nomad/nomad.log"
|
||||
dest: /etc/nomad.d/nomad.hcl
|
||||
owner: nomad
|
||||
group: nomad
|
||||
mode: '0640'
|
||||
|
||||
- name: Validate nomad configuration
|
||||
shell: nomad config validate /etc/nomad.d/nomad.hcl
|
||||
register: config_validation
|
||||
|
||||
- name: Show validation result
|
||||
debug:
|
||||
var: config_validation.stdout_lines
|
||||
|
||||
- name: Start nomad service on bootstrap node first
|
||||
systemd:
|
||||
name: nomad
|
||||
state: started
|
||||
daemon_reload: yes
|
||||
when: inventory_hostname == 'semaphore'
|
||||
|
||||
- name: Wait for bootstrap node
|
||||
pause:
|
||||
seconds: 15
|
||||
when: inventory_hostname == 'semaphore'
|
||||
|
||||
- name: Start nomad service on other nodes
|
||||
systemd:
|
||||
name: nomad
|
||||
state: started
|
||||
daemon_reload: yes
|
||||
when: inventory_hostname != 'semaphore'
|
||||
|
||||
- name: Wait for services to start
|
||||
pause:
|
||||
seconds: 10
|
||||
|
||||
- name: Check service status
|
||||
shell: systemctl status nomad --no-pager
|
||||
register: service_status
|
||||
ignore_errors: yes
|
||||
|
||||
- name: Show service status
|
||||
debug:
|
||||
var: service_status.stdout_lines
|
||||
33
scripts/utilities/disk-monitor.sh
Executable file
33
scripts/utilities/disk-monitor.sh
Executable file
@@ -0,0 +1,33 @@
|
||||
#!/bin/bash
|
||||
|
||||
# 磁盘监控脚本
|
||||
# 使用方法: ./disk-monitor.sh [threshold]
|
||||
|
||||
THRESHOLD=${1:-85} # 默认阈值 85%
|
||||
INVENTORY_FILE="configuration/inventories/production/nomad-cluster.ini"
|
||||
|
||||
echo "🔍 开始磁盘空间监控 (阈值: ${THRESHOLD}%)"
|
||||
echo "=================================="
|
||||
|
||||
# 运行磁盘分析
|
||||
echo "📊 运行磁盘分析..."
|
||||
ansible-playbook -i "$INVENTORY_FILE" configuration/playbooks/disk-analysis-ncdu.yml
|
||||
|
||||
echo ""
|
||||
echo "⚠️ 检查高磁盘使用率节点..."
|
||||
|
||||
# 检查所有节点的磁盘使用情况
|
||||
ansible all -i "$INVENTORY_FILE" -m shell -a "df -h | awk 'NR>1 {gsub(/%/, \"\", \$5); if(\$5 > $THRESHOLD) print \$0}'" | while read line; do
|
||||
if [[ $line == *"=>"* ]]; then
|
||||
echo "🚨 节点: $line"
|
||||
elif [[ $line =~ ^/dev ]]; then
|
||||
echo " 高使用率磁盘: $line"
|
||||
fi
|
||||
done
|
||||
|
||||
echo ""
|
||||
echo "💡 如需清理,运行:"
|
||||
echo " ansible-playbook -i $INVENTORY_FILE configuration/playbooks/disk-cleanup.yml"
|
||||
echo ""
|
||||
echo "📁 详细报告位置: /tmp/disk-analysis/"
|
||||
echo " 使用 ncdu -f /tmp/disk-analysis/ncdu-root-<hostname>.json 查看详细信息"
|
||||
@@ -1,190 +0,0 @@
|
||||
---
|
||||
- name: Final Complete Nomad Cluster Fix
|
||||
hosts: nomad_cluster
|
||||
become: yes
|
||||
gather_facts: yes
|
||||
vars:
|
||||
nomad_encrypt_key: "NVOMDvXblgWfhtzFzOUIHnKEOrbXOkPrkIPbRGGf1YQ="
|
||||
nomad_servers:
|
||||
- "100.116.158.95:4647" # semaphore
|
||||
- "100.117.106.136:4647" # master
|
||||
- "100.116.80.94:4647" # ash3c
|
||||
|
||||
tasks:
|
||||
- name: Stop nomad service
|
||||
systemd:
|
||||
name: nomad
|
||||
state: stopped
|
||||
ignore_errors: yes
|
||||
|
||||
- name: Reset failed nomad service
|
||||
systemd:
|
||||
name: nomad
|
||||
daemon_reload: yes
|
||||
ignore_errors: yes
|
||||
|
||||
- name: Create nomad user if not exists
|
||||
user:
|
||||
name: nomad
|
||||
system: yes
|
||||
shell: /bin/false
|
||||
home: /opt/nomad
|
||||
create_home: no
|
||||
|
||||
- name: Create nomad directories with correct permissions
|
||||
file:
|
||||
path: "{{ item }}"
|
||||
state: directory
|
||||
owner: nomad
|
||||
group: nomad
|
||||
mode: '0755'
|
||||
loop:
|
||||
- /etc/nomad.d
|
||||
- /opt/nomad
|
||||
- /opt/nomad/data
|
||||
- /opt/nomad/alloc_mounts
|
||||
- /var/log/nomad
|
||||
|
||||
- name: Clean old nomad data
|
||||
file:
|
||||
path: /opt/nomad/data
|
||||
state: absent
|
||||
|
||||
- name: Recreate nomad data directory
|
||||
file:
|
||||
path: /opt/nomad/data
|
||||
state: directory
|
||||
owner: nomad
|
||||
group: nomad
|
||||
mode: '0755'
|
||||
|
||||
- name: Get Tailscale IP address
|
||||
shell: ip addr show tailscale0 | grep 'inet ' | awk '{print $2}' | cut -d'/' -f1
|
||||
register: tailscale_ip
|
||||
failed_when: false
|
||||
|
||||
- name: Set bind address (fallback to default interface if tailscale not available)
|
||||
set_fact:
|
||||
bind_address: "{{ tailscale_ip.stdout if tailscale_ip.stdout != '' else ansible_default_ipv4.address }}"
|
||||
|
||||
- name: Generate nomad configuration
|
||||
template:
|
||||
src: nomad-server.hcl.j2
|
||||
dest: /etc/nomad.d/nomad.hcl
|
||||
owner: nomad
|
||||
group: nomad
|
||||
mode: '0640'
|
||||
vars:
|
||||
nomad_datacenter: "dc1"
|
||||
nomad_region: "global"
|
||||
nomad_data_dir: "/opt/nomad/data"
|
||||
nomad_bind_addr: "{{ bind_address }}"
|
||||
nomad_bootstrap_expect: 3
|
||||
nomad_encrypt: "{{ nomad_encrypt_key }}"
|
||||
nomad_retry_join: "{{ nomad_servers }}"
|
||||
nomad_alloc_dir: "/opt/nomad/alloc_mounts"
|
||||
nomad_log_file: "/var/log/nomad/nomad.log"
|
||||
|
||||
- name: Create nomad systemd service
|
||||
copy:
|
||||
content: |
|
||||
[Unit]
|
||||
Description=Nomad
|
||||
Documentation=https://www.nomadproject.io/
|
||||
Requires=network-online.target
|
||||
After=network-online.target
|
||||
ConditionFileNotEmpty=/etc/nomad.d/nomad.hcl
|
||||
|
||||
[Service]
|
||||
Type=notify
|
||||
User=nomad
|
||||
Group=nomad
|
||||
ExecStart=/usr/bin/nomad agent -config=/etc/nomad.d/nomad.hcl
|
||||
ExecReload=/bin/kill -HUP $MAINPID
|
||||
KillMode=process
|
||||
Restart=on-failure
|
||||
LimitNOFILE=65536
|
||||
|
||||
[Install]
|
||||
WantedBy=multi-user.target
|
||||
dest: /etc/systemd/system/nomad.service
|
||||
mode: '0644'
|
||||
|
||||
- name: Reload systemd daemon
|
||||
systemd:
|
||||
daemon_reload: yes
|
||||
|
||||
- name: Start nomad service
|
||||
systemd:
|
||||
name: nomad
|
||||
state: started
|
||||
enabled: yes
|
||||
|
||||
- name: Wait for nomad to start
|
||||
wait_for:
|
||||
port: 4646
|
||||
host: "{{ bind_address }}"
|
||||
delay: 5
|
||||
timeout: 30
|
||||
ignore_errors: yes
|
||||
|
||||
- name: Create nomad configuration template
|
||||
hosts: localhost
|
||||
gather_facts: no
|
||||
tasks:
|
||||
- name: Create nomad server template
|
||||
copy:
|
||||
content: |
|
||||
datacenter = "{{ nomad_datacenter }}"
|
||||
region = "{{ nomad_region }}"
|
||||
data_dir = "{{ nomad_data_dir }}"
|
||||
|
||||
bind_addr = "{{ nomad_bind_addr }}"
|
||||
|
||||
server {
|
||||
enabled = true
|
||||
bootstrap_expect = {{ nomad_bootstrap_expect }}
|
||||
encrypt = "{{ nomad_encrypt }}"
|
||||
|
||||
server_join {
|
||||
retry_join = {{ nomad_retry_join | to_json }}
|
||||
retry_interval = "15s"
|
||||
retry_max = 3
|
||||
}
|
||||
}
|
||||
|
||||
client {
|
||||
enabled = true
|
||||
alloc_dir = "{{ nomad_alloc_dir }}"
|
||||
}
|
||||
|
||||
ui {
|
||||
enabled = true
|
||||
}
|
||||
|
||||
addresses {
|
||||
http = "0.0.0.0"
|
||||
rpc = "{{ nomad_bind_addr }}"
|
||||
serf = "{{ nomad_bind_addr }}"
|
||||
}
|
||||
|
||||
ports {
|
||||
http = 4646
|
||||
rpc = 4647
|
||||
serf = 4648
|
||||
}
|
||||
|
||||
plugin "docker" {
|
||||
config {
|
||||
allow_privileged = true
|
||||
volumes {
|
||||
enabled = true
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
log_level = "INFO"
|
||||
log_file = "{{ nomad_log_file }}"
|
||||
dest: /tmp/nomad-server.hcl.j2
|
||||
delegate_to: localhost
|
||||
run_once: true
|
||||
@@ -1,111 +0,0 @@
|
||||
---
|
||||
- name: Final Nomad Cluster Fix
|
||||
hosts: nomad_cluster
|
||||
become: yes
|
||||
vars:
|
||||
nomad_encrypt_key: "NVOMDvXblgWfhtzFzOUIHnKEOrbXOkPrkIPbRGGf1YQ="
|
||||
tailscale_ips:
|
||||
semaphore: "100.116.158.95"
|
||||
master: "100.117.106.136"
|
||||
ash3c: "100.116.80.94"
|
||||
|
||||
tasks:
|
||||
- name: Stop nomad service
|
||||
systemd:
|
||||
name: nomad
|
||||
state: stopped
|
||||
ignore_errors: yes
|
||||
|
||||
- name: Create required directories
|
||||
file:
|
||||
path: "{{ item }}"
|
||||
state: directory
|
||||
owner: nomad
|
||||
group: nomad
|
||||
mode: '0755'
|
||||
loop:
|
||||
- /opt/nomad/data
|
||||
- /opt/nomad/alloc_mounts
|
||||
- /var/log/nomad
|
||||
|
||||
- name: Clean nomad data
|
||||
shell: rm -rf /opt/nomad/data/*
|
||||
ignore_errors: yes
|
||||
|
||||
- name: Create working nomad configuration
|
||||
copy:
|
||||
content: |
|
||||
datacenter = "dc1"
|
||||
region = "global"
|
||||
data_dir = "/opt/nomad/data"
|
||||
|
||||
bind_addr = "{{ tailscale_ips[inventory_hostname] }}"
|
||||
|
||||
server {
|
||||
enabled = true
|
||||
bootstrap_expect = 3
|
||||
encrypt = "{{ nomad_encrypt_key }}"
|
||||
|
||||
server_join {
|
||||
retry_join = [
|
||||
"{{ tailscale_ips.semaphore }}",
|
||||
"{{ tailscale_ips.master }}",
|
||||
"{{ tailscale_ips.ash3c }}"
|
||||
]
|
||||
}
|
||||
}
|
||||
|
||||
client {
|
||||
enabled = true
|
||||
}
|
||||
|
||||
ui {
|
||||
enabled = true
|
||||
}
|
||||
|
||||
addresses {
|
||||
http = "0.0.0.0"
|
||||
rpc = "{{ tailscale_ips[inventory_hostname] }}"
|
||||
serf = "{{ tailscale_ips[inventory_hostname] }}"
|
||||
}
|
||||
|
||||
ports {
|
||||
http = 4646
|
||||
rpc = 4647
|
||||
serf = 4648
|
||||
}
|
||||
|
||||
plugin "docker" {
|
||||
config {
|
||||
allow_privileged = true
|
||||
volumes {
|
||||
enabled = true
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
log_level = "INFO"
|
||||
log_file = "/var/log/nomad/nomad.log"
|
||||
dest: /etc/nomad.d/nomad.hcl
|
||||
owner: nomad
|
||||
group: nomad
|
||||
mode: '0640'
|
||||
|
||||
- name: Start nomad service
|
||||
systemd:
|
||||
name: nomad
|
||||
state: started
|
||||
enabled: yes
|
||||
|
||||
- name: Wait for service to start
|
||||
pause:
|
||||
seconds: 10
|
||||
|
||||
- name: Check service status
|
||||
shell: systemctl status nomad --no-pager -l
|
||||
register: service_status
|
||||
ignore_errors: yes
|
||||
|
||||
- name: Show service status
|
||||
debug:
|
||||
var: service_status.stdout_lines
|
||||
@@ -1,137 +0,0 @@
|
||||
#!/bin/bash
|
||||
|
||||
# 🔧 ash3c IP 地址修复脚本
|
||||
|
||||
set -e
|
||||
|
||||
echo "🔧 ash3c IP 地址问题修复脚本"
|
||||
echo ""
|
||||
|
||||
# 定义正确的 IP 地址
|
||||
CORRECT_IP="100.116.80.94"
|
||||
ASH3C_HOST="100.116.80.94"
|
||||
|
||||
echo "📡 检查 ash3c 节点的网络配置..."
|
||||
|
||||
# 检查 ash3c 的实际 IP 配置
|
||||
echo "🔍 检查 ash3c 节点的 IP 地址绑定..."
|
||||
ssh -p 22 -i ~/.ssh/id_ed25519 ben@${ASH3C_HOST} "echo '3131' | sudo -S ip addr show" | grep -E "inet.*100\." || echo "❌ 未找到 Tailscale IP"
|
||||
|
||||
echo ""
|
||||
echo "🔍 检查 Tailscale 状态..."
|
||||
ssh -p 22 -i ~/.ssh/id_ed25519 ben@${ASH3C_HOST} "echo '3131' | sudo -S tailscale status" || echo "❌ Tailscale 状态检查失败"
|
||||
|
||||
echo ""
|
||||
echo "🔧 修复 ash3c 的 Nomad 配置..."
|
||||
|
||||
# 创建正确的配置文件
|
||||
cat > /tmp/ash3c-nomad.hcl << EOF
|
||||
# 🔧 ash3c 修复后的 Nomad 配置
|
||||
datacenter = "dc1"
|
||||
region = "global"
|
||||
data_dir = "/opt/nomad/data"
|
||||
|
||||
# 强制使用正确的 Tailscale IP
|
||||
bind_addr = "${CORRECT_IP}"
|
||||
|
||||
# 日志配置
|
||||
log_level = "INFO"
|
||||
log_file = "/var/log/nomad/nomad.log"
|
||||
|
||||
server {
|
||||
enabled = true
|
||||
bootstrap_expect = 3
|
||||
encrypt = "NVOMDvXblgWfhtzFzOUIHnKEOrbXOkPrkIPbRGGf1YQ="
|
||||
|
||||
server_join {
|
||||
retry_join = [
|
||||
"100.116.158.95:4647",
|
||||
"100.117.106.136:4647",
|
||||
"100.116.80.94:4647"
|
||||
]
|
||||
retry_max = 10
|
||||
retry_interval = "15s"
|
||||
}
|
||||
|
||||
# 更宽松的心跳配置
|
||||
heartbeat_grace = "30s"
|
||||
min_heartbeat_ttl = "10s"
|
||||
}
|
||||
|
||||
client {
|
||||
enabled = true
|
||||
network_interface = "tailscale0"
|
||||
}
|
||||
|
||||
ui_config {
|
||||
enabled = true
|
||||
}
|
||||
|
||||
addresses {
|
||||
http = "0.0.0.0"
|
||||
rpc = "${CORRECT_IP}"
|
||||
serf = "${CORRECT_IP}"
|
||||
}
|
||||
|
||||
ports {
|
||||
http = 4646
|
||||
rpc = 4647
|
||||
serf = 4648
|
||||
}
|
||||
|
||||
plugin "docker" {
|
||||
config {
|
||||
allow_privileged = true
|
||||
volumes {
|
||||
enabled = true
|
||||
}
|
||||
}
|
||||
}
|
||||
EOF
|
||||
|
||||
echo "📤 上传修复后的配置到 ash3c..."
|
||||
scp -P 22 -i ~/.ssh/id_ed25519 /tmp/ash3c-nomad.hcl ben@${ASH3C_HOST}:/tmp/
|
||||
|
||||
echo "🔧 在 ash3c 上应用修复..."
|
||||
ssh -p 22 -i ~/.ssh/id_ed25519 ben@${ASH3C_HOST} << 'REMOTE_SCRIPT'
|
||||
echo '3131' | sudo -S systemctl stop nomad || true
|
||||
echo '3131' | sudo -S pkill -f nomad || true
|
||||
sleep 5
|
||||
|
||||
# 备份旧配置
|
||||
echo '3131' | sudo -S cp /etc/nomad.d/nomad.hcl /etc/nomad.d/nomad.hcl.backup.$(date +%Y%m%d_%H%M%S) || true
|
||||
|
||||
# 应用新配置
|
||||
echo '3131' | sudo -S cp /tmp/ash3c-nomad.hcl /etc/nomad.d/nomad.hcl
|
||||
echo '3131' | sudo -S chown nomad:nomad /etc/nomad.d/nomad.hcl
|
||||
echo '3131' | sudo -S chmod 640 /etc/nomad.d/nomad.hcl
|
||||
|
||||
# 清理数据目录
|
||||
echo '3131' | sudo -S rm -rf /opt/nomad/data/*
|
||||
|
||||
# 重启服务
|
||||
echo '3131' | sudo -S systemctl daemon-reload
|
||||
echo '3131' | sudo -S systemctl enable nomad
|
||||
echo '3131' | sudo -S systemctl start nomad
|
||||
|
||||
echo "✅ ash3c 配置修复完成"
|
||||
REMOTE_SCRIPT
|
||||
|
||||
echo ""
|
||||
echo "⏰ 等待 ash3c 服务启动..."
|
||||
sleep 15
|
||||
|
||||
echo ""
|
||||
echo "🔍 检查 ash3c 服务状态..."
|
||||
ssh -p 22 -i ~/.ssh/id_ed25519 ben@${ASH3C_HOST} "echo '3131' | sudo -S systemctl status nomad --no-pager" || echo "❌ 服务状态检查失败"
|
||||
|
||||
echo ""
|
||||
echo "🧹 清理临时文件..."
|
||||
rm -f /tmp/ash3c-nomad.hcl
|
||||
|
||||
echo ""
|
||||
echo "✅ ash3c IP 修复完成!"
|
||||
echo ""
|
||||
echo "下一步:"
|
||||
echo "1. 检查集群状态: nomad server members"
|
||||
echo "2. 如果还有问题,运行核弹级重置: ./scripts/utilities/nuclear-reset.sh"
|
||||
@@ -1,151 +0,0 @@
|
||||
#!/bin/bash
|
||||
|
||||
# Consul 集群修复脚本
|
||||
# 解决 "No cluster leader" 问题
|
||||
|
||||
set -e
|
||||
|
||||
echo "=== Consul 集群修复脚本 ==="
|
||||
echo "当前时间: $(date)"
|
||||
echo
|
||||
|
||||
# 检查当前 Consul 服务状态
|
||||
echo "1. 检查当前 Consul 服务状态..."
|
||||
docker service ls | grep consul || echo "未找到 consul 服务"
|
||||
echo
|
||||
|
||||
# 显示当前问题
|
||||
echo "2. 检查 Consul 日志中的错误..."
|
||||
echo "Master 节点日志:"
|
||||
docker service logs consul-cluster_consul-master --tail 5 2>/dev/null || echo "无法获取 master 日志"
|
||||
echo
|
||||
echo "Ash3c 节点日志:"
|
||||
docker service logs consul-cluster_consul-ash3c --tail 5 2>/dev/null || echo "无法获取 ash3c 日志"
|
||||
echo
|
||||
|
||||
# 提供修复选项
|
||||
echo "3. 修复选项:"
|
||||
echo " a) 使用修复后的 overlay 网络配置 (推荐)"
|
||||
echo " b) 使用 macvlan 网络配置"
|
||||
echo " c) 仅重启现有服务"
|
||||
echo
|
||||
|
||||
read -p "请选择修复方案 (a/b/c): " choice
|
||||
|
||||
case $choice in
|
||||
a)
|
||||
echo "使用修复后的 overlay 网络配置..."
|
||||
|
||||
# 停止现有服务
|
||||
echo "停止现有 Consul 集群..."
|
||||
docker stack rm consul-cluster 2>/dev/null || echo "consul-cluster stack 不存在"
|
||||
|
||||
# 等待服务完全停止
|
||||
echo "等待服务完全停止..."
|
||||
sleep 10
|
||||
|
||||
# 清理数据卷 (可选)
|
||||
read -p "是否清理现有数据卷? (y/n): " clean_volumes
|
||||
if [[ $clean_volumes == "y" ]]; then
|
||||
docker volume rm consul-cluster_consul_master_data 2>/dev/null || true
|
||||
docker volume rm consul-cluster_consul_ash3c_data 2>/dev/null || true
|
||||
echo "数据卷已清理"
|
||||
fi
|
||||
|
||||
# 部署修复后的配置
|
||||
echo "部署修复后的 Consul 集群..."
|
||||
docker stack deploy -c /root/mgmt/swarm/stacks/consul-cluster-fixed.yml consul-cluster
|
||||
|
||||
echo "等待服务启动..."
|
||||
sleep 15
|
||||
|
||||
# 检查服务状态
|
||||
echo "检查新服务状态..."
|
||||
docker service ls | grep consul
|
||||
;;
|
||||
|
||||
b)
|
||||
echo "使用 macvlan 网络配置..."
|
||||
echo "注意: 需要根据你的网络环境调整 IP 地址和网络接口"
|
||||
|
||||
# 检查网络接口
|
||||
echo "当前网络接口:"
|
||||
ip link show | grep -E "^[0-9]+:" | awk '{print $2}' | sed 's/://'
|
||||
echo
|
||||
|
||||
read -p "请输入要使用的网络接口 (如 eth0): " interface
|
||||
read -p "请输入子网 (如 192.168.1.0/24): " subnet
|
||||
read -p "请输入网关 (如 192.168.1.1): " gateway
|
||||
|
||||
# 更新 macvlan 配置文件
|
||||
sed -i "s/parent: eth0/parent: $interface/" /root/mgmt/swarm/stacks/consul-cluster-macvlan.yml
|
||||
sed -i "s/192.168.1.0\/24/$subnet/" /root/mgmt/swarm/stacks/consul-cluster-macvlan.yml
|
||||
sed -i "s/192.168.1.1/$gateway/" /root/mgmt/swarm/stacks/consul-cluster-macvlan.yml
|
||||
|
||||
# 停止现有服务
|
||||
echo "停止现有 Consul 集群..."
|
||||
docker stack rm consul-cluster 2>/dev/null || echo "consul-cluster stack 不存在"
|
||||
|
||||
# 等待服务完全停止
|
||||
echo "等待服务完全停止..."
|
||||
sleep 10
|
||||
|
||||
# 部署 macvlan 配置
|
||||
echo "部署 macvlan Consul 集群..."
|
||||
docker stack deploy -c /root/mgmt/swarm/stacks/consul-cluster-macvlan.yml consul-cluster
|
||||
|
||||
echo "等待服务启动..."
|
||||
sleep 15
|
||||
|
||||
# 检查服务状态
|
||||
echo "检查新服务状态..."
|
||||
docker service ls | grep consul
|
||||
;;
|
||||
|
||||
c)
|
||||
echo "重启现有服务..."
|
||||
|
||||
# 重启服务
|
||||
docker service update --force consul-cluster_consul-master
|
||||
docker service update --force consul-cluster_consul-ash3c
|
||||
|
||||
echo "等待服务重启..."
|
||||
sleep 10
|
||||
|
||||
# 检查服务状态
|
||||
echo "检查服务状态..."
|
||||
docker service ls | grep consul
|
||||
;;
|
||||
|
||||
*)
|
||||
echo "无效选择,退出"
|
||||
exit 1
|
||||
;;
|
||||
esac
|
||||
|
||||
echo
|
||||
echo "4. 验证修复结果..."
|
||||
sleep 5
|
||||
|
||||
# 检查服务状态
|
||||
echo "服务状态:"
|
||||
docker service ls | grep consul
|
||||
|
||||
echo
|
||||
echo "等待 30 秒后检查集群状态..."
|
||||
sleep 30
|
||||
|
||||
# 尝试检查集群成员
|
||||
echo "尝试检查集群成员状态..."
|
||||
timeout 10 docker service logs consul-cluster_consul-master --tail 10 2>/dev/null || echo "无法获取日志"
|
||||
|
||||
echo
|
||||
echo "=== 修复完成 ==="
|
||||
echo "请等待几分钟让集群完全启动,然后访问:"
|
||||
echo "- Master UI: http://your-master-ip:8500"
|
||||
echo "- Ash3c UI: http://your-ash3c-ip:8501"
|
||||
echo
|
||||
echo "如果问题仍然存在,请检查:"
|
||||
echo "1. 节点间网络连通性"
|
||||
echo "2. 防火墙设置"
|
||||
echo "3. Docker Swarm 网络配置"
|
||||
@@ -1,92 +0,0 @@
|
||||
---
|
||||
- name: Fix Nomad Cluster Issues
|
||||
hosts: nomad_cluster
|
||||
become: yes
|
||||
vars:
|
||||
nomad_encrypt_key: "NVOMDvXblgWfhtzFzOUIHnKEOrbXOkPrkIPbRGGf1YQ="
|
||||
|
||||
tasks:
|
||||
- name: Stop nomad service
|
||||
systemd:
|
||||
name: nomad
|
||||
state: stopped
|
||||
ignore_errors: yes
|
||||
|
||||
- name: Clean nomad data directory
|
||||
shell: rm -rf /opt/nomad/data/*
|
||||
ignore_errors: yes
|
||||
|
||||
- name: Create correct nomad configuration
|
||||
copy:
|
||||
content: |
|
||||
datacenter = "dc1"
|
||||
region = "global"
|
||||
data_dir = "/opt/nomad/data"
|
||||
|
||||
bind_addr = "{{ ansible_host | default(hostvars[inventory_hostname]['ansible_default_ipv4']['address']) }}"
|
||||
|
||||
server {
|
||||
enabled = true
|
||||
bootstrap_expect = 3
|
||||
encrypt = "{{ nomad_encrypt_key }}"
|
||||
|
||||
server_join {
|
||||
retry_join = ["100.116.158.95", "100.117.106.136", "100.116.80.94"]
|
||||
}
|
||||
}
|
||||
|
||||
client {
|
||||
enabled = true
|
||||
network_interface = "{{ ansible_default_ipv4.interface | default('eth0') }}"
|
||||
}
|
||||
|
||||
ui {
|
||||
enabled = true
|
||||
}
|
||||
|
||||
addresses {
|
||||
http = "0.0.0.0"
|
||||
rpc = "0.0.0.0"
|
||||
serf = "0.0.0.0"
|
||||
}
|
||||
|
||||
ports {
|
||||
http = 4646
|
||||
rpc = 4647
|
||||
serf = 4648
|
||||
}
|
||||
|
||||
plugin "docker" {
|
||||
config {
|
||||
allow_privileged = true
|
||||
volumes {
|
||||
enabled = true
|
||||
}
|
||||
}
|
||||
}
|
||||
dest: /etc/nomad.d/nomad.hcl
|
||||
owner: nomad
|
||||
group: nomad
|
||||
mode: '0640'
|
||||
|
||||
- name: Start nomad service
|
||||
systemd:
|
||||
name: nomad
|
||||
state: started
|
||||
enabled: yes
|
||||
|
||||
- name: Wait for nomad to start
|
||||
wait_for:
|
||||
port: 4646
|
||||
host: "{{ ansible_host | default(hostvars[inventory_hostname]['ansible_default_ipv4']['address']) }}"
|
||||
delay: 10
|
||||
timeout: 60
|
||||
|
||||
- name: Check nomad status
|
||||
shell: systemctl status nomad --no-pager -l
|
||||
register: nomad_status
|
||||
ignore_errors: yes
|
||||
|
||||
- name: Display nomad status
|
||||
debug:
|
||||
var: nomad_status.stdout_lines
|
||||
@@ -1,242 +0,0 @@
|
||||
#!/bin/bash
|
||||
# Gitea 仓库管理脚本
|
||||
|
||||
set -e
|
||||
|
||||
# 配置
|
||||
GITEA_HOST="gitea"
|
||||
GITEA_USER="ben"
|
||||
GITEA_HTTP_URL="http://${GITEA_HOST}:3000"
|
||||
GITEA_SSH_URL="git@${GITEA_HOST}"
|
||||
REPO_NAME="mgmt"
|
||||
|
||||
# 颜色定义
|
||||
RED='\033[0;31m'
|
||||
GREEN='\033[0;32m'
|
||||
YELLOW='\033[1;33m'
|
||||
BLUE='\033[0;34m'
|
||||
NC='\033[0m' # No Color
|
||||
|
||||
# 打印带颜色的消息
|
||||
print_message() {
|
||||
local color=$1
|
||||
local message=$2
|
||||
echo -e "${color}${message}${NC}"
|
||||
}
|
||||
|
||||
# 检查 SSH 连接
|
||||
check_ssh_connection() {
|
||||
print_message $BLUE "🔍 检查 Gitea SSH 连接..."
|
||||
|
||||
if ssh -o ConnectTimeout=5 -o BatchMode=yes "${GITEA_SSH_URL}" 2>&1 | grep -q "successfully authenticated"; then
|
||||
print_message $GREEN "✅ SSH 连接正常"
|
||||
return 0
|
||||
else
|
||||
print_message $RED "❌ SSH 连接失败"
|
||||
return 1
|
||||
fi
|
||||
}
|
||||
|
||||
# 检查仓库状态
|
||||
check_repo_status() {
|
||||
print_message $BLUE "📊 检查仓库状态..."
|
||||
|
||||
if [ -d ".git" ]; then
|
||||
print_message $GREEN "✅ Git 仓库已初始化"
|
||||
|
||||
if git remote get-url origin >/dev/null 2>&1; then
|
||||
local origin_url=$(git remote get-url origin)
|
||||
print_message $GREEN "✅ 远程仓库: $origin_url"
|
||||
else
|
||||
print_message $YELLOW "⚠️ 未配置远程仓库"
|
||||
fi
|
||||
|
||||
local branch=$(git branch --show-current)
|
||||
print_message $BLUE "📍 当前分支: $branch"
|
||||
|
||||
local status=$(git status --porcelain)
|
||||
if [ -z "$status" ]; then
|
||||
print_message $GREEN "✅ 工作目录干净"
|
||||
else
|
||||
print_message $YELLOW "⚠️ 有未提交的变更"
|
||||
fi
|
||||
else
|
||||
print_message $RED "❌ 不是 Git 仓库"
|
||||
fi
|
||||
}
|
||||
|
||||
# 初始化仓库
|
||||
init_repo() {
|
||||
print_message $BLUE "📦 初始化 Git 仓库..."
|
||||
|
||||
if [ ! -d ".git" ]; then
|
||||
git init
|
||||
git config user.name "${GITEA_USER}"
|
||||
git config user.email "${GITEA_USER}@example.com"
|
||||
print_message $GREEN "✅ Git 仓库初始化完成"
|
||||
fi
|
||||
|
||||
# 配置远程仓库
|
||||
if ! git remote get-url origin >/dev/null 2>&1; then
|
||||
git remote add origin "${GITEA_SSH_URL}:${GITEA_USER}/${REPO_NAME}.git"
|
||||
print_message $GREEN "✅ 远程仓库配置完成"
|
||||
fi
|
||||
}
|
||||
|
||||
# 同步代码
|
||||
sync_code() {
|
||||
print_message $BLUE "🔄 同步代码..."
|
||||
|
||||
# 检查是否有未提交的变更
|
||||
if ! git diff --quiet || ! git diff --staged --quiet; then
|
||||
print_message $YELLOW "⚠️ 发现未提交的变更"
|
||||
git status --short
|
||||
|
||||
read -p "是否提交这些变更? (y/N): " -n 1 -r
|
||||
echo
|
||||
if [[ $REPLY =~ ^[Yy]$ ]]; then
|
||||
git add .
|
||||
read -p "请输入提交消息: " commit_message
|
||||
git commit -m "$commit_message"
|
||||
print_message $GREEN "✅ 变更已提交"
|
||||
else
|
||||
print_message $YELLOW "⚠️ 跳过提交"
|
||||
return 1
|
||||
fi
|
||||
fi
|
||||
|
||||
# 推送到远程仓库
|
||||
if git push origin main; then
|
||||
print_message $GREEN "✅ 代码推送成功"
|
||||
else
|
||||
print_message $RED "❌ 代码推送失败"
|
||||
return 1
|
||||
fi
|
||||
}
|
||||
|
||||
# 拉取最新代码
|
||||
pull_code() {
|
||||
print_message $BLUE "⬇️ 拉取最新代码..."
|
||||
|
||||
if git pull origin main; then
|
||||
print_message $GREEN "✅ 代码拉取成功"
|
||||
else
|
||||
print_message $RED "❌ 代码拉取失败"
|
||||
return 1
|
||||
fi
|
||||
}
|
||||
|
||||
# 查看提交历史
|
||||
show_history() {
|
||||
print_message $BLUE "📜 提交历史:"
|
||||
git log --oneline --graph --decorate -10
|
||||
}
|
||||
|
||||
# 查看分支状态
|
||||
show_branches() {
|
||||
print_message $BLUE "🌿 分支状态:"
|
||||
git branch -a
|
||||
}
|
||||
|
||||
# 创建新分支
|
||||
create_branch() {
|
||||
local branch_name=$1
|
||||
if [ -z "$branch_name" ]; then
|
||||
read -p "请输入分支名称: " branch_name
|
||||
fi
|
||||
|
||||
if [ -n "$branch_name" ]; then
|
||||
git checkout -b "$branch_name"
|
||||
print_message $GREEN "✅ 分支 '$branch_name' 创建成功"
|
||||
else
|
||||
print_message $RED "❌ 分支名称不能为空"
|
||||
fi
|
||||
}
|
||||
|
||||
# 切换分支
|
||||
switch_branch() {
|
||||
local branch_name=$1
|
||||
if [ -z "$branch_name" ]; then
|
||||
print_message $BLUE "可用分支:"
|
||||
git branch -a
|
||||
read -p "请输入要切换的分支名称: " branch_name
|
||||
fi
|
||||
|
||||
if [ -n "$branch_name" ]; then
|
||||
git checkout "$branch_name"
|
||||
print_message $GREEN "✅ 已切换到分支 '$branch_name'"
|
||||
else
|
||||
print_message $RED "❌ 分支名称不能为空"
|
||||
fi
|
||||
}
|
||||
|
||||
# 显示帮助
|
||||
show_help() {
|
||||
echo "Gitea 仓库管理脚本"
|
||||
echo ""
|
||||
echo "用法: $0 [命令]"
|
||||
echo ""
|
||||
echo "命令:"
|
||||
echo " check 检查连接和仓库状态"
|
||||
echo " init 初始化仓库"
|
||||
echo " sync 同步代码到远程仓库"
|
||||
echo " pull 拉取最新代码"
|
||||
echo " history 查看提交历史"
|
||||
echo " branches 查看分支状态"
|
||||
echo " create-branch [name] 创建新分支"
|
||||
echo " switch-branch [name] 切换分支"
|
||||
echo " status 查看仓库状态"
|
||||
echo " help 显示帮助信息"
|
||||
echo ""
|
||||
echo "示例:"
|
||||
echo " $0 check # 检查状态"
|
||||
echo " $0 sync # 同步代码"
|
||||
echo " $0 create-branch feature-x # 创建功能分支"
|
||||
}
|
||||
|
||||
# 主函数
|
||||
main() {
|
||||
local command=${1:-help}
|
||||
|
||||
case $command in
|
||||
check)
|
||||
check_ssh_connection
|
||||
check_repo_status
|
||||
;;
|
||||
init)
|
||||
init_repo
|
||||
;;
|
||||
sync)
|
||||
sync_code
|
||||
;;
|
||||
pull)
|
||||
pull_code
|
||||
;;
|
||||
history)
|
||||
show_history
|
||||
;;
|
||||
branches)
|
||||
show_branches
|
||||
;;
|
||||
create-branch)
|
||||
create_branch "$2"
|
||||
;;
|
||||
switch-branch)
|
||||
switch_branch "$2"
|
||||
;;
|
||||
status)
|
||||
check_repo_status
|
||||
;;
|
||||
help|--help|-h)
|
||||
show_help
|
||||
;;
|
||||
*)
|
||||
print_message $RED "❌ 未知命令: $command"
|
||||
show_help
|
||||
exit 1
|
||||
;;
|
||||
esac
|
||||
}
|
||||
|
||||
# 执行主函数
|
||||
main "$@"
|
||||
227
scripts/utilities/nomad-cluster-manager.sh
Executable file
227
scripts/utilities/nomad-cluster-manager.sh
Executable file
@@ -0,0 +1,227 @@
|
||||
#!/bin/bash
|
||||
|
||||
# 🚀 Nomad 集群管理脚本
|
||||
# Nomad Cluster Management Script
|
||||
|
||||
set -e
|
||||
|
||||
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
||||
PROJECT_ROOT="$(cd "$SCRIPT_DIR/../.." && pwd)"
|
||||
|
||||
# 颜色定义
|
||||
RED='\033[0;31m'
|
||||
GREEN='\033[0;32m'
|
||||
YELLOW='\033[1;33m'
|
||||
BLUE='\033[0;34m'
|
||||
PURPLE='\033[0;35m'
|
||||
CYAN='\033[0;36m'
|
||||
NC='\033[0m' # No Color
|
||||
|
||||
# 日志函数
|
||||
log_info() {
|
||||
echo -e "${BLUE}[INFO]${NC} $1"
|
||||
}
|
||||
|
||||
log_success() {
|
||||
echo -e "${GREEN}[SUCCESS]${NC} $1"
|
||||
}
|
||||
|
||||
log_warning() {
|
||||
echo -e "${YELLOW}[WARNING]${NC} $1"
|
||||
}
|
||||
|
||||
log_error() {
|
||||
echo -e "${RED}[ERROR]${NC} $1"
|
||||
}
|
||||
|
||||
log_header() {
|
||||
echo -e "${PURPLE}=== $1 ===${NC}"
|
||||
}
|
||||
|
||||
# 显示集群状态
|
||||
show_cluster_status() {
|
||||
log_header "Nomad 集群状态概览"
|
||||
|
||||
# 检查 Leader
|
||||
echo -e "${CYAN}Leader 状态:${NC}"
|
||||
LEADER=$(curl -s http://localhost:4646/v1/status/leader 2>/dev/null || echo "无法连接")
|
||||
if [[ "$LEADER" =~ ^\".*\"$ ]]; then
|
||||
echo " ✅ Leader: $(echo $LEADER | tr -d '\"')"
|
||||
else
|
||||
echo " ❌ 无 Leader 或连接失败"
|
||||
return 1
|
||||
fi
|
||||
|
||||
echo ""
|
||||
|
||||
# 节点状态
|
||||
echo -e "${CYAN}节点状态:${NC}"
|
||||
curl -s http://localhost:4646/v1/nodes 2>/dev/null | jq -r '.[] | " \(.Status == "ready" and "✅" or "❌") \(.Name) (\(.Address)) - \(.Status)"' 2>/dev/null || {
|
||||
log_warning "无法获取节点状态详情"
|
||||
nomad node status 2>/dev/null || echo " ❌ 命令执行失败"
|
||||
}
|
||||
|
||||
echo ""
|
||||
|
||||
# 驱动状态
|
||||
echo -e "${CYAN}驱动程序状态:${NC}"
|
||||
curl -s http://localhost:4646/v1/nodes 2>/dev/null | jq -r '
|
||||
.[] |
|
||||
" 节点: \(.Name)" as $node |
|
||||
.Drivers |
|
||||
to_entries[] |
|
||||
" \(.value.Healthy and "✅" or "❌") \(.key): \(.value.HealthDescription // "未知")"
|
||||
' 2>/dev/null || {
|
||||
log_warning "无法获取驱动状态详情"
|
||||
}
|
||||
}
|
||||
|
||||
# 显示作业状态
|
||||
show_jobs_status() {
|
||||
log_header "作业状态"
|
||||
|
||||
JOBS=$(curl -s http://localhost:4646/v1/jobs 2>/dev/null)
|
||||
if [[ "$?" -eq 0 ]] && [[ "$JOBS" != "[]" ]] && [[ "$JOBS" != "null" ]]; then
|
||||
echo "$JOBS" | jq -r '.[] | " \(.Status == "running" and "✅" or "❌") \(.Name) - \(.Status)"' 2>/dev/null
|
||||
else
|
||||
echo " 📝 当前没有运行的作业"
|
||||
fi
|
||||
}
|
||||
|
||||
# 显示访问信息
|
||||
show_access_info() {
|
||||
log_header "访问信息"
|
||||
|
||||
echo -e "${CYAN}Web UI:${NC}"
|
||||
echo " 🌐 http://100.116.158.95:4646"
|
||||
echo ""
|
||||
|
||||
echo -e "${CYAN}API 端点:${NC}"
|
||||
echo " 🔗 http://100.116.158.95:4646/v1/"
|
||||
echo ""
|
||||
|
||||
echo -e "${CYAN}常用命令:${NC}"
|
||||
echo " 📊 nomad status # 查看集群概览"
|
||||
echo " 🖥️ nomad node status # 查看节点状态"
|
||||
echo " 🔧 nomad server members # 查看服务器成员"
|
||||
echo " 📋 nomad job status <job-name> # 查看作业状态"
|
||||
echo " 🚀 nomad job run <job-file> # 运行作业"
|
||||
echo " 📜 journalctl -u nomad -f # 查看日志"
|
||||
}
|
||||
|
||||
# 运行诊断
|
||||
run_diagnosis() {
|
||||
log_header "运行完整诊断"
|
||||
|
||||
if [[ -f "$PROJECT_ROOT/scripts/utilities/nomad-diagnosis.sh" ]]; then
|
||||
bash "$PROJECT_ROOT/scripts/utilities/nomad-diagnosis.sh"
|
||||
else
|
||||
log_error "诊断脚本未找到"
|
||||
return 1
|
||||
fi
|
||||
}
|
||||
|
||||
# 配置 Podman 驱动
|
||||
configure_podman() {
|
||||
log_header "配置所有节点使用 Podman 驱动"
|
||||
|
||||
local playbook="$PROJECT_ROOT/configuration/playbooks/configure-nomad-podman-cluster.yml"
|
||||
local inventory="$PROJECT_ROOT/configuration/inventories/production/nomad-cluster.ini"
|
||||
|
||||
if [[ ! -f "$playbook" ]]; then
|
||||
log_error "Playbook 文件不存在: $playbook"
|
||||
return 1
|
||||
fi
|
||||
|
||||
if [[ ! -f "$inventory" ]]; then
|
||||
log_error "Inventory 文件不存在: $inventory"
|
||||
return 1
|
||||
fi
|
||||
|
||||
cd "$PROJECT_ROOT/configuration"
|
||||
python3 -m ansible playbook -i "$inventory" "$playbook" -v
|
||||
}
|
||||
|
||||
# 重启集群
|
||||
restart_cluster() {
|
||||
log_header "重启 Nomad 集群"
|
||||
|
||||
log_warning "这将重启整个 Nomad 集群"
|
||||
read -p "确认继续? (y/N): " -n 1 -r
|
||||
echo ""
|
||||
|
||||
if [[ $REPLY =~ ^[Yy]$ ]]; then
|
||||
local inventory="$PROJECT_ROOT/configuration/inventories/production/nomad-cluster.ini"
|
||||
cd "$PROJECT_ROOT/configuration"
|
||||
python3 -m ansible adhoc -i "$inventory" nomad_cluster -m systemd -a "name=nomad state=restarted" --become
|
||||
|
||||
log_info "等待集群启动..."
|
||||
sleep 15
|
||||
show_cluster_status
|
||||
else
|
||||
log_info "操作已取消"
|
||||
fi
|
||||
}
|
||||
|
||||
# 主菜单
|
||||
show_menu() {
|
||||
echo ""
|
||||
log_header "Nomad 集群管理菜单"
|
||||
echo ""
|
||||
echo "1) 📊 显示集群状态"
|
||||
echo "2) 📋 显示作业状态"
|
||||
echo "3) 🔍 运行完整诊断"
|
||||
echo "4) 🐳 配置 Podman 驱动"
|
||||
echo "5) 🔄 重启集群"
|
||||
echo "6) ℹ️ 显示访问信息"
|
||||
echo "0) ❌ 退出"
|
||||
echo ""
|
||||
}
|
||||
|
||||
# 主函数
|
||||
main() {
|
||||
echo ""
|
||||
echo "🚀 Nomad 集群管理工具"
|
||||
echo "==================="
|
||||
|
||||
while true; do
|
||||
show_menu
|
||||
read -p "请选择操作 (0-6): " choice
|
||||
|
||||
case $choice in
|
||||
1)
|
||||
show_cluster_status
|
||||
;;
|
||||
2)
|
||||
show_jobs_status
|
||||
;;
|
||||
3)
|
||||
run_diagnosis
|
||||
;;
|
||||
4)
|
||||
configure_podman
|
||||
;;
|
||||
5)
|
||||
restart_cluster
|
||||
;;
|
||||
6)
|
||||
show_access_info
|
||||
;;
|
||||
0)
|
||||
log_info "再见!"
|
||||
exit 0
|
||||
;;
|
||||
*)
|
||||
log_error "无效选择,请重试"
|
||||
;;
|
||||
esac
|
||||
|
||||
echo ""
|
||||
read -p "按回车键继续..." -r
|
||||
done
|
||||
}
|
||||
|
||||
# 如果直接运行脚本
|
||||
if [[ "${BASH_SOURCE[0]}" == "${0}" ]]; then
|
||||
main "$@"
|
||||
fi
|
||||
@@ -1,304 +0,0 @@
|
||||
#!/bin/bash
|
||||
|
||||
# 代理开关脚本
|
||||
# 用于一键开启/关闭 istoreos.tailnet-68f9.ts.net:1082 代理
|
||||
|
||||
set -euo pipefail
|
||||
|
||||
# 颜色定义
|
||||
RED='\033[0;31m'
|
||||
GREEN='\033[0;32m'
|
||||
YELLOW='\033[1;33m'
|
||||
BLUE='\033[0;34m'
|
||||
NC='\033[0m'
|
||||
|
||||
# 代理配置
|
||||
PROXY_HOST="istoreos.tailnet-68f9.ts.net"
|
||||
PROXY_PORT="1082"
|
||||
PROXY_URL="http://${PROXY_HOST}:${PROXY_PORT}"
|
||||
|
||||
# 配置文件路径
|
||||
PROXY_ENV_FILE="/root/mgmt/configuration/proxy.env"
|
||||
SHELL_RC_FILE="$HOME/.zshrc"
|
||||
BASH_RC_FILE="$HOME/.bashrc"
|
||||
|
||||
# 日志函数
|
||||
log_info() {
|
||||
echo -e "${BLUE}[INFO]${NC} $1"
|
||||
}
|
||||
|
||||
log_success() {
|
||||
echo -e "${GREEN}[SUCCESS]${NC} $1"
|
||||
}
|
||||
|
||||
log_warning() {
|
||||
echo -e "${YELLOW}[WARNING]${NC} $1"
|
||||
}
|
||||
|
||||
log_error() {
|
||||
echo -e "${RED}[ERROR]${NC} $1"
|
||||
}
|
||||
|
||||
# 检查代理状态
|
||||
check_proxy_status() {
|
||||
if [[ -n "${http_proxy:-}" ]] || [[ -n "${HTTP_PROXY:-}" ]]; then
|
||||
echo "on"
|
||||
else
|
||||
echo "off"
|
||||
fi
|
||||
}
|
||||
|
||||
# 测试代理连接
|
||||
test_proxy() {
|
||||
log_info "测试代理连接..."
|
||||
if curl -s --connect-timeout 5 --proxy "$PROXY_URL" https://httpbin.org/ip >/dev/null 2>&1; then
|
||||
log_success "代理连接正常"
|
||||
return 0
|
||||
else
|
||||
log_error "代理连接失败"
|
||||
return 1
|
||||
fi
|
||||
}
|
||||
|
||||
# 开启代理
|
||||
enable_proxy() {
|
||||
log_info "开启代理..."
|
||||
|
||||
# 设置环境变量
|
||||
export http_proxy="$PROXY_URL"
|
||||
export https_proxy="$PROXY_URL"
|
||||
export HTTP_PROXY="$PROXY_URL"
|
||||
export HTTPS_PROXY="$PROXY_URL"
|
||||
export no_proxy="localhost,127.0.0.1,::1,.local,.tailnet-68f9.ts.net"
|
||||
export NO_PROXY="localhost,127.0.0.1,::1,.local,.tailnet-68f9.ts.net"
|
||||
export ALL_PROXY="$PROXY_URL"
|
||||
export all_proxy="$PROXY_URL"
|
||||
|
||||
# 测试连接
|
||||
if test_proxy; then
|
||||
log_success "代理已开启: $PROXY_URL"
|
||||
|
||||
# 显示当前IP
|
||||
local current_ip=$(curl -s --connect-timeout 5 --proxy "$PROXY_URL" https://httpbin.org/ip | jq -r .origin 2>/dev/null || echo "未知")
|
||||
log_info "当前IP: $current_ip"
|
||||
|
||||
return 0
|
||||
else
|
||||
log_error "代理开启失败"
|
||||
return 1
|
||||
fi
|
||||
}
|
||||
|
||||
# 关闭代理
|
||||
disable_proxy() {
|
||||
log_info "关闭代理..."
|
||||
|
||||
# 清除环境变量
|
||||
unset http_proxy https_proxy HTTP_PROXY HTTPS_PROXY
|
||||
unset no_proxy NO_PROXY ALL_PROXY all_proxy
|
||||
|
||||
log_success "代理已关闭"
|
||||
|
||||
# 显示当前IP
|
||||
local current_ip=$(curl -s --connect-timeout 5 https://httpbin.org/ip | jq -r .origin 2>/dev/null || echo "未知")
|
||||
log_info "当前IP: $current_ip"
|
||||
}
|
||||
|
||||
# 切换代理状态
|
||||
toggle_proxy() {
|
||||
local current_status=$(check_proxy_status)
|
||||
|
||||
if [[ "$current_status" == "on" ]]; then
|
||||
disable_proxy
|
||||
else
|
||||
enable_proxy
|
||||
fi
|
||||
}
|
||||
|
||||
# 永久开启代理(写入配置文件)
|
||||
enable_proxy_permanent() {
|
||||
log_info "永久开启代理..."
|
||||
|
||||
# 创建代理环境文件
|
||||
cat > "$PROXY_ENV_FILE" << EOF
|
||||
# Proxy Configuration for ${PROXY_HOST}:${PROXY_PORT}
|
||||
# This file contains proxy environment variables for the management system
|
||||
|
||||
# HTTP/HTTPS Proxy Settings
|
||||
export http_proxy=${PROXY_URL}
|
||||
export https_proxy=${PROXY_URL}
|
||||
export HTTP_PROXY=${PROXY_URL}
|
||||
export HTTPS_PROXY=${PROXY_URL}
|
||||
|
||||
# No Proxy Settings (local networks and services)
|
||||
export no_proxy=localhost,127.0.0.1,::1,.local,.tailnet-68f9.ts.net
|
||||
export NO_PROXY=localhost,127.0.0.1,::1,.local,.tailnet-68f9.ts.net
|
||||
|
||||
# Additional proxy settings for various tools
|
||||
export ALL_PROXY=${PROXY_URL}
|
||||
export all_proxy=${PROXY_URL}
|
||||
|
||||
# Docker proxy settings
|
||||
export DOCKER_BUILDKIT=1
|
||||
export BUILDKIT_PROGRESS=plain
|
||||
|
||||
# Git proxy settings
|
||||
export GIT_HTTP_PROXY=${PROXY_URL}
|
||||
export GIT_HTTPS_PROXY=${PROXY_URL}
|
||||
|
||||
# Curl proxy settings
|
||||
export CURL_PROXY=${PROXY_URL}
|
||||
|
||||
# Wget proxy settings
|
||||
export WGET_PROXY=${PROXY_URL}
|
||||
EOF
|
||||
|
||||
# 在 shell 配置文件中加载代理配置
|
||||
local shell_files=("$SHELL_RC_FILE" "$BASH_RC_FILE")
|
||||
|
||||
for shell_file in "${shell_files[@]}"; do
|
||||
if [[ -f "$shell_file" ]]; then
|
||||
# 检查是否已经加载了代理配置
|
||||
if ! grep -q "source.*proxy.env" "$shell_file"; then
|
||||
log_info "在 $shell_file 中添加代理配置加载..."
|
||||
echo "" >> "$shell_file"
|
||||
echo "# Load proxy configuration if exists" >> "$shell_file"
|
||||
echo "if [[ -f $PROXY_ENV_FILE ]]; then" >> "$shell_file"
|
||||
echo " source $PROXY_ENV_FILE" >> "$shell_file"
|
||||
echo "fi" >> "$shell_file"
|
||||
fi
|
||||
fi
|
||||
done
|
||||
|
||||
# 立即加载配置
|
||||
if [[ -f "$PROXY_ENV_FILE" ]]; then
|
||||
source "$PROXY_ENV_FILE"
|
||||
fi
|
||||
|
||||
if test_proxy; then
|
||||
log_success "代理已永久开启"
|
||||
log_info "配置已保存到: $PROXY_ENV_FILE"
|
||||
log_info "请重新登录或运行: source ~/.zshrc"
|
||||
else
|
||||
log_error "代理永久开启失败"
|
||||
return 1
|
||||
fi
|
||||
}
|
||||
|
||||
# 永久关闭代理(从配置文件移除)
|
||||
disable_proxy_permanent() {
|
||||
log_info "永久关闭代理..."
|
||||
|
||||
# 备份现有配置
|
||||
if [[ -f "$PROXY_ENV_FILE" ]]; then
|
||||
cp "$PROXY_ENV_FILE" "${PROXY_ENV_FILE}.backup.$(date +%Y%m%d_%H%M%S)"
|
||||
rm -f "$PROXY_ENV_FILE"
|
||||
fi
|
||||
|
||||
# 从 shell 配置文件中移除代理配置加载
|
||||
local shell_files=("$SHELL_RC_FILE" "$BASH_RC_FILE")
|
||||
|
||||
for shell_file in "${shell_files[@]}"; do
|
||||
if [[ -f "$shell_file" ]]; then
|
||||
# 移除代理配置加载行
|
||||
if grep -q "source.*proxy.env" "$shell_file"; then
|
||||
log_info "从 $shell_file 中移除代理配置加载..."
|
||||
sed -i '/# Load proxy configuration if exists/,/^fi$/d' "$shell_file"
|
||||
fi
|
||||
fi
|
||||
done
|
||||
|
||||
# 立即清除环境变量
|
||||
disable_proxy
|
||||
|
||||
log_success "代理已永久关闭"
|
||||
log_info "请重新登录或运行: source ~/.zshrc"
|
||||
}
|
||||
|
||||
# 显示代理状态
|
||||
show_status() {
|
||||
local current_status=$(check_proxy_status)
|
||||
|
||||
echo ""
|
||||
log_info "=== 代理状态 ==="
|
||||
|
||||
if [[ "$current_status" == "on" ]]; then
|
||||
log_success "代理状态: 开启"
|
||||
log_info "代理地址: $PROXY_URL"
|
||||
|
||||
# 显示当前IP
|
||||
local current_ip=$(curl -s --connect-timeout 5 --proxy "$PROXY_URL" https://httpbin.org/ip | jq -r .origin 2>/dev/null || echo "未知")
|
||||
log_info "当前IP: $current_ip"
|
||||
else
|
||||
log_warning "代理状态: 关闭"
|
||||
|
||||
# 显示当前IP
|
||||
local current_ip=$(curl -s --connect-timeout 5 https://httpbin.org/ip | jq -r .origin 2>/dev/null || echo "未知")
|
||||
log_info "当前IP: $current_ip"
|
||||
fi
|
||||
|
||||
# 检查配置文件状态
|
||||
if [[ -f "$PROXY_ENV_FILE" ]]; then
|
||||
log_info "配置文件: 存在 ($PROXY_ENV_FILE)"
|
||||
else
|
||||
log_info "配置文件: 不存在"
|
||||
fi
|
||||
|
||||
echo ""
|
||||
}
|
||||
|
||||
# 显示帮助信息
|
||||
show_help() {
|
||||
echo "代理开关脚本 - 管理 istoreos.tailnet-68f9.ts.net:1082 代理"
|
||||
echo ""
|
||||
echo "用法: $0 [命令]"
|
||||
echo ""
|
||||
echo "命令:"
|
||||
echo " on - 临时开启代理(仅当前会话)"
|
||||
echo " off - 临时关闭代理(仅当前会话)"
|
||||
echo " toggle - 切换代理状态"
|
||||
echo " enable - 永久开启代理(写入配置文件)"
|
||||
echo " disable - 永久关闭代理(从配置文件移除)"
|
||||
echo " status - 显示代理状态"
|
||||
echo " test - 测试代理连接"
|
||||
echo " help - 显示此帮助信息"
|
||||
echo ""
|
||||
echo "示例:"
|
||||
echo " $0 on # 临时开启代理"
|
||||
echo " $0 enable # 永久开启代理"
|
||||
echo " $0 status # 查看代理状态"
|
||||
echo " $0 toggle # 切换代理状态"
|
||||
echo ""
|
||||
}
|
||||
|
||||
# 主函数
|
||||
main() {
|
||||
case "${1:-help}" in
|
||||
"on")
|
||||
enable_proxy
|
||||
;;
|
||||
"off")
|
||||
disable_proxy
|
||||
;;
|
||||
"toggle")
|
||||
toggle_proxy
|
||||
;;
|
||||
"enable")
|
||||
enable_proxy_permanent
|
||||
;;
|
||||
"disable")
|
||||
disable_proxy_permanent
|
||||
;;
|
||||
"status")
|
||||
show_status
|
||||
;;
|
||||
"test")
|
||||
test_proxy
|
||||
;;
|
||||
"help"|*)
|
||||
show_help
|
||||
;;
|
||||
esac
|
||||
}
|
||||
|
||||
main "$@"
|
||||
@@ -1,114 +0,0 @@
|
||||
#!/bin/bash
|
||||
# 快速启动脚本
|
||||
|
||||
set -e
|
||||
|
||||
echo "🚀 欢迎使用基础设施管理平台!"
|
||||
echo ""
|
||||
|
||||
# 检查必要工具
|
||||
check_tool() {
|
||||
if ! command -v "$1" &> /dev/null; then
|
||||
echo "❌ $1 未安装,请先运行 'make setup'"
|
||||
return 1
|
||||
fi
|
||||
}
|
||||
|
||||
echo "🔍 检查必要工具..."
|
||||
check_tool "tofu" || exit 1
|
||||
check_tool "ansible" || exit 1
|
||||
check_tool "docker" || exit 1
|
||||
|
||||
echo "✅ 工具检查通过"
|
||||
echo ""
|
||||
|
||||
# 检查配置文件
|
||||
CONFIG_FILE="infrastructure/environments/dev/terraform.tfvars"
|
||||
if [ ! -f "$CONFIG_FILE" ]; then
|
||||
echo "⚠️ 配置文件不存在,正在创建..."
|
||||
cp "${CONFIG_FILE}.example" "$CONFIG_FILE"
|
||||
echo "📝 请编辑配置文件: $CONFIG_FILE"
|
||||
echo " 填入你的云服务商凭据后再次运行此脚本"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
echo "✅ 配置文件存在"
|
||||
echo ""
|
||||
|
||||
# 选择操作
|
||||
echo "请选择要执行的操作:"
|
||||
echo "1) 初始化基础设施"
|
||||
echo "2) 查看执行计划"
|
||||
echo "3) 应用基础设施变更"
|
||||
echo "4) 部署应用"
|
||||
echo "5) 启动开发环境"
|
||||
echo "6) 查看监控"
|
||||
echo "7) 完整部署流程"
|
||||
echo ""
|
||||
|
||||
read -p "请输入选项 (1-7): " choice
|
||||
|
||||
case $choice in
|
||||
1)
|
||||
echo "🏗️ 初始化基础设施..."
|
||||
make init
|
||||
;;
|
||||
2)
|
||||
echo "📋 查看执行计划..."
|
||||
make plan
|
||||
;;
|
||||
3)
|
||||
echo "🚀 应用基础设施变更..."
|
||||
make apply
|
||||
;;
|
||||
4)
|
||||
echo "📦 部署应用..."
|
||||
make ansible-deploy
|
||||
;;
|
||||
5)
|
||||
echo "🐳 启动开发环境..."
|
||||
make docker-up
|
||||
;;
|
||||
6)
|
||||
echo "📊 启动监控..."
|
||||
make monitor
|
||||
;;
|
||||
7)
|
||||
echo "🎯 执行完整部署流程..."
|
||||
echo ""
|
||||
echo "步骤 1/4: 初始化基础设施..."
|
||||
make init
|
||||
echo ""
|
||||
echo "步骤 2/4: 查看执行计划..."
|
||||
make plan
|
||||
echo ""
|
||||
read -p "是否继续应用基础设施变更? (y/N): " -n 1 -r
|
||||
echo
|
||||
if [[ $REPLY =~ ^[Yy]$ ]]; then
|
||||
echo "步骤 3/4: 应用基础设施变更..."
|
||||
make apply
|
||||
echo ""
|
||||
echo "步骤 4/4: 部署应用..."
|
||||
make ansible-deploy
|
||||
echo ""
|
||||
echo "🎉 完整部署流程完成!"
|
||||
else
|
||||
echo "ℹ️ 部署流程已取消"
|
||||
fi
|
||||
;;
|
||||
*)
|
||||
echo "❌ 无效选项"
|
||||
exit 1
|
||||
;;
|
||||
esac
|
||||
|
||||
echo ""
|
||||
echo "🎉 操作完成!"
|
||||
echo ""
|
||||
echo "📋 有用的命令:"
|
||||
echo " make help - 查看所有可用命令"
|
||||
echo " make plan - 查看基础设施变更计划"
|
||||
echo " make apply - 应用基础设施变更"
|
||||
echo " make ansible-deploy - 部署应用"
|
||||
echo " make monitor - 启动监控"
|
||||
echo " make clean - 清理临时文件"
|
||||
@@ -1,104 +0,0 @@
|
||||
#!/bin/bash
|
||||
|
||||
echo "=== 简单的 Nomad 集群修复脚本 ==="
|
||||
|
||||
# 定义 Tailscale IP 地址
|
||||
SEMAPHORE_IP="100.116.158.95"
|
||||
MASTER_IP="100.117.106.136"
|
||||
ASH3C_IP="100.116.80.94"
|
||||
ENCRYPT_KEY="NVOMDvXblgWfhtzFzOUIHnKEOrbXOkPrkIPbRGGf1YQ="
|
||||
|
||||
# 创建配置文件函数
|
||||
create_config() {
|
||||
local node_name=$1
|
||||
local bind_ip=$2
|
||||
|
||||
cat > /tmp/nomad-${node_name}.hcl << EOF
|
||||
datacenter = "dc1"
|
||||
region = "global"
|
||||
data_dir = "/opt/nomad/data"
|
||||
|
||||
bind_addr = "${bind_ip}"
|
||||
|
||||
server {
|
||||
enabled = true
|
||||
bootstrap_expect = 3
|
||||
encrypt = "${ENCRYPT_KEY}"
|
||||
|
||||
server_join {
|
||||
retry_join = ["${SEMAPHORE_IP}", "${MASTER_IP}", "${ASH3C_IP}"]
|
||||
}
|
||||
}
|
||||
|
||||
client {
|
||||
enabled = true
|
||||
}
|
||||
|
||||
ui_config {
|
||||
enabled = true
|
||||
}
|
||||
|
||||
addresses {
|
||||
http = "0.0.0.0"
|
||||
rpc = "${bind_ip}"
|
||||
serf = "${bind_ip}"
|
||||
}
|
||||
|
||||
ports {
|
||||
http = 4646
|
||||
rpc = 4647
|
||||
serf = 4648
|
||||
}
|
||||
|
||||
plugin "docker" {
|
||||
config {
|
||||
allow_privileged = true
|
||||
volumes {
|
||||
enabled = true
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
log_level = "INFO"
|
||||
log_file = "/var/log/nomad/nomad.log"
|
||||
EOF
|
||||
}
|
||||
|
||||
echo "1. 停止所有 Nomad 服务..."
|
||||
systemctl stop nomad
|
||||
ssh -p 60022 -i ~/.ssh/id_ed25519 ben@${MASTER_IP} "echo '3131' | sudo -S systemctl stop nomad"
|
||||
ssh -p 22 -i ~/.ssh/id_ed25519 ben@${ASH3C_IP} "echo '3131' | sudo -S systemctl stop nomad"
|
||||
|
||||
echo "2. 清理数据目录..."
|
||||
rm -rf /opt/nomad/data/*
|
||||
ssh -p 60022 -i ~/.ssh/id_ed25519 ben@${MASTER_IP} "echo '3131' | sudo -S rm -rf /opt/nomad/data/*"
|
||||
ssh -p 22 -i ~/.ssh/id_ed25519 ben@${ASH3C_IP} "echo '3131' | sudo -S rm -rf /opt/nomad/data/*"
|
||||
|
||||
echo "3. 创建新配置文件..."
|
||||
create_config "semaphore" "${SEMAPHORE_IP}"
|
||||
create_config "master" "${MASTER_IP}"
|
||||
create_config "ash3c" "${ASH3C_IP}"
|
||||
|
||||
echo "4. 部署配置文件..."
|
||||
cp /tmp/nomad-semaphore.hcl /etc/nomad.d/nomad.hcl
|
||||
chown nomad:nomad /etc/nomad.d/nomad.hcl
|
||||
|
||||
scp -P 60022 -i ~/.ssh/id_ed25519 /tmp/nomad-master.hcl ben@${MASTER_IP}:/tmp/
|
||||
ssh -p 60022 -i ~/.ssh/id_ed25519 ben@${MASTER_IP} "echo '3131' | sudo -S cp /tmp/nomad-master.hcl /etc/nomad.d/nomad.hcl && echo '3131' | sudo -S chown nomad:nomad /etc/nomad.d/nomad.hcl"
|
||||
|
||||
scp -P 22 -i ~/.ssh/id_ed25519 /tmp/nomad-ash3c.hcl ben@${ASH3C_IP}:/tmp/
|
||||
ssh -p 22 -i ~/.ssh/id_ed25519 ben@${ASH3C_IP} "echo '3131' | sudo -S cp /tmp/nomad-ash3c.hcl /etc/nomad.d/nomad.hcl && echo '3131' | sudo -S chown nomad:nomad /etc/nomad.d/nomad.hcl"
|
||||
|
||||
echo "5. 启动服务..."
|
||||
systemctl start nomad
|
||||
ssh -p 60022 -i ~/.ssh/id_ed25519 ben@${MASTER_IP} "echo '3131' | sudo -S systemctl start nomad"
|
||||
ssh -p 22 -i ~/.ssh/id_ed25519 ben@${ASH3C_IP} "echo '3131' | sudo -S systemctl start nomad"
|
||||
|
||||
echo "6. 等待集群形成..."
|
||||
sleep 30
|
||||
|
||||
echo "7. 检查集群状态..."
|
||||
nomad server members
|
||||
nomad node status
|
||||
|
||||
echo "=== 修复完成 ==="
|
||||
@@ -1,311 +0,0 @@
|
||||
#!/bin/bash
|
||||
|
||||
# Terraform Consul Provider 配置脚本
|
||||
# 用于配置 Terraform 从 Consul 读取敏感配置
|
||||
|
||||
set -euo pipefail
|
||||
|
||||
ENVIRONMENT="${ENVIRONMENT:-dev}"
|
||||
CONSUL_ADDR="${CONSUL_ADDR:-http://localhost:8500}"
|
||||
|
||||
# 颜色输出
|
||||
GREEN='\033[0;32m'
|
||||
BLUE='\033[0;34m'
|
||||
NC='\033[0m'
|
||||
|
||||
log_info() {
|
||||
echo -e "${BLUE}[INFO]${NC} $1"
|
||||
}
|
||||
|
||||
log_success() {
|
||||
echo -e "${GREEN}[SUCCESS]${NC} $1"
|
||||
}
|
||||
|
||||
# 创建 Terraform Consul Provider 配置
|
||||
create_consul_provider() {
|
||||
local tf_dir="infrastructure/environments/${ENVIRONMENT}"
|
||||
|
||||
log_info "创建 Terraform Consul Provider 配置..."
|
||||
|
||||
cat > "${tf_dir}/consul-provider.tf" << 'EOF'
|
||||
# Consul Provider 配置
|
||||
terraform {
|
||||
required_providers {
|
||||
consul = {
|
||||
source = "hashicorp/consul"
|
||||
version = "~> 2.18"
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
provider "consul" {
|
||||
address = var.consul_config.address
|
||||
token = lookup(var.consul_config, "token", null)
|
||||
}
|
||||
|
||||
# 从 Consul 读取 Oracle Cloud 配置
|
||||
data "consul_keys" "oracle_config" {
|
||||
key {
|
||||
name = "tenancy_ocid"
|
||||
path = "config/${var.environment}/oracle/tenancy_ocid"
|
||||
}
|
||||
|
||||
key {
|
||||
name = "user_ocid"
|
||||
path = "config/${var.environment}/oracle/user_ocid"
|
||||
}
|
||||
|
||||
key {
|
||||
name = "fingerprint"
|
||||
path = "config/${var.environment}/oracle/fingerprint"
|
||||
}
|
||||
|
||||
key {
|
||||
name = "private_key"
|
||||
path = "config/${var.environment}/oracle/private_key"
|
||||
}
|
||||
|
||||
key {
|
||||
name = "compartment_ocid"
|
||||
path = "config/${var.environment}/oracle/compartment_ocid"
|
||||
}
|
||||
}
|
||||
|
||||
# 创建临时私钥文件
|
||||
resource "local_file" "oci_private_key" {
|
||||
content = data.consul_keys.oracle_config.var.private_key
|
||||
filename = "/tmp/oci_private_key_${var.environment}.pem"
|
||||
file_permission = "0600"
|
||||
|
||||
lifecycle {
|
||||
ignore_changes = [content]
|
||||
}
|
||||
}
|
||||
|
||||
# 本地变量,用于构建完整的 OCI 配置
|
||||
locals {
|
||||
oci_config_from_consul = {
|
||||
tenancy_ocid = data.consul_keys.oracle_config.var.tenancy_ocid
|
||||
user_ocid = data.consul_keys.oracle_config.var.user_ocid
|
||||
fingerprint = data.consul_keys.oracle_config.var.fingerprint
|
||||
private_key_path = local_file.oci_private_key.filename
|
||||
region = var.oci_config.region
|
||||
compartment_ocid = data.consul_keys.oracle_config.var.compartment_ocid
|
||||
}
|
||||
}
|
||||
EOF
|
||||
|
||||
log_success "Consul Provider 配置已创建: ${tf_dir}/consul-provider.tf"
|
||||
}
|
||||
|
||||
# 创建变量定义文件
|
||||
create_variables() {
|
||||
local tf_dir="infrastructure/environments/${ENVIRONMENT}"
|
||||
|
||||
log_info "更新 Terraform 变量定义..."
|
||||
|
||||
cat > "${tf_dir}/variables.tf" << 'EOF'
|
||||
# 基本变量
|
||||
variable "environment" {
|
||||
description = "环境名称"
|
||||
type = string
|
||||
}
|
||||
|
||||
variable "project_name" {
|
||||
description = "项目名称"
|
||||
type = string
|
||||
}
|
||||
|
||||
variable "owner" {
|
||||
description = "项目所有者"
|
||||
type = string
|
||||
}
|
||||
|
||||
variable "cloud_providers" {
|
||||
description = "要启用的云服务商"
|
||||
type = list(string)
|
||||
default = []
|
||||
}
|
||||
|
||||
variable "vpc_cidr" {
|
||||
description = "VPC CIDR 块"
|
||||
type = string
|
||||
}
|
||||
|
||||
variable "availability_zones" {
|
||||
description = "可用区列表"
|
||||
type = list(string)
|
||||
}
|
||||
|
||||
variable "common_tags" {
|
||||
description = "通用标签"
|
||||
type = map(string)
|
||||
default = {}
|
||||
}
|
||||
|
||||
# Consul 配置
|
||||
variable "consul_config" {
|
||||
description = "Consul 配置"
|
||||
type = object({
|
||||
address = string
|
||||
token = optional(string)
|
||||
})
|
||||
}
|
||||
|
||||
# Oracle Cloud 配置(基本信息)
|
||||
variable "oci_config" {
|
||||
description = "Oracle Cloud 基本配置"
|
||||
type = object({
|
||||
region = string
|
||||
tenancy_ocid = optional(string, "FROM_CONSUL")
|
||||
user_ocid = optional(string, "FROM_CONSUL")
|
||||
fingerprint = optional(string, "FROM_CONSUL")
|
||||
private_key_path = optional(string, "FROM_CONSUL")
|
||||
compartment_ocid = optional(string, "FROM_CONSUL")
|
||||
})
|
||||
}
|
||||
|
||||
# 其他云服务商配置
|
||||
variable "huawei_config" {
|
||||
description = "华为云配置"
|
||||
type = object({
|
||||
access_key = string
|
||||
secret_key = string
|
||||
region = string
|
||||
project_id = string
|
||||
})
|
||||
default = {
|
||||
access_key = ""
|
||||
secret_key = ""
|
||||
region = "cn-north-4"
|
||||
project_id = ""
|
||||
}
|
||||
}
|
||||
|
||||
variable "gcp_config" {
|
||||
description = "Google Cloud 配置"
|
||||
type = object({
|
||||
project_id = string
|
||||
region = string
|
||||
zone = string
|
||||
credentials_file = string
|
||||
})
|
||||
default = {
|
||||
project_id = ""
|
||||
region = "asia-northeast3"
|
||||
zone = "asia-northeast3-a"
|
||||
credentials_file = ""
|
||||
}
|
||||
}
|
||||
|
||||
variable "aws_config" {
|
||||
description = "AWS 配置"
|
||||
type = object({
|
||||
region = string
|
||||
access_key = string
|
||||
secret_key = string
|
||||
})
|
||||
default = {
|
||||
region = "ap-northeast-2"
|
||||
access_key = ""
|
||||
secret_key = ""
|
||||
}
|
||||
}
|
||||
|
||||
variable "do_config" {
|
||||
description = "DigitalOcean 配置"
|
||||
type = object({
|
||||
token = string
|
||||
region = string
|
||||
})
|
||||
default = {
|
||||
token = ""
|
||||
region = "sgp1"
|
||||
}
|
||||
}
|
||||
EOF
|
||||
|
||||
log_success "变量定义已更新: ${tf_dir}/variables.tf"
|
||||
}
|
||||
|
||||
# 创建示例 main.tf
|
||||
create_main_tf() {
|
||||
local tf_dir="infrastructure/environments/${ENVIRONMENT}"
|
||||
|
||||
log_info "创建示例 main.tf..."
|
||||
|
||||
cat > "${tf_dir}/main.tf" << 'EOF'
|
||||
# 主要 Terraform 配置文件
|
||||
|
||||
terraform {
|
||||
required_version = ">= 1.0"
|
||||
|
||||
required_providers {
|
||||
oci = {
|
||||
source = "oracle/oci"
|
||||
version = "~> 5.0"
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
# Oracle Cloud Provider
|
||||
provider "oci" {
|
||||
tenancy_ocid = local.oci_config_from_consul.tenancy_ocid
|
||||
user_ocid = local.oci_config_from_consul.user_ocid
|
||||
fingerprint = local.oci_config_from_consul.fingerprint
|
||||
private_key_path = local.oci_config_from_consul.private_key_path
|
||||
region = local.oci_config_from_consul.region
|
||||
}
|
||||
|
||||
# 示例:创建 VCN
|
||||
resource "oci_core_vcn" "main" {
|
||||
count = contains(var.cloud_providers, "oracle") ? 1 : 0
|
||||
compartment_id = local.oci_config_from_consul.compartment_ocid
|
||||
cidr_block = var.vpc_cidr
|
||||
display_name = "${var.project_name}-${var.environment}-vcn"
|
||||
|
||||
freeform_tags = var.common_tags
|
||||
}
|
||||
|
||||
# 输出
|
||||
output "vcn_id" {
|
||||
description = "VCN ID"
|
||||
value = try(oci_core_vcn.main[0].id, null)
|
||||
}
|
||||
|
||||
output "oci_config_source" {
|
||||
description = "OCI 配置来源"
|
||||
value = "consul"
|
||||
}
|
||||
EOF
|
||||
|
||||
log_success "示例 main.tf 已创建: ${tf_dir}/main.tf"
|
||||
}
|
||||
|
||||
# 主函数
|
||||
main() {
|
||||
case "${1:-help}" in
|
||||
"setup")
|
||||
create_consul_provider
|
||||
create_variables
|
||||
create_main_tf
|
||||
;;
|
||||
"help"|*)
|
||||
cat << EOF
|
||||
Terraform Consul Provider 配置脚本
|
||||
|
||||
用法: $0 [选项]
|
||||
|
||||
选项:
|
||||
setup 创建 Terraform Consul Provider 配置
|
||||
help 显示此帮助信息
|
||||
|
||||
环境变量:
|
||||
ENVIRONMENT 环境名称 (默认: dev)
|
||||
CONSUL_ADDR Consul 地址 (默认: http://localhost:8500)
|
||||
EOF
|
||||
;;
|
||||
esac
|
||||
}
|
||||
|
||||
main "$@"
|
||||
@@ -1,128 +0,0 @@
|
||||
#!/bin/bash
|
||||
|
||||
# 简化版 OpenTofu 密钥上传脚本
|
||||
set -euo pipefail
|
||||
|
||||
# 配置
|
||||
CONSUL_ADDR="${CONSUL_ADDR:-http://master:8500}"
|
||||
ENVIRONMENT="${ENVIRONMENT:-dev}"
|
||||
TFVARS_FILE="tofu/environments/${ENVIRONMENT}/terraform.tfvars"
|
||||
|
||||
# 颜色输出
|
||||
RED='\033[0;31m'
|
||||
GREEN='\033[0;32m'
|
||||
BLUE='\033[0;34m'
|
||||
NC='\033[0m'
|
||||
|
||||
log_info() { echo -e "${BLUE}[INFO]${NC} $1"; }
|
||||
log_success() { echo -e "${GREEN}[SUCCESS]${NC} $1"; }
|
||||
log_error() { echo -e "${RED}[ERROR]${NC} $1"; }
|
||||
|
||||
# 检查 Consul 连接
|
||||
check_consul() {
|
||||
log_info "检查 Consul 连接..."
|
||||
if ! curl -s "${CONSUL_ADDR}/v1/status/leader" > /dev/null; then
|
||||
log_error "无法连接到 Consul: ${CONSUL_ADDR}"
|
||||
exit 1
|
||||
fi
|
||||
log_success "Consul 连接正常"
|
||||
}
|
||||
|
||||
# 上传配置
|
||||
upload_configs() {
|
||||
local uploaded_count=0
|
||||
|
||||
log_info "开始解析并上传配置..."
|
||||
|
||||
# 直接解析 tfvars 文件
|
||||
while IFS= read -r line; do
|
||||
# 跳过注释和空行
|
||||
if [[ "$line" =~ ^[[:space:]]*# ]] || [[ -z "${line// }" ]]; then
|
||||
continue
|
||||
fi
|
||||
|
||||
# 匹配变量赋值
|
||||
if [[ "$line" =~ ^[[:space:]]*([a-zA-Z_][a-zA-Z0-9_]*)[[:space:]]*=[[:space:]]*\"([^\"]*)\"|^[[:space:]]*([a-zA-Z_][a-zA-Z0-9_]*)[[:space:]]*=[[:space:]]*([^[:space:]#]+) ]]; then
|
||||
local var_name="${BASH_REMATCH[1]:-${BASH_REMATCH[3]}}"
|
||||
local var_value="${BASH_REMATCH[2]:-${BASH_REMATCH[4]}}"
|
||||
|
||||
# 跳过空值
|
||||
if [[ -z "$var_value" || "$var_value" == "null" ]]; then
|
||||
continue
|
||||
fi
|
||||
|
||||
# 确定配置分类和路径
|
||||
local consul_path=""
|
||||
if [[ "$var_name" =~ ^oci_ ]]; then
|
||||
consul_path="config/${ENVIRONMENT}/oracle/${var_name#oci_}"
|
||||
elif [[ "$var_name" =~ ^huawei_ ]]; then
|
||||
consul_path="config/${ENVIRONMENT}/huawei/${var_name#huawei_}"
|
||||
elif [[ "$var_name" =~ ^aws_ ]]; then
|
||||
consul_path="config/${ENVIRONMENT}/aws/${var_name#aws_}"
|
||||
elif [[ "$var_name" =~ ^do_ ]]; then
|
||||
consul_path="config/${ENVIRONMENT}/digitalocean/${var_name#do_}"
|
||||
elif [[ "$var_name" =~ ^gcp_ ]]; then
|
||||
consul_path="config/${ENVIRONMENT}/gcp/${var_name#gcp_}"
|
||||
else
|
||||
consul_path="config/${ENVIRONMENT}/general/${var_name}"
|
||||
fi
|
||||
|
||||
# 上传到 Consul
|
||||
if curl -s -X PUT "${CONSUL_ADDR}/v1/kv/${consul_path}" -d "$var_value" > /dev/null; then
|
||||
log_info "上传: ${consul_path}"
|
||||
((uploaded_count++))
|
||||
else
|
||||
log_error "上传失败: ${consul_path}"
|
||||
fi
|
||||
fi
|
||||
done < "$TFVARS_FILE"
|
||||
|
||||
log_success "总共上传了 $uploaded_count 个配置项到 Consul"
|
||||
}
|
||||
|
||||
# 列出配置
|
||||
list_configs() {
|
||||
log_info "列出 Consul 中的配置..."
|
||||
|
||||
local keys=$(curl -s "${CONSUL_ADDR}/v1/kv/config/${ENVIRONMENT}/?keys" | jq -r '.[]' 2>/dev/null || echo "")
|
||||
|
||||
if [[ -z "$keys" ]]; then
|
||||
log_error "没有找到配置"
|
||||
return
|
||||
fi
|
||||
|
||||
echo "=== 环境 ${ENVIRONMENT} 的配置 ==="
|
||||
echo "$keys" | while read -r key; do
|
||||
local value=$(curl -s "${CONSUL_ADDR}/v1/kv/${key}?raw" 2>/dev/null || echo "无法读取")
|
||||
# 隐藏敏感信息
|
||||
if [[ "$key" =~ (secret|key|token|password|ocid) ]]; then
|
||||
echo "$key: [已隐藏]"
|
||||
else
|
||||
echo "$key: $value"
|
||||
fi
|
||||
done
|
||||
}
|
||||
|
||||
# 主函数
|
||||
main() {
|
||||
if [[ ! -f "$TFVARS_FILE" ]]; then
|
||||
log_error "找不到配置文件: $TFVARS_FILE"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
check_consul
|
||||
|
||||
case "${1:-upload}" in
|
||||
"upload")
|
||||
upload_configs
|
||||
;;
|
||||
"list")
|
||||
list_configs
|
||||
;;
|
||||
*)
|
||||
echo "用法: $0 [upload|list]"
|
||||
;;
|
||||
esac
|
||||
}
|
||||
|
||||
main "$@"
|
||||
@@ -1,495 +0,0 @@
|
||||
#!/bin/bash
|
||||
|
||||
# OpenTofu 密钥上传脚本
|
||||
# 用于将 terraform.tfvars 中的敏感配置批量上传到 Consul
|
||||
|
||||
set -euo pipefail
|
||||
|
||||
# 配置
|
||||
CONSUL_ADDR="${CONSUL_ADDR:-http://master:8500}"
|
||||
CONSUL_TOKEN="${CONSUL_TOKEN:-}"
|
||||
ENVIRONMENT="${ENVIRONMENT:-dev}"
|
||||
TOFU_DIR="${TOFU_DIR:-tofu/environments/${ENVIRONMENT}}"
|
||||
TFVARS_FILE="${TFVARS_FILE:-${TOFU_DIR}/terraform.tfvars}"
|
||||
|
||||
# 颜色输出
|
||||
RED='\033[0;31m'
|
||||
GREEN='\033[0;32m'
|
||||
YELLOW='\033[1;33m'
|
||||
BLUE='\033[0;34m'
|
||||
NC='\033[0m' # No Color
|
||||
|
||||
# 日志函数
|
||||
log_info() {
|
||||
echo -e "${BLUE}[INFO]${NC} $1"
|
||||
}
|
||||
|
||||
log_success() {
|
||||
echo -e "${GREEN}[SUCCESS]${NC} $1"
|
||||
}
|
||||
|
||||
log_warning() {
|
||||
echo -e "${YELLOW}[WARNING]${NC} $1"
|
||||
}
|
||||
|
||||
log_error() {
|
||||
echo -e "${RED}[ERROR]${NC} $1"
|
||||
}
|
||||
|
||||
# 检查依赖
|
||||
check_dependencies() {
|
||||
local deps=("curl" "jq")
|
||||
for dep in "${deps[@]}"; do
|
||||
if ! command -v "$dep" &> /dev/null; then
|
||||
log_error "缺少依赖: $dep"
|
||||
exit 1
|
||||
fi
|
||||
done
|
||||
}
|
||||
|
||||
# 检查 Consul 连接
|
||||
check_consul() {
|
||||
log_info "检查 Consul 连接..."
|
||||
if ! curl -s "${CONSUL_ADDR}/v1/status/leader" > /dev/null; then
|
||||
log_error "无法连接到 Consul: ${CONSUL_ADDR}"
|
||||
exit 1
|
||||
fi
|
||||
log_success "Consul 连接正常"
|
||||
}
|
||||
|
||||
# 检查 tfvars 文件
|
||||
check_tfvars_file() {
|
||||
if [[ ! -f "$TFVARS_FILE" ]]; then
|
||||
log_error "找不到 terraform.tfvars 文件: $TFVARS_FILE"
|
||||
exit 1
|
||||
fi
|
||||
log_info "找到配置文件: $TFVARS_FILE"
|
||||
}
|
||||
|
||||
# 解析 HCL 配置并转换为 JSON
|
||||
parse_hcl_to_json() {
|
||||
local tfvars_file="$1"
|
||||
local temp_tf_file="/tmp/temp_config.tf"
|
||||
local temp_json_file="/tmp/temp_config.json"
|
||||
|
||||
# 创建临时 .tf 文件,将变量赋值转换为输出
|
||||
log_info "解析 HCL 配置..."
|
||||
|
||||
# 读取 tfvars 文件并转换为 output 格式
|
||||
cat > "$temp_tf_file" << 'EOF'
|
||||
# 临时配置文件,用于解析 tfvars
|
||||
EOF
|
||||
|
||||
# 解析每个配置块
|
||||
while IFS= read -r line; do
|
||||
# 跳过注释和空行
|
||||
if [[ "$line" =~ ^[[:space:]]*# ]] || [[ -z "${line// }" ]]; then
|
||||
continue
|
||||
fi
|
||||
|
||||
# 提取变量名和值
|
||||
if [[ "$line" =~ ^[[:space:]]*([a-zA-Z_][a-zA-Z0-9_]*)[[:space:]]*=[[:space:]]*(.+)$ ]]; then
|
||||
local var_name="${BASH_REMATCH[1]}"
|
||||
local var_value="${BASH_REMATCH[2]}"
|
||||
|
||||
echo "output \"$var_name\" {" >> "$temp_tf_file"
|
||||
echo " value = $var_value" >> "$temp_tf_file"
|
||||
echo "}" >> "$temp_tf_file"
|
||||
fi
|
||||
done < "$tfvars_file"
|
||||
|
||||
# 使用 terraform 解析配置
|
||||
if command -v terraform &> /dev/null; then
|
||||
cd "$(dirname "$temp_tf_file")"
|
||||
terraform init -backend=false > /dev/null 2>&1 || true
|
||||
terraform output -json > "$temp_json_file" 2>/dev/null || {
|
||||
log_warning "无法使用 terraform 解析,尝试手动解析..."
|
||||
manual_parse_tfvars "$tfvars_file" "$temp_json_file"
|
||||
}
|
||||
else
|
||||
log_warning "未找到 terraform,使用手动解析..."
|
||||
manual_parse_tfvars "$tfvars_file" "$temp_json_file"
|
||||
fi
|
||||
|
||||
echo "$temp_json_file"
|
||||
}
|
||||
|
||||
# 手动解析 tfvars 文件
|
||||
manual_parse_tfvars() {
|
||||
local tfvars_file="$1"
|
||||
local output_file="$2"
|
||||
|
||||
log_info "手动解析 tfvars 文件..."
|
||||
|
||||
# 创建基础 JSON 结构
|
||||
echo "{" > "$output_file"
|
||||
|
||||
local first_item=true
|
||||
local in_block=false
|
||||
local block_name=""
|
||||
local block_content=""
|
||||
|
||||
while IFS= read -r line; do
|
||||
# 跳过注释和空行
|
||||
if [[ "$line" =~ ^[[:space:]]*# ]] || [[ -z "${line// }" ]]; then
|
||||
continue
|
||||
fi
|
||||
|
||||
# 检测配置块开始
|
||||
if [[ "$line" =~ ^[[:space:]]*([a-zA-Z_][a-zA-Z0-9_]*)[[:space:]]*=[[:space:]]*\{[[:space:]]*$ ]]; then
|
||||
block_name="${BASH_REMATCH[1]}"
|
||||
in_block=true
|
||||
block_content=""
|
||||
continue
|
||||
fi
|
||||
|
||||
# 检测配置块结束
|
||||
if [[ "$in_block" == true && "$line" =~ ^[[:space:]]*\}[[:space:]]*$ ]]; then
|
||||
if [[ "$first_item" == false ]]; then
|
||||
echo "," >> "$output_file"
|
||||
fi
|
||||
echo " \"$block_name\": {" >> "$output_file"
|
||||
echo "$block_content" >> "$output_file"
|
||||
echo " }" >> "$output_file"
|
||||
first_item=false
|
||||
in_block=false
|
||||
continue
|
||||
fi
|
||||
|
||||
# 处理块内容
|
||||
if [[ "$in_block" == true ]]; then
|
||||
if [[ "$line" =~ ^[[:space:]]*([a-zA-Z_][a-zA-Z0-9_]*)[[:space:]]*=[[:space:]]*\"([^\"]*)\"|^[[:space:]]*([a-zA-Z_][a-zA-Z0-9_]*)[[:space:]]*=[[:space:]]*([^[:space:]]+) ]]; then
|
||||
local key="${BASH_REMATCH[1]:-${BASH_REMATCH[3]}}"
|
||||
local value="${BASH_REMATCH[2]:-${BASH_REMATCH[4]}}"
|
||||
|
||||
if [[ -n "$block_content" ]]; then
|
||||
block_content+=","
|
||||
fi
|
||||
block_content+="\n \"$key\": \"$value\""
|
||||
fi
|
||||
continue
|
||||
fi
|
||||
|
||||
# 处理简单变量
|
||||
if [[ "$line" =~ ^[[:space:]]*([a-zA-Z_][a-zA-Z0-9_]*)[[:space:]]*=[[:space:]]*\"([^\"]*)\"|^[[:space:]]*([a-zA-Z_][a-zA-Z0-9_]*)[[:space:]]*=[[:space:]]*([^[:space:]]+) ]]; then
|
||||
local var_name="${BASH_REMATCH[1]:-${BASH_REMATCH[3]}}"
|
||||
local var_value="${BASH_REMATCH[2]:-${BASH_REMATCH[4]}}"
|
||||
|
||||
if [[ "$first_item" == false ]]; then
|
||||
echo "," >> "$output_file"
|
||||
fi
|
||||
echo " \"$var_name\": \"$var_value\"" >> "$output_file"
|
||||
first_item=false
|
||||
fi
|
||||
done < "$tfvars_file"
|
||||
|
||||
echo "}" >> "$output_file"
|
||||
}
|
||||
|
||||
# 上传配置到 Consul
|
||||
upload_config_to_consul() {
|
||||
local config_file="$1"
|
||||
local uploaded_count=0
|
||||
|
||||
log_info "开始上传配置到 Consul..."
|
||||
|
||||
# 读取 JSON 配置
|
||||
if [[ ! -f "$config_file" ]]; then
|
||||
log_error "配置文件不存在: $config_file"
|
||||
return 1
|
||||
fi
|
||||
|
||||
# 上传 Oracle Cloud 配置
|
||||
local oci_tenancy=$(jq -r '.oci_tenancy_ocid // empty' "$config_file")
|
||||
local oci_user=$(jq -r '.oci_user_ocid // empty' "$config_file")
|
||||
local oci_fingerprint=$(jq -r '.oci_fingerprint // empty' "$config_file")
|
||||
local oci_private_key_path=$(jq -r '.oci_private_key_path // empty' "$config_file")
|
||||
local oci_compartment=$(jq -r '.oci_compartment_ocid // empty' "$config_file")
|
||||
local oci_region=$(jq -r '.oci_region // empty' "$config_file")
|
||||
|
||||
if [[ -n "$oci_tenancy" && "$oci_tenancy" != "null" && "$oci_tenancy" != "" ]]; then
|
||||
=======
|
||||
# 上传配置到 Consul
|
||||
upload_config_to_consul() {
|
||||
local config_file="$1"
|
||||
local uploaded_count=0
|
||||
|
||||
log_info "开始上传配置到 Consul..."
|
||||
|
||||
# 读取 JSON 配置
|
||||
if [[ ! -f "$config_file" ]]; then
|
||||
log_error "配置文件不存在: $config_file"
|
||||
return 1
|
||||
fi
|
||||
|
||||
# 上传 Oracle Cloud 配置
|
||||
local oci_tenancy=$(jq -r '.oci_tenancy_ocid // empty' "$config_file")
|
||||
local oci_user=$(jq -r '.oci_user_ocid // empty' "$config_file")
|
||||
local oci_fingerprint=$(jq -r '.oci_fingerprint // empty' "$config_file")
|
||||
local oci_private_key_path=$(jq -r '.oci_private_key_path // empty' "$config_file")
|
||||
local oci_compartment=$(jq -r '.oci_compartment_ocid // empty' "$config_file")
|
||||
local oci_region=$(jq -r '.oci_region // empty' "$config_file")
|
||||
|
||||
if [[ -n "$oci_tenancy" && "$oci_tenancy" != "null" && "$oci_tenancy" != "" ]]; then
|
||||
log_info "上传 Oracle Cloud 配置..."
|
||||
local base_path="config/${ENVIRONMENT}/oracle"
|
||||
|
||||
local tenancy_ocid=$(jq -r '.oci_config.tenancy_ocid // empty' "$config_file")
|
||||
local user_ocid=$(jq -r '.oci_config.user_ocid // empty' "$config_file")
|
||||
local fingerprint=$(jq -r '.oci_config.fingerprint // empty' "$config_file")
|
||||
local private_key_path=$(jq -r '.oci_config.private_key_path // empty' "$config_file")
|
||||
local compartment_ocid=$(jq -r '.oci_config.compartment_ocid // empty' "$config_file")
|
||||
local region=$(jq -r '.oci_config.region // "ap-seoul-1"' "$config_file")
|
||||
|
||||
# 上传非空配置
|
||||
[[ -n "$tenancy_ocid" && "$tenancy_ocid" != "null" ]] && {
|
||||
curl -s -X PUT "${CONSUL_ADDR}/v1/kv/${base_path}/tenancy_ocid" -d "$tenancy_ocid" > /dev/null
|
||||
((uploaded_count++))
|
||||
}
|
||||
[[ -n "$user_ocid" && "$user_ocid" != "null" ]] && {
|
||||
curl -s -X PUT "${CONSUL_ADDR}/v1/kv/${base_path}/user_ocid" -d "$user_ocid" > /dev/null
|
||||
((uploaded_count++))
|
||||
}
|
||||
[[ -n "$fingerprint" && "$fingerprint" != "null" ]] && {
|
||||
curl -s -X PUT "${CONSUL_ADDR}/v1/kv/${base_path}/fingerprint" -d "$fingerprint" > /dev/null
|
||||
((uploaded_count++))
|
||||
}
|
||||
[[ -n "$compartment_ocid" && "$compartment_ocid" != "null" ]] && {
|
||||
curl -s -X PUT "${CONSUL_ADDR}/v1/kv/${base_path}/compartment_ocid" -d "$compartment_ocid" > /dev/null
|
||||
((uploaded_count++))
|
||||
}
|
||||
[[ -n "$region" && "$region" != "null" ]] && {
|
||||
curl -s -X PUT "${CONSUL_ADDR}/v1/kv/${base_path}/region" -d "$region" > /dev/null
|
||||
((uploaded_count++))
|
||||
}
|
||||
|
||||
# 上传私钥文件内容
|
||||
if [[ -n "$private_key_path" && "$private_key_path" != "null" && -f "$private_key_path" ]]; then
|
||||
local private_key_content=$(cat "$private_key_path")
|
||||
curl -s -X PUT "${CONSUL_ADDR}/v1/kv/${base_path}/private_key" -d "$private_key_content" > /dev/null
|
||||
((uploaded_count++))
|
||||
fi
|
||||
|
||||
log_success "Oracle Cloud 配置已上传"
|
||||
fi
|
||||
|
||||
# 上传华为云配置
|
||||
if jq -e '.huawei_config' "$config_file" > /dev/null 2>&1; then
|
||||
log_info "上传华为云配置..."
|
||||
local base_path="config/${ENVIRONMENT}/huawei"
|
||||
|
||||
local access_key=$(jq -r '.huawei_config.access_key // empty' "$config_file")
|
||||
local secret_key=$(jq -r '.huawei_config.secret_key // empty' "$config_file")
|
||||
local region=$(jq -r '.huawei_config.region // "cn-north-4"' "$config_file")
|
||||
local project_id=$(jq -r '.huawei_config.project_id // empty' "$config_file")
|
||||
|
||||
[[ -n "$access_key" && "$access_key" != "null" && "$access_key" != "" ]] && {
|
||||
curl -s -X PUT "${CONSUL_ADDR}/v1/kv/${base_path}/access_key" -d "$access_key" > /dev/null
|
||||
((uploaded_count++))
|
||||
}
|
||||
[[ -n "$secret_key" && "$secret_key" != "null" && "$secret_key" != "" ]] && {
|
||||
curl -s -X PUT "${CONSUL_ADDR}/v1/kv/${base_path}/secret_key" -d "$secret_key" > /dev/null
|
||||
((uploaded_count++))
|
||||
}
|
||||
[[ -n "$region" && "$region" != "null" ]] && {
|
||||
curl -s -X PUT "${CONSUL_ADDR}/v1/kv/${base_path}/region" -d "$region" > /dev/null
|
||||
((uploaded_count++))
|
||||
}
|
||||
[[ -n "$project_id" && "$project_id" != "null" && "$project_id" != "" ]] && {
|
||||
curl -s -X PUT "${CONSUL_ADDR}/v1/kv/${base_path}/project_id" -d "$project_id" > /dev/null
|
||||
((uploaded_count++))
|
||||
}
|
||||
|
||||
log_success "华为云配置已上传"
|
||||
fi
|
||||
|
||||
# 上传 AWS 配置
|
||||
if jq -e '.aws_config' "$config_file" > /dev/null 2>&1; then
|
||||
log_info "上传 AWS 配置..."
|
||||
local base_path="config/${ENVIRONMENT}/aws"
|
||||
|
||||
local access_key=$(jq -r '.aws_config.access_key // empty' "$config_file")
|
||||
local secret_key=$(jq -r '.aws_config.secret_key // empty' "$config_file")
|
||||
local region=$(jq -r '.aws_config.region // "ap-northeast-2"' "$config_file")
|
||||
|
||||
[[ -n "$access_key" && "$access_key" != "null" && "$access_key" != "" ]] && {
|
||||
curl -s -X PUT "${CONSUL_ADDR}/v1/kv/${base_path}/access_key" -d "$access_key" > /dev/null
|
||||
((uploaded_count++))
|
||||
}
|
||||
[[ -n "$secret_key" && "$secret_key" != "null" && "$secret_key" != "" ]] && {
|
||||
curl -s -X PUT "${CONSUL_ADDR}/v1/kv/${base_path}/secret_key" -d "$secret_key" > /dev/null
|
||||
((uploaded_count++))
|
||||
}
|
||||
[[ -n "$region" && "$region" != "null" ]] && {
|
||||
curl -s -X PUT "${CONSUL_ADDR}/v1/kv/${base_path}/region" -d "$region" > /dev/null
|
||||
((uploaded_count++))
|
||||
}
|
||||
|
||||
log_success "AWS 配置已上传"
|
||||
fi
|
||||
|
||||
# 上传 DigitalOcean 配置
|
||||
if jq -e '.do_config' "$config_file" > /dev/null 2>&1; then
|
||||
log_info "上传 DigitalOcean 配置..."
|
||||
local base_path="config/${ENVIRONMENT}/digitalocean"
|
||||
|
||||
local token=$(jq -r '.do_config.token // empty' "$config_file")
|
||||
local region=$(jq -r '.do_config.region // "sgp1"' "$config_file")
|
||||
|
||||
[[ -n "$token" && "$token" != "null" && "$token" != "" ]] && {
|
||||
curl -s -X PUT "${CONSUL_ADDR}/v1/kv/${base_path}/token" -d "$token" > /dev/null
|
||||
((uploaded_count++))
|
||||
}
|
||||
[[ -n "$region" && "$region" != "null" ]] && {
|
||||
curl -s -X PUT "${CONSUL_ADDR}/v1/kv/${base_path}/region" -d "$region" > /dev/null
|
||||
((uploaded_count++))
|
||||
}
|
||||
|
||||
log_success "DigitalOcean 配置已上传"
|
||||
fi
|
||||
|
||||
# 上传 Google Cloud 配置
|
||||
if jq -e '.gcp_config' "$config_file" > /dev/null 2>&1; then
|
||||
log_info "上传 Google Cloud 配置..."
|
||||
local base_path="config/${ENVIRONMENT}/gcp"
|
||||
|
||||
local project_id=$(jq -r '.gcp_config.project_id // empty' "$config_file")
|
||||
local region=$(jq -r '.gcp_config.region // "asia-northeast3"' "$config_file")
|
||||
local zone=$(jq -r '.gcp_config.zone // "asia-northeast3-a"' "$config_file")
|
||||
local credentials_file=$(jq -r '.gcp_config.credentials_file // empty' "$config_file")
|
||||
|
||||
[[ -n "$project_id" && "$project_id" != "null" && "$project_id" != "" ]] && {
|
||||
curl -s -X PUT "${CONSUL_ADDR}/v1/kv/${base_path}/project_id" -d "$project_id" > /dev/null
|
||||
((uploaded_count++))
|
||||
}
|
||||
[[ -n "$region" && "$region" != "null" ]] && {
|
||||
curl -s -X PUT "${CONSUL_ADDR}/v1/kv/${base_path}/region" -d "$region" > /dev/null
|
||||
((uploaded_count++))
|
||||
}
|
||||
[[ -n "$zone" && "$zone" != "null" ]] && {
|
||||
curl -s -X PUT "${CONSUL_ADDR}/v1/kv/${base_path}/zone" -d "$zone" > /dev/null
|
||||
((uploaded_count++))
|
||||
}
|
||||
|
||||
# 上传凭证文件内容
|
||||
if [[ -n "$credentials_file" && "$credentials_file" != "null" && -f "$credentials_file" ]]; then
|
||||
local credentials_content=$(cat "$credentials_file")
|
||||
curl -s -X PUT "${CONSUL_ADDR}/v1/kv/${base_path}/credentials" -d "$credentials_content" > /dev/null
|
||||
((uploaded_count++))
|
||||
fi
|
||||
|
||||
log_success "Google Cloud 配置已上传"
|
||||
fi
|
||||
|
||||
log_success "总共上传了 $uploaded_count 个配置项到 Consul"
|
||||
}
|
||||
|
||||
# 列出 Consul 中的配置
|
||||
list_consul_configs() {
|
||||
log_info "列出 Consul 中的配置..."
|
||||
|
||||
local base_path="config/${ENVIRONMENT}"
|
||||
|
||||
echo "=== Consul 中的配置 ==="
|
||||
|
||||
# 获取所有配置键
|
||||
local keys=$(curl -s "${CONSUL_ADDR}/v1/kv/${base_path}/?keys" | jq -r '.[]' 2>/dev/null || echo "")
|
||||
|
||||
if [[ -z "$keys" ]]; then
|
||||
log_warning "Consul 中没有找到配置"
|
||||
return
|
||||
fi
|
||||
|
||||
echo "$keys" | while read -r key; do
|
||||
local value=$(curl -s "${CONSUL_ADDR}/v1/kv/${key}?raw" 2>/dev/null || echo "无法读取")
|
||||
# 隐藏敏感信息
|
||||
if [[ "$key" =~ (secret|key|token|password) ]]; then
|
||||
echo "$key: [已隐藏]"
|
||||
else
|
||||
echo "$key: $value"
|
||||
fi
|
||||
done
|
||||
}
|
||||
|
||||
# 清理 Consul 配置
|
||||
cleanup_consul_configs() {
|
||||
log_warning "清理 Consul 配置..."
|
||||
|
||||
read -p "确定要删除环境 '$ENVIRONMENT' 的所有配置吗?(y/N): " confirm
|
||||
if [[ "$confirm" != "y" && "$confirm" != "Y" ]]; then
|
||||
log_info "操作已取消"
|
||||
return
|
||||
fi
|
||||
|
||||
local base_path="config/${ENVIRONMENT}"
|
||||
curl -s -X DELETE "${CONSUL_ADDR}/v1/kv/${base_path}?recurse" > /dev/null
|
||||
|
||||
log_success "环境 '$ENVIRONMENT' 的配置已清理"
|
||||
}
|
||||
|
||||
# 显示帮助信息
|
||||
show_help() {
|
||||
cat << EOF
|
||||
OpenTofu 密钥上传脚本
|
||||
|
||||
用法: $0 [选项]
|
||||
|
||||
选项:
|
||||
upload 上传 terraform.tfvars 中的配置到 Consul
|
||||
list 列出 Consul 中的配置
|
||||
cleanup 清理 Consul 中的配置
|
||||
help 显示此帮助信息
|
||||
|
||||
环境变量:
|
||||
CONSUL_ADDR Consul 地址 (默认: http://localhost:8500)
|
||||
CONSUL_TOKEN Consul ACL Token (可选)
|
||||
ENVIRONMENT 环境名称 (默认: dev)
|
||||
TOFU_DIR OpenTofu 目录 (默认: tofu/environments/\${ENVIRONMENT})
|
||||
TFVARS_FILE 变量文件路径 (默认: \${TOFU_DIR}/terraform.tfvars)
|
||||
|
||||
示例:
|
||||
# 上传配置到 Consul
|
||||
$0 upload
|
||||
|
||||
# 列出 Consul 中的配置
|
||||
$0 list
|
||||
|
||||
# 清理配置
|
||||
$0 cleanup
|
||||
|
||||
# 指定不同环境
|
||||
ENVIRONMENT=production $0 upload
|
||||
EOF
|
||||
}
|
||||
|
||||
# 主函数
|
||||
main() {
|
||||
check_dependencies
|
||||
|
||||
case "${1:-help}" in
|
||||
"upload")
|
||||
check_consul
|
||||
check_tfvars_file
|
||||
|
||||
log_info "解析配置文件: $TFVARS_FILE"
|
||||
local config_json=$(manual_parse_tfvars "$TFVARS_FILE" "/tmp/parsed_config.json")
|
||||
upload_config_to_consul "/tmp/parsed_config.json"
|
||||
|
||||
# 清理临时文件
|
||||
rm -f /tmp/parsed_config.json /tmp/temp_config.tf
|
||||
;;
|
||||
"list")
|
||||
check_consul
|
||||
list_consul_configs
|
||||
;;
|
||||
"cleanup")
|
||||
check_consul
|
||||
cleanup_consul_configs
|
||||
;;
|
||||
"help"|*)
|
||||
show_help
|
||||
;;
|
||||
esac
|
||||
}
|
||||
|
||||
main "$@"
|
||||
31
scripts/utilities/verify-podman-migration.sh
Executable file
31
scripts/utilities/verify-podman-migration.sh
Executable file
@@ -0,0 +1,31 @@
|
||||
#!/bin/bash
|
||||
|
||||
echo "=== Nomad Podman Migration Verification ==="
|
||||
echo
|
||||
|
||||
# Check Nomad service status
|
||||
echo "1. Checking Nomad service status..."
|
||||
ssh ben@100.84.197.26 "sudo systemctl status nomad --no-pager -l"
|
||||
echo
|
||||
|
||||
# Check Nomad configuration
|
||||
echo "2. Checking Nomad configuration..."
|
||||
ssh ben@100.84.197.26 "sudo cat /etc/nomad.d/nomad.hcl | grep -A 10 -B 2 podman"
|
||||
echo
|
||||
|
||||
# Check Podman socket
|
||||
echo "3. Checking Podman socket..."
|
||||
ssh ben@100.84.197.26 "ls -la /run/user/*/podman/podman.sock 2>/dev/null || echo 'Podman socket not found'"
|
||||
echo
|
||||
|
||||
# Check Nomad node status
|
||||
echo "4. Checking Nomad node status..."
|
||||
ssh ben@100.84.197.26 "sudo -u nomad /usr/local/bin/nomad node status -self | grep -A 10 'Driver Status'" 2>/dev/null || echo "Could not get node status"
|
||||
echo
|
||||
|
||||
# Test Podman functionality
|
||||
echo "5. Testing Podman as nomad user..."
|
||||
ssh ben@100.84.197.26 "sudo -u nomad podman version --format '{{.Version}}'" 2>/dev/null || echo "Podman test failed"
|
||||
echo
|
||||
|
||||
echo "=== Verification Complete ==="
|
||||
Reference in New Issue
Block a user