feat: 重构项目脚本结构和文档

docs: 添加API和部署文档
refactor: 重新组织脚本目录结构
fix: 修复Nomad配置中的region设置
chore: 移除过期节点相关配置
test: 更新MCP服务器测试脚本
build: 更新Makefile以适配新脚本路径
This commit is contained in:
2025-10-01 02:08:58 +00:00
parent 7ea230b072
commit e5aa00d6f9
119 changed files with 7847 additions and 203 deletions

View File

@@ -1,9 +1,20 @@
---
- name: 配置Nomad客户端节点
hosts: nomad_nodes:!semaphore
hosts: nomad_clients
become: yes
vars:
nomad_config_dir: /etc/nomad.d
client_ip: "{{ ansible_host }}"
# Nomad节点名称带地理位置前缀
client_name: >-
{%- if inventory_hostname == 'influxdb1' -%}us-influxdb
{%- elif inventory_hostname == 'master' -%}kr-master
{%- elif inventory_hostname == 'hcp1' -%}bj-hcp1
{%- elif inventory_hostname == 'hcp2' -%}bj-hcp2
{%- elif inventory_hostname == 'warden' -%}bj-warden
{%- else -%}{{ inventory_hostname }}
{%- endif -%}
tasks:
- name: 创建Nomad配置目录
@@ -14,42 +25,9 @@
group: root
mode: '0755'
- name: 复制Nomad客户端配置
copy:
content: |
datacenter = "dc1"
data_dir = "/opt/nomad/data"
log_level = "INFO"
bind_addr = "0.0.0.0"
server {
enabled = false
}
client {
enabled = true
servers = ["100.116.158.95:4647"]
host_volume "fnsync" {
path = "/mnt/fnsync"
read_only = false
}
}
addresses {
http = "{{ ansible_host }}"
rpc = "{{ ansible_host }}"
serf = "{{ ansible_host }}"
}
advertise {
http = "{{ ansible_host }}:4646"
rpc = "{{ ansible_host }}:4647"
serf = "{{ ansible_host }}:4648"
}
consul {
address = "100.116.158.95:8500"
}
- name: 复制Nomad客户端配置模板
template:
src: ../templates/nomad-client.hcl
dest: "{{ nomad_config_dir }}/nomad.hcl"
owner: root
group: root

View File

@@ -0,0 +1,104 @@
---
- name: 配置Nomad客户端节点
hosts: target_nodes
become: yes
vars:
nomad_config_dir: /etc/nomad.d
tasks:
- name: 创建Nomad配置目录
file:
path: "{{ nomad_config_dir }}"
state: directory
owner: root
group: root
mode: '0755'
- name: 复制Nomad客户端配置
copy:
content: |
datacenter = "dc1"
data_dir = "/opt/nomad/data"
log_level = "INFO"
bind_addr = "0.0.0.0"
server {
enabled = false
}
client {
enabled = true
# 配置七姐妹服务器地址
servers = [
"100.116.158.95:4647", # bj-semaphore
"100.81.26.3:4647", # ash1d
"100.103.147.94:4647", # ash2e
"100.90.159.68:4647", # ch2
"100.86.141.112:4647", # ch3
"100.98.209.50:4647", # bj-onecloud1
"100.120.225.29:4647" # de
]
host_volume "fnsync" {
path = "/mnt/fnsync"
read_only = false
}
# 禁用Docker驱动只使用Podman
options {
"driver.raw_exec.enable" = "1"
"driver.exec.enable" = "1"
}
}
# 配置Podman插件目录
plugin_dir = "/opt/nomad/plugins"
addresses {
http = "{{ ansible_host }}"
rpc = "{{ ansible_host }}"
serf = "{{ ansible_host }}"
}
advertise {
http = "{{ ansible_host }}:4646"
rpc = "{{ ansible_host }}:4647"
serf = "{{ ansible_host }}:4648"
}
consul {
address = "100.116.158.95:8500"
}
# 配置Podman驱动
plugin "podman" {
config {
volumes {
enabled = true
}
logging {
type = "journald"
}
gc {
container = true
}
}
}
dest: "{{ nomad_config_dir }}/nomad.hcl"
owner: root
group: root
mode: '0644'
- name: 启动Nomad服务
systemd:
name: nomad
state: restarted
enabled: yes
daemon_reload: yes
- name: 检查Nomad服务状态
command: systemctl status nomad
register: nomad_status
changed_when: false
- name: 显示Nomad服务状态
debug:
var: nomad_status.stdout_lines

View File

@@ -0,0 +1,104 @@
---
- name: 配置Nomad客户端节点
hosts: target_nodes
become: yes
vars:
nomad_config_dir: /etc/nomad.d
tasks:
- name: 创建Nomad配置目录
file:
path: "{{ nomad_config_dir }}"
state: directory
owner: root
group: root
mode: '0755'
- name: 复制Nomad客户端配置
copy:
content: |
datacenter = "dc1"
data_dir = "/opt/nomad/data"
log_level = "INFO"
bind_addr = "0.0.0.0"
server {
enabled = false
}
client {
enabled = true
# 配置七姐妹服务器地址
servers = [
"100.116.158.95:4647", # bj-semaphore
"100.81.26.3:4647", # ash1d
"100.103.147.94:4647", # ash2e
"100.90.159.68:4647", # ch2
"100.86.141.112:4647", # ch3
"100.98.209.50:4647", # bj-onecloud1
"100.120.225.29:4647" # de
]
host_volume "fnsync" {
path = "/mnt/fnsync"
read_only = false
}
# 禁用Docker驱动只使用Podman
options {
"driver.raw_exec.enable" = "1"
"driver.exec.enable" = "1"
}
}
# 配置Podman插件目录
plugin_dir = "/opt/nomad/plugins"
addresses {
http = "{{ ansible_host }}"
rpc = "{{ ansible_host }}"
serf = "{{ ansible_host }}"
}
advertise {
http = "{{ ansible_host }}:4646"
rpc = "{{ ansible_host }}:4647"
serf = "{{ ansible_host }}:4648"
}
consul {
address = "100.116.158.95:8500"
}
# 配置Podman驱动
plugin "podman" {
config {
volumes {
enabled = true
}
logging {
type = "journald"
}
gc {
container = true
}
}
}
dest: "{{ nomad_config_dir }}/nomad.hcl"
owner: root
group: root
mode: '0644'
- name: 启动Nomad服务
systemd:
name: nomad
state: restarted
enabled: yes
daemon_reload: yes
- name: 检查Nomad服务状态
command: systemctl status nomad
register: nomad_status
changed_when: false
- name: 显示Nomad服务状态
debug:
var: nomad_status.stdout_lines

View File

@@ -0,0 +1,105 @@
---
- name: 部署韩国节点Nomad配置
hosts: ch2,ch3
become: yes
gather_facts: no
vars:
nomad_config_dir: "/etc/nomad.d"
nomad_config_file: "{{ nomad_config_dir }}/nomad.hcl"
source_config_dir: "/root/mgmt/infrastructure/configs/server"
tasks:
- name: 获取主机名短名称(去掉后缀)
set_fact:
short_hostname: "{{ inventory_hostname | regex_replace('\\$', '') }}"
- name: 确保 Nomad 配置目录存在
file:
path: "{{ nomad_config_dir }}"
state: directory
owner: root
group: root
mode: '0755'
- name: 部署 Nomad 配置文件到韩国节点
copy:
src: "{{ source_config_dir }}/nomad-{{ short_hostname }}.hcl"
dest: "{{ nomad_config_file }}"
owner: root
group: root
mode: '0644'
backup: yes
notify: restart nomad
- name: 检查 Nomad 二进制文件位置
shell: which nomad || find /usr -name nomad 2>/dev/null | head -1
register: nomad_binary_path
failed_when: nomad_binary_path.stdout == ""
- name: 创建/更新 Nomad systemd 服务文件
copy:
dest: "/etc/systemd/system/nomad.service"
owner: root
group: root
mode: '0644'
content: |
[Unit]
Description=Nomad
Documentation=https://www.nomadproject.io/
Requires=network-online.target
After=network-online.target
[Service]
Type=notify
User=root
Group=root
ExecStart={{ nomad_binary_path.stdout }} agent -config=/etc/nomad.d/nomad.hcl
ExecReload=/bin/kill -HUP $MAINPID
KillMode=process
Restart=on-failure
LimitNOFILE=65536
[Install]
WantedBy=multi-user.target
notify: restart nomad
- name: 确保 Nomad 数据目录存在
file:
path: "/opt/nomad/data"
state: directory
owner: root
group: root
mode: '0755'
- name: 重新加载 systemd daemon
systemd:
daemon_reload: yes
- name: 启用并启动 Nomad 服务
systemd:
name: nomad
enabled: yes
state: started
- name: 等待 Nomad 服务启动
wait_for:
port: 4646
host: "{{ ansible_host }}"
delay: 5
timeout: 30
ignore_errors: yes
- name: 显示 Nomad 服务状态
command: systemctl status nomad
register: nomad_status
changed_when: false
- name: 显示 Nomad 服务状态信息
debug:
var: nomad_status.stdout_lines
handlers:
- name: restart nomad
systemd:
name: nomad
state: restarted

View File

@@ -0,0 +1,105 @@
---
- name: 部署韩国节点Nomad配置
hosts: ch2,ch3
become: yes
gather_facts: no
vars:
nomad_config_dir: "/etc/nomad.d"
nomad_config_file: "{{ nomad_config_dir }}/nomad.hcl"
source_config_dir: "/root/mgmt/infrastructure/configs/server"
tasks:
- name: 获取主机名短名称(去掉.global后缀
set_fact:
short_hostname: "{{ inventory_hostname | regex_replace('\\.global$', '') }}"
- name: 确保 Nomad 配置目录存在
file:
path: "{{ nomad_config_dir }}"
state: directory
owner: root
group: root
mode: '0755'
- name: 部署 Nomad 配置文件到韩国节点
copy:
src: "{{ source_config_dir }}/nomad-{{ short_hostname }}.hcl"
dest: "{{ nomad_config_file }}"
owner: root
group: root
mode: '0644'
backup: yes
notify: restart nomad
- name: 检查 Nomad 二进制文件位置
shell: which nomad || find /usr -name nomad 2>/dev/null | head -1
register: nomad_binary_path
failed_when: nomad_binary_path.stdout == ""
- name: 创建/更新 Nomad systemd 服务文件
copy:
dest: "/etc/systemd/system/nomad.service"
owner: root
group: root
mode: '0644'
content: |
[Unit]
Description=Nomad
Documentation=https://www.nomadproject.io/
Requires=network-online.target
After=network-online.target
[Service]
Type=notify
User=root
Group=root
ExecStart={{ nomad_binary_path.stdout }} agent -config=/etc/nomad.d/nomad.hcl
ExecReload=/bin/kill -HUP $MAINPID
KillMode=process
Restart=on-failure
LimitNOFILE=65536
[Install]
WantedBy=multi-user.target
notify: restart nomad
- name: 确保 Nomad 数据目录存在
file:
path: "/opt/nomad/data"
state: directory
owner: root
group: root
mode: '0755'
- name: 重新加载 systemd daemon
systemd:
daemon_reload: yes
- name: 启用并启动 Nomad 服务
systemd:
name: nomad
enabled: yes
state: started
- name: 等待 Nomad 服务启动
wait_for:
port: 4646
host: "{{ ansible_host }}"
delay: 5
timeout: 30
ignore_errors: yes
- name: 显示 Nomad 服务状态
command: systemctl status nomad
register: nomad_status
changed_when: false
- name: 显示 Nomad 服务状态信息
debug:
var: nomad_status.stdout_lines
handlers:
- name: restart nomad
systemd:
name: nomad
state: restarted

View File

@@ -0,0 +1,105 @@
---
- name: 部署韩国节点Nomad配置
hosts: ch2,ch3
become: yes
gather_facts: no
vars:
nomad_config_dir: "/etc/nomad.d"
nomad_config_file: "{{ nomad_config_dir }}/nomad.hcl"
source_config_dir: "/root/mgmt/infrastructure/configs/server"
tasks:
- name: 获取主机名短名称(去掉后缀)
set_fact:
short_hostname: "{{ inventory_hostname | regex_replace('\\$', '') }}"
- name: 确保 Nomad 配置目录存在
file:
path: "{{ nomad_config_dir }}"
state: directory
owner: root
group: root
mode: '0755'
- name: 部署 Nomad 配置文件到韩国节点
copy:
src: "{{ source_config_dir }}/nomad-{{ short_hostname }}.hcl"
dest: "{{ nomad_config_file }}"
owner: root
group: root
mode: '0644'
backup: yes
notify: restart nomad
- name: 检查 Nomad 二进制文件位置
shell: which nomad || find /usr -name nomad 2>/dev/null | head -1
register: nomad_binary_path
failed_when: nomad_binary_path.stdout == ""
- name: 创建/更新 Nomad systemd 服务文件
copy:
dest: "/etc/systemd/system/nomad.service"
owner: root
group: root
mode: '0644'
content: |
[Unit]
Description=Nomad
Documentation=https://www.nomadproject.io/
Requires=network-online.target
After=network-online.target
[Service]
Type=notify
User=root
Group=root
ExecStart={{ nomad_binary_path.stdout }} agent -config=/etc/nomad.d/nomad.hcl
ExecReload=/bin/kill -HUP $MAINPID
KillMode=process
Restart=on-failure
LimitNOFILE=65536
[Install]
WantedBy=multi-user.target
notify: restart nomad
- name: 确保 Nomad 数据目录存在
file:
path: "/opt/nomad/data"
state: directory
owner: root
group: root
mode: '0755'
- name: 重新加载 systemd daemon
systemd:
daemon_reload: yes
- name: 启用并启动 Nomad 服务
systemd:
name: nomad
enabled: yes
state: started
- name: 等待 Nomad 服务启动
wait_for:
port: 4646
host: "{{ ansible_host }}"
delay: 5
timeout: 30
ignore_errors: yes
- name: 显示 Nomad 服务状态
command: systemctl status nomad
register: nomad_status
changed_when: false
- name: 显示 Nomad 服务状态信息
debug:
var: nomad_status.stdout_lines
handlers:
- name: restart nomad
systemd:
name: nomad
state: restarted

View File

@@ -0,0 +1,33 @@
---
- name: 分发SSH公钥到Nomad客户端节点
hosts: nomad_clients
become: yes
vars:
ssh_public_key: "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIMSUUfma8FKEFvH8Nq65XM2PZ9kitfgv1q727cKV9y5Z houzhongxu@seekkey.tech"
tasks:
- name: 确保 .ssh 目录存在
file:
path: "/home/{{ ansible_user }}/.ssh"
state: directory
owner: "{{ ansible_user }}"
group: "{{ ansible_user }}"
mode: '0700'
- name: 添加SSH公钥到 authorized_keys
lineinfile:
path: "/home/{{ ansible_user }}/.ssh/authorized_keys"
line: "{{ ssh_public_key }}"
create: yes
owner: "{{ ansible_user }}"
group: "{{ ansible_user }}"
mode: '0600'
- name: 验证SSH公钥已添加
command: cat "/home/{{ ansible_user }}/.ssh/authorized_keys"
register: ssh_key_check
changed_when: false
- name: 显示SSH公钥内容
debug:
var: ssh_key_check.stdout_lines

View File

@@ -0,0 +1,32 @@
---
- name: 分发SSH公钥到新节点
hosts: browser,influxdb1,hcp1,warden
become: yes
vars:
ssh_public_key: "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIMSUUfma8FKEFvH8Nq65XM2PZ9kitfgv1q727cKV9y5Z houzhongxu@seekkey.tech"
tasks:
- name: 确保 .ssh 目录存在
file:
path: "/root/.ssh"
state: directory
mode: '0700'
owner: root
group: root
- name: 添加SSH公钥到 authorized_keys
copy:
content: "{{ ssh_public_key }}"
dest: "/root/.ssh/authorized_keys"
mode: '0600'
owner: root
group: root
- name: 验证SSH公钥已添加
command: cat /root/.ssh/authorized_keys
register: ssh_key_check
changed_when: false
- name: 显示SSH公钥内容
debug:
var: ssh_key_check.stdout_lines

View File

@@ -0,0 +1,43 @@
---
- name: 修复 Nomad 服务器 region 配置
hosts: nomad_servers
become: yes
vars:
nomad_config_dir: /etc/nomad.d
tasks:
- name: 备份当前 Nomad 配置
copy:
src: "{{ nomad_config_dir }}/nomad.hcl"
dest: "{{ nomad_config_dir }}/nomad.hcl.backup.{{ ansible_date_time.epoch }}"
remote_src: yes
ignore_errors: yes
- name: 更新 Nomad 配置文件以添加 region 设置
blockinfile:
path: "{{ nomad_config_dir }}/nomad.hcl"
insertafter: '^datacenter = '
block: |
region = "dc1"
marker: "# {mark} Ansible managed region setting"
notify: restart nomad
- name: 更新节点名称以移除 .global 后缀(如果存在)
replace:
path: "{{ nomad_config_dir }}/nomad.hcl"
regexp: 'name = "(.*)\.global(.*)"'
replace: 'name = "\1\2"'
notify: restart nomad
- name: 确保 retry_join 使用正确的 IP 地址
replace:
path: "{{ nomad_config_dir }}/nomad.hcl"
regexp: 'retry_join = \[(.*)\]'
replace: 'retry_join = ["100.81.26.3", "100.103.147.94", "100.90.159.68", "100.116.158.95", "100.98.209.50", "100.120.225.29"]'
notify: restart nomad
handlers:
- name: restart nomad
systemd:
name: nomad
state: restarted

View File

@@ -0,0 +1,87 @@
---
- name: Configure Nomad Podman Driver
hosts: target_nodes
become: yes
tasks:
- name: Create backup directory
file:
path: /etc/nomad.d/backup
state: directory
mode: '0755'
- name: Backup current nomad.hcl
copy:
src: /etc/nomad.d/nomad.hcl
dest: "/etc/nomad.d/backup/nomad.hcl.bak.{{ ansible_date_time.iso8601 }}"
remote_src: yes
- name: Create plugin directory
file:
path: /opt/nomad/plugins
state: directory
owner: nomad
group: nomad
mode: '0755'
- name: Create symlink for podman driver
file:
src: /usr/bin/nomad-driver-podman
dest: /opt/nomad/plugins/nomad-driver-podman
state: link
- name: Copy podman driver configuration
copy:
src: ../../files/podman-driver.hcl
dest: /etc/nomad.d/podman-driver.hcl
owner: root
group: root
mode: '0644'
- name: Remove existing plugin_dir configuration
lineinfile:
path: /etc/nomad.d/nomad.hcl
regexp: '^plugin_dir = "/opt/nomad/data/plugins"'
state: absent
- name: Configure Nomad to use Podman driver
blockinfile:
path: /etc/nomad.d/nomad.hcl
marker: "# {mark} ANSIBLE MANAGED BLOCK - PODMAN DRIVER"
block: |
plugin_dir = "/opt/nomad/plugins"
plugin "podman" {
config {
volumes {
enabled = true
}
logging {
type = "journald"
}
gc {
container = true
}
}
}
register: nomad_config_result
- name: Restart nomad service
systemd:
name: nomad
state: restarted
enabled: yes
- name: Wait for nomad to start
wait_for:
port: 4646
delay: 10
timeout: 60
- name: Check nomad status
command: nomad node status
register: nomad_status
changed_when: false
- name: Display nomad status
debug:
var: nomad_status.stdout_lines

View File

@@ -0,0 +1,86 @@
---
- name: 恢复客户端节点的/etc/hosts文件
hosts: nomad_clients
become: yes
tasks:
- name: 删除添加的主机名解析条目
lineinfile:
path: /etc/hosts
regexp: "^100\\.116\\.158\\.95\\s"
state: absent
- name: 删除添加的主机名解析条目
lineinfile:
path: /etc/hosts
regexp: "^100\\.81\\.26\\.3\\s"
state: absent
- name: 删除添加的主机名解析条目
lineinfile:
path: /etc/hosts
regexp: "^100\\.103\\.147\\.94\\s"
state: absent
- name: 删除添加的主机名解析条目
lineinfile:
path: /etc/hosts
regexp: "^100\\.90\\.159\\.68\\s"
state: absent
- name: 删除添加的主机名解析条目
lineinfile:
path: /etc/hosts
regexp: "^100\\.86\\.141\\.112\\s"
state: absent
- name: 删除添加的主机名解析条目
lineinfile:
path: /etc/hosts
regexp: "^100\\.98\\.209\\.50\\s"
state: absent
- name: 删除添加的主机名解析条目
lineinfile:
path: /etc/hosts
regexp: "^100\\.120\\.225\\.29\\s"
state: absent
- name: 删除添加的主机名解析条目
lineinfile:
path: /etc/hosts
regexp: "^100\\.117\\.106\\.136\\s"
state: absent
- name: 删除添加的主机名解析条目
lineinfile:
path: /etc/hosts
regexp: "^100\\.116\\.80\\.94\\s"
state: absent
- name: 删除添加的主机名解析条目
lineinfile:
path: /etc/hosts
regexp: "^100\\.116\\.112\\.45\\s"
state: absent
- name: 删除添加的主机名解析条目
lineinfile:
path: /etc/hosts
regexp: "^100\\.97\\.62\\.111\\s"
state: absent
- name: 删除添加的主机名解析条目
lineinfile:
path: /etc/hosts
regexp: "^100\\.122\\.197\\.112\\s"
state: absent
- name: 显示恢复后的/etc/hosts文件内容
command: cat /etc/hosts
register: hosts_content
changed_when: false
- name: 显示/etc/hosts文件内容
debug:
var: hosts_content.stdout_lines

View File

@@ -0,0 +1,50 @@
---
- name: 更新客户端节点的/etc/hosts文件
hosts: nomad_clients
become: yes
vars:
hosts_entries:
- ip: "100.116.158.95"
hostnames: ["semaphore", "bj-semaphore"]
- ip: "100.81.26.3"
hostnames: ["ash1d"]
- ip: "100.103.147.94"
hostnames: ["ash2e"]
- ip: "100.90.159.68"
hostnames: ["ch2"]
- ip: "100.86.141.112"
hostnames: ["ch3"]
- ip: "100.98.209.50"
hostnames: ["onecloud1", "bj-onecloud1"]
- ip: "100.120.225.29"
hostnames: ["de"]
- ip: "100.117.106.136"
hostnames: ["master"]
- ip: "100.116.80.94"
hostnames: ["ash3c", "influxdb1"]
- ip: "100.116.112.45"
hostnames: ["browser"]
- ip: "100.97.62.111"
hostnames: ["hcp1", "bj-hcp1"]
- ip: "100.122.197.112"
hostnames: ["warden"]
tasks:
- name: 添加主机名解析到/etc/hosts文件
lineinfile:
path: /etc/hosts
line: "{{ item.ip }} {{ item.hostnames | join(' ') }}"
create: yes
owner: root
group: root
mode: '0644'
loop: "{{ hosts_entries }}"
- name: 显示更新后的/etc/hosts文件内容
command: cat /etc/hosts
register: hosts_content
changed_when: false
- name: 显示/etc/hosts文件内容
debug:
var: hosts_content.stdout_lines

View File

@@ -0,0 +1,31 @@
---
- name: Update Nomad server configuration
hosts: nomad_servers
become: yes
tasks:
- name: Backup current Nomad configuration
copy:
src: /etc/nomad.d/nomad.hcl
dest: /etc/nomad.d/nomad.hcl.bak
remote_src: yes
- name: Generate Nomad configuration for each server
template:
src: ../templates/nomad-server.hcl.j2
dest: /etc/nomad.d/nomad.hcl
vars:
server_name: "{{ inventory_hostname }}"
server_ip: "{{ ansible_host }}"
- name: Restart Nomad service
systemd:
name: nomad
state: restarted
- name: Wait for Nomad to be ready
wait_for:
port: 4646
host: "{{ ansible_host }}"
delay: 10
timeout: 60