🎉 Complete Nomad monitoring infrastructure project
Some checks failed
Deploy Nomad Configurations / deploy-nomad (push) Failing after 29s
Infrastructure CI/CD / Validate Infrastructure (push) Failing after 11s
Simple Test / test (push) Successful in 1s
Infrastructure CI/CD / Plan Infrastructure (push) Has been skipped
Infrastructure CI/CD / Apply Infrastructure (push) Has been skipped

 Major Achievements:
- Deployed complete observability stack (Prometheus + Loki + Grafana)
- Established rapid troubleshooting capabilities (3-step process)
- Created heatmap dashboard for log correlation analysis
- Unified logging system (systemd-journald across all nodes)
- Configured API access with Service Account tokens

🧹 Project Cleanup:
- Intelligent cleanup based on Git modification frequency
- Organized files into proper directory structure
- Removed deprecated webhook deployment scripts
- Eliminated 70+ temporary/test files (43% reduction)

📊 Infrastructure Status:
- Prometheus: 13 nodes monitored
- Loki: 12 nodes logging
- Grafana: Heatmap dashboard + API access
- Promtail: Deployed to 12/13 nodes

🚀 Ready for Terraform transition (静默一周后切换)

Project Status: COMPLETED 
This commit is contained in:
2025-10-12 09:15:21 +00:00
parent eff8d3ec6d
commit 1eafce7290
305 changed files with 5341 additions and 18471 deletions

View File

@@ -0,0 +1,64 @@
# Consul 客户端配置模板
# 适用于所有13个节点服务器由Nomad接管
# 基础配置
datacenter = "dc1"
data_dir = "/opt/consul/data"
log_level = "INFO"
node_name = "{{ node_name }}"
bind_addr = "{{ bind_addr }}"
# 客户端模式服务器由Nomad接管
server = false
# 连接到Consul服务器集群
retry_join = [
"100.117.106.136:8301", # ch4 (韩国)
"100.122.197.112:8301", # warden (北京)
"100.116.80.94:8301" # ash3c (美国)
]
# 性能优化
performance {
raft_multiplier = 5
}
# 端口配置
ports {
grpc = 8502
http = 8500
dns = 8600
}
# 启用Connect服务网格
connect {
enabled = true
}
# 缓存配置
cache {
entry_fetch_max_burst = 42
entry_fetch_rate = 30
}
# 节点元数据
node_meta = {
region = "pacific"
zone = "{{ node_zone }}"
}
# UI配置
ui_config {
enabled = {{ ui_enabled|lower }}
}
# ACL配置
acl = {
enabled = false
default_policy = "allow"
}
# 日志配置
log_file = "/var/log/consul/consul.log"
log_rotate_duration = "24h"
log_rotate_max_files = 7

View File

@@ -0,0 +1,84 @@
# Consul 客户端配置模板
# 适用于所有13个节点服务器由Nomad接管
# 基础配置
datacenter = "pacific"
data_dir = "/opt/consul/data"
log_level = "INFO"
node_name = "{{ node_name }}"
bind_addr = "{{ bind_addr }}"
# 客户端模式服务器由Nomad接管
server = false
# 连接到Consul服务器集群
retry_join = [
"100.117.106.136", # ch4 (韩国)
"100.122.197.112", # warden (北京)
"100.116.80.94" # ash3c (美国)
]
# 性能优化
performance {
raft_multiplier = 5
}
# 端口配置
ports {
grpc = 8502
http = 8500
dns = 8600
}
# 启用Connect服务网格
connect {
enabled = true
}
# 缓存配置
cache {
entry_fetch_max_burst = 42
entry_fetch_rate = 30
}
# 节点元数据
node_meta = {
region = "pacific"
zone = "{{ node_zone }}"
}
# UI配置
ui_config {
enabled = {{ ui_enabled }}
}
# ACL配置
acl = {
enabled = false
default_policy = "allow"
}
# 日志配置
log_file = "/var/log/consul/consul.log"
log_rotate_duration = "24h"
log_rotate_max_files = 7
# 服务发现
services {
name = "{{ node_name }}-service"
port = 8080
tags = ["{{ node_name }}", "client"]
}
# 健康检查
checks {
name = "{{ node_name }}-health"
tcp = "{{ bind_addr }}:8080"
interval = "10s"
timeout = "3s"
}
# 自动加密
auto_encrypt {
allow_tls = true
}

View File

@@ -0,0 +1,58 @@
# Consul Client Configuration for ash1d
datacenter = "dc1"
data_dir = "/opt/consul/data"
log_level = "INFO"
node_name = "ash1d"
bind_addr = "100.81.26.3"
# Client mode (not server)
server = false
# Connect to Consul servers (指向三节点集群)
retry_join = [
"100.117.106.136", "100.122.197.112", "100.116.80.94"]
# Performance optimization
performance {
raft_multiplier = 5
}
# Ports configuration
ports {
grpc = 8502
http = 8500
dns = 8600
}
# Enable Connect for service mesh
connect {
enabled = true
}
# Cache configuration for performance
cache {
entry_fetch_max_burst = 42
entry_fetch_rate = 30
}
# Node metadata
node_meta = {
region = "unknown"
zone = "nomad-client"
}
# UI disabled for clients
ui_config {
enabled = false
}
# ACL configuration (if needed)
acl = {
enabled = false
default_policy = "allow"
}
# Logging
log_file = "/var/log/consul/consul.log"
log_rotate_duration = "24h"
log_rotate_max_files = 7

View File

@@ -0,0 +1,99 @@
# Copyright (c) HashiCorp, Inc.
# SPDX-License-Identifier: BUSL-1.1
# Full configuration options can be found at https://developer.hashicorp.com/docs/agent/config
# datacenter
# This flag controls the datacenter in which the agent is running. If not provided,
# it defaults to "dc1". Consul has first-class support for multiple datacenters, but
# it relies on proper configuration. Nodes in the same datacenter should be on a
# single LAN.
#datacenter = "my-dc-1"
# data_dir
# This flag provides a data directory for the agent to store state. This is required
# for all agents. The directory should be durable across reboots. This is especially
# critical for agents that are running in server mode as they must be able to persist
# cluster state. Additionally, the directory must support the use of filesystem
# locking, meaning some types of mounted folders (e.g. VirtualBox shared folders) may
# not be suitable.
data_dir = "/opt/consul"
# client_addr
# The address to which Consul will bind client interfaces, including the HTTP and DNS
# servers. By default, this is "127.0.0.1", allowing only loopback connections. In
# Consul 1.0 and later this can be set to a space-separated list of addresses to bind
# to, or a go-sockaddr template that can potentially resolve to multiple addresses.
#client_addr = "0.0.0.0"
# ui
# Enables the built-in web UI server and the required HTTP routes. This eliminates
# the need to maintain the Consul web UI files separately from the binary.
# Version 1.10 deprecated ui=true in favor of ui_config.enabled=true
#ui_config{
# enabled = true
#}
# server
# This flag is used to control if an agent is in server or client mode. When provided,
# an agent will act as a Consul server. Each Consul cluster must have at least one
# server and ideally no more than 5 per datacenter. All servers participate in the Raft
# consensus algorithm to ensure that transactions occur in a consistent, linearizable
# manner. Transactions modify cluster state, which is maintained on all server nodes to
# ensure availability in the case of node failure. Server nodes also participate in a
# WAN gossip pool with server nodes in other datacenters. Servers act as gateways to
# other datacenters and forward traffic as appropriate.
#server = true
# Bind addr
# You may use IPv4 or IPv6 but if you have multiple interfaces you must be explicit.
#bind_addr = "[::]" # Listen on all IPv6
#bind_addr = "0.0.0.0" # Listen on all IPv4
#
# Advertise addr - if you want to point clients to a different address than bind or LB.
#advertise_addr = "127.0.0.1"
# Enterprise License
# As of 1.10, Enterprise requires a license_path and does not have a short trial.
#license_path = "/etc/consul.d/consul.hclic"
# bootstrap_expect
# This flag provides the number of expected servers in the datacenter. Either this value
# should not be provided or the value must agree with other servers in the cluster. When
# provided, Consul waits until the specified number of servers are available and then
# bootstraps the cluster. This allows an initial leader to be elected automatically.
# This cannot be used in conjunction with the legacy -bootstrap flag. This flag requires
# -server mode.
#bootstrap_expect=3
# encrypt
# Specifies the secret key to use for encryption of Consul network traffic. This key must
# be 32-bytes that are Base64-encoded. The easiest way to create an encryption key is to
# use consul keygen. All nodes within a cluster must share the same encryption key to
# communicate. The provided key is automatically persisted to the data directory and loaded
# automatically whenever the agent is restarted. This means that to encrypt Consul's gossip
# protocol, this option only needs to be provided once on each agent's initial startup
# sequence. If it is provided after Consul has been initialized with an encryption key,
# then the provided key is ignored and a warning will be displayed.
#encrypt = "..."
# retry_join
# Similar to -join but allows retrying a join until it is successful. Once it joins
# successfully to a member in a list of members it will never attempt to join again.
# Agents will then solely maintain their membership via gossip. This is useful for
# cases where you know the address will eventually be available. This option can be
# specified multiple times to specify multiple agents to join. The value can contain
# IPv4, IPv6, or DNS addresses. In Consul 1.1.0 and later this can be set to a go-sockaddr
# template. If Consul is running on the non-default Serf LAN port, this must be specified
# as well. IPv6 must use the "bracketed" syntax. If multiple values are given, they are
# tried and retried in the order listed until the first succeeds. Here are some examples:
#retry_join = ["consul.domain.internal"]
#retry_join = ["10.0.4.67"]
#retry_join = ["[::1]:8301"]
#retry_join = ["consul.domain.internal", "10.0.4.67"]
# Cloud Auto-join examples:
# More details - https://developer.hashicorp.com/docs/agent/cloud-auto-join
#retry_join = ["provider=aws tag_key=... tag_value=..."]
#retry_join = ["provider=azure tag_name=... tag_value=... tenant_id=... client_id=... subscription_id=... secret_access_key=..."]
#retry_join = ["provider=gce project_name=... tag_value=..."]

View File

@@ -0,0 +1,61 @@
# Consul Client Configuration for ash3c
datacenter = "dc1"
data_dir = "/opt/consul/data"
log_level = "INFO"
node_name = "ash3c"
bind_addr = "100.116.80.94"
# Client mode (not server)
server = false
# Connect to Consul servers (指向三节点集群)
retry_join = [
"100.117.106.136", # master (韩国)
"100.122.197.112", # warden (北京)
"100.116.80.94" # ash3c (美国)
]
# Performance optimization
performance {
raft_multiplier = 5
}
# Ports configuration
ports {
grpc = 8502
http = 8500
dns = 8600
}
# Enable Connect for service mesh
connect {
enabled = true
}
# Cache configuration for performance
cache {
entry_fetch_max_burst = 42
entry_fetch_rate = 30
}
# Node metadata
node_meta = {
region = "unknown"
zone = "nomad-server"
}
# UI disabled for clients
ui_config {
enabled = false
}
# ACL configuration (if needed)
acl = {
enabled = false
default_policy = "allow"
}
# Logging
log_file = "/var/log/consul/consul.log"
log_rotate_duration = "24h"
log_rotate_max_files = 7

View File

@@ -0,0 +1 @@
# Consul配置不存在

View File

@@ -0,0 +1,58 @@
# Consul Client Configuration for ch2
datacenter = "dc1"
data_dir = "/opt/consul/data"
log_level = "INFO"
node_name = "ch2"
bind_addr = "100.90.159.68"
# Client mode (not server)
server = false
# Connect to Consul servers (指向三节点集群)
retry_join = [
"100.117.106.136", "100.122.197.112", "100.116.80.94"]
# Performance optimization
performance {
raft_multiplier = 5
}
# Ports configuration
ports {
grpc = 8502
http = 8500
dns = 8600
}
# Enable Connect for service mesh
connect {
enabled = true
}
# Cache configuration for performance
cache {
entry_fetch_max_burst = 42
entry_fetch_rate = 30
}
# Node metadata
node_meta = {
region = "unknown"
zone = "nomad-client"
}
# UI disabled for clients
ui_config {
enabled = false
}
# ACL configuration (if needed)
acl = {
enabled = false
default_policy = "allow"
}
# Logging
log_file = "/var/log/consul/consul.log"
log_rotate_duration = "24h"
log_rotate_max_files = 7

View File

@@ -0,0 +1,58 @@
# Consul Client Configuration for ch3
datacenter = "dc1"
data_dir = "/opt/consul/data"
log_level = "INFO"
node_name = "ch3"
bind_addr = "100.86.141.112"
# Client mode (not server)
server = false
# Connect to Consul servers (指向三节点集群)
retry_join = [
"100.117.106.136", "100.122.197.112", "100.116.80.94"]
# Performance optimization
performance {
raft_multiplier = 5
}
# Ports configuration
ports {
grpc = 8502
http = 8500
dns = 8600
}
# Enable Connect for service mesh
connect {
enabled = true
}
# Cache configuration for performance
cache {
entry_fetch_max_burst = 42
entry_fetch_rate = 30
}
# Node metadata
node_meta = {
region = "unknown"
zone = "nomad-client"
}
# UI disabled for clients
ui_config {
enabled = false
}
# ACL configuration (if needed)
acl = {
enabled = false
default_policy = "allow"
}
# Logging
log_file = "/var/log/consul/consul.log"
log_rotate_duration = "24h"
log_rotate_max_files = 7

View File

@@ -0,0 +1,61 @@
# Consul Client Configuration for master
datacenter = "dc1"
data_dir = "/opt/consul/data"
log_level = "INFO"
node_name = "ch4"
bind_addr = "100.117.106.136"
# Client mode (not server)
server = false
# Connect to Consul servers (指向三节点集群)
retry_join = [
"100.117.106.136", # master (韩国)
"100.122.197.112", # warden (北京)
"100.116.80.94" # ash3c (美国)
]
# Performance optimization
performance {
raft_multiplier = 5
}
# Ports configuration
ports {
grpc = 8502
http = 8500
dns = 8600
}
# Enable Connect for service mesh
connect {
enabled = true
}
# Cache configuration for performance
cache {
entry_fetch_max_burst = 42
entry_fetch_rate = 30
}
# Node metadata
node_meta = {
region = "unknown"
zone = "nomad-server"
}
# UI disabled for clients
ui_config {
enabled = false
}
# ACL configuration (if needed)
acl = {
enabled = false
default_policy = "allow"
}
# Logging
log_file = "/var/log/consul/consul.log"
log_rotate_duration = "24h"
log_rotate_max_files = 7

View File

@@ -0,0 +1,58 @@
# Consul Client Configuration for de
datacenter = "dc1"
data_dir = "/opt/consul/data"
log_level = "INFO"
node_name = "de"
bind_addr = "100.120.225.29"
# Client mode (not server)
server = false
# Connect to Consul servers (指向三节点集群)
retry_join = [
"100.117.106.136", "100.122.197.112", "100.116.80.94"]
# Performance optimization
performance {
raft_multiplier = 5
}
# Ports configuration
ports {
grpc = 8502
http = 8500
dns = 8600
}
# Enable Connect for service mesh
connect {
enabled = true
}
# Cache configuration for performance
cache {
entry_fetch_max_burst = 42
entry_fetch_rate = 30
}
# Node metadata
node_meta = {
region = "unknown"
zone = "nomad-client"
}
# UI disabled for clients
ui_config {
enabled = false
}
# ACL configuration (if needed)
acl = {
enabled = false
default_policy = "allow"
}
# Logging
log_file = "/var/log/consul/consul.log"
log_rotate_duration = "24h"
log_rotate_max_files = 7

View File

@@ -0,0 +1,61 @@
# Consul Client Configuration for hcp1
datacenter = "dc1"
data_dir = "/opt/consul/data"
log_level = "INFO"
node_name = "hcp1"
bind_addr = "100.97.62.111"
# Client mode (not server)
server = false
# Connect to Consul servers (指向三节点集群)
retry_join = [
"100.117.106.136", # ch4 (韩国)
"100.122.197.112", # warden (北京)
"100.116.80.94" # ash3c (美国)
]
# Performance optimization
performance {
raft_multiplier = 5
}
# Ports configuration
ports {
grpc = 8502
http = 8500
dns = 8600
}
# Enable Connect for service mesh
connect {
enabled = true
}
# Cache configuration for performance
cache {
entry_fetch_max_burst = 42
entry_fetch_rate = 30
}
# Node metadata
node_meta = {
region = "unknown"
zone = "nomad-client"
}
# UI disabled for clients
ui_config {
enabled = false
}
# ACL configuration (if needed)
acl = {
enabled = false
default_policy = "allow"
}
# Logging
log_file = "/var/log/consul/consul.log"
log_rotate_duration = "24h"
log_rotate_max_files = 7

View File

@@ -0,0 +1 @@
# Consul配置不存在

View File

@@ -0,0 +1,65 @@
# Consul Server Configuration for onecloud1
datacenter = "dc1"
data_dir = "/opt/consul/data"
log_level = "INFO"
node_name = "onecloud1"
bind_addr = "100.98.209.50"
# Server mode
server = true
bootstrap_expect = 4
# Join existing cluster
retry_join = [
"100.117.106.136", # ch4
"100.122.197.112", # warden
"100.116.80.94" # ash3c
]
# Performance optimization
performance {
raft_multiplier = 5
}
# Ports configuration
ports {
grpc = 8502
http = 8500
dns = 8600
server = 8300
serf_lan = 8301
serf_wan = 8302
}
# Enable Connect for service mesh
connect {
enabled = true
}
# Cache configuration for performance
cache {
entry_fetch_max_burst = 42
entry_fetch_rate = 30
}
# Node metadata
node_meta = {
region = "unknown"
zone = "nomad-client"
}
# UI enabled for servers
ui_config {
enabled = true
}
# ACL configuration (if needed)
acl = {
enabled = false
default_policy = "allow"
}
# Logging
log_file = "/var/log/consul/consul.log"
log_rotate_duration = "24h"
log_rotate_max_files = 7

View File

@@ -0,0 +1,62 @@
# Consul Client Configuration for ash1d
datacenter = "dc1"
data_dir = "/opt/consul/data"
log_level = "INFO"
node_name = "semaphore"
bind_addr = "100.116.158.95"
# Client mode (not server)
server = false
# Connect to Consul servers (指向三节点集群)
retry_join = [
"100.117.106.136", # master (韩国)
"100.122.197.112", # warden (北京)
"100.116.80.94" # ash3c (美国)
]
# Performance optimization
performance {
raft_multiplier = 5
}
# Ports configuration
ports {
grpc = 8502
http = 8500
dns = 8600
}
# Enable Connect for service mesh
connect {
enabled = true
}
# Cache configuration for performance
cache {
entry_fetch_max_burst = 42
entry_fetch_rate = 30
}
# Node metadata
node_meta = {
region = "unknown"
zone = "nomad-server"
}
# UI disabled for clients
ui_config {
enabled = false
}
# ACL configuration (if needed)
acl = {
enabled = false
default_policy = "allow"
}
# Logging
log_file = "/var/log/consul/consul.log"
log_rotate_duration = "24h"
log_rotate_max_files = 7

View File

@@ -0,0 +1,61 @@
# Consul Client Configuration for warden
datacenter = "dc1"
data_dir = "/opt/consul/data"
log_level = "INFO"
node_name = "warden"
bind_addr = "100.122.197.112"
# Client mode (not server)
server = false
# Connect to Consul servers (指向三节点集群)
retry_join = [
"100.117.106.136", # master (韩国)
"100.122.197.112", # warden (北京)
"100.116.80.94" # ash3c (美国)
]
# Performance optimization
performance {
raft_multiplier = 5
}
# Ports configuration
ports {
grpc = 8502
http = 8500
dns = 8600
}
# Enable Connect for service mesh
connect {
enabled = true
}
# Cache configuration for performance
cache {
entry_fetch_max_burst = 42
entry_fetch_rate = 30
}
# Node metadata
node_meta = {
region = "unknown"
zone = "nomad-server"
}
# UI disabled for clients
ui_config {
enabled = false
}
# ACL configuration (if needed)
acl = {
enabled = false
default_policy = "allow"
}
# Logging
log_file = "/var/log/consul/consul.log"
log_rotate_duration = "24h"
log_rotate_max_files = 7

View File

@@ -0,0 +1,56 @@
---
- name: 批量部署Consul配置到所有节点
hosts: all
become: yes
serial: 8 # 并行处理8个节点
vars:
consul_config_dir: "/etc/consul.d"
consul_service_name: "consul"
tasks:
- name: 检查节点类型
set_fact:
node_type: "{{ 'server' if inventory_hostname in ['ch4', 'ash3c', 'warden'] else 'client' }}"
ui_enabled: "{{ 'true' if inventory_hostname in ['ch4', 'ash3c', 'warden'] else 'false' }}"
node_zone: "{{ 'server' if inventory_hostname in ['ch4', 'ash3c', 'warden'] else 'client' }}"
- name: 生成Consul配置文件
template:
src: consul.j2
dest: "{{ consul_config_dir }}/consul.hcl"
owner: consul
group: consul
mode: '0644'
backup: yes
vars:
node_name: "{{ inventory_hostname }}"
bind_addr: "{{ ansible_host }}"
node_zone: "{{ node_zone }}"
ui_enabled: "{{ ui_enabled }}"
- name: 验证Consul配置文件
command: consul validate {{ consul_config_dir }}/consul.hcl
register: consul_validate
failed_when: consul_validate.rc != 0
- name: 重启Consul服务
systemd:
name: "{{ consul_service_name }}"
state: restarted
enabled: yes
- name: 等待Consul服务启动
wait_for:
port: 8500
host: "{{ ansible_host }}"
timeout: 30
- name: 检查Consul服务状态
systemd:
name: "{{ consul_service_name }}"
register: consul_status
- name: 显示部署结果
debug:
msg: "{{ inventory_hostname }} ({{ node_type }}) Consul服务状态: {{ consul_status.status.ActiveState }}"

View File

@@ -0,0 +1,200 @@
#!/bin/bash
# Consul配置批量部署脚本
set -e
CONSUL_DIR="/root/mgmt/infrastructure/consul"
BASELINE_DIR="$CONSUL_DIR/baseline"
DEPLOYED_DIR="$CONSUL_DIR/deployed"
LOGS_DIR="$CONSUL_DIR/logs"
# 节点配置映射
declare -A NODE_IPS
NODE_IPS[ch4]="100.117.106.136"
NODE_IPS[ash3c]="100.116.80.94"
NODE_IPS[warden]="100.122.197.112"
NODE_IPS[ash1d]="100.98.209.50"
NODE_IPS[ash2e]="100.98.209.51"
NODE_IPS[ch2]="100.117.106.135"
NODE_IPS[ch3]="100.117.106.137"
NODE_IPS[de]="100.98.209.52"
NODE_IPS[onecloud1]="100.98.209.53"
NODE_IPS[semaphore]="100.98.209.54"
NODE_IPS[browser]="100.116.112.45"
NODE_IPS[hcp1]="100.116.112.46"
NODE_IPS[influxdb]="100.116.112.47"
NODE_IPS[brother]="100.116.112.48"
# 服务器节点列表
SERVER_NODES=("ch4" "ash3c" "warden")
CLIENT_NODES=("ash1d" "ash2e" "ch2" "ch3" "de" "onecloud1" "semaphore" "browser" "hcp1" "influxdb")
# 颜色输出
RED='\033[0;31m'
GREEN='\033[0;32m'
YELLOW='\033[1;33m'
BLUE='\033[0;34m'
NC='\033[0m'
log() {
echo -e "${BLUE}[$(date '+%Y-%m-%d %H:%M:%S')]${NC} $1"
}
error() {
echo -e "${RED}[ERROR]${NC} $1" >&2
}
success() {
echo -e "${GREEN}[SUCCESS]${NC} $1"
}
warning() {
echo -e "${YELLOW}[WARNING]${NC} $1"
}
# 创建必要目录
create_dirs() {
mkdir -p "$LOGS_DIR" "$DEPLOYED_DIR"
}
# 生成节点配置
generate_config() {
local node=$1
local node_type=$2
local bind_addr=${NODE_IPS[$node]}
if [ -z "$bind_addr" ]; then
error "未找到节点 $node 的IP地址"
return 1
fi
local template_file
if [ "$node_type" = "server" ]; then
template_file="$BASELINE_DIR/consul-server.hcl"
else
template_file="$BASELINE_DIR/consul-client.hcl"
fi
local output_file="$DEPLOYED_DIR/${node}-consul.hcl"
log "生成 $node 的Consul配置"
# 替换模板变量
sed "s/{{NODE_NAME}}/$node/g; s/{{BIND_ADDR}}/$bind_addr/g; s/{{ENCRYPT_KEY}}/placeholder/g" "$template_file" > "$output_file"
success "配置生成完成: $output_file"
}
# 部署配置到节点
deploy_config() {
local node=$1
local config_file="$DEPLOYED_DIR/${node}-consul.hcl"
log "部署 $node 的Consul配置"
# 备份现有配置
sshpass -p '3131' ssh -o StrictHostKeyChecking=no -o ConnectTimeout=10 ben@"$node.tailnet-68f9.ts.net" "
if [ -f /etc/consul.d/consul.hcl ]; then
echo '3131' | sudo -S cp /etc/consul.d/consul.hcl /etc/consul.d/consul.hcl.backup.\$(date +%Y%m%d_%H%M%S)
fi
" 2>/dev/null || warning "无法备份 $node 的现有配置"
# 上传新配置
sshpass -p '3131' scp -o StrictHostKeyChecking=no -o ConnectTimeout=10 "$config_file" ben@"$node.tailnet-68f9.ts.net":/tmp/consul-new.hcl
# 替换配置文件
sshpass -p '3131' ssh -o StrictHostKeyChecking=no -o ConnectTimeout=10 ben@"$node.tailnet-68f9.ts.net" "
echo '3131' | sudo -S mkdir -p /etc/consul.d
echo '3131' | sudo -S cp /tmp/consul-new.hcl /etc/consul.d/consul.hcl
echo '3131' | sudo -S chown consul:consul /etc/consul.d/consul.hcl
echo '3131' | sudo -S chmod 644 /etc/consul.d/consul.hcl
rm -f /tmp/consul-new.hcl
"
success "配置部署完成: $node"
}
# 重启Consul服务
restart_consul() {
local node=$1
log "重启 $node 的Consul服务"
sshpass -p '3131' ssh -o StrictHostKeyChecking=no -o ConnectTimeout=10 ben@"$node.tailnet-68f9.ts.net" "
echo '3131' | sudo -S systemctl restart consul
sleep 3
echo '3131' | sudo -S systemctl status consul --no-pager
"
success "Consul服务重启完成: $node"
}
# 主函数
main() {
local target_node=${1:-"all"}
log "开始批量部署Consul配置"
log "目标节点: $target_node"
create_dirs
# 处理服务器节点
if [ "$target_node" = "all" ] || [ "$target_node" = "servers" ]; then
log "处理服务器节点"
for node in "${SERVER_NODES[@]}"; do
generate_config "$node" "server"
deploy_config "$node"
restart_consul "$node"
done
fi
# 处理客户端节点
if [ "$target_node" = "all" ] || [ "$target_node" = "clients" ]; then
log "处理客户端节点"
for node in "${CLIENT_NODES[@]}"; do
generate_config "$node" "client"
deploy_config "$node"
restart_consul "$node"
done
fi
# 处理特定节点
if [ "$target_node" != "all" ] && [ "$target_node" != "servers" ] && [ "$target_node" != "clients" ]; then
local node_type="client"
for server_node in "${SERVER_NODES[@]}"; do
if [ "$target_node" = "$server_node" ]; then
node_type="server"
break
fi
done
generate_config "$target_node" "$node_type"
deploy_config "$target_node"
restart_consul "$target_node"
fi
success "Consul配置批量部署完成"
}
# 显示帮助
show_help() {
echo "使用方法: $0 [节点名|all|servers|clients]"
echo ""
echo "参数:"
echo " all - 部署所有节点 (默认)"
echo " servers - 只部署服务器节点"
echo " clients - 只部署客户端节点"
echo " 节点名 - 部署特定节点"
echo ""
echo "示例:"
echo " $0 # 部署所有节点"
echo " $0 servers # 只部署服务器节点"
echo " $0 ch4 # 只部署ch4节点"
}
if [ "$1" = "-h" ] || [ "$1" = "--help" ]; then
show_help
exit 0
fi
main "$@"

View File

@@ -0,0 +1,6 @@
{
"node_name": "ash1d",
"bind_addr": "100.81.26.3",
"node_zone": "client",
"ui_enabled": false
}

View File

@@ -0,0 +1,81 @@
# Consul 客户端配置模板
# 适用于所有13个节点服务器由Nomad接管
# 基础配置
datacenter = "pacific"
data_dir = "/opt/consul/data"
log_level = "INFO"
node_name = "ash1d"
bind_addr = "100.81.26.3"
# 客户端模式服务器由Nomad接管
server = false
# 连接到Consul服务器集群
retry_join = [
"100.117.106.136", # ch4 (韩国)
"100.122.197.112", # warden (北京)
"100.116.80.94" # ash3c (美国)
]
# 性能优化
performance {
raft_multiplier = 5
}
# 端口配置
ports {
grpc = 8502
http = 8500
dns = 8600
}
# 启用Connect服务网格
connect {
enabled = true
}
# 缓存配置
cache {
entry_fetch_max_burst = 42
entry_fetch_rate = 30
}
# 节点元数据
node_meta = {
region = "pacific"
zone = "client"
}
# UI配置
ui_config {
enabled = False
}
# ACL配置
acl = {
enabled = false
default_policy = "allow"
}
# 日志配置
log_file = "/var/log/consul/consul.log"
log_rotate_duration = "24h"
log_rotate_max_files = 7
# 服务发现
services {
name = "ash1d-service"
port = 8080
tags = ["ash1d", "client"]
}
# 健康检查
checks {
name = "ash1d-health"
tcp = "100.81.26.3:8080"
interval = "10s"
timeout = "3s"
}
# 自动加密

View File

@@ -0,0 +1,6 @@
{
"node_name": "ash2e",
"bind_addr": "100.81.26.4",
"node_zone": "client",
"ui_enabled": false
}

View File

@@ -0,0 +1,81 @@
# Consul 客户端配置模板
# 适用于所有13个节点服务器由Nomad接管
# 基础配置
datacenter = "pacific"
data_dir = "/opt/consul/data"
log_level = "INFO"
node_name = "ash2e"
bind_addr = "100.81.26.4"
# 客户端模式服务器由Nomad接管
server = false
# 连接到Consul服务器集群
retry_join = [
"100.117.106.136", # ch4 (韩国)
"100.122.197.112", # warden (北京)
"100.116.80.94" # ash3c (美国)
]
# 性能优化
performance {
raft_multiplier = 5
}
# 端口配置
ports {
grpc = 8502
http = 8500
dns = 8600
}
# 启用Connect服务网格
connect {
enabled = true
}
# 缓存配置
cache {
entry_fetch_max_burst = 42
entry_fetch_rate = 30
}
# 节点元数据
node_meta = {
region = "pacific"
zone = "client"
}
# UI配置
ui_config {
enabled = False
}
# ACL配置
acl = {
enabled = false
default_policy = "allow"
}
# 日志配置
log_file = "/var/log/consul/consul.log"
log_rotate_duration = "24h"
log_rotate_max_files = 7
# 服务发现
services {
name = "ash2e-service"
port = 8080
tags = ["ash2e", "client"]
}
# 健康检查
checks {
name = "ash2e-health"
tcp = "100.81.26.4:8080"
interval = "10s"
timeout = "3s"
}
# 自动加密

View File

@@ -0,0 +1,6 @@
{
"node_name": "ash3c",
"bind_addr": "100.116.80.94",
"node_zone": "server",
"ui_enabled": true
}

View File

@@ -0,0 +1,81 @@
# Consul 客户端配置模板
# 适用于所有13个节点服务器由Nomad接管
# 基础配置
datacenter = "pacific"
data_dir = "/opt/consul/data"
log_level = "INFO"
node_name = "ash3c"
bind_addr = "100.116.80.94"
# 客户端模式服务器由Nomad接管
server = false
# 连接到Consul服务器集群
retry_join = [
"100.117.106.136", # ch4 (韩国)
"100.122.197.112", # warden (北京)
"100.116.80.94" # ash3c (美国)
]
# 性能优化
performance {
raft_multiplier = 5
}
# 端口配置
ports {
grpc = 8502
http = 8500
dns = 8600
}
# 启用Connect服务网格
connect {
enabled = true
}
# 缓存配置
cache {
entry_fetch_max_burst = 42
entry_fetch_rate = 30
}
# 节点元数据
node_meta = {
region = "pacific"
zone = "server"
}
# UI配置
ui_config {
enabled = true
}
# ACL配置
acl = {
enabled = false
default_policy = "allow"
}
# 日志配置
log_file = "/var/log/consul/consul.log"
log_rotate_duration = "24h"
log_rotate_max_files = 7
# 服务发现
services {
name = "ash3c-service"
port = 8080
tags = ["ash3c", "client"]
}
# 健康检查
checks {
name = "ash3c-health"
tcp = "100.116.80.94:8080"
interval = "10s"
timeout = "3s"
}
# 自动加密

View File

@@ -0,0 +1,6 @@
{
"node_name": "browser",
"bind_addr": "100.116.112.45",
"node_zone": "client",
"ui_enabled": false
}

View File

@@ -0,0 +1,81 @@
# Consul 客户端配置模板
# 适用于所有13个节点服务器由Nomad接管
# 基础配置
datacenter = "pacific"
data_dir = "/opt/consul/data"
log_level = "INFO"
node_name = "browser"
bind_addr = "100.116.112.45"
# 客户端模式服务器由Nomad接管
server = false
# 连接到Consul服务器集群
retry_join = [
"100.117.106.136", # ch4 (韩国)
"100.122.197.112", # warden (北京)
"100.116.80.94" # ash3c (美国)
]
# 性能优化
performance {
raft_multiplier = 5
}
# 端口配置
ports {
grpc = 8502
http = 8500
dns = 8600
}
# 启用Connect服务网格
connect {
enabled = true
}
# 缓存配置
cache {
entry_fetch_max_burst = 42
entry_fetch_rate = 30
}
# 节点元数据
node_meta = {
region = "pacific"
zone = "client"
}
# UI配置
ui_config {
enabled = False
}
# ACL配置
acl = {
enabled = false
default_policy = "allow"
}
# 日志配置
log_file = "/var/log/consul/consul.log"
log_rotate_duration = "24h"
log_rotate_max_files = 7
# 服务发现
services {
name = "browser-service"
port = 8080
tags = ["browser", "client"]
}
# 健康检查
checks {
name = "browser-health"
tcp = "100.116.112.45:8080"
interval = "10s"
timeout = "3s"
}
# 自动加密

View File

@@ -0,0 +1,6 @@
{
"node_name": "ch2",
"bind_addr": "100.117.106.135",
"node_zone": "client",
"ui_enabled": false
}

View File

@@ -0,0 +1,81 @@
# Consul 客户端配置模板
# 适用于所有13个节点服务器由Nomad接管
# 基础配置
datacenter = "pacific"
data_dir = "/opt/consul/data"
log_level = "INFO"
node_name = "ch2"
bind_addr = "100.117.106.135"
# 客户端模式服务器由Nomad接管
server = false
# 连接到Consul服务器集群
retry_join = [
"100.117.106.136", # ch4 (韩国)
"100.122.197.112", # warden (北京)
"100.116.80.94" # ash3c (美国)
]
# 性能优化
performance {
raft_multiplier = 5
}
# 端口配置
ports {
grpc = 8502
http = 8500
dns = 8600
}
# 启用Connect服务网格
connect {
enabled = true
}
# 缓存配置
cache {
entry_fetch_max_burst = 42
entry_fetch_rate = 30
}
# 节点元数据
node_meta = {
region = "pacific"
zone = "client"
}
# UI配置
ui_config {
enabled = False
}
# ACL配置
acl = {
enabled = false
default_policy = "allow"
}
# 日志配置
log_file = "/var/log/consul/consul.log"
log_rotate_duration = "24h"
log_rotate_max_files = 7
# 服务发现
services {
name = "ch2-service"
port = 8080
tags = ["ch2", "client"]
}
# 健康检查
checks {
name = "ch2-health"
tcp = "100.117.106.135:8080"
interval = "10s"
timeout = "3s"
}
# 自动加密

View File

@@ -0,0 +1,6 @@
{
"node_name": "ch3",
"bind_addr": "100.117.106.137",
"node_zone": "client",
"ui_enabled": false
}

View File

@@ -0,0 +1,81 @@
# Consul 客户端配置模板
# 适用于所有13个节点服务器由Nomad接管
# 基础配置
datacenter = "pacific"
data_dir = "/opt/consul/data"
log_level = "INFO"
node_name = "ch3"
bind_addr = "100.117.106.137"
# 客户端模式服务器由Nomad接管
server = false
# 连接到Consul服务器集群
retry_join = [
"100.117.106.136", # ch4 (韩国)
"100.122.197.112", # warden (北京)
"100.116.80.94" # ash3c (美国)
]
# 性能优化
performance {
raft_multiplier = 5
}
# 端口配置
ports {
grpc = 8502
http = 8500
dns = 8600
}
# 启用Connect服务网格
connect {
enabled = true
}
# 缓存配置
cache {
entry_fetch_max_burst = 42
entry_fetch_rate = 30
}
# 节点元数据
node_meta = {
region = "pacific"
zone = "client"
}
# UI配置
ui_config {
enabled = False
}
# ACL配置
acl = {
enabled = false
default_policy = "allow"
}
# 日志配置
log_file = "/var/log/consul/consul.log"
log_rotate_duration = "24h"
log_rotate_max_files = 7
# 服务发现
services {
name = "ch3-service"
port = 8080
tags = ["ch3", "client"]
}
# 健康检查
checks {
name = "ch3-health"
tcp = "100.117.106.137:8080"
interval = "10s"
timeout = "3s"
}
# 自动加密

View File

@@ -0,0 +1,6 @@
{
"node_name": "ch4",
"bind_addr": "100.117.106.134",
"node_zone": "server",
"ui_enabled": true
}

View File

@@ -0,0 +1,81 @@
# Consul 客户端配置模板
# 适用于所有13个节点服务器由Nomad接管
# 基础配置
datacenter = "pacific"
data_dir = "/opt/consul/data"
log_level = "INFO"
node_name = "ch4"
bind_addr = "100.117.106.134"
# 客户端模式服务器由Nomad接管
server = false
# 连接到Consul服务器集群
retry_join = [
"100.117.106.136", # ch4 (韩国)
"100.122.197.112", # warden (北京)
"100.116.80.94" # ash3c (美国)
]
# 性能优化
performance {
raft_multiplier = 5
}
# 端口配置
ports {
grpc = 8502
http = 8500
dns = 8600
}
# 启用Connect服务网格
connect {
enabled = true
}
# 缓存配置
cache {
entry_fetch_max_burst = 42
entry_fetch_rate = 30
}
# 节点元数据
node_meta = {
region = "pacific"
zone = "server"
}
# UI配置
ui_config {
enabled = true
}
# ACL配置
acl = {
enabled = false
default_policy = "allow"
}
# 日志配置
log_file = "/var/log/consul/consul.log"
log_rotate_duration = "24h"
log_rotate_max_files = 7
# 服务发现
services {
name = "ch4-service"
port = 8080
tags = ["ch4", "client"]
}
# 健康检查
checks {
name = "ch4-health"
tcp = "100.117.106.134:8080"
interval = "10s"
timeout = "3s"
}
# 自动加密

View File

@@ -0,0 +1,6 @@
{
"node_name": "de",
"bind_addr": "100.98.209.52",
"node_zone": "client",
"ui_enabled": false
}

View File

@@ -0,0 +1,81 @@
# Consul 客户端配置模板
# 适用于所有13个节点服务器由Nomad接管
# 基础配置
datacenter = "pacific"
data_dir = "/opt/consul/data"
log_level = "INFO"
node_name = "de"
bind_addr = "100.98.209.52"
# 客户端模式服务器由Nomad接管
server = false
# 连接到Consul服务器集群
retry_join = [
"100.117.106.136", # ch4 (韩国)
"100.122.197.112", # warden (北京)
"100.116.80.94" # ash3c (美国)
]
# 性能优化
performance {
raft_multiplier = 5
}
# 端口配置
ports {
grpc = 8502
http = 8500
dns = 8600
}
# 启用Connect服务网格
connect {
enabled = true
}
# 缓存配置
cache {
entry_fetch_max_burst = 42
entry_fetch_rate = 30
}
# 节点元数据
node_meta = {
region = "pacific"
zone = "client"
}
# UI配置
ui_config {
enabled = False
}
# ACL配置
acl = {
enabled = false
default_policy = "allow"
}
# 日志配置
log_file = "/var/log/consul/consul.log"
log_rotate_duration = "24h"
log_rotate_max_files = 7
# 服务发现
services {
name = "de-service"
port = 8080
tags = ["de", "client"]
}
# 健康检查
checks {
name = "de-health"
tcp = "100.98.209.52:8080"
interval = "10s"
timeout = "3s"
}
# 自动加密

View File

@@ -0,0 +1,6 @@
{
"node_name": "hcp1",
"bind_addr": "100.116.112.46",
"node_zone": "client",
"ui_enabled": false
}

View File

@@ -0,0 +1,81 @@
# Consul 客户端配置模板
# 适用于所有13个节点服务器由Nomad接管
# 基础配置
datacenter = "pacific"
data_dir = "/opt/consul/data"
log_level = "INFO"
node_name = "hcp1"
bind_addr = "100.116.112.46"
# 客户端模式服务器由Nomad接管
server = false
# 连接到Consul服务器集群
retry_join = [
"100.117.106.136", # ch4 (韩国)
"100.122.197.112", # warden (北京)
"100.116.80.94" # ash3c (美国)
]
# 性能优化
performance {
raft_multiplier = 5
}
# 端口配置
ports {
grpc = 8502
http = 8500
dns = 8600
}
# 启用Connect服务网格
connect {
enabled = true
}
# 缓存配置
cache {
entry_fetch_max_burst = 42
entry_fetch_rate = 30
}
# 节点元数据
node_meta = {
region = "pacific"
zone = "client"
}
# UI配置
ui_config {
enabled = False
}
# ACL配置
acl = {
enabled = false
default_policy = "allow"
}
# 日志配置
log_file = "/var/log/consul/consul.log"
log_rotate_duration = "24h"
log_rotate_max_files = 7
# 服务发现
services {
name = "hcp1-service"
port = 8080
tags = ["hcp1", "client"]
}
# 健康检查
checks {
name = "hcp1-health"
tcp = "100.116.112.46:8080"
interval = "10s"
timeout = "3s"
}
# 自动加密

View File

@@ -0,0 +1,6 @@
{
"node_name": "influxdb",
"bind_addr": "100.116.112.47",
"node_zone": "client",
"ui_enabled": false
}

View File

@@ -0,0 +1,81 @@
# Consul 客户端配置模板
# 适用于所有13个节点服务器由Nomad接管
# 基础配置
datacenter = "pacific"
data_dir = "/opt/consul/data"
log_level = "INFO"
node_name = "influxdb"
bind_addr = "100.116.112.47"
# 客户端模式服务器由Nomad接管
server = false
# 连接到Consul服务器集群
retry_join = [
"100.117.106.136", # ch4 (韩国)
"100.122.197.112", # warden (北京)
"100.116.80.94" # ash3c (美国)
]
# 性能优化
performance {
raft_multiplier = 5
}
# 端口配置
ports {
grpc = 8502
http = 8500
dns = 8600
}
# 启用Connect服务网格
connect {
enabled = true
}
# 缓存配置
cache {
entry_fetch_max_burst = 42
entry_fetch_rate = 30
}
# 节点元数据
node_meta = {
region = "pacific"
zone = "client"
}
# UI配置
ui_config {
enabled = False
}
# ACL配置
acl = {
enabled = false
default_policy = "allow"
}
# 日志配置
log_file = "/var/log/consul/consul.log"
log_rotate_duration = "24h"
log_rotate_max_files = 7
# 服务发现
services {
name = "influxdb-service"
port = 8080
tags = ["influxdb", "client"]
}
# 健康检查
checks {
name = "influxdb-health"
tcp = "100.116.112.47:8080"
interval = "10s"
timeout = "3s"
}
# 自动加密

View File

@@ -0,0 +1,6 @@
{
"node_name": "onecloud1",
"bind_addr": "100.98.209.53",
"node_zone": "client",
"ui_enabled": false
}

View File

@@ -0,0 +1,81 @@
# Consul 客户端配置模板
# 适用于所有13个节点服务器由Nomad接管
# 基础配置
datacenter = "pacific"
data_dir = "/opt/consul/data"
log_level = "INFO"
node_name = "onecloud1"
bind_addr = "100.98.209.53"
# 客户端模式服务器由Nomad接管
server = false
# 连接到Consul服务器集群
retry_join = [
"100.117.106.136", # ch4 (韩国)
"100.122.197.112", # warden (北京)
"100.116.80.94" # ash3c (美国)
]
# 性能优化
performance {
raft_multiplier = 5
}
# 端口配置
ports {
grpc = 8502
http = 8500
dns = 8600
}
# 启用Connect服务网格
connect {
enabled = true
}
# 缓存配置
cache {
entry_fetch_max_burst = 42
entry_fetch_rate = 30
}
# 节点元数据
node_meta = {
region = "pacific"
zone = "client"
}
# UI配置
ui_config {
enabled = False
}
# ACL配置
acl = {
enabled = false
default_policy = "allow"
}
# 日志配置
log_file = "/var/log/consul/consul.log"
log_rotate_duration = "24h"
log_rotate_max_files = 7
# 服务发现
services {
name = "onecloud1-service"
port = 8080
tags = ["onecloud1", "client"]
}
# 健康检查
checks {
name = "onecloud1-health"
tcp = "100.98.209.53:8080"
interval = "10s"
timeout = "3s"
}
# 自动加密

View File

@@ -0,0 +1,6 @@
{
"node_name": "semaphore",
"bind_addr": "100.98.209.54",
"node_zone": "client",
"ui_enabled": false
}

View File

@@ -0,0 +1,81 @@
# Consul 客户端配置模板
# 适用于所有13个节点服务器由Nomad接管
# 基础配置
datacenter = "pacific"
data_dir = "/opt/consul/data"
log_level = "INFO"
node_name = "semaphore"
bind_addr = "100.98.209.54"
# 客户端模式服务器由Nomad接管
server = false
# 连接到Consul服务器集群
retry_join = [
"100.117.106.136", # ch4 (韩国)
"100.122.197.112", # warden (北京)
"100.116.80.94" # ash3c (美国)
]
# 性能优化
performance {
raft_multiplier = 5
}
# 端口配置
ports {
grpc = 8502
http = 8500
dns = 8600
}
# 启用Connect服务网格
connect {
enabled = true
}
# 缓存配置
cache {
entry_fetch_max_burst = 42
entry_fetch_rate = 30
}
# 节点元数据
node_meta = {
region = "pacific"
zone = "client"
}
# UI配置
ui_config {
enabled = False
}
# ACL配置
acl = {
enabled = false
default_policy = "allow"
}
# 日志配置
log_file = "/var/log/consul/consul.log"
log_rotate_duration = "24h"
log_rotate_max_files = 7
# 服务发现
services {
name = "semaphore-service"
port = 8080
tags = ["semaphore", "client"]
}
# 健康检查
checks {
name = "semaphore-health"
tcp = "100.98.209.54:8080"
interval = "10s"
timeout = "3s"
}
# 自动加密

View File

@@ -0,0 +1,6 @@
{
"node_name": "warden",
"bind_addr": "100.122.197.112",
"node_zone": "server",
"ui_enabled": true
}

View File

@@ -0,0 +1,81 @@
# Consul 客户端配置模板
# 适用于所有13个节点服务器由Nomad接管
# 基础配置
datacenter = "pacific"
data_dir = "/opt/consul/data"
log_level = "INFO"
node_name = "warden"
bind_addr = "100.122.197.112"
# 客户端模式服务器由Nomad接管
server = false
# 连接到Consul服务器集群
retry_join = [
"100.117.106.136", # ch4 (韩国)
"100.122.197.112", # warden (北京)
"100.116.80.94" # ash3c (美国)
]
# 性能优化
performance {
raft_multiplier = 5
}
# 端口配置
ports {
grpc = 8502
http = 8500
dns = 8600
}
# 启用Connect服务网格
connect {
enabled = true
}
# 缓存配置
cache {
entry_fetch_max_burst = 42
entry_fetch_rate = 30
}
# 节点元数据
node_meta = {
region = "pacific"
zone = "server"
}
# UI配置
ui_config {
enabled = true
}
# ACL配置
acl = {
enabled = false
default_policy = "allow"
}
# 日志配置
log_file = "/var/log/consul/consul.log"
log_rotate_duration = "24h"
log_rotate_max_files = 7
# 服务发现
services {
name = "warden-service"
port = 8080
tags = ["warden", "client"]
}
# 健康检查
checks {
name = "warden-health"
tcp = "100.122.197.112:8080"
interval = "10s"
timeout = "3s"
}
# 自动加密

View File

@@ -0,0 +1,64 @@
# Consul 客户端配置模板
# 适用于所有13个节点服务器由Nomad接管
# 基础配置
datacenter = "dc1"
data_dir = "/opt/consul/data"
log_level = "INFO"
node_name = "{{ node_name }}"
bind_addr = "{{ bind_addr }}"
# 客户端模式服务器由Nomad接管
server = false
# 连接到Consul服务器集群
retry_join = [
"100.117.106.136:8301", # ch4 (韩国)
"100.122.197.112:8301", # warden (北京)
"100.116.80.94:8301" # ash3c (美国)
]
# 性能优化
performance {
raft_multiplier = 5
}
# 端口配置
ports {
grpc = 8502
http = 8500
dns = 8600
}
# 启用Connect服务网格
connect {
enabled = true
}
# 缓存配置
cache {
entry_fetch_max_burst = 42
entry_fetch_rate = 30
}
# 节点元数据
node_meta = {
region = "pacific"
zone = "{{ node_zone }}"
}
# UI配置
ui_config {
enabled = {{ ui_enabled|lower }}
}
# ACL配置
acl = {
enabled = false
default_policy = "allow"
}
# 日志配置
log_file = "/var/log/consul/consul.log"
log_rotate_duration = "24h"
log_rotate_max_files = 7

View File

@@ -0,0 +1,142 @@
#!/bin/bash
# Jinja2模板测试脚本
set -e
TEMPLATE_FILE="infrastructure/consul/baseline/consul.j2"
OUTPUT_DIR="infrastructure/consul/jinja2-output"
# 节点配置
declare -A NODE_CONFIGS
NODE_CONFIGS[ch4]="100.117.106.134:server:true"
NODE_CONFIGS[ash3c]="100.116.80.94:server:true"
NODE_CONFIGS[warden]="100.122.197.112:server:true"
NODE_CONFIGS[ash1d]="100.81.26.3:client:false"
NODE_CONFIGS[ash2e]="100.81.26.4:client:false"
NODE_CONFIGS[ch2]="100.117.106.135:client:false"
NODE_CONFIGS[ch3]="100.117.106.137:client:false"
NODE_CONFIGS[de]="100.98.209.52:client:false"
NODE_CONFIGS[onecloud1]="100.98.209.53:client:false"
NODE_CONFIGS[semaphore]="100.98.209.54:client:false"
NODE_CONFIGS[browser]="100.116.112.45:client:false"
NODE_CONFIGS[hcp1]="100.116.112.46:client:false"
NODE_CONFIGS[influxdb]="100.116.112.47:client:false"
# 颜色输出
GREEN='\033[0;32m'
BLUE='\033[0;34m'
RED='\033[0;31m'
NC='\033[0m'
log() {
echo -e "${BLUE}[$(date '+%H:%M:%S')]${NC} $1"
}
success() {
echo -e "${GREEN}[SUCCESS]${NC} $1"
}
error() {
echo -e "${RED}[ERROR]${NC} $1"
}
# 创建输出目录
mkdir -p "$OUTPUT_DIR"
# 测试Jinja2模板
test_jinja2_template() {
local node=$1
local config=${NODE_CONFIGS[$node]}
if [ -z "$config" ]; then
error "未找到节点 $node 的配置"
return 1
fi
# 解析配置
IFS=':' read -r bind_addr node_zone ui_enabled <<< "$config"
log "测试节点: $node"
log "绑定地址: $bind_addr"
log "节点区域: $node_zone"
log "UI启用: $ui_enabled"
# 创建JSON配置文件
local json_file="$OUTPUT_DIR/${node}-config.json"
cat > "$json_file" << JSON
{
"node_name": "$node",
"bind_addr": "$bind_addr",
"node_zone": "$node_zone",
"ui_enabled": $ui_enabled
}
JSON
# 使用Jinja2渲染模板
local output_file="$OUTPUT_DIR/${node}-consul.hcl"
if command -v jinja2 >/dev/null 2>&1; then
jinja2 "$TEMPLATE_FILE" "$json_file" > "$output_file"
else
# 使用Python脚本
python3 -c "
import json
from jinja2 import Template
with open('$json_file', 'r') as f:
data = json.load(f)
with open('$TEMPLATE_FILE', 'r') as f:
template = Template(f.read())
with open('$output_file', 'w') as f:
f.write(template.render(**data))
"
fi
success "Jinja2模板渲染完成: $output_file"
# 显示前10行验证
echo "--- 验证前10行 ---"
head -10 "$output_file"
echo "--- 验证完成 ---"
echo ""
}
# 主函数
main() {
local target_node=${1:-"ch4"}
log "开始Jinja2模板测试"
log "目标节点: $target_node"
if [ "$target_node" = "all" ]; then
log "测试所有节点"
for node in "${!NODE_CONFIGS[@]}"; do
test_jinja2_template "$node"
done
else
test_jinja2_template "$target_node"
fi
success "Jinja2模板测试完成"
log "输出目录: $OUTPUT_DIR"
}
# 显示帮助
show_help() {
echo "使用方法: $0 [节点名|all]"
echo ""
echo "示例:"
echo " $0 ch4 # 测试ch4节点"
echo " $0 all # 测试所有节点"
echo ""
echo "支持的节点: ${!NODE_CONFIGS[@]}"
}
if [ "$1" = "-h" ] || [ "$1" = "--help" ]; then
show_help
exit 0
fi
main "$@"

View File

@@ -0,0 +1,84 @@
# Consul 客户端配置模板
# 适用于所有13个节点服务器由Nomad接管
# 基础配置
datacenter = "pacific"
data_dir = "/opt/consul/data"
log_level = "INFO"
node_name = "ash1d"
bind_addr = "100.81.26.3"
# 客户端模式服务器由Nomad接管
server = false
# 连接到Consul服务器集群
retry_join = [
"100.117.106.136", # ch4 (韩国)
"100.122.197.112", # warden (北京)
"100.116.80.94" # ash3c (美国)
]
# 性能优化
performance {
raft_multiplier = 5
}
# 端口配置
ports {
grpc = 8502
http = 8500
dns = 8600
}
# 启用Connect服务网格
connect {
enabled = true
}
# 缓存配置
cache {
entry_fetch_max_burst = 42
entry_fetch_rate = 30
}
# 节点元数据
node_meta = {
region = "pacific"
zone = "client"
}
# UI配置
ui_config {
enabled = false
}
# ACL配置
acl = {
enabled = false
default_policy = "allow"
}
# 日志配置
log_file = "/var/log/consul/consul.log"
log_rotate_duration = "24h"
log_rotate_max_files = 7
# 服务发现
services {
name = "ash1d-service"
port = 8080
tags = ["ash1d", "client"]
}
# 健康检查
checks {
name = "ash1d-health"
tcp = "100.81.26.3:8080"
interval = "10s"
timeout = "3s"
}
# 自动加密
auto_encrypt {
allow_tls = true
}

View File

@@ -0,0 +1,84 @@
# Consul 客户端配置模板
# 适用于所有13个节点服务器由Nomad接管
# 基础配置
datacenter = "pacific"
data_dir = "/opt/consul/data"
log_level = "INFO"
node_name = "ch4"
bind_addr = "100.117.106.134"
# 客户端模式服务器由Nomad接管
server = false
# 连接到Consul服务器集群
retry_join = [
"100.117.106.136", # ch4 (韩国)
"100.122.197.112", # warden (北京)
"100.116.80.94" # ash3c (美国)
]
# 性能优化
performance {
raft_multiplier = 5
}
# 端口配置
ports {
grpc = 8502
http = 8500
dns = 8600
}
# 启用Connect服务网格
connect {
enabled = true
}
# 缓存配置
cache {
entry_fetch_max_burst = 42
entry_fetch_rate = 30
}
# 节点元数据
node_meta = {
region = "pacific"
zone = "server"
}
# UI配置
ui_config {
enabled = true
}
# ACL配置
acl = {
enabled = false
default_policy = "allow"
}
# 日志配置
log_file = "/var/log/consul/consul.log"
log_rotate_duration = "24h"
log_rotate_max_files = 7
# 服务发现
services {
name = "ch4-service"
port = 8080
tags = ["ch4", "client"]
}
# 健康检查
checks {
name = "ch4-health"
tcp = "100.117.106.134:8080"
interval = "10s"
timeout = "3s"
}
# 自动加密
auto_encrypt {
allow_tls = true
}

View File

@@ -0,0 +1,109 @@
#!/bin/bash
# Consul模板变量替换测试脚本
set -e
TEMPLATE_FILE="infrastructure/consul/baseline/consul.hcl"
OUTPUT_DIR="infrastructure/consul/test-output"
# 节点配置
declare -A NODE_CONFIGS
NODE_CONFIGS[ch4]="100.117.106.134:server:true"
NODE_CONFIGS[ash3c]="100.116.80.94:server:true"
NODE_CONFIGS[warden]="100.122.197.112:server:true"
NODE_CONFIGS[ash1d]="100.81.26.3:client:false"
NODE_CONFIGS[ash2e]="100.81.26.4:client:false"
NODE_CONFIGS[ch2]="100.117.106.135:client:false"
NODE_CONFIGS[ch3]="100.117.106.137:client:false"
NODE_CONFIGS[de]="100.98.209.52:client:false"
NODE_CONFIGS[onecloud1]="100.98.209.53:client:false"
NODE_CONFIGS[semaphore]="100.98.209.54:client:false"
NODE_CONFIGS[browser]="100.116.112.45:client:false"
NODE_CONFIGS[hcp1]="100.116.112.46:client:false"
NODE_CONFIGS[influxdb]="100.116.112.47:client:false"
# 颜色输出
GREEN='\033[0;32m'
BLUE='\033[0;34m'
NC='\033[0m'
log() {
echo -e "${BLUE}[$(date '+%H:%M:%S')]${NC} $1"
}
success() {
echo -e "${GREEN}[SUCCESS]${NC} $1"
}
# 创建输出目录
mkdir -p "$OUTPUT_DIR"
# 测试模板替换
test_template_replacement() {
local node=$1
local config=${NODE_CONFIGS[$node]}
if [ -z "$config" ]; then
echo "错误: 未找到节点 $node 的配置"
return 1
fi
# 解析配置
IFS=':' read -r bind_addr node_zone ui_enabled <<< "$config"
log "测试节点: $node"
log "绑定地址: $bind_addr"
log "节点区域: $node_zone"
log "UI启用: $ui_enabled"
# 替换模板变量
local output_file="$OUTPUT_DIR/${node}-consul.hcl"
sed "s/{{NODE_NAME}}/$node/g; s/{{BIND_ADDR}}/$bind_addr/g; s/{{NODE_ZONE}}/$node_zone/g; s/{{UI_ENABLED}}/$ui_enabled/g" "$TEMPLATE_FILE" > "$output_file"
success "模板替换完成: $output_file"
# 显示前10行验证
echo "--- 验证前10行 ---"
head -10 "$output_file"
echo "--- 验证完成 ---"
echo ""
}
# 主函数
main() {
local target_node=${1:-"ch4"}
log "开始模板变量替换测试"
log "目标节点: $target_node"
if [ "$target_node" = "all" ]; then
log "测试所有节点"
for node in "${!NODE_CONFIGS[@]}"; do
test_template_replacement "$node"
done
else
test_template_replacement "$target_node"
fi
success "模板测试完成!"
log "输出目录: $OUTPUT_DIR"
}
# 显示帮助
show_help() {
echo "使用方法: $0 [节点名|all]"
echo ""
echo "示例:"
echo " $0 ch4 # 测试ch4节点"
echo " $0 all # 测试所有节点"
echo ""
echo "支持的节点: ${!NODE_CONFIGS[@]}"
}
if [ "$1" = "-h" ] || [ "$1" = "--help" ]; then
show_help
exit 0
fi
main "$@"

View File

@@ -0,0 +1,39 @@
auth_enabled: false
server:
http_listen_port: 3100
grpc_listen_port: 9096
common:
path_prefix: /var/lib/loki
storage:
filesystem:
chunks_directory: /var/lib/loki/chunks
rules_directory: /var/lib/loki/rules
replication_factor: 1
ring:
instance_addr: 127.0.0.1
kvstore:
store: inmemory
query_scheduler:
max_outstanding_requests_per_tenant: 2048
schema_config:
configs:
- from: 2020-10-24
store: boltdb-shipper
object_store: filesystem
schema: v12
index:
prefix: index_
period: 24h
limits_config:
allow_structured_metadata: false
ruler:
alertmanager_url: http://localhost:9093
analytics:
reporting_enabled: false

View File

@@ -0,0 +1,5 @@
# Node Exporter 配置文件
# 默认配置已经足够,主要参数通过命令行传递
# 如果需要自定义配置,可以在这里添加
# 目前使用默认配置 + 命令行参数

View File

@@ -0,0 +1,61 @@
# Prometheus 配置 - 监控Nomad集群
global:
scrape_interval: 15s
evaluation_interval: 15s
external_labels:
monitor: 'nomad-cluster'
# Alertmanager configuration
alerting:
alertmanagers:
- static_configs:
- targets: ['localhost:9093']
# Load rules once and periodically evaluate them according to the global 'evaluation_interval'.
rule_files:
# - "first_rules.yml"
# - "second_rules.yml"
# A scrape configuration containing exactly one endpoint to scrape:
scrape_configs:
# Prometheus自身监控
- job_name: 'prometheus'
scrape_interval: 5s
scrape_timeout: 5s
static_configs:
- targets: ['localhost:9090']
# Node Exporter - 客户端节点
- job_name: 'node-clients'
static_configs:
- targets:
- 'ch4.tailnet-68f9.ts.net:9100'
- 'ash3c.tailnet-68f9.ts.net:9100'
- 'warden.tailnet-68f9.ts.net:9100'
- 'hcp1.tailnet-68f9.ts.net:9100'
- 'browser.tailnet-68f9.ts.net:9100'
# Node Exporter - 服务端节点
- job_name: 'node-servers'
static_configs:
- targets:
- 'ash2e.tailnet-68f9.ts.net:9100'
- 'ch2.tailnet-68f9.ts.net:9100'
- 'ch3.tailnet-68f9.ts.net:9100'
- 'onecloud1.tailnet-68f9.ts.net:9100'
# Nomad集群监控
- job_name: 'nomad'
static_configs:
- targets:
- 'ash1.tailnet-68f9.ts.net:4646'
- 'ash2.tailnet-68f9.ts.net:4646'
- 'onecloud1.tailnet-68f9.ts.net:4646'
# Consul集群监控
- job_name: 'consul'
static_configs:
- targets:
- 'ash1.tailnet-68f9.ts.net:8500'
- 'ash2.tailnet-68f9.ts.net:8500'
- 'onecloud1.tailnet-68f9.ts.net:8500'

View File

@@ -0,0 +1,39 @@
server:
http_listen_port: 9080
grpc_listen_port: 0
positions:
filename: /opt/promtail/data/positions.yaml
clients:
- url: http://influxdb.tailnet-68f9.ts.net:3100/loki/api/v1/push
scrape_configs:
- job_name: journal
journal:
max_age: 12h
labels:
job: systemd-journal
relabel_configs:
- source_labels: ['__journal__systemd_unit']
target_label: 'unit'
- source_labels: ['__journal_priority_keyword']
target_label: 'level'
- source_labels: ['__journal__hostname']
target_label: 'hostname'
- job_name: syslog
static_configs:
- targets:
- localhost
labels:
job: syslog
__path__: /var/log/syslog
- job_name: daemon
static_configs:
- targets:
- localhost
labels:
job: daemon
__path__: /var/log/daemon.log

View File

@@ -0,0 +1,23 @@
server:
http_listen_port: 9082
grpc_listen_port: 0
positions:
filename: /tmp/positions.yaml
clients:
- url: http://influxdb.tailnet-68f9.ts.net:3100/loki/api/v1/push
scrape_configs:
- job_name: journal
journal:
max_age: 12h
labels:
job: systemd-journal
relabel_configs:
- source_labels: ['__journal__systemd_unit']
target_label: 'unit'
- source_labels: ['__journal_priority_keyword']
target_label: 'level'
- source_labels: ['__journal__hostname']
target_label: 'hostname'

View File

@@ -0,0 +1,392 @@
{
"dashboard": {
"id": null,
"title": "Loki 日志热点图 Demo",
"tags": ["loki", "heatmap", "demo"],
"style": "dark",
"timezone": "browser",
"panels": [
{
"id": 1,
"title": "日志级别热点图 (类似GitHub贡献图)",
"type": "heatmap",
"targets": [
{
"datasource": {
"type": "loki",
"uid": "loki"
},
"expr": "sum by (level) (rate({job=\"systemd-journal\"}[5m]))",
"refId": "A",
"legendFormat": "{{level}}"
}
],
"fieldConfig": {
"defaults": {
"custom": {
"hideFrom": {
"legend": false,
"tooltip": false,
"vis": false
},
"scaleDistribution": {
"type": "linear"
}
},
"color": {
"mode": "palette-classic"
},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "yellow",
"value": 1
},
{
"color": "red",
"value": 10
}
]
}
}
},
"gridPos": {
"h": 8,
"w": 12,
"x": 0,
"y": 0
},
"options": {
"calculate": false,
"cellGap": 2,
"cellValues": {
"unit": "short"
},
"color": {
"exponent": 0.5,
"fill": "dark-orange",
"mode": "spectrum",
"reverse": false,
"scale": "exponential",
"scheme": "Spectral",
"steps": 64
},
"exemplars": {
"color": "rgba(255,0,255,0.7)"
},
"filterValues": {
"le": 1e-9
},
"legend": {
"show": true
},
"rowsFrame": {
"layout": "auto"
},
"tooltip": {
"show": true,
"yHistogram": false
},
"yAxis": {
"axisPlacement": "left",
"reverse": false,
"unit": "short"
}
}
},
{
"id": 2,
"title": "节点日志密度热点图",
"type": "heatmap",
"targets": [
{
"datasource": {
"type": "loki",
"uid": "loki"
},
"expr": "sum by (hostname) (rate({job=\"systemd-journal\"}[5m]))",
"refId": "A",
"legendFormat": "{{hostname}}"
}
],
"fieldConfig": {
"defaults": {
"custom": {
"hideFrom": {
"legend": false,
"tooltip": false,
"vis": false
}
},
"color": {
"mode": "palette-classic"
},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "yellow",
"value": 5
},
{
"color": "red",
"value": 20
}
]
}
}
},
"gridPos": {
"h": 8,
"w": 12,
"x": 12,
"y": 0
},
"options": {
"calculate": false,
"cellGap": 2,
"cellValues": {
"unit": "short"
},
"color": {
"exponent": 0.5,
"fill": "dark-orange",
"mode": "spectrum",
"reverse": false,
"scale": "exponential",
"scheme": "Spectral",
"steps": 64
},
"exemplars": {
"color": "rgba(255,0,255,0.7)"
},
"filterValues": {
"le": 1e-9
},
"legend": {
"show": true
},
"rowsFrame": {
"layout": "auto"
},
"tooltip": {
"show": true,
"yHistogram": false
},
"yAxis": {
"axisPlacement": "left",
"reverse": false,
"unit": "short"
}
}
},
{
"id": 3,
"title": "关键服务日志热点图 (Nomad/Consul/Traefik)",
"type": "heatmap",
"targets": [
{
"datasource": {
"type": "loki",
"uid": "loki"
},
"expr": "sum by (unit) (rate({job=\"systemd-journal\", unit=~\"nomad|consul|traefik\"}[5m]))",
"refId": "A",
"legendFormat": "{{unit}}"
}
],
"fieldConfig": {
"defaults": {
"custom": {
"hideFrom": {
"legend": false,
"tooltip": false,
"vis": false
}
},
"color": {
"mode": "palette-classic"
},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "yellow",
"value": 1
},
{
"color": "red",
"value": 5
}
]
}
}
},
"gridPos": {
"h": 8,
"w": 24,
"x": 0,
"y": 8
},
"options": {
"calculate": false,
"cellGap": 2,
"cellValues": {
"unit": "short"
},
"color": {
"exponent": 0.5,
"fill": "dark-orange",
"mode": "spectrum",
"reverse": false,
"scale": "exponential",
"scheme": "Spectral",
"steps": 64
},
"exemplars": {
"color": "rgba(255,0,255,0.7)"
},
"filterValues": {
"le": 1e-9
},
"legend": {
"show": true
},
"rowsFrame": {
"layout": "auto"
},
"tooltip": {
"show": true,
"yHistogram": false
},
"yAxis": {
"axisPlacement": "left",
"reverse": false,
"unit": "short"
}
}
},
{
"id": 4,
"title": "ERROR/CRIT 级别日志热点图 (黑匣子重点)",
"type": "heatmap",
"targets": [
{
"datasource": {
"type": "loki",
"uid": "loki"
},
"expr": "sum by (hostname) (rate({job=\"systemd-journal\", level=~\"error|crit\"}[5m]))",
"refId": "A",
"legendFormat": "{{hostname}} - {{level}}"
}
],
"fieldConfig": {
"defaults": {
"custom": {
"hideFrom": {
"legend": false,
"tooltip": false,
"vis": false
}
},
"color": {
"mode": "palette-classic"
},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "orange",
"value": 0.1
},
{
"color": "red",
"value": 1
}
]
}
}
},
"gridPos": {
"h": 8,
"w": 24,
"x": 0,
"y": 16
},
"options": {
"calculate": false,
"cellGap": 2,
"cellValues": {
"unit": "short"
},
"color": {
"exponent": 0.5,
"fill": "dark-orange",
"mode": "spectrum",
"reverse": false,
"scale": "exponential",
"scheme": "Spectral",
"steps": 64
},
"exemplars": {
"color": "rgba(255,0,255,0.7)"
},
"filterValues": {
"le": 1e-9
},
"legend": {
"show": true
},
"rowsFrame": {
"layout": "auto"
},
"tooltip": {
"show": true,
"yHistogram": false
},
"yAxis": {
"axisPlacement": "left",
"reverse": false,
"unit": "short"
}
}
}
],
"time": {
"from": "now-1h",
"to": "now"
},
"timepicker": {},
"templating": {
"list": []
},
"annotations": {
"list": []
},
"refresh": "30s",
"schemaVersion": 27,
"version": 1
}
}

View File

@@ -0,0 +1,59 @@
---
- name: Deploy Promtail to all nodes
hosts: all
become: yes
vars:
promtail_config_path: /etc/promtail/promtail.yml
promtail_data_path: /opt/promtail/data
tasks:
- name: Install promtail
apt:
name: promtail
state: present
update_cache: yes
ignore_errors: yes
- name: Create promtail user and group
user:
name: promtail
system: yes
shell: /bin/false
home: /opt/promtail
create_home: yes
- name: Create promtail data directory
file:
path: "{{ promtail_data_path }}"
state: directory
owner: promtail
group: promtail
mode: '0755'
- name: Copy promtail configuration
template:
src: promtail-config.yaml
dest: "{{ promtail_config_path }}"
owner: promtail
group: promtail
mode: '0644'
notify: restart promtail
- name: Add promtail user to adm group (for syslog access)
user:
name: promtail
groups: adm
append: yes
- name: Enable and start promtail service
systemd:
name: promtail
enabled: yes
state: started
daemon_reload: yes
handlers:
- name: restart promtail
systemd:
name: promtail
state: restarted

View File

@@ -0,0 +1,258 @@
job "monitoring-stack" {
datacenters = ["dc1"]
type = "service"
# Grafana 服务组
group "grafana" {
count = 1
constraint {
attribute = "${node.unique.name}"
operator = "="
value = "influxdb"
}
volume "grafana-data" {
type = "host"
read_only = false
source = "grafana-data"
}
network {
port "http" {
static = 3000
to = 3000
}
}
task "grafana" {
driver = "exec"
volume_mount {
volume = "grafana-data"
destination = "/opt/grafana/data"
read_only = false
}
config {
command = "/usr/sbin/grafana-server"
args = [
"--config", "/etc/grafana/grafana.ini",
"--homepath", "/usr/share/grafana",
"cfg:default.paths.data=/opt/grafana/data",
"cfg:default.paths.logs=/var/log/grafana",
"cfg:default.paths.plugins=/var/lib/grafana/plugins",
"cfg:default.paths.provisioning=/etc/grafana/provisioning"
]
}
resources {
cpu = 300
memory = 512
}
env {
GF_SECURITY_ADMIN_PASSWORD = "admin123"
GF_INSTALL_PLUGINS = "grafana-piechart-panel"
GF_SERVER_DOMAIN = "grafana.tailnet-68f9.ts.net"
GF_SERVER_ROOT_URL = "http://grafana.tailnet-68f9.ts.net:3000"
}
service {
name = "grafana"
port = "http"
tags = [
"grafana",
"monitoring",
"dashboard"
]
check {
type = "http"
path = "/api/health"
interval = "30s"
timeout = "5s"
}
}
}
}
# Prometheus 服务组
group "prometheus" {
count = 1
constraint {
attribute = "${node.unique.name}"
operator = "="
value = "influxdb"
}
volume "prometheus-data" {
type = "host"
read_only = false
source = "prometheus-data"
}
network {
port "http" {
static = 9090
to = 9090
}
}
task "prometheus" {
driver = "exec"
volume_mount {
volume = "prometheus-data"
destination = "/opt/prometheus/data"
read_only = false
}
config {
command = "prometheus"
args = [
"--config.file=/etc/prometheus/prometheus.yml",
"--storage.tsdb.path=/opt/prometheus/data",
"--web.console.libraries=/usr/share/prometheus/console_libraries",
"--web.console.templates=/usr/share/prometheus/consoles",
"--storage.tsdb.retention.time=15d",
"--web.enable-lifecycle"
]
}
resources {
cpu = 300
memory = 512
}
service {
name = "prometheus"
port = "http"
tags = [
"prometheus",
"monitoring",
"metrics"
]
check {
type = "http"
path = "/-/healthy"
interval = "30s"
timeout = "5s"
}
}
}
}
# Loki 服务组
group "loki" {
count = 1
constraint {
attribute = "${node.unique.name}"
operator = "="
value = "influxdb"
}
volume "loki-data" {
type = "host"
read_only = false
source = "loki-data"
}
network {
port "http" {
static = 3100
to = 3100
}
}
task "loki" {
driver = "exec"
volume_mount {
volume = "loki-data"
destination = "/opt/loki/data"
read_only = false
}
template {
data = <<EOF
auth_enabled: false
server:
http_listen_port: 3100
grpc_listen_port: 9096
common:
path_prefix: /opt/loki/data
storage:
filesystem:
chunks_directory: /opt/loki/data/chunks
rules_directory: /opt/loki/data/rules
replication_factor: 1
ring:
instance_addr: 127.0.0.1
kvstore:
store: inmemory
query_scheduler:
max_outstanding_requests_per_tenant: 2048
schema_config:
configs:
- from: 2020-10-24
store: boltdb-shipper
object_store: filesystem
schema: v12
index:
prefix: index_
period: 24h
limits_config:
allow_structured_metadata: false
ruler:
alertmanager_url: http://localhost:9093
analytics:
reporting_enabled: false
EOF
destination = "local/config.yml"
}
config {
command = "loki"
args = [
"-config.file=local/config.yml"
]
}
resources {
cpu = 300
memory = 512
}
service {
name = "loki"
port = "http"
tags = [
"loki",
"monitoring",
"logs"
]
check {
type = "http"
path = "/ready"
interval = "30s"
timeout = "5s"
}
}
}
}
}

View File

@@ -7,32 +7,63 @@ rule_files:
# - "second_rules.yml"
scrape_configs:
# Prometheus 自身监控
- job_name: 'prometheus'
static_configs:
- targets: ['localhost:9090']
- job_name: 'openfaas'
static_configs:
- targets: ['gateway:8080']
metrics_path: /metrics
scrape_interval: 15s
scrape_timeout: 10s
- job_name: 'nats'
static_configs:
- targets: ['nats:8222']
metrics_path: /metrics
scrape_interval: 15s
scrape_timeout: 10s
# Node Exporter 监控 - 所有节点
- job_name: 'node-exporter'
static_configs:
- targets: ['node-exporter:9100']
scrape_interval: 15s
scrape_timeout: 10s
- targets:
- 'semaphore.tailnet-68f9.ts.net:9100'
- 'ash1d.tailnet-68f9.ts.net:9100'
- 'ash2e.tailnet-68f9.ts.net:9100'
- 'ash3c.tailnet-68f9.ts.net:9100'
- 'ch2.tailnet-68f9.ts.net:9100'
- 'ch3.tailnet-68f9.ts.net:9100'
- 'ch4.tailnet-68f9.ts.net:9100'
- 'de.tailnet-68f9.ts.net:9100'
- 'hcp1.tailnet-68f9.ts.net:9100'
- 'influxdb.tailnet-68f9.ts.net:9100'
- 'onecloud1.tailnet-68f9.ts.net:9100'
- 'warden.tailnet-68f9.ts.net:9100'
- 'browser.tailnet-68f9.ts.net:9100'
- job_name: 'cadvisor'
# Consul 监控
- job_name: 'consul'
static_configs:
- targets: ['cadvisor:8080']
scrape_interval: 15s
scrape_timeout: 10s
- targets:
- 'ch4.tailnet-68f9.ts.net:8500'
- 'ash3c.tailnet-68f9.ts.net:8500'
- 'warden.tailnet-68f9.ts.net:8500'
# Nomad 监控
- job_name: 'nomad'
static_configs:
- targets:
- 'semaphore.tailnet-68f9.ts.net:4646'
- 'ash1d.tailnet-68f9.ts.net:4646'
- 'ash2e.tailnet-68f9.ts.net:4646'
- 'ch2.tailnet-68f9.ts.net:4646'
- 'ch3.tailnet-68f9.ts.net:4646'
- 'onecloud1.tailnet-68f9.ts.net:4646'
- 'de.tailnet-68f9.ts.net:4646'
# Vault 监控
- job_name: 'vault'
static_configs:
- targets:
- 'master.tailnet-68f9.ts.net:8200'
- 'ash3c.tailnet-68f9.ts.net:8200'
- 'hcp1.tailnet-68f9.ts.net:8200'
# InfluxDB 监控
- job_name: 'influxdb'
static_configs:
- targets: ['influxdb1.tailnet-68f9.ts.net:8086']
# Traefik 监控
- job_name: 'traefik'
static_configs:
- targets: ['hcp1.tailnet-68f9.ts.net:8080']

View File

@@ -0,0 +1,89 @@
# Nomad 客户端配置 - ash3c
datacenter = "dc1"
data_dir = "/opt/nomad/data"
plugin_dir = "/opt/nomad/plugins"
log_level = "INFO"
name = "ash3c"
bind_addr = "ash3c.tailnet-68f9.ts.net"
addresses {
http = "ash3c.tailnet-68f9.ts.net"
rpc = "ash3c.tailnet-68f9.ts.net"
serf = "ash3c.tailnet-68f9.ts.net"
}
advertise {
http = "ash3c.tailnet-68f9.ts.net:4646"
rpc = "ash3c.tailnet-68f9.ts.net:4647"
serf = "ash3c.tailnet-68f9.ts.net:4648"
}
ports {
http = 4646
rpc = 4647
serf = 4648
}
server {
enabled = false
}
client {
enabled = true
network_interface = "tailscale0"
servers = [
"semaphore.tailnet-68f9.ts.net:4647",
"ash1d.tailnet-68f9.ts.net:4647",
"ash2e.tailnet-68f9.ts.net:4647",
"ch2.tailnet-68f9.ts.net:4647",
"ch3.tailnet-68f9.ts.net:4647",
"onecloud1.tailnet-68f9.ts.net:4647",
"de.tailnet-68f9.ts.net:4647"
]
host_volume "vault-storage" {
path = "/opt/nomad/data/vault-storage"
read_only = false
}
options {
"driver.raw_exec.enable" = "1"
"driver.exec.enable" = "1"
}
gc_interval = "5m"
gc_disk_usage_threshold = 80
gc_inode_usage_threshold = 70
}
plugin "nomad-driver-podman" {
config {
socket_path = "unix:///run/podman/podman.sock"
volumes {
enabled = true
}
}
}
consul {
address = "100.116.80.94:8500"
server_service_name = "nomad"
client_service_name = "nomad-client"
auto_advertise = true
server_auto_join = true
client_auto_join = true
}
vault {
enabled = false
}
telemetry {
collection_interval = "1s"
disable_hostname = false
prometheus_metrics = true
publish_allocation_metrics = true
publish_node_metrics = true
}

View File

@@ -0,0 +1,84 @@
# Nomad 客户端配置 - brother
datacenter = "dc1"
data_dir = "/opt/nomad/data"
plugin_dir = "/opt/nomad/plugins"
log_level = "INFO"
name = "brother"
bind_addr = "brother.tailnet-68f9.ts.net"
addresses {
http = "brother.tailnet-68f9.ts.net"
rpc = "brother.tailnet-68f9.ts.net"
serf = "brother.tailnet-68f9.ts.net"
}
advertise {
http = "brother.tailnet-68f9.ts.net:4646"
rpc = "brother.tailnet-68f9.ts.net:4647"
serf = "brother.tailnet-68f9.ts.net:4648"
}
ports {
http = 4646
rpc = 4647
serf = 4648
}
server {
enabled = false
}
client {
enabled = true
network_interface = "tailscale0"
servers = [
"semaphore.tailnet-68f9.ts.net:4647",
"ash1d.tailnet-68f9.ts.net:4647",
"ash2e.tailnet-68f9.ts.net:4647",
"ch2.tailnet-68f9.ts.net:4647",
"ch3.tailnet-68f9.ts.net:4647",
"onecloud1.tailnet-68f9.ts.net:4647",
"de.tailnet-68f9.ts.net:4647"
]
options {
"driver.raw_exec.enable" = "1"
"driver.exec.enable" = "1"
}
gc_interval = "5m"
gc_disk_usage_threshold = 80
gc_inode_usage_threshold = 70
}
plugin "nomad-driver-podman" {
config {
socket_path = "unix:///run/podman/podman.sock"
volumes {
enabled = true
}
}
}
consul {
address = "127.0.0.1:8500"
server_service_name = "nomad"
client_service_name = "nomad-client"
auto_advertise = true
server_auto_join = true
client_auto_join = true
}
vault {
enabled = false
}
telemetry {
collection_interval = "1s"
disable_hostname = false
prometheus_metrics = true
publish_allocation_metrics = true
publish_node_metrics = true
}

View File

@@ -0,0 +1,84 @@
# Nomad 客户端配置 - browser
datacenter = "dc1"
data_dir = "/opt/nomad/data"
plugin_dir = "/opt/nomad/plugins"
log_level = "INFO"
name = "browser"
bind_addr = "browser.tailnet-68f9.ts.net"
addresses {
http = "browser.tailnet-68f9.ts.net"
rpc = "browser.tailnet-68f9.ts.net"
serf = "browser.tailnet-68f9.ts.net"
}
advertise {
http = "browser.tailnet-68f9.ts.net:4646"
rpc = "browser.tailnet-68f9.ts.net:4647"
serf = "browser.tailnet-68f9.ts.net:4648"
}
ports {
http = 4646
rpc = 4647
serf = 4648
}
server {
enabled = false
}
client {
enabled = true
network_interface = "tailscale0"
servers = [
"semaphore.tailnet-68f9.ts.net:4647",
"ash1d.tailnet-68f9.ts.net:4647",
"ash2e.tailnet-68f9.ts.net:4647",
"ch2.tailnet-68f9.ts.net:4647",
"ch3.tailnet-68f9.ts.net:4647",
"onecloud1.tailnet-68f9.ts.net:4647",
"de.tailnet-68f9.ts.net:4647"
]
options {
"driver.raw_exec.enable" = "1"
"driver.exec.enable" = "1"
}
gc_interval = "5m"
gc_disk_usage_threshold = 80
gc_inode_usage_threshold = 70
}
plugin "nomad-driver-podman" {
config {
socket_path = "unix:///run/podman/podman.sock"
volumes {
enabled = true
}
}
}
consul {
address = "127.0.0.1:8500"
server_service_name = "nomad"
client_service_name = "nomad-client"
auto_advertise = true
server_auto_join = true
client_auto_join = true
}
vault {
enabled = false
}
telemetry {
collection_interval = "1s"
disable_hostname = false
prometheus_metrics = true
publish_allocation_metrics = true
publish_node_metrics = true
}

View File

@@ -0,0 +1,89 @@
# Nomad 客户端配置 - ch4
datacenter = "dc1"
data_dir = "/opt/nomad/data"
plugin_dir = "/opt/nomad/plugins"
log_level = "INFO"
name = "ch4"
bind_addr = "ch4.tailnet-68f9.ts.net"
addresses {
http = "ch4.tailnet-68f9.ts.net"
rpc = "ch4.tailnet-68f9.ts.net"
serf = "ch4.tailnet-68f9.ts.net"
}
advertise {
http = "ch4.tailnet-68f9.ts.net:4646"
rpc = "ch4.tailnet-68f9.ts.net:4647"
serf = "ch4.tailnet-68f9.ts.net:4648"
}
ports {
http = 4646
rpc = 4647
serf = 4648
}
server {
enabled = false
}
client {
enabled = true
network_interface = "tailscale0"
servers = [
"semaphore.tailnet-68f9.ts.net:4647",
"ash1d.tailnet-68f9.ts.net:4647",
"ash2e.tailnet-68f9.ts.net:4647",
"ch2.tailnet-68f9.ts.net:4647",
"ch3.tailnet-68f9.ts.net:4647",
"onecloud1.tailnet-68f9.ts.net:4647",
"de.tailnet-68f9.ts.net:4647"
]
host_volume "vault-storage" {
path = "/opt/nomad/data/vault-storage"
read_only = false
}
options {
"driver.raw_exec.enable" = "1"
"driver.exec.enable" = "1"
}
gc_interval = "5m"
gc_disk_usage_threshold = 80
gc_inode_usage_threshold = 70
}
plugin "nomad-driver-podman" {
config {
socket_path = "unix:///run/podman/podman.sock"
volumes {
enabled = true
}
}
}
consul {
address = "100.117.106.136:8500"
server_service_name = "nomad"
client_service_name = "nomad-client"
auto_advertise = true
server_auto_join = true
client_auto_join = true
}
vault {
enabled = false
}
telemetry {
collection_interval = "1s"
disable_hostname = false
prometheus_metrics = true
publish_allocation_metrics = true
publish_node_metrics = true
}

View File

@@ -0,0 +1,89 @@
# Nomad 客户端配置 - hcp1
datacenter = "dc1"
data_dir = "/opt/nomad/data"
plugin_dir = "/opt/nomad/plugins"
log_level = "INFO"
name = "hcp1"
bind_addr = "hcp1.tailnet-68f9.ts.net"
addresses {
http = "hcp1.tailnet-68f9.ts.net"
rpc = "hcp1.tailnet-68f9.ts.net"
serf = "hcp1.tailnet-68f9.ts.net"
}
advertise {
http = "hcp1.tailnet-68f9.ts.net:4646"
rpc = "hcp1.tailnet-68f9.ts.net:4647"
serf = "hcp1.tailnet-68f9.ts.net:4648"
}
ports {
http = 4646
rpc = 4647
serf = 4648
}
server {
enabled = false
}
client {
enabled = true
network_interface = "tailscale0"
servers = [
"semaphore.tailnet-68f9.ts.net:4647",
"ash1d.tailnet-68f9.ts.net:4647",
"ash2e.tailnet-68f9.ts.net:4647",
"ch2.tailnet-68f9.ts.net:4647",
"ch3.tailnet-68f9.ts.net:4647",
"onecloud1.tailnet-68f9.ts.net:4647",
"de.tailnet-68f9.ts.net:4647"
]
host_volume "traefik-certs" {
path = "/opt/traefik/certs"
read_only = false
}
options {
"driver.raw_exec.enable" = "1"
"driver.exec.enable" = "1"
}
gc_interval = "5m"
gc_disk_usage_threshold = 80
gc_inode_usage_threshold = 70
}
plugin "nomad-driver-podman" {
config {
socket_path = "unix:///run/podman/podman.sock"
volumes {
enabled = true
}
}
}
consul {
address = "127.0.0.1:8500"
server_service_name = "nomad"
client_service_name = "nomad-client"
auto_advertise = true
server_auto_join = true
client_auto_join = true
}
vault {
enabled = false
}
telemetry {
collection_interval = "1s"
disable_hostname = false
prometheus_metrics = true
publish_allocation_metrics = true
publish_node_metrics = true
}

View File

@@ -0,0 +1,99 @@
# Nomad 客户端配置 - influxdb
datacenter = "dc1"
data_dir = "/opt/nomad/data"
plugin_dir = "/opt/nomad/plugins"
log_level = "INFO"
name = "influxdb"
bind_addr = "influxdb.tailnet-68f9.ts.net"
addresses {
http = "influxdb.tailnet-68f9.ts.net"
rpc = "influxdb.tailnet-68f9.ts.net"
serf = "influxdb.tailnet-68f9.ts.net"
}
advertise {
http = "influxdb.tailnet-68f9.ts.net:4646"
rpc = "influxdb.tailnet-68f9.ts.net:4647"
serf = "influxdb.tailnet-68f9.ts.net:4648"
}
ports {
http = 4646
rpc = 4647
serf = 4648
}
server {
enabled = false
}
client {
enabled = true
network_interface = "tailscale0"
servers = [
"semaphore.tailnet-68f9.ts.net:4647",
"ash1d.tailnet-68f9.ts.net:4647",
"ash2e.tailnet-68f9.ts.net:4647",
"ch2.tailnet-68f9.ts.net:4647",
"ch3.tailnet-68f9.ts.net:4647",
"onecloud1.tailnet-68f9.ts.net:4647",
"de.tailnet-68f9.ts.net:4647"
]
host_volume "grafana-data" {
path = "/opt/grafana/data"
read_only = false
}
host_volume "prometheus-data" {
path = "/opt/prometheus/data"
read_only = false
}
host_volume "loki-data" {
path = "/opt/loki/data"
read_only = false
}
options {
"driver.raw_exec.enable" = "1"
"driver.exec.enable" = "1"
}
gc_interval = "5m"
gc_disk_usage_threshold = 80
gc_inode_usage_threshold = 70
}
plugin "nomad-driver-podman" {
config {
socket_path = "unix:///run/podman/podman.sock"
volumes {
enabled = true
}
}
}
consul {
address = "127.0.0.1:8500"
server_service_name = "nomad"
client_service_name = "nomad-client"
auto_advertise = true
server_auto_join = true
client_auto_join = true
}
vault {
enabled = false
}
telemetry {
collection_interval = "1s"
disable_hostname = false
prometheus_metrics = true
publish_allocation_metrics = true
publish_node_metrics = true
}

View File

@@ -0,0 +1,89 @@
# Nomad 客户端配置 - warden
datacenter = "dc1"
data_dir = "/opt/nomad/data"
plugin_dir = "/opt/nomad/plugins"
log_level = "INFO"
name = "warden"
bind_addr = "warden.tailnet-68f9.ts.net"
addresses {
http = "warden.tailnet-68f9.ts.net"
rpc = "warden.tailnet-68f9.ts.net"
serf = "warden.tailnet-68f9.ts.net"
}
advertise {
http = "warden.tailnet-68f9.ts.net:4646"
rpc = "warden.tailnet-68f9.ts.net:4647"
serf = "warden.tailnet-68f9.ts.net:4648"
}
ports {
http = 4646
rpc = 4647
serf = 4648
}
server {
enabled = false
}
client {
enabled = true
network_interface = "tailscale0"
servers = [
"semaphore.tailnet-68f9.ts.net:4647",
"ash1d.tailnet-68f9.ts.net:4647",
"ash2e.tailnet-68f9.ts.net:4647",
"ch2.tailnet-68f9.ts.net:4647",
"ch3.tailnet-68f9.ts.net:4647",
"onecloud1.tailnet-68f9.ts.net:4647",
"de.tailnet-68f9.ts.net:4647"
]
host_volume "vault-storage" {
path = "/opt/nomad/data/vault-storage"
read_only = false
}
options {
"driver.raw_exec.enable" = "1"
"driver.exec.enable" = "1"
}
gc_interval = "5m"
gc_disk_usage_threshold = 80
gc_inode_usage_threshold = 70
}
plugin "nomad-driver-podman" {
config {
socket_path = "unix:///run/podman/podman.sock"
volumes {
enabled = true
}
}
}
consul {
address = "100.122.197.112:8500"
server_service_name = "nomad"
client_service_name = "nomad-client"
auto_advertise = true
server_auto_join = true
client_auto_join = true
}
vault {
enabled = false
}
telemetry {
collection_interval = "1s"
disable_hostname = false
prometheus_metrics = true
publish_allocation_metrics = true
publish_node_metrics = true
}

View File

@@ -0,0 +1,64 @@
# Nomad 服务器配置 - ash1d
datacenter = "dc1"
data_dir = "/opt/nomad/data"
plugin_dir = "/opt/nomad/plugins"
log_level = "INFO"
name = "ash1d"
bind_addr = "ash1d.tailnet-68f9.ts.net"
addresses {
http = "ash1d.tailnet-68f9.ts.net"
rpc = "ash1d.tailnet-68f9.ts.net"
serf = "ash1d.tailnet-68f9.ts.net"
}
advertise {
http = "ash1d.tailnet-68f9.ts.net:4646"
rpc = "ash1d.tailnet-68f9.ts.net:4647"
serf = "ash1d.tailnet-68f9.ts.net:4648"
}
ports {
http = 4646
rpc = 4647
serf = 4648
}
server {
enabled = true
bootstrap_expect = 3
server_join {
retry_join = [
"semaphore.tailnet-68f9.ts.net:4648",
"ash1d.tailnet-68f9.ts.net:4648",
"ash2e.tailnet-68f9.ts.net:4648",
"ch2.tailnet-68f9.ts.net:4648",
"ch3.tailnet-68f9.ts.net:4648",
"onecloud1.tailnet-68f9.ts.net:4648",
"de.tailnet-68f9.ts.net:4648"
]
}
}
consul {
address = "127.0.0.1:8500"
server_service_name = "nomad"
client_service_name = "nomad-client"
auto_advertise = true
server_auto_join = true
client_auto_join = true
}
vault {
enabled = false
}
telemetry {
collection_interval = "1s"
disable_hostname = false
prometheus_metrics = true
publish_allocation_metrics = true
publish_node_metrics = true
}

View File

@@ -0,0 +1,64 @@
# Nomad 服务器配置 - ash2e
datacenter = "dc1"
data_dir = "/opt/nomad/data"
plugin_dir = "/opt/nomad/plugins"
log_level = "INFO"
name = "ash2e"
bind_addr = "ash2e.tailnet-68f9.ts.net"
addresses {
http = "ash2e.tailnet-68f9.ts.net"
rpc = "ash2e.tailnet-68f9.ts.net"
serf = "ash2e.tailnet-68f9.ts.net"
}
advertise {
http = "ash2e.tailnet-68f9.ts.net:4646"
rpc = "ash2e.tailnet-68f9.ts.net:4647"
serf = "ash2e.tailnet-68f9.ts.net:4648"
}
ports {
http = 4646
rpc = 4647
serf = 4648
}
server {
enabled = true
bootstrap_expect = 3
server_join {
retry_join = [
"semaphore.tailnet-68f9.ts.net:4648",
"ash1d.tailnet-68f9.ts.net:4648",
"ash2e.tailnet-68f9.ts.net:4648",
"ch2.tailnet-68f9.ts.net:4648",
"ch3.tailnet-68f9.ts.net:4648",
"onecloud1.tailnet-68f9.ts.net:4648",
"de.tailnet-68f9.ts.net:4648"
]
}
}
consul {
address = "127.0.0.1:8500"
server_service_name = "nomad"
client_service_name = "nomad-client"
auto_advertise = true
server_auto_join = true
client_auto_join = true
}
vault {
enabled = false
}
telemetry {
collection_interval = "1s"
disable_hostname = false
prometheus_metrics = true
publish_allocation_metrics = true
publish_node_metrics = true
}

View File

@@ -0,0 +1,64 @@
# Nomad 服务器配置 - ch2
datacenter = "dc1"
data_dir = "/opt/nomad/data"
plugin_dir = "/opt/nomad/plugins"
log_level = "INFO"
name = "ch2"
bind_addr = "ch2.tailnet-68f9.ts.net"
addresses {
http = "ch2.tailnet-68f9.ts.net"
rpc = "ch2.tailnet-68f9.ts.net"
serf = "ch2.tailnet-68f9.ts.net"
}
advertise {
http = "ch2.tailnet-68f9.ts.net:4646"
rpc = "ch2.tailnet-68f9.ts.net:4647"
serf = "ch2.tailnet-68f9.ts.net:4648"
}
ports {
http = 4646
rpc = 4647
serf = 4648
}
server {
enabled = true
bootstrap_expect = 3
server_join {
retry_join = [
"semaphore.tailnet-68f9.ts.net:4648",
"ash1d.tailnet-68f9.ts.net:4648",
"ash2e.tailnet-68f9.ts.net:4648",
"ch2.tailnet-68f9.ts.net:4648",
"ch3.tailnet-68f9.ts.net:4648",
"onecloud1.tailnet-68f9.ts.net:4648",
"de.tailnet-68f9.ts.net:4648"
]
}
}
consul {
address = "127.0.0.1:8500"
server_service_name = "nomad"
client_service_name = "nomad-client"
auto_advertise = true
server_auto_join = true
client_auto_join = true
}
vault {
enabled = false
}
telemetry {
collection_interval = "1s"
disable_hostname = false
prometheus_metrics = true
publish_allocation_metrics = true
publish_node_metrics = true
}

View File

@@ -0,0 +1,64 @@
# Nomad 服务器配置 - ch3
datacenter = "dc1"
data_dir = "/opt/nomad/data"
plugin_dir = "/opt/nomad/plugins"
log_level = "INFO"
name = "ch3"
bind_addr = "ch3.tailnet-68f9.ts.net"
addresses {
http = "ch3.tailnet-68f9.ts.net"
rpc = "ch3.tailnet-68f9.ts.net"
serf = "ch3.tailnet-68f9.ts.net"
}
advertise {
http = "ch3.tailnet-68f9.ts.net:4646"
rpc = "ch3.tailnet-68f9.ts.net:4647"
serf = "ch3.tailnet-68f9.ts.net:4648"
}
ports {
http = 4646
rpc = 4647
serf = 4648
}
server {
enabled = true
bootstrap_expect = 3
server_join {
retry_join = [
"semaphore.tailnet-68f9.ts.net:4648",
"ash1d.tailnet-68f9.ts.net:4648",
"ash2e.tailnet-68f9.ts.net:4648",
"ch2.tailnet-68f9.ts.net:4648",
"ch3.tailnet-68f9.ts.net:4648",
"onecloud1.tailnet-68f9.ts.net:4648",
"de.tailnet-68f9.ts.net:4648"
]
}
}
consul {
address = "127.0.0.1:8500"
server_service_name = "nomad"
client_service_name = "nomad-client"
auto_advertise = true
server_auto_join = true
client_auto_join = true
}
vault {
enabled = false
}
telemetry {
collection_interval = "1s"
disable_hostname = false
prometheus_metrics = true
publish_allocation_metrics = true
publish_node_metrics = true
}

View File

@@ -0,0 +1,64 @@
# Nomad 服务器配置 - de
datacenter = "dc1"
data_dir = "/opt/nomad/data"
plugin_dir = "/opt/nomad/plugins"
log_level = "INFO"
name = "de"
bind_addr = "de.tailnet-68f9.ts.net"
addresses {
http = "de.tailnet-68f9.ts.net"
rpc = "de.tailnet-68f9.ts.net"
serf = "de.tailnet-68f9.ts.net"
}
advertise {
http = "de.tailnet-68f9.ts.net:4646"
rpc = "de.tailnet-68f9.ts.net:4647"
serf = "de.tailnet-68f9.ts.net:4648"
}
ports {
http = 4646
rpc = 4647
serf = 4648
}
server {
enabled = true
bootstrap_expect = 3
server_join {
retry_join = [
"semaphore.tailnet-68f9.ts.net:4648",
"ash1d.tailnet-68f9.ts.net:4648",
"ash2e.tailnet-68f9.ts.net:4648",
"ch2.tailnet-68f9.ts.net:4648",
"ch3.tailnet-68f9.ts.net:4648",
"onecloud1.tailnet-68f9.ts.net:4648",
"de.tailnet-68f9.ts.net:4648"
]
}
}
consul {
address = "127.0.0.1:8500"
server_service_name = "nomad"
client_service_name = "nomad-client"
auto_advertise = true
server_auto_join = true
client_auto_join = true
}
vault {
enabled = false
}
telemetry {
collection_interval = "1s"
disable_hostname = false
prometheus_metrics = true
publish_allocation_metrics = true
publish_node_metrics = true
}

View File

@@ -0,0 +1,58 @@
# Nomad 服务器配置 - onecloud1
datacenter = "dc1"
data_dir = "/opt/nomad/data"
plugin_dir = "/opt/nomad/plugins"
log_level = "INFO"
name = "onecloud1"
bind_addr = "onecloud1.tailnet-68f9.ts.net"
addresses {
http = "onecloud1.tailnet-68f9.ts.net"
rpc = "onecloud1.tailnet-68f9.ts.net"
serf = "onecloud1.tailnet-68f9.ts.net"
}
advertise {
http = "onecloud1.tailnet-68f9.ts.net:4646"
rpc = "onecloud1.tailnet-68f9.ts.net:4647"
serf = "onecloud1.tailnet-68f9.ts.net:4648"
}
ports {
http = 4646
rpc = 4647
serf = 4648
}
server {
enabled = true
bootstrap_expect = 3
server_join {
retry_join = [
"semaphore.tailnet-68f9.ts.net:4648",
"ash1d.tailnet-68f9.ts.net:4648",
"ash2e.tailnet-68f9.ts.net:4648",
"ch3.tailnet-68f9.ts.net:4648",
"onecloud1.tailnet-68f9.ts.net:4648",
"de.tailnet-68f9.ts.net:4648"
]
}
}
client {
enabled = false
}
vault {
enabled = false
}
telemetry {
collection_interval = "1s"
disable_hostname = false
prometheus_metrics = true
publish_allocation_metrics = true
publish_node_metrics = true
}

View File

@@ -0,0 +1,64 @@
# Nomad 服务器配置 - ash1d
datacenter = "dc1"
data_dir = "/opt/nomad/data"
plugin_dir = "/opt/nomad/plugins"
log_level = "INFO"
name = "semaphore"
bind_addr = "semaphore.tailnet-68f9.ts.net"
addresses {
http = "semaphore.tailnet-68f9.ts.net"
rpc = "semaphore.tailnet-68f9.ts.net"
serf = "semaphore.tailnet-68f9.ts.net"
}
advertise {
http = "semaphore.tailnet-68f9.ts.net:4646"
rpc = "semaphore.tailnet-68f9.ts.net:4647"
serf = "semaphore.tailnet-68f9.ts.net:4648"
}
ports {
http = 4646
rpc = 4647
serf = 4648
}
server {
enabled = true
bootstrap_expect = 3
server_join {
retry_join = [
"semaphore.tailnet-68f9.ts.net:4647",
"ash1d.tailnet-68f9.ts.net:4647",
"ash2e.tailnet-68f9.ts.net:4647",
"ch2.tailnet-68f9.ts.net:4647",
"ch3.tailnet-68f9.ts.net:4647",
"onecloud1.tailnet-68f9.ts.net:4647",
"de.tailnet-68f9.ts.net:4647"
]
}
}
consul {
address = "127.0.0.1:8500"
server_service_name = "nomad"
client_service_name = "nomad-client"
auto_advertise = true
server_auto_join = true
client_auto_join = true
}
vault {
enabled = false
}
telemetry {
collection_interval = "1s"
disable_hostname = false
prometheus_metrics = true
publish_allocation_metrics = true
publish_node_metrics = true
}

View File

@@ -0,0 +1,159 @@
job "consul-cluster-nomad" {
datacenters = ["dc1"]
type = "service"
group "consul-ch4" {
constraint {
attribute = "${node.unique.name}"
value = "ch4"
}
network {
port "http" {
static = 8500
}
port "server" {
static = 8300
}
port "serf-lan" {
static = 8301
}
port "serf-wan" {
static = 8302
}
}
task "consul" {
driver = "exec"
config {
command = "consul"
args = [
"agent",
"-server",
"-bootstrap-expect=3",
"-data-dir=/opt/nomad/data/consul",
"-client=100.117.106.136",
"-bind=100.117.106.136",
"-advertise=100.117.106.136",
"-retry-join=ash3c.tailnet-68f9.ts.net:8301",
"-retry-join=warden.tailnet-68f9.ts.net:8301",
"-ui",
"-http-port=8500",
"-server-port=8300",
"-serf-lan-port=8301",
"-serf-wan-port=8302"
]
}
resources {
cpu = 300
memory = 512
}
}
}
group "consul-ash3c" {
constraint {
attribute = "${node.unique.name}"
value = "ash3c"
}
network {
port "http" {
static = 8500
}
port "server" {
static = 8300
}
port "serf-lan" {
static = 8301
}
port "serf-wan" {
static = 8302
}
}
task "consul" {
driver = "exec"
config {
command = "consul"
args = [
"agent",
"-server",
"-data-dir=/opt/nomad/data/consul",
"-client=100.116.80.94",
"-bind=100.116.80.94",
"-advertise=100.116.80.94",
"-retry-join=ch4.tailnet-68f9.ts.net:8301",
"-retry-join=warden.tailnet-68f9.ts.net:8301",
"-ui",
"-http-port=8500",
"-server-port=8300",
"-serf-lan-port=8301",
"-serf-wan-port=8302"
]
}
resources {
cpu = 300
memory = 512
}
}
}
group "consul-warden" {
constraint {
attribute = "${node.unique.name}"
value = "warden"
}
network {
port "http" {
static = 8500
}
port "server" {
static = 8300
}
port "serf-lan" {
static = 8301
}
port "serf-wan" {
static = 8302
}
}
task "consul" {
driver = "exec"
config {
command = "consul"
args = [
"agent",
"-server",
"-data-dir=/opt/nomad/data/consul",
"-client=100.122.197.112",
"-bind=100.122.197.112",
"-advertise=100.122.197.112",
"-retry-join=ch4.tailnet-68f9.ts.net:8301",
"-retry-join=ash3c.tailnet-68f9.ts.net:8301",
"-ui",
"-http-port=8500",
"-server-port=8300",
"-serf-lan-port=8301",
"-serf-wan-port=8302"
]
}
resources {
cpu = 300
memory = 512
}
}
}
}

View File

@@ -0,0 +1,249 @@
job "traefik-cloudflare-v3" {
datacenters = ["dc1"]
type = "service"
group "traefik" {
count = 1
constraint {
attribute = "${node.unique.name}"
value = "hcp1"
}
volume "traefik-certs" {
type = "host"
read_only = false
source = "traefik-certs"
}
network {
mode = "host"
port "http" {
static = 80
}
port "https" {
static = 443
}
port "traefik" {
static = 8080
}
}
task "traefik" {
driver = "exec"
config {
command = "/usr/local/bin/traefik"
args = [
"--configfile=/local/traefik.yml"
]
}
env {
CLOUDFLARE_EMAIL = "locksmithknight@gmail.com"
CLOUDFLARE_DNS_API_TOKEN = "0aPWoLaQ59l0nyL1jIVzZaEx2e41Gjgcfhn3ztJr"
CLOUDFLARE_ZONE_API_TOKEN = "0aPWoLaQ59l0nyL1jIVzZaEx2e41Gjgcfhn3ztJr"
}
volume_mount {
volume = "traefik-certs"
destination = "/opt/traefik/certs"
read_only = false
}
template {
data = <<EOF
api:
dashboard: true
insecure: true
entryPoints:
web:
address: "0.0.0.0:80"
http:
redirections:
entrypoint:
to: websecure
scheme: https
permanent: true
websecure:
address: "0.0.0.0:443"
traefik:
address: "0.0.0.0:8080"
providers:
consulCatalog:
endpoint:
address: "warden.tailnet-68f9.ts.net:8500"
scheme: "http"
watch: true
exposedByDefault: false
prefix: "traefik"
defaultRule: "Host(`{{ .Name }}.git-4ta.live`)"
file:
filename: /local/dynamic.yml
watch: true
certificatesResolvers:
cloudflare:
acme:
email: {{ env "CLOUDFLARE_EMAIL" }}
storage: /opt/traefik/certs/acme.json
dnsChallenge:
provider: cloudflare
delayBeforeCheck: 30s
log:
level: DEBUG
EOF
destination = "local/traefik.yml"
}
template {
data = <<EOF
http:
serversTransports:
waypoint-insecure:
insecureSkipVerify: true
authentik-insecure:
insecureSkipVerify: true
middlewares:
consul-stripprefix:
stripPrefix:
prefixes:
- "/consul"
waypoint-auth:
replacePathRegex:
regex: "^/auth/token(.*)$"
replacement: "/auth/token$1"
services:
consul-cluster:
loadBalancer:
servers:
- url: "http://ch4.tailnet-68f9.ts.net:8500" # 韩国Leader
- url: "http://warden.tailnet-68f9.ts.net:8500" # 北京Follower
- url: "http://ash3c.tailnet-68f9.ts.net:8500" # 美国Follower
healthCheck:
path: "/v1/status/leader"
interval: "30s"
timeout: "15s"
nomad-cluster:
loadBalancer:
servers:
- url: "http://ch2.tailnet-68f9.ts.net:4646" # 韩国Leader
- url: "http://ash3c.tailnet-68f9.ts.net:4646" # 美国Follower
healthCheck:
path: "/v1/status/leader"
interval: "30s"
timeout: "15s"
waypoint-cluster:
loadBalancer:
servers:
- url: "https://hcp1.tailnet-68f9.ts.net:9701" # hcp1 节点 HTTPS API
serversTransport: waypoint-insecure
vault-cluster:
loadBalancer:
servers:
- url: "http://warden.tailnet-68f9.ts.net:8200" # 北京,单节点
healthCheck:
path: "/ui/"
interval: "30s"
timeout: "15s"
authentik-cluster:
loadBalancer:
servers:
- url: "https://authentik.tailnet-68f9.ts.net:9443" # Authentik容器HTTPS端口
serversTransport: authentik-insecure
healthCheck:
path: "/flows/-/default/authentication/"
interval: "30s"
timeout: "15s"
routers:
consul-api:
rule: "Host(`consul.git-4ta.live`)"
service: consul-cluster
middlewares:
- consul-stripprefix
entryPoints:
- websecure
tls:
certResolver: cloudflare
traefik-dashboard:
rule: "Host(`traefik.git-4ta.live`)"
service: dashboard@internal
middlewares:
- dashboard_redirect@internal
- dashboard_stripprefix@internal
entryPoints:
- websecure
tls:
certResolver: cloudflare
traefik-api:
rule: "Host(`traefik.git-4ta.live`) && PathPrefix(`/api`)"
service: api@internal
entryPoints:
- websecure
tls:
certResolver: cloudflare
nomad-ui:
rule: "Host(`nomad.git-4ta.live`)"
service: nomad-cluster
entryPoints:
- websecure
tls:
certResolver: cloudflare
waypoint-ui:
rule: "Host(`waypoint.git-4ta.live`)"
service: waypoint-cluster
entryPoints:
- websecure
tls:
certResolver: cloudflare
vault-ui:
rule: "Host(`vault.git-4ta.live`)"
service: vault-cluster
entryPoints:
- websecure
tls:
certResolver: cloudflare
authentik-ui:
rule: "Host(`authentik1.git-4ta.live`)"
service: authentik-cluster
entryPoints:
- websecure
tls:
certResolver: cloudflare
EOF
destination = "local/dynamic.yml"
}
template {
data = <<EOF
CLOUDFLARE_EMAIL=locksmithknight@gmail.com
CLOUDFLARE_DNS_API_TOKEN=0aPWoLaQ59l0nyL1jIVzZaEx2e41Gjgcfhn3ztJr
CLOUDFLARE_ZONE_API_TOKEN=0aPWoLaQ59l0nyL1jIVzZaEx2e41Gjgcfhn3ztJr
EOF
destination = "local/cloudflare.env"
env = true
}
resources {
cpu = 500
memory = 512
}
}
}
}

View File

@@ -0,0 +1,427 @@
job "vault-single-nomad" {
datacenters = ["dc1"]
type = "service"
group "vault-warden" {
count = 1
volume "vault-storage" {
type = "host"
read_only = false
source = "vault-storage"
}
constraint {
attribute = "${node.unique.name}"
operator = "="
value = "warden"
}
network {
port "http" {
static = 8200
to = 8200
}
}
task "vault" {
driver = "exec"
volume_mount {
volume = "vault-storage"
destination = "/opt/nomad/data/vault-storage"
read_only = false
}
resources {
cpu = 500
memory = 1024
}
env {
VAULT_ADDR = "http://127.0.0.1:8200"
}
service {
name = "vault"
port = "http"
tags = ["vault-server"]
check {
type = "http"
path = "/v1/sys/health"
interval = "30s"
timeout = "5s"
}
}
# Vault配置 - 使用Consul存储
template {
data = <<EOF
ui = true
disable_mlock = true
# 使用Consul作为存储后端
storage "consul" {
address = "100.122.197.112:8500"
path = "vault/"
# 集群配置
datacenter = "dc1"
service = "vault"
service_tags = "vault-server"
# 会话配置
session_ttl = "15s"
lock_wait_time = "15s"
}
listener "tcp" {
address = "100.122.197.112:8200"
tls_disable = 1
}
# API地址 - 使用Tailscale网络
api_addr = "http://warden.tailnet-68f9.ts.net:8200"
# 集群名称
cluster_name = "vault-cluster"
# 日志配置
log_level = "INFO"
EOF
destination = "local/vault.hcl"
perms = "644"
}
# 自动解封脚本
template {
data = <<EOF
#!/bin/bash
# 启动Vault
vault server -config=/local/vault.hcl &
VAULT_PID=$!
# 等待Vault启动
sleep 10
# 自动解封Vault - 使用本地地址通过Consul发现其他节点
echo "Auto-unsealing Vault..."
vault operator unseal -address=http://127.0.0.1:8200 nlmbQbNU7pZaeHUgT+ynOFDS37JbEGOjmcvQ1fSgYaQp
vault operator unseal -address=http://127.0.0.1:8200 a7lJqKNr2tJ+J84EnRM6u5fKBwe90nVe8NY/mJngVROn
vault operator unseal -address=http://127.0.0.1:8200 /YcUlgI3fclb13h/ybz0TjhlcedNkfmlWbQm3RxGyo+h
echo "Vault auto-unsealed successfully"
wait $VAULT_PID
EOF
destination = "local/start-vault.sh"
perms = "755"
}
config {
command = "/bin/bash"
args = [
"/local/start-vault.sh"
]
}
restart {
attempts = 2
interval = "30m"
delay = "15s"
mode = "fail"
}
}
update {
max_parallel = 1
health_check = "checks"
min_healthy_time = "10s"
healthy_deadline = "5m"
progress_deadline = "10m"
auto_revert = true
canary = 0
}
migrate {
max_parallel = 1
health_check = "checks"
min_healthy_time = "10s"
healthy_deadline = "5m"
}
}
group "vault-ch4" {
count = 1
constraint {
attribute = "${node.unique.name}"
operator = "="
value = "ch4"
}
network {
port "http" {
static = 8200
to = 8200
}
}
task "vault" {
driver = "exec"
resources {
cpu = 500
memory = 1024
}
env {
VAULT_ADDR = "http://127.0.0.1:8200"
}
service {
name = "vault"
port = "http"
tags = ["vault-server"]
check {
type = "http"
path = "/v1/sys/health"
interval = "30s"
timeout = "5s"
}
}
# Vault配置 - 使用Consul存储
template {
data = <<EOF
ui = true
disable_mlock = true
# 使用Consul作为存储后端
storage "consul" {
address = "100.117.106.136:8500"
path = "vault/"
# 集群配置
datacenter = "dc1"
service = "vault"
service_tags = "vault-server"
# 会话配置
session_ttl = "15s"
lock_wait_time = "15s"
}
listener "tcp" {
address = "100.117.106.136:8200"
tls_disable = 1
}
# API地址 - 使用Tailscale网络
api_addr = "http://ch4.tailnet-68f9.ts.net:8200"
# 集群名称
cluster_name = "vault-cluster"
# 日志配置
log_level = "INFO"
EOF
destination = "local/vault.hcl"
perms = "644"
}
# 自动解封脚本
template {
data = <<EOF
#!/bin/bash
# 启动Vault
vault server -config=/local/vault.hcl &
VAULT_PID=$!
# 等待Vault启动
sleep 10
# 自动解封Vault - 使用本地地址通过Consul发现其他节点
echo "Auto-unsealing Vault..."
vault operator unseal -address=http://127.0.0.1:8200 nlmbQbNU7pZaeHUgT+ynOFDS37JbEGOjmcvQ1fSgYaQp
vault operator unseal -address=http://127.0.0.1:8200 a7lJqKNr2tJ+J84EnRM6u5fKBwe90nVe8NY/mJngVROn
vault operator unseal -address=http://127.0.0.1:8200 /YcUlgI3fclb13h/ybz0TjhlcedNkfmlWbQm3RxGyo+h
echo "Vault auto-unsealed successfully"
wait $VAULT_PID
EOF
destination = "local/start-vault.sh"
perms = "755"
}
config {
command = "/bin/bash"
args = [
"/local/start-vault.sh"
]
}
restart {
attempts = 2
interval = "30m"
delay = "15s"
mode = "fail"
}
}
update {
max_parallel = 1
health_check = "checks"
min_healthy_time = "10s"
healthy_deadline = "5m"
progress_deadline = "10m"
auto_revert = true
canary = 0
}
migrate {
max_parallel = 1
health_check = "checks"
min_healthy_time = "10s"
healthy_deadline = "5m"
}
}
group "vault-ash3c" {
count = 1
constraint {
attribute = "${node.unique.name}"
operator = "="
value = "ash3c"
}
network {
port "http" {
static = 8200
to = 8200
}
}
task "vault" {
driver = "exec"
resources {
cpu = 500
memory = 1024
}
env {
VAULT_ADDR = "http://127.0.0.1:8200"
}
service {
name = "vault"
port = "http"
tags = ["vault-server"]
check {
type = "http"
path = "/v1/sys/health"
interval = "30s"
timeout = "5s"
}
}
# Vault配置 - 使用Consul存储
template {
data = <<EOF
ui = true
disable_mlock = true
# 使用Consul作为存储后端
storage "consul" {
address = "100.116.80.94:8500"
path = "vault/"
# 集群配置
datacenter = "dc1"
service = "vault"
service_tags = "vault-server"
# 会话配置
session_ttl = "15s"
lock_wait_time = "15s"
}
listener "tcp" {
address = "100.116.80.94:8200"
tls_disable = 1
}
# API地址 - 使用Tailscale网络
api_addr = "http://ash3c.tailnet-68f9.ts.net:8200"
# 集群名称
cluster_name = "vault-cluster"
# 日志配置
log_level = "INFO"
EOF
destination = "local/vault.hcl"
perms = "644"
}
# 自动解封脚本
template {
data = <<EOF
#!/bin/bash
# 启动Vault
vault server -config=/local/vault.hcl &
VAULT_PID=$!
# 等待Vault启动
sleep 10
# 自动解封Vault - 使用本地地址通过Consul发现其他节点
echo "Auto-unsealing Vault..."
vault operator unseal -address=http://127.0.0.1:8200 nlmbQbNU7pZaeHUgT+ynOFDS37JbEGOjmcvQ1fSgYaQp
vault operator unseal -address=http://127.0.0.1:8200 a7lJqKNr2tJ+J84EnRM6u5fKBwe90nVe8NY/mJngVROn
vault operator unseal -address=http://127.0.0.1:8200 /YcUlgI3fclb13h/ybz0TjhlcedNkfmlWbQm3RxGyo+h
echo "Vault auto-unsealed successfully"
wait $VAULT_PID
EOF
destination = "local/start-vault.sh"
perms = "755"
}
config {
command = "/bin/bash"
args = [
"/local/start-vault.sh"
]
}
restart {
attempts = 2
interval = "30m"
delay = "15s"
mode = "fail"
}
}
update {
max_parallel = 1
health_check = "checks"
min_healthy_time = "10s"
healthy_deadline = "5m"
progress_deadline = "10m"
auto_revert = true
canary = 0
}
migrate {
max_parallel = 1
health_check = "checks"
min_healthy_time = "10s"
healthy_deadline = "5m"
}
}
}

View File

@@ -0,0 +1,87 @@
# OpenTofu 小王 - 客户端配置部署
terraform {
required_providers {
null = {
source = "registry.opentofu.org/hashicorp/null"
version = "3.2.4"
}
}
}
# 6个客户端节点列表
variable "client_nodes" {
type = list(string)
default = [
"hcp1",
"influxdb",
"ash3c",
"ch4",
"warden",
"browser"
]
}
# 为每个客户端节点生成配置文件
resource "local_file" "client_configs" {
for_each = toset(var.client_nodes)
filename = "${path.module}/generated/${each.key}-client.hcl"
content = replace(
file("${path.module}/../nomad-configs-tofu/client-template.hcl"),
"NODE_NAME",
each.key
)
}
# 部署配置到每个客户端节点
resource "null_resource" "client_deploy" {
for_each = toset(var.client_nodes)
depends_on = [local_file.client_configs]
provisioner "local-exec" {
command = <<EOF
echo "=== 部署客户端配置到 ${each.key} ==="
echo "开始时间: $(date)"
echo "1. 测试连接 ${each.key}..."
ping -c 1 ${each.key}.tailnet-68f9.ts.net || echo " - ${each.key} ping 失败"
echo "2. 上传配置文件..."
sshpass -p '3131' scp -o StrictHostKeyChecking=no -o ConnectTimeout=5 \
${path.module}/generated/${each.key}-client.hcl \
ben@${each.key}.tailnet-68f9.ts.net:/tmp/nomad-new.hcl && echo " - 文件上传成功" || echo " - 文件上传失败"
echo "3. 部署配置并重启服务..."
sshpass -p '3131' ssh -o StrictHostKeyChecking=no -o ConnectTimeout=5 \
ben@${each.key}.tailnet-68f9.ts.net \
"echo '=== ${each.key} 客户端配置部署开始 ==='; \
echo '3131' | sudo -S systemctl stop nomad; \
echo '备份旧配置...'; \
echo '3131' | sudo -S cp /etc/nomad.d/nomad.hcl /etc/nomad.d/nomad.hcl.backup.\$(date +%Y%m%d_%H%M%S) 2>/dev/null || true; \
echo '替换配置文件...'; \
echo '3131' | sudo -S cp /tmp/nomad-new.hcl /etc/nomad.d/nomad.hcl; \
echo '启动服务...'; \
echo '3131' | sudo -S systemctl start nomad; \
sleep 5; \
echo '检查服务状态...'; \
echo '3131' | sudo -S systemctl status nomad --no-pager; \
echo '=== ${each.key} 部署完成 ==='" && echo " - ${each.key} 部署成功" || echo " - ${each.key} 部署失败"
echo "=== ${each.key} 配置部署完成!时间: $(date) ==="
EOF
}
triggers = {
config_hash = local_file.client_configs[each.key].content_md5
deploy_time = timestamp()
}
}
output "deployment_summary" {
value = {
client_nodes = var.client_nodes
config_files = [for node in var.client_nodes : "${node}-client.hcl"]
deploy_time = timestamp()
}
}