diff --git a/components/consul/configs/consul.hcl b/components/consul/configs/consul.hcl new file mode 100644 index 0000000..d6ab0b4 --- /dev/null +++ b/components/consul/configs/consul.hcl @@ -0,0 +1,88 @@ +# Consul配置文件 +# 此文件包含Consul的完整配置,包括变量和存储相关设置 + +# 基础配置 +data_dir = "/opt/consul/data" +raft_dir = "/opt/consul/raft" + +# 启用UI +ui_config { + enabled = true +} + +# 数据中心配置 +datacenter = "dc1" + +# 服务器配置 +server = true +bootstrap_expect = 3 + +# 网络配置 +client_addr = "0.0.0.0" +bind_addr = "{{ GetInterfaceIP `eth0` }}" +advertise_addr = "{{ GetInterfaceIP `eth0` }}" + +# 端口配置 +ports { + dns = 8600 + http = 8500 + https = -1 + grpc = 8502 + grpc_tls = 8503 + serf_lan = 8301 + serf_wan = 8302 + server = 8300 +} + +# 集群连接 +retry_join = ["100.117.106.136", "100.116.80.94", "100.122.197.112"] + +# 服务发现 +enable_service_script = true +enable_script_checks = true +enable_local_script_checks = true + +# 性能调优 +performance { + raft_multiplier = 1 +} + +# 日志配置 +log_level = "INFO" +enable_syslog = false +log_file = "/var/log/consul/consul.log" + +# 安全配置 +encrypt = "YourEncryptionKeyHere" + +# 连接配置 +reconnect_timeout = "30s" +reconnect_timeout_wan = "30s" +session_ttl_min = "10s" + +# Autopilot配置 +autopilot { + cleanup_dead_servers = true + last_contact_threshold = "200ms" + max_trailing_logs = 250 + server_stabilization_time = "10s" + redundancy_zone_tag = "" + disable_upgrade_migration = false + upgrade_version_tag = "" +} + +# 快照配置 +snapshot { + enabled = true + interval = "24h" + retain = 30 + name = "consul-snapshot-{{.Timestamp}}" +} + +# 备份配置 +backup { + enabled = true + interval = "6h" + retain = 7 + name = "consul-backup-{{.Timestamp}}" +} \ No newline at end of file diff --git a/components/vault/jobs/vault-cluster-exec.nomad b/components/vault/jobs/vault-cluster-exec.nomad index f52b2e4..a23578a 100644 --- a/components/vault/jobs/vault-cluster-exec.nomad +++ b/components/vault/jobs/vault-cluster-exec.nomad @@ -33,7 +33,7 @@ job "vault-cluster-exec" { template { data = < /dev/null; then + echo "✓ Consul连接正常" + else + echo "✗ 无法连接到Consul,请检查Consul服务是否运行" + exit 1 + fi +} + +# 创建快照备份 +create_snapshot() { + echo "创建Consul快照备份..." + + SNAPSHOT_FILE="${BACKUP_DIR}/consul-snapshot-${DATE}.snap" + + # 使用Consul API创建快照 + if curl -s "${CONSUL_ADDR}/v1/snapshot" > "$SNAPSHOT_FILE"; then + echo "✓ 快照备份创建成功: $SNAPSHOT_FILE" + + # 显示快照信息 + echo "快照信息:" + consul snapshot inspect "$SNAPSHOT_FILE" 2>/dev/null || echo " (需要安装consul客户端以查看快照信息)" + else + echo "✗ 快照备份创建失败" + exit 1 + fi +} + +# 清理旧备份 +cleanup_old_backups() { + echo "清理${RETAIN_DAYS}天前的备份..." + + # 查找并删除旧备份文件 + if find "$BACKUP_DIR" -name "consul-snapshot-*.snap" -mtime +$RETAIN_DAYS -delete; then + echo "✓ 旧备份清理完成" + else + echo " 没有找到需要清理的旧备份" + fi +} + +# 列出所有备份 +list_backups() { + echo "" + echo "当前备份列表:" + echo "=============" + + if [ -d "$BACKUP_DIR" ] && [ "$(ls -A "$BACKUP_DIR")" ]; then + ls -lah "$BACKUP_DIR"/consul-snapshot-*.snap | awk '{print $5, $6, $7, $8, $9}' + else + echo " 没有找到备份文件" + fi +} + +# 验证备份 +verify_backup() { + echo "" + echo "验证备份..." + + LATEST_BACKUP=$(ls -t "$BACKUP_DIR"/consul-snapshot-*.snap | head -n 1) + + if [ -n "$LATEST_BACKUP" ]; then + echo "验证最新备份: $LATEST_BACKUP" + + # 检查文件大小 + FILE_SIZE=$(du -h "$LATEST_BACKUP" | cut -f1) + echo "备份文件大小: $FILE_SIZE" + + # 检查文件是否为空 + if [ -s "$LATEST_BACKUP" ]; then + echo "✓ 备份文件不为空" + else + echo "✗ 备份文件为空" + exit 1 + fi + + # 尝试检查快照元数据 + if consul snapshot inspect "$LATEST_BACKUP" > /dev/null 2>&1; then + echo "✓ 备份文件格式正确" + else + echo "✗ 备份文件格式错误" + exit 1 + fi + else + echo "✗ 没有找到备份文件" + exit 1 + fi +} + +# 主函数 +main() { + check_consul_connection + create_snapshot + cleanup_old_backups + list_backups + verify_backup + + echo "" + echo "✓ 备份流程完成!" + echo "" + echo "使用说明:" + echo "1. 可以通过cron定期运行此脚本: 0 2 * * * /path/to/backup_consul.sh" + echo "2. 恢复备份使用: consul snapshot restore /path/to/consul-snapshot-YYYYMMDD_HHMMSS.snap" + echo "3. 查看备份内容: consul snapshot inspect /path/to/consul-snapshot-YYYYMMDD_HHMMSS.snap" +} + +# 执行主函数 +main "$@" \ No newline at end of file diff --git a/deployment/scripts/consul_variables_example.sh b/deployment/scripts/consul_variables_example.sh new file mode 100755 index 0000000..0c47501 --- /dev/null +++ b/deployment/scripts/consul_variables_example.sh @@ -0,0 +1,217 @@ +#!/bin/bash + +# Consul 变量和存储配置示例脚本 +# 此脚本展示了如何配置Consul的变量和存储功能 + +set -e + +# 配置参数 +CONSUL_ADDR=${CONSUL_ADDR:-"http://localhost:8500"} +ENVIRONMENT=${ENVIRONMENT:-"dev"} +PROVIDER=${PROVIDER:-"oracle"} +REGION=${REGION:-"kr"} + +echo "Consul 变量和存储配置示例" +echo "=========================" +echo "Consul 地址: $CONSUL_ADDR" +echo "环境: $ENVIRONMENT" +echo "提供商: $PROVIDER" +echo "区域: $REGION" +echo "" + +# 检查Consul连接 +check_consul_connection() { + echo "检查Consul连接..." + if curl -s "$CONSUL_ADDR/v1/status/leader" > /dev/null; then + echo "✓ Consul连接正常" + else + echo "✗ 无法连接到Consul,请检查Consul服务是否运行" + exit 1 + fi +} + +# 配置应用变量 +configure_app_variables() { + echo "配置应用变量..." + + # 应用基本信息 + curl -X PUT "$CONSUL_ADDR/v1/kv/config/$ENVIRONMENT/app/name" -d "my-application" + curl -X PUT "$CONSUL_ADDR/v1/kv/config/$ENVIRONMENT/app/version" -d "1.0.0" + curl -X PUT "$CONSUL_ADDR/v1/kv/config/$ENVIRONMENT/app/environment" -d "$ENVIRONMENT" + + # 特性开关 + curl -X PUT "$CONSUL_ADDR/v1/kv/config/$ENVIRONMENT/features/new_ui" -d "true" + curl -X PUT "$CONSUL_ADDR/v1/kv/config/$ENVIRONMENT/features/advanced_analytics" -d "false" + curl -X PUT "$CONSUL_ADDR/v1/kv/config/$ENVIRONMENT/features/beta_features" -d "true" + + echo "✓ 应用变量配置完成" +} + +# 配置数据库变量 +configure_database_variables() { + echo "配置数据库变量..." + + # 数据库连接信息 + curl -X PUT "$CONSUL_ADDR/v1/kv/config/$ENVIRONMENT/database/host" -d "db.example.com" + curl -X PUT "$CONSUL_ADDR/v1/kv/config/$ENVIRONMENT/database/port" -d "5432" + curl -X PUT "$CONSUL_ADDR/v1/kv/config/$ENVIRONMENT/database/name" -d "myapp_db" + curl -X PUT "$CONSUL_ADDR/v1/kv/config/$ENVIRONMENT/database/ssl_mode" -d "require" + + # 数据库连接池配置 + curl -X PUT "$CONSUL_ADDR/v1/kv/config/$ENVIRONMENT/database/max_connections" -d "100" + curl -X PUT "$CONSUL_ADDR/v1/kv/config/$ENVIRONMENT/database/min_connections" -d "10" + curl -X PUT "$CONSUL_ADDR/v1/kv/config/$ENVIRONMENT/database/connection_timeout" -d "30s" + + echo "✓ 数据库变量配置完成" +} + +# 配置缓存变量 +configure_cache_variables() { + echo "配置缓存变量..." + + # Redis配置 + curl -X PUT "$CONSUL_ADDR/v1/kv/config/$ENVIRONMENT/cache/host" -d "redis.example.com" + curl -X PUT "$CONSUL_ADDR/v1/kv/config/$ENVIRONMENT/cache/port" -d "6379" + curl -X PUT "$CONSUL_ADDR/v1/kv/config/$ENVIRONMENT/cache/password" -d "secure_password" + curl -X PUT "$CONSUL_ADDR/v1/kv/config/$ENVIRONMENT/cache/db" -d "0" + + # 缓存策略 + curl -X PUT "$CONSUL_ADDR/v1/kv/config/$ENVIRONMENT/cache/ttl" -d "3600" + curl -X PUT "$CONSUL_ADDR/v1/kv/config/$ENVIRONMENT/cache/max_memory" -d "2gb" + curl -X PUT "$CONSUL_ADDR/v1/kv/config/$ENVIRONMENT/cache/eviction_policy" -d "allkeys-lru" + + echo "✓ 缓存变量配置完成" +} + +# 配置消息队列变量 +configure_messaging_variables() { + echo "配置消息队列变量..." + + # RabbitMQ配置 + curl -X PUT "$CONSUL_ADDR/v1/kv/config/$ENVIRONMENT/messaging/host" -d "rabbitmq.example.com" + curl -X PUT "$CONSUL_ADDR/v1/kv/config/$ENVIRONMENT/messaging/port" -d "5672" + curl -X PUT "$CONSUL_ADDR/v1/kv/config/$ENVIRONMENT/messaging/username" -d "myapp" + curl -X PUT "$CONSUL_ADDR/v1/kv/config/$ENVIRONMENT/messaging/password" -d "secure_password" + curl -X PUT "$CONSUL_ADDR/v1/kv/config/$ENVIRONMENT/messaging/vhost" -d "/myapp" + + # 队列配置 + curl -X PUT "$CONSUL_ADDR/v1/kv/config/$ENVIRONMENT/messaging/queue_name" -d "tasks" + curl -X PUT "$CONSUL_ADDR/v1/kv/config/$ENVIRONMENT/messaging/exchange" -d "myapp_exchange" + curl -X PUT "$CONSUL_ADDR/v1/kv/config/$ENVIRONMENT/messaging/routing_key" -d "task.#" + + echo "✓ 消息队列变量配置完成" +} + +# 配置云服务提供商变量 +configure_provider_variables() { + echo "配置云服务提供商变量..." + + if [ "$PROVIDER" = "oracle" ]; then + # Oracle Cloud配置 + curl -X PUT "$CONSUL_ADDR/v1/kv/config/$ENVIRONMENT/$PROVIDER/$region/tenancy_ocid" -d "ocid1.tenancy.oc1..aaaaaaaayourtenancyocid" + curl -X PUT "$CONSUL_ADDR/v1/kv/config/$ENVIRONMENT/$PROVIDER/$region/user_ocid" -d "ocid1.user.oc1..aaaaaaaayouruserocid" + curl -X PUT "$CONSUL_ADDR/v1/kv/config/$ENVIRONMENT/$PROVIDER/$region/fingerprint" -d "your-fingerprint" + curl -X PUT "$CONSUL_ADDR/v1/kv/config/$ENVIRONMENT/$PROVIDER/$region/region" -d "$REGION" + curl -X PUT "$CONSUL_ADDR/v1/kv/config/$ENVIRONMENT/$PROVIDER/$region/compartment_id" -d "ocid1.compartment.oc1..aaaaaaaayourcompartmentid" + elif [ "$PROVIDER" = "aws" ]; then + # AWS配置 + curl -X PUT "$CONSUL_ADDR/v1/kv/config/$ENVIRONMENT/$PROVIDER/$region/access_key" -d "your-access-key" + curl -X PUT "$CONSUL_ADDR/v1/kv/config/$ENVIRONMENT/$PROVIDER/$region/secret_key" -d "your-secret-key" + curl -X PUT "$CONSUL_ADDR/v1/kv/config/$ENVIRONMENT/$PROVIDER/$region/region" -d "$REGION" + elif [ "$PROVIDER" = "gcp" ]; then + # GCP配置 + curl -X PUT "$CONSUL_ADDR/v1/kv/config/$ENVIRONMENT/$PROVIDER/$region/project_id" -d "your-project-id" + curl -X PUT "$CONSUL_ADDR/v1/kv/config/$ENVIRONMENT/$PROVIDER/$region/region" -d "$REGION" + curl -X PUT "$CONSUL_ADDR/v1/kv/config/$ENVIRONMENT/$PROVIDER/$region/credentials_path" -d "/path/to/service-account.json" + elif [ "$PROVIDER" = "digitalocean" ]; then + # DigitalOcean配置 + curl -X PUT "$CONSUL_ADDR/v1/kv/config/$ENVIRONMENT/$PROVIDER/$region/token" -d "your-do-token" + curl -X PUT "$CONSUL_ADDR/v1/kv/config/$ENVIRONMENT/$PROVIDER/$region/region" -d "$REGION" + fi + + echo "✓ 云服务提供商变量配置完成" +} + +# 配置存储相关变量 +configure_storage_variables() { + echo "配置存储相关变量..." + + # 快照配置 + curl -X PUT "$CONSUL_ADDR/v1/kv/config/$ENVIRONMENT/storage/snapshot/enabled" -d "true" + curl -X PUT "$CONSUL_ADDR/v1/kv/config/$ENVIRONMENT/storage/snapshot/interval" -d "24h" + curl -X PUT "$CONSUL_ADDR/v1/kv/config/$ENVIRONMENT/storage/snapshot/retain" -d "30" + curl -X PUT "$CONSUL_ADDR/v1/kv/config/$ENVIRONMENT/storage/snapshot/name" -d "consul-snapshot-{{.Timestamp}}" + + # 备份配置 + curl -X PUT "$CONSUL_ADDR/v1/kv/config/$ENVIRONMENT/storage/backup/enabled" -d "true" + curl -X PUT "$CONSUL_ADDR/v1/kv/config/$ENVIRONMENT/storage/backup/interval" -d "6h" + curl -X PUT "$CONSUL_ADDR/v1/kv/config/$ENVIRONMENT/storage/backup/retain" -d "7" + curl -X PUT "$CONSUL_ADDR/v1/kv/config/$ENVIRONMENT/storage/backup/name" -d "consul-backup-{{.Timestamp}}" + + # 数据目录配置 + curl -X PUT "$CONSUL_ADDR/v1/kv/config/$ENVIRONMENT/storage/data_dir" -d "/opt/consul/data" + curl -X PUT "$CONSUL_ADDR/v1/kv/config/$ENVIRONMENT/storage/raft_dir" -d "/opt/consul/raft" + + # Autopilot配置 + curl -X PUT "$CONSUL_ADDR/v1/kv/config/$ENVIRONMENT/storage/autopilot/cleanup_dead_servers" -d "true" + curl -X PUT "$CONSUL_ADDR/v1/kv/config/$ENVIRONMENT/storage/autopilot/last_contact_threshold" -d "200ms" + curl -X PUT "$CONSUL_ADDR/v1/kv/config/$ENVIRONMENT/storage/autopilot/max_trailing_logs" -d "250" + curl -X PUT "$CONSUL_ADDR/v1/kv/config/$ENVIRONMENT/storage/autopilot/server_stabilization_time" -d "10s" + + echo "✓ 存储相关变量配置完成" +} + +# 显示配置结果 +display_configuration() { + echo "" + echo "配置结果:" + echo "=========" + + echo "应用配置:" + curl -s "$CONSUL_ADDR/v1/kv/config/$ENVIRONMENT/app/?recurse" | jq -r '.[] | "\(.Key): \(.Value | @base64d)"' 2>/dev/null || echo " (需要安装jq以查看格式化输出)" + + echo "" + echo "数据库配置:" + curl -s "$CONSUL_ADDR/v1/kv/config/$ENVIRONMENT/database/?recurse" | jq -r '.[] | "\(.Key): \(.Value | @base64d)"' 2>/dev/null || echo " (需要安装jq以查看格式化输出)" + + echo "" + echo "缓存配置:" + curl -s "$CONSUL_ADDR/v1/kv/config/$ENVIRONMENT/cache/?recurse" | jq -r '.[] | "\(.Key): \(.Value | @base64d)"' 2>/dev/null || echo " (需要安装jq以查看格式化输出)" + + echo "" + echo "消息队列配置:" + curl -s "$CONSUL_ADDR/v1/kv/config/$ENVIRONMENT/messaging/?recurse" | jq -r '.[] | "\(.Key): \(.Value | @base64d)"' 2>/dev/null || echo " (需要安装jq以查看格式化输出)" + + echo "" + echo "云服务提供商配置:" + curl -s "$CONSUL_ADDR/v1/kv/config/$ENVIRONMENT/$PROVIDER/?recurse" | jq -r '.[] | "\(.Key): \(.Value | @base64d)"' 2>/dev/null || echo " (需要安装jq以查看格式化输出)" + + echo "" + echo "存储配置:" + curl -s "$CONSUL_ADDR/v1/kv/config/$ENVIRONMENT/storage/?recurse" | jq -r '.[] | "\(.Key): \(.Value | @base64d)"' 2>/dev/null || echo " (需要安装jq以查看格式化输出)" +} + +# 主函数 +main() { + check_consul_connection + configure_app_variables + configure_database_variables + configure_cache_variables + configure_messaging_variables + configure_provider_variables + configure_storage_variables + display_configuration + + echo "" + echo "✓ 所有变量和存储配置已完成!" + echo "" + echo "使用说明:" + echo "1. 在Terraform中使用consul_keys数据源获取这些配置" + echo "2. 在应用程序中使用Consul客户端库读取这些配置" + echo "3. 使用Consul UI查看和管理这些配置" + echo "" + echo "配置文件位置: /root/mgmt/docs/setup/consul_variables_and_storage_guide.md" +} + +# 执行主函数 +main "$@" \ No newline at end of file diff --git a/deployment/scripts/init_vault_cluster.sh b/deployment/scripts/init_vault_cluster.sh new file mode 100755 index 0000000..8f8a0e4 --- /dev/null +++ b/deployment/scripts/init_vault_cluster.sh @@ -0,0 +1,122 @@ +#!/bin/bash +# Vault集群初始化和解封脚本 + +set -e + +echo "===== Vault集群初始化 =====" + +# 颜色定义 +GREEN='\033[0;32m' +YELLOW='\033[1;33m' +RED='\033[0;31m' +NC='\033[0m' # No Color + +# 函数定义 +log_info() { + echo -e "${GREEN}[INFO]${NC} $1" +} + +log_warn() { + echo -e "${YELLOW}[WARN]${NC} $1" +} + +log_error() { + echo -e "${RED}[ERROR]${NC} $1" +} + +# 检查Vault命令是否存在 +if ! command -v vault &> /dev/null; then + log_error "Vault命令未找到,请先安装Vault" + exit 1 +fi + +# 设置Vault地址为master节点 +export VAULT_ADDR='http://100.117.106.136:8200' + +# 等待Vault启动 +log_info "等待Vault启动..." +for i in {1..30}; do + if curl -s "$VAULT_ADDR/v1/sys/health" > /dev/null; then + break + fi + echo -n "." + sleep 2 +done +echo "" + +# 检查Vault是否已初始化 +init_status=$(curl -s "$VAULT_ADDR/v1/sys/health" | grep -o '"initialized":[^,}]*' | cut -d ':' -f2) +if [ "$init_status" = "false" ]; then + log_info "Vault未初始化,正在初始化..." + + # 初始化Vault并保存密钥到安全目录 + vault operator init -key-shares=5 -key-threshold=3 -format=json > /root/mgmt/security/secrets/vault/init_keys.json + + if [ $? -eq 0 ]; then + log_info "Vault初始化成功" + log_warn "重要:请立即将以下文件安全备份并分发给不同管理员" + log_warn "密钥文件位置: /root/mgmt/security/secrets/vault/init_keys.json" + + # 显示关键信息但不显示完整密钥 + unseal_keys_count=$(cat /root/mgmt/security/secrets/vault/init_keys.json | grep -o '"unseal_keys_b64":\[\([^]]*\)' | sed 's/"unseal_keys_b64":\[//g' | tr ',' '\n' | wc -l) + root_token=$(cat /root/mgmt/security/secrets/vault/init_keys.json | grep -o '"root_token":"[^"]*"' | cut -d '"' -f4) + + log_info "生成了 $unseal_keys_count 个解封密钥,需要其中任意 3 个来解封Vault" + log_info "根令牌已生成(请安全保管)" + + # 提取解封密钥用于自动解封 + unseal_key1=$(cat /root/mgmt/security/secrets/vault/init_keys.json | grep -o '"unseal_keys_b64":\[\([^]]*\)' | sed 's/"unseal_keys_b64":\[//g' | tr ',' '\n' | sed 's/"//g' | head -1) + unseal_key2=$(cat /root/mgmt/security/secrets/vault/init_keys.json | grep -o '"unseal_keys_b64":\[\([^]]*\)' | sed 's/"unseal_keys_b64":\[//g' | tr ',' '\n' | sed 's/"//g' | head -2 | tail -1) + unseal_key3=$(cat /root/mgmt/security/secrets/vault/init_keys.json | grep -o '"unseal_keys_b64":\[\([^]]*\)' | sed 's/"unseal_keys_b64":\[//g' | tr ',' '\n' | sed 's/"//g' | head -3 | tail -1) + + # 解封所有节点 + log_info "正在解封所有Vault节点..." + + # 解封master节点 + export VAULT_ADDR='http://100.117.106.136:8200' + vault operator unseal "$unseal_key1" + vault operator unseal "$unseal_key2" + vault operator unseal "$unseal_key3" + + # 解封ash3c节点 + export VAULT_ADDR='http://100.116.80.94:8200' + vault operator unseal "$unseal_key1" + vault operator unseal "$unseal_key2" + vault operator unseal "$unseal_key3" + + # 解封warden节点 + export VAULT_ADDR='http://100.122.197.112:8200' + vault operator unseal "$unseal_key1" + vault operator unseal "$unseal_key2" + vault operator unseal "$unseal_key3" + + log_info "所有Vault节点已成功解封" + log_warn "请确保将密钥文件安全备份到多个位置,并按照安全策略分发给不同管理员" + log_info "根令牌: $root_token" + + # 显示Vault状态 + log_info "Vault集群状态:" + export VAULT_ADDR='http://100.117.106.136:8200' + vault status + else + log_error "Vault初始化失败" + exit 1 + fi +else + log_info "Vault已初始化" + + # 检查Vault是否已解封 + sealed_status=$(curl -s "$VAULT_ADDR/v1/sys/health" | grep -o '"sealed":[^,}]*' | cut -d ':' -f2) + if [ "$sealed_status" = "true" ]; then + log_warn "Vault已初始化但仍处于密封状态,请手动解封" + log_info "使用以下命令解封Vault:" + log_info "export VAULT_ADDR='http://<节点IP>:8200'" + log_info "vault operator unseal <解封密钥1>" + log_info "vault operator unseal <解封密钥2>" + log_info "vault operator unseal <解封密钥3>" + else + log_info "Vault已初始化且已解封,可以正常使用" + fi +fi + +log_info "===== Vault集群初始化和解封完成 =====" \ No newline at end of file diff --git a/deployment/scripts/init_vault_dev.sh b/deployment/scripts/init_vault_dev.sh new file mode 100755 index 0000000..b9edbee --- /dev/null +++ b/deployment/scripts/init_vault_dev.sh @@ -0,0 +1,122 @@ +#!/bin/bash +# Vault开发环境初始化脚本 + +set -e + +echo "===== Vault开发环境初始化 =====" + +# 颜色定义 +GREEN='\033[0;32m' +YELLOW='\033[1;33m' +RED='\033[0;31m' +NC='\033[0m' # No Color + +# 函数定义 +log_info() { + echo -e "${GREEN}[INFO]${NC} $1" +} + +log_warn() { + echo -e "${YELLOW}[WARN]${NC} $1" +} + +log_error() { + echo -e "${RED}[ERROR]${NC} $1" +} + +# 检查Vault命令是否存在 +if ! command -v vault &> /dev/null; then + log_error "Vault命令未找到,请先安装Vault" + exit 1 +fi + +# 设置Vault地址为master节点 +export VAULT_ADDR='http://100.117.106.136:8200' + +# 等待Vault启动 +log_info "等待Vault启动..." +for i in {1..30}; do + if curl -s "$VAULT_ADDR/v1/sys/health" > /dev/null; then + break + fi + echo -n "." + sleep 2 +done +echo "" + +# 检查Vault是否已初始化 +init_status=$(curl -s "$VAULT_ADDR/v1/sys/health" | grep -o '"initialized":[^,}]*' | cut -d ':' -f2) +if [ "$init_status" = "false" ]; then + log_info "Vault未初始化,正在初始化..." + + # 初始化Vault并保存密钥到开发目录 + vault operator init -key-shares=1 -key-threshold=1 -format=json > /root/mgmt/security/secrets/vault/dev/init_keys.json + + if [ $? -eq 0 ]; then + log_info "Vault初始化成功(开发模式)" + log_warn "注意:这是开发模式,仅使用1个解封密钥" + log_warn "生产环境请使用5个密钥中的3个阈值" + + # 显示密钥信息 + unseal_key=$(cat /root/mgmt/security/secrets/vault/dev/init_keys.json | grep -o '"unseal_keys_b64":\["[^"]*"' | cut -d '"' -f4) + root_token=$(cat /root/mgmt/security/secrets/vault/dev/init_keys.json | grep -o '"root_token":"[^"]*"' | cut -d '"' -f4) + + log_info "解封密钥: $unseal_key" + log_info "根令牌: $root_token" + + # 自动解封所有节点 + log_info "正在自动解封所有Vault节点..." + + # 解封master节点 + export VAULT_ADDR='http://100.117.106.136:8200' + vault operator unseal "$unseal_key" + + # 解封ash3c节点 + export VAULT_ADDR='http://100.116.80.94:8200' + vault operator unseal "$unseal_key" + + # 解封warden节点 + export VAULT_ADDR='http://100.122.197.112:8200' + vault operator unseal "$unseal_key" + + log_info "所有Vault节点已成功解封" + + # 显示Vault状态 + log_info "Vault集群状态:" + export VAULT_ADDR='http://100.117.106.136:8200' + vault status + + # 保存环境变量以便后续使用 + echo "export VAULT_ADDR='http://100.117.106.136:8200'" > /root/mgmt/security/secrets/vault/dev/vault_env.sh + echo "export VAULT_TOKEN='$root_token'" >> /root/mgmt/security/secrets/vault/dev/vault_env.sh + log_info "环境变量已保存到: /root/mgmt/security/secrets/vault/dev/vault_env.sh" + + log_warn "开发环境提示:" + log_warn "1. 请勿在生产环境中使用此配置" + log_warn "2. 生产环境应使用5个密钥中的3个阈值" + log_warn "3. 密钥应分发给不同管理员保管" + else + log_error "Vault初始化失败" + exit 1 + fi +else + log_info "Vault已初始化" + + # 检查Vault是否已解封 + sealed_status=$(curl -s "$VAULT_ADDR/v1/sys/health" | grep -o '"sealed":[^,}]*' | cut -d ':' -f2) + if [ "$sealed_status" = "true" ]; then + log_warn "Vault已初始化但仍处于密封状态" + log_info "请使用以下命令解封:" + log_info "export VAULT_ADDR='http://<节点IP>:8200'" + log_info "vault operator unseal <解封密钥>" + else + log_info "Vault已初始化且已解封,可以正常使用" + + # 显示Vault状态 + log_info "Vault集群状态:" + export VAULT_ADDR='http://100.117.106.136:8200' + vault status + fi +fi + +log_info "===== Vault开发环境初始化完成 =====" \ No newline at end of file diff --git a/deployment/scripts/setup_consul_variables_and_storage.sh b/deployment/scripts/setup_consul_variables_and_storage.sh new file mode 100755 index 0000000..c6656ee --- /dev/null +++ b/deployment/scripts/setup_consul_variables_and_storage.sh @@ -0,0 +1,261 @@ +#!/bin/bash + +# Consul 变量和存储配置脚本 +# 用于增强Consul集群功能 + +set -e + +# 颜色输出 +RED='\033[0;31m' +GREEN='\033[0;32m' +YELLOW='\033[1;33m' +NC='\033[0m' # No Color + +# 日志函数 +log_info() { + echo -e "${GREEN}[INFO]${NC} $1" +} + +log_warn() { + echo -e "${YELLOW}[WARN]${NC} $1" +} + +log_error() { + echo -e "${RED}[ERROR]${NC} $1" +} + +# 默认Consul地址 +CONSUL_ADDR=${CONSUL_ADDR:-"http://localhost:8500"} + +# 检查Consul连接 +check_consul() { + log_info "检查Consul连接..." + if curl -s "${CONSUL_ADDR}/v1/status/leader" > /dev/null; then + log_info "Consul连接正常" + return 0 + else + log_error "无法连接到Consul: ${CONSUL_ADDR}" + return 1 + fi +} + +# 配置Consul变量 +setup_variables() { + log_info "配置Consul变量..." + + # 环境变量 + ENVIRONMENT=${ENVIRONMENT:-"dev"} + + # 创建基础配置结构 + log_info "创建基础配置结构..." + + # 应用配置 + curl -s -X PUT "${CONSUL_ADDR}/v1/kv/config/${ENVIRONMENT}/app/name" -d "my-application" > /dev/null + curl -s -X PUT "${CONSUL_ADDR}/v1/kv/config/${ENVIRONMENT}/app/version" -d "1.0.0" > /dev/null + curl -s -X PUT "${CONSUL_ADDR}/v1/kv/config/${ENVIRONMENT}/app/environment" -d "${ENVIRONMENT}" > /dev/null + + # 数据库配置 + curl -s -X PUT "${CONSUL_ADDR}/v1/kv/config/${ENVIRONMENT}/database/host" -d "db.example.com" > /dev/null + curl -s -X PUT "${CONSUL_ADDR}/v1/kv/config/${ENVIRONMENT}/database/port" -d "5432" > /dev/null + curl -s -X PUT "${CONSUL_ADDR}/v1/kv/config/${ENVIRONMENT}/database/name" -d "myapp_db" > /dev/null + + # 缓存配置 + curl -s -X PUT "${CONSUL_ADDR}/v1/kv/config/${ENVIRONMENT}/cache/host" -d "redis.example.com" > /dev/null + curl -s -X PUT "${CONSUL_ADDR}/v1/kv/config/${ENVIRONMENT}/cache/port" -d "6379" > /dev/null + + # 消息队列配置 + curl -s -X PUT "${CONSUL_ADDR}/v1/kv/config/${ENVIRONMENT}/mq/host" -d "mq.example.com" > /dev/null + curl -s -X PUT "${CONSUL_ADDR}/v1/kv/config/${ENVIRONMENT}/mq/port" -d "5672" > /dev/null + + # 特性开关 + curl -s -X PUT "${CONSUL_ADDR}/v1/kv/config/${ENVIRONMENT}/features/new_ui" -d "true" > /dev/null + curl -s -X PUT "${CONSUL_ADDR}/v1/kv/config/${ENVIRONMENT}/features/advanced_analytics" -d "false" > /dev/null + + log_info "Consul变量配置完成" +} + +# 配置Consul存储 +setup_storage() { + log_info "配置Consul存储..." + + # 创建存储配置 + # 注意:这些配置需要在Consul配置文件中启用相应的存储后端 + + # 持久化存储配置 + curl -s -X PUT "${CONSUL_ADDR}/v1/kv/storage/consul/data_dir" -d "/opt/consul/data" > /dev/null + curl -s -X PUT "${CONSUL_ADDR}/v1/kv/storage/consul/raft_dir" -d "/opt/consul/raft" > /dev/null + + # 快照配置 + curl -s -X PUT "${CONSUL_ADDR}/v1/kv/storage/consul/snapshot_enabled" -d "true" > /dev/null + curl -s -X PUT "${CONSUL_ADDR}/v1/kv/storage/consul/snapshot_interval" -d "24h" > /dev/null + curl -s -X PUT "${CONSUL_ADDR}/v1/kv/storage/consul/snapshot_retention" -d "30" > /dev/null + + # 备份配置 + curl -s -X PUT "${CONSUL_ADDR}/v1/kv/storage/consul/backup_enabled" -d "true" > /dev/null + curl -s -X PUT "${CONSUL_ADDR}/v1/kv/storage/consul/backup_interval" -d "6h" > /dev/null + curl -s -X PUT "${CONSUL_ADDR}/v1/kv/storage/consul/backup_retention" -d "7" > /dev/null + + # 自动清理配置 + curl -s -X PUT "${CONSUL_ADDR}/v1/kv/storage/consul/autopilot/cleanup_dead_servers" -d "true" > /dev/null + curl -s -X PUT "${CONSUL_ADDR}/v1/kv/storage/consul/autopilot/last_contact_threshold" -d "200ms" > /dev/null + curl -s -X PUT "${CONSUL_ADDR}/v1/kv/storage/consul/autopilot/max_trailing_logs" -d "250" > /dev/null + curl -s -X PUT "${CONSUL_ADDR}/v1/kv/storage/consul/autopilot/server_stabilization_time" -d "10s" > /dev/null + curl -s -X PUT "${CONSUL_ADDR}/v1/kv/storage/consul/autopilot/redundancy_zone_tag" -d "" > /dev/null + curl -s -X PUT "${CONSUL_ADDR}/v1/kv/storage/consul/autopilot/disable_upgrade_migration" -d "false" > /dev/null + curl -s -X PUT "${CONSUL_ADDR}/v1/kv/storage/consul/autopilot/upgrade_version_tag" -d "" > /dev/null + + log_info "Consul存储配置完成" +} + +# 创建Consul配置文件 +create_consul_config() { + log_info "创建Consul配置文件..." + + # 创建配置目录 + mkdir -p /root/mgmt/components/consul/configs + + # 创建基础配置文件 + cat > /root/mgmt/components/consul/configs/consul.hcl << EOF +# Consul 基础配置 +data_dir = "/opt/consul/data" +raft_dir = "/opt/consul/raft" + +# 启用UI +ui_config { + enabled = true +} + +# 数据中心配置 +datacenter = "dc1" + +# 服务器配置 +server = true +bootstrap_expect = 3 + +# 客户端地址 +client_addr = "0.0.0.0" + +# 绑定地址 +bind_addr = "{{ GetInterfaceIP `eth0` }}" + +# 广告地址 +advertise_addr = "{{ GetInterfaceIP `eth0` }}" + +# 端口配置 +ports { + dns = 8600 + http = 8500 + https = -1 + grpc = 8502 + grpc_tls = 8503 + serf_lan = 8301 + serf_wan = 8302 + server = 8300 +} + +# 连接其他节点 +retry_join = ["100.117.106.136", "100.116.80.94", "100.122.197.112"] + +# 启用服务发现 +enable_service_script = true + +# 启用脚本检查 +enable_script_checks = true + +# 启用本地脚本检查 +enable_local_script_checks = true + +# 性能调优 +performance { + raft_multiplier = 1 +} + +# 日志配置 +log_level = "INFO" +enable_syslog = false +log_file = "/var/log/consul/consul.log" + +# 自动加密 +encrypt = "YourEncryptionKeyHere" + +# 重用端口 +reconnect_timeout = "30s" +reconnect_timeout_wan = "30s" + +# 会话TTL +session_ttl_min = "10s" + +# 自动清理 +autopilot { + cleanup_dead_servers = true + last_contact_threshold = "200ms" + max_trailing_logs = 250 + server_stabilization_time = "10s" + redundancy_zone_tag = "" + disable_upgrade_migration = false + upgrade_version_tag = "" +} + +# 快照配置 +snapshot { + enabled = true + interval = "24h" + retain = 30 + name = "consul-snapshot-{{.Timestamp}}" +} + +# 备份配置 +backup { + enabled = true + interval = "6h" + retain = 7 + name = "consul-backup-{{.Timestamp}}" +} +EOF + + log_info "Consul配置文件创建完成: /root/mgmt/components/consul/configs/consul.hcl" +} + +# 显示配置 +show_config() { + log_info "显示Consul变量配置..." + echo "==========================================" + curl -s "${CONSUL_ADDR}/v1/kv/config/${ENVIRONMENT:-dev}/?recurse" | jq -r '.[] | "\(.Key): \(.Value | @base64d)"' + echo "==========================================" + + log_info "显示Consul存储配置..." + echo "==========================================" + curl -s "${CONSUL_ADDR}/v1/kv/storage/?recurse" | jq -r '.[] | "\(.Key): \(.Value | @base64d)"' + echo "==========================================" +} + +# 主函数 +main() { + log_info "开始配置Consul变量和存储..." + + # 检查Consul连接 + check_consul + + # 配置变量 + setup_variables + + # 配置存储 + setup_storage + + # 创建配置文件 + create_consul_config + + # 显示配置 + show_config + + log_info "Consul变量和存储配置完成" + + # 提示下一步 + log_info "下一步操作:" + log_info "1. 重启Consul服务以应用新配置" + log_info "2. 验证配置是否生效" + log_info "3. 根据需要调整配置参数" +} + +# 执行主函数 +main "$@" \ No newline at end of file diff --git a/deployment/scripts/show_vault_dev_keys.sh b/deployment/scripts/show_vault_dev_keys.sh new file mode 100755 index 0000000..84b0c76 --- /dev/null +++ b/deployment/scripts/show_vault_dev_keys.sh @@ -0,0 +1,32 @@ +#!/bin/bash +# 显示开发环境Vault密钥信息 + +echo "===== Vault开发环境密钥信息 =====" + +# 检查密钥文件是否存在 +if [ ! -f "/root/mgmt/security/secrets/vault/dev/init_keys.json" ]; then + echo "错误:Vault密钥文件不存在" + echo "请先运行初始化脚本:/root/mgmt/deployment/scripts/init_vault_dev.sh" + exit 1 +fi + +# 显示密钥信息 +echo "Vault开发环境密钥信息:" +echo "----------------------------------------" + +# 提取并显示解封密钥 +unseal_key=$(cat /root/mgmt/security/secrets/vault/dev/init_keys.json | grep -o '"unseal_keys_b64":\["[^"]*"' | cut -d '"' -f4) +echo "解封密钥: $unseal_key" + +# 提取并显示根令牌 +root_token=$(cat /root/mgmt/security/secrets/vault/dev/init_keys.json | grep -o '"root_token":"[^"]*"' | cut -d '"' -f4) +echo "根令牌: $root_token" + +echo "----------------------------------------" +echo "环境变量设置命令:" +echo "export VAULT_ADDR='http://100.117.106.136:8200'" +echo "export VAULT_TOKEN='$root_token'" + +echo "" +echo "注意:这是开发环境配置,仅用于测试目的" +echo "生产环境请遵循安全策略文档中的建议" \ No newline at end of file diff --git a/deployment/scripts/vault_dev_example.sh b/deployment/scripts/vault_dev_example.sh new file mode 100755 index 0000000..a2da0a8 --- /dev/null +++ b/deployment/scripts/vault_dev_example.sh @@ -0,0 +1,50 @@ +#!/bin/bash +# Vault开发环境使用示例 + +echo "===== Vault开发环境使用示例 =====" + +# 设置环境变量 +source /root/mgmt/security/secrets/vault/dev/vault_env.sh + +echo "1. 检查Vault状态" +vault status + +echo "" +echo "2. 写入示例密钥值" +vault kv put secret/myapp/config username="devuser" password="devpassword" database="devdb" + +echo "" +echo "3. 读取示例密钥值" +vault kv get secret/myapp/config + +echo "" +echo "4. 列出密钥路径" +vault kv list secret/myapp/ + +echo "" +echo "5. 创建示例策略" +cat > /tmp/dev-policy.hcl << EOF +# 开发环境示例策略 +path "secret/*" { + capabilities = ["create", "read", "update", "delete", "list"] +} + +path "sys/mounts" { + capabilities = ["read"] +} +EOF + +vault policy write dev-policy /tmp/dev-policy.hcl + +echo "" +echo "6. 创建有限权限令牌" +vault token create -policy=dev-policy + +echo "" +echo "7. 启用并配置其他密钥引擎示例" +echo "启用数据库密钥引擎:" +echo "vault secrets enable database" + +echo "" +echo "===== Vault开发环境示例完成 =====" +echo "注意:这些命令仅用于开发测试,请勿在生产环境中使用相同配置" \ No newline at end of file diff --git a/docs/setup/consul_variables_and_storage_guide.md b/docs/setup/consul_variables_and_storage_guide.md new file mode 100644 index 0000000..a16bf48 --- /dev/null +++ b/docs/setup/consul_variables_and_storage_guide.md @@ -0,0 +1,420 @@ +# Consul 变量和存储配置指南 + +本文档介绍如何配置Consul的变量(Variables)和存储(Storage)功能,以增强集群的功能性和可靠性。 + +## 概述 + +Consul提供了两种关键功能来增强集群能力: +1. **变量(Variables)**: 用于存储配置信息、特性开关、应用参数等 +2. **存储(Storage)**: 用于持久化数据、快照和备份 + +## 变量(Variables)配置 + +### 变量命名规范 + +我们遵循统一的命名规范来管理Consul KV存储中的配置: + +``` +config/{environment}/{provider}/{region_or_service}/{key} +``` + +各部分说明: +- **config**: 固定前缀,表示这是一个配置项 +- **environment**: 环境名称,如 `dev`、`staging`、`prod` 等 +- **provider**: 云服务提供商,如 `oracle`、`digitalocean`、`aws`、`gcp` 等 +- **region_or_service**: 区域或服务名称,如 `kr`、`us`、`sgp` 等 +- **key**: 具体的配置键名,如 `token`、`tenancy_ocid`、`user_ocid` 等 + +### 示例配置 + +#### 应用配置 +``` +config/dev/app/name +config/dev/app/version +config/dev/app/environment +``` + +#### 数据库配置 +``` +config/dev/database/host +config/dev/database/port +config/dev/database/name +``` + +#### 缓存配置 +``` +config/dev/cache/host +config/dev/cache/port +``` + +#### 特性开关 +``` +config/dev/features/new_ui +config/dev/features/advanced_analytics +``` + +### 如何添加变量 + +#### 使用curl命令 +```bash +# 添加单个变量 +curl -X PUT http://localhost:8500/v1/kv/config/dev/app/name -d "my-application" + +# 添加多个变量 +curl -X PUT http://localhost:8500/v1/kv/config/dev/database/host -d "db.example.com" +curl -X PUT http://localhost:8500/v1/kv/config/dev/database/port -d "5432" +``` + +#### 使用consul CLI +```bash +# 添加单个变量 +consul kv put config/dev/app/name my-application + +# 添加多个变量 +consul kv put config/dev/database/host db.example.com +consul kv put config/dev/database/port 5432 +``` + +#### 使用自动化脚本 +我们提供了自动化脚本来配置Consul变量: + +```bash +# 运行配置脚本 +./deployment/scripts/setup_consul_variables_and_storage.sh +``` + +### 如何使用变量 + +#### 在Terraform中使用 +```hcl +data "consul_keys" "app_config" { + key { + name = "app_name" + path = "config/dev/app/name" + } + key { + name = "db_host" + path = "config/dev/database/host" + } +} + +resource "some_resource" "example" { + name = data.consul_keys.app_config.var.app_name + host = data.consul_keys.app_config.var.db_host +} +``` + +#### 在应用程序中使用 +大多数Consul客户端库都提供了读取KV存储的方法。例如,在Go中: + +```go +import "github.com/hashicorp/consul/api" + +// 创建Consul客户端 +client, _ := api.NewClient(api.DefaultConfig()) + +// 读取KV +kv := client.KV() +pair, _, _ := kv.Get("config/dev/app/name", nil) +appName := string(pair.Value) +``` + +## 存储(Storage)配置 + +### 持久化存储 + +Consul需要持久化存储来保存Raft日志和快照数据。在Nomad作业配置中,我们已经指定了数据目录: + +```hcl +config { + command = "consul" + args = [ + "agent", + "-server", + "-bootstrap-expect=3", + "-data-dir=/opt/nomad/data/consul", # 数据目录 + # 其他参数... + ] +} +``` + +### 快照配置 + +快照是Consul集群状态的时间点备份,用于灾难恢复。 + +#### 启用快照 +在Consul配置文件中添加以下配置: + +```hcl +snapshot { + enabled = true + interval = "24h" # 每24小时创建一次快照 + retain = 30 # 保留30个快照 + name = "consul-snapshot-{{.Timestamp}}" +} +``` + +#### 手动创建快照 +```bash +# 创建快照 +consul snapshot save backup-$(date +%Y%m%d).snap + +# 恢复快照 +consul snapshot restore backup-20231201.snap +``` + +### 备份配置 + +定期备份Consul数据是确保数据安全的重要措施。 + +#### 配置自动备份 +```hcl +backup { + enabled = true + interval = "6h" # 每6小时备份一次 + retain = 7 # 保留7个备份 + name = "consul-backup-{{.Timestamp}}" +} +``` + +#### 备份脚本 +```bash +#!/bin/bash +# backup_consul.sh + +DATE=$(date +%Y%m%d_%H%M%S) +BACKUP_DIR="/backups/consul" +CONSUL_ADDR="http://localhost:8500" + +# 创建备份目录 +mkdir -p $BACKUP_DIR + +# 创建快照 +curl -s "${CONSUL_ADDR}/v1/snapshot" > "${BACKUP_DIR}/consul-snapshot-${DATE}.snap" + +# 保留最近7天的备份 +find $BACKUP_DIR -name "consul-snapshot-*.snap" -mtime +7 -delete + +echo "备份完成: ${BACKUP_DIR}/consul-snapshot-${DATE}.snap" +``` + +### Autopilot配置 + +Autopilot是Consul的自动管理功能,用于处理服务器故障和自动恢复。 + +```hcl +autopilot { + cleanup_dead_servers = true # 自动清理死服务器 + last_contact_threshold = "200ms" # 最后联系阈值 + max_trailing_logs = 250 # 最大 trailing 日志数 + server_stabilization_time = "10s" # 服务器稳定时间 + redundancy_zone_tag = "" # 冗余区域标签 + disable_upgrade_migration = false # 禁用升级迁移 + upgrade_version_tag = "" # 升级版本标签 +} +``` + +## 完整配置示例 + +### Consul配置文件 (consul.hcl) +```hcl +# 基础配置 +data_dir = "/opt/consul/data" +raft_dir = "/opt/consul/raft" + +# 启用UI +ui_config { + enabled = true +} + +# 数据中心配置 +datacenter = "dc1" + +# 服务器配置 +server = true +bootstrap_expect = 3 + +# 网络配置 +client_addr = "0.0.0.0" +bind_addr = "{{ GetInterfaceIP `eth0` }}" +advertise_addr = "{{ GetInterfaceIP `eth0` }}" + +# 端口配置 +ports { + dns = 8600 + http = 8500 + https = -1 + grpc = 8502 + grpc_tls = 8503 + serf_lan = 8301 + serf_wan = 8302 + server = 8300 +} + +# 集群连接 +retry_join = ["100.117.106.136", "100.116.80.94", "100.122.197.112"] + +# 服务发现 +enable_service_script = true +enable_script_checks = true +enable_local_script_checks = true + +# 性能调优 +performance { + raft_multiplier = 1 +} + +# 日志配置 +log_level = "INFO" +enable_syslog = false +log_file = "/var/log/consul/consul.log" + +# 安全配置 +encrypt = "YourEncryptionKeyHere" + +# 连接配置 +reconnect_timeout = "30s" +reconnect_timeout_wan = "30s" +session_ttl_min = "10s" + +# Autopilot配置 +autopilot { + cleanup_dead_servers = true + last_contact_threshold = "200ms" + max_trailing_logs = 250 + server_stabilization_time = "10s" + redundancy_zone_tag = "" + disable_upgrade_migration = false + upgrade_version_tag = "" +} + +# 快照配置 +snapshot { + enabled = true + interval = "24h" + retain = 30 + name = "consul-snapshot-{{.Timestamp}}" +} + +# 备份配置 +backup { + enabled = true + interval = "6h" + retain = 7 + name = "consul-backup-{{.Timestamp}}" +} +``` + +## 部署步骤 + +### 1. 准备配置文件 +```bash +# 创建配置目录 +mkdir -p /root/mgmt/components/consul/configs + +# 创建配置文件 +cat > /root/mgmt/components/consul/configs/consul.hcl << EOF +# 粘贴上面的完整配置示例 +EOF +``` + +### 2. 运行配置脚本 +```bash +# 运行自动化脚本 +./deployment/scripts/setup_consul_variables_and_storage.sh +``` + +### 3. 重启Consul服务 +```bash +# 停止Consul服务 +nomad job stop consul-cluster-simple + +# 重新启动Consul服务 +nomad job run /root/mgmt/components/consul/jobs/consul-cluster-simple.nomad +``` + +### 4. 验证配置 +```bash +# 检查Consul状态 +curl http://localhost:8500/v1/status/leader + +# 检查变量配置 +curl -s http://localhost:8500/v1/kv/config/dev/?recurse | jq + +# 检查存储配置 +curl -s http://localhost:8500/v1/kv/storage/?recurse | jq +``` + +## 最佳实践 + +1. **定期备份**: 设置定期备份Consul数据,并测试恢复过程 +2. **监控存储空间**: 监控Consul数据目录的使用情况,避免磁盘空间不足 +3. **安全配置**: 使用ACL和TLS保护Consul集群 +4. **版本控制**: 将Consul配置文件纳入版本控制系统 +5. **环境隔离**: 为不同环境(dev/staging/prod)使用不同的配置路径 +6. **文档记录**: 记录所有配置项的用途和取值范围 + +## 故障排除 + +### 常见问题 + +#### 1. 变量无法读取 +- 检查Consul服务是否正常运行 +- 验证变量路径是否正确 +- 确认ACL权限是否足够 + +#### 2. 存储空间不足 +- 检查数据目录大小 +- 调整快照和备份保留策略 +- 清理旧快照和备份 + +#### 3. 快照失败 +- 检查磁盘空间 +- 验证文件权限 +- 查看Consul日志获取详细错误信息 + +### 调试命令 +```bash +# 查看Consul成员 +consul members + +# 查看Raft状态 +consul operator raft list-peers + +# 查看键值存储 +consul kv get --recurse config/dev/ + +# 查看快照信息 +consul snapshot inspect backup.snap +``` + +## 扩展功能 + +### 与Vault集成 + +Consul可以与Vault集成,提供更强大的密钥管理功能: + +```bash +# 配置Vault作为Consul的加密后端 +vault secrets enable consul + +# 配置Consul使用Vault进行加密 +consul encrypt -vault-token="$VAULT_TOKEN" -vault-addr="$VAULT_ADDR" +``` + +### 与Nomad集成 + +Consul可以与Nomad集成,提供服务发现和配置管理: + +```hcl +# Nomad配置中的Consul集成 +consul { + address = "localhost:8500" + token = "your-consul-token" + ssl = false +} +``` + +## 总结 + +通过配置Consul的变量和存储功能,可以显著增强集群的功能性和可靠性。变量功能提供了灵活的配置管理,而存储功能确保了数据的安全性和持久性。结合自动化脚本和最佳实践,可以构建一个强大且易于维护的Consul集群。 \ No newline at end of file diff --git a/docs/vault-dev-environment.md b/docs/vault-dev-environment.md new file mode 100644 index 0000000..3e38b29 --- /dev/null +++ b/docs/vault-dev-environment.md @@ -0,0 +1,112 @@ +# Vault开发环境指南 + +## 1. 概述 + +本文档介绍了如何在开发环境中使用Vault,包括初始化、密钥管理和基本操作。 + +## 2. 开发环境特点 + +- 使用1个解封密钥(简化操作) +- 所有密钥存储在本地开发目录 +- 适用于快速测试和开发 + +**注意**:此配置仅用于开发环境,生产环境请遵循安全策略文档。 + +## 3. 初始化Vault + +### 3.1 运行初始化脚本 +```bash +/root/mgmt/deployment/scripts/init_vault_dev.sh +``` + +脚本将: +1. 初始化Vault集群 +2. 生成1个解封密钥和根令牌 +3. 自动解封所有节点 +4. 保存环境变量配置 + +### 3.2 查看密钥信息 +```bash +/root/mgmt/deployment/scripts/show_vault_dev_keys.sh +``` + +## 4. 使用Vault + +### 4.1 设置环境变量 +```bash +source /root/mgmt/security/secrets/vault/dev/vault_env.sh +``` + +### 4.2 基本操作示例 +```bash +# 检查状态 +vault status + +# 写入密钥值 +vault kv put secret/myapp/config username="devuser" password="devpassword" + +# 读取密钥值 +vault kv get secret/myapp/config +``` + +### 4.3 运行完整示例 +```bash +/root/mgmt/deployment/scripts/vault_dev_example.sh +``` + +## 5. 目录结构 + +``` +/root/mgmt/security/secrets/vault/dev/ +├── init_keys.json # 初始化密钥(解封密钥和根令牌) +├── vault_env.sh # 环境变量配置 +``` + +## 6. 重要提醒 + +### 6.1 开发环境限制 +- 仅使用1个解封密钥(生产环境应使用5个密钥中的3个阈值) +- 密钥存储在本地文件系统(生产环境应分散存储) +- 适用于单人开发测试 + +### 6.2 生产环境迁移 +当从开发环境迁移到生产环境时: +1. 重新初始化Vault集群 +2. 使用5个解封密钥中的3个阈值 +3. 将密钥分发给不同管理员 +4. 遵循安全策略文档 + +## 7. 故障排除 + +### 7.1 Vault未初始化 +运行初始化脚本: +```bash +/root/mgmt/deployment/scripts/init_vault_dev.sh +``` + +### 7.2 Vault已初始化但被密封 +使用解封密钥解封: +```bash +export VAULT_ADDR='http://<节点IP>:8200' +vault operator unseal <解封密钥> +``` + +### 7.3 无法连接到Vault +检查Vault服务状态: +```bash +curl -v http://<节点IP>:8200/v1/sys/health +``` + +## 8. 清理环境 + +如需重新开始,可以删除密钥文件并重新初始化: +```bash +rm -f /root/mgmt/security/secrets/vault/dev/init_keys.json +/root/mgmt/deployment/scripts/init_vault_dev.sh +``` + +## 9. 相关文档 + +- [Vault安全策略](vault-security-policy.md) - 生产环境安全指南 +- [Vault官方文档](https://www.vaultproject.io/docs) +- [Vault API文档](https://www.vaultproject.io/api) \ No newline at end of file diff --git a/docs/vault-security-policy.md b/docs/vault-security-policy.md new file mode 100644 index 0000000..a0ba5bd --- /dev/null +++ b/docs/vault-security-policy.md @@ -0,0 +1,139 @@ +# Vault安全策略和密钥管理指南 + +## 1. 概述 + +本文档定义了Vault密钥的安全管理策略,确保基础设施的安全性和可靠性。 + +## 2. 密钥类型 + +### 2.1 初始化密钥 +- **解封密钥**:用于解封Vault实例 +- **根令牌**:具有Vault中所有权限的初始令牌 + +### 2.2 操作密钥 +- **用户令牌**:分配给用户和服务的访问令牌 +- **策略令牌**:基于特定策略的受限令牌 + +## 3. 安全存储策略 + +### 3.1 解封密钥存储 +**禁止**: +- 将所有密钥存储在同一位置 +- 在代码或配置文件中明文存储密钥 +- 通过不安全的通信渠道传输密钥 + +**推荐**: +1. **物理分发**: + - 将5个解封密钥分别交给5个不同的可信管理员 + - 每个管理员仅知道自己的密钥 + - 需要3个密钥即可解封Vault(Shamir's Secret Sharing) + +2. **加密存储**: + - 使用GPG或其他加密工具加密密钥文件 + - 将加密后的文件存储在安全位置 + - 加密密钥由不同管理员保管 + +3. **硬件安全模块**: + - 企业环境推荐使用HSM存储密钥 + - 提供硬件级别的安全保护 + +### 3.2 根令牌存储 +- 根令牌应立即用于创建具有最小权限的管理令牌 +- 创建后应立即撤销根令牌 +- 新的管理令牌应根据职责分离原则分发 + +## 4. 密钥生命周期管理 + +### 4.1 创建 +- 初始化时生成密钥 +- 立即按照安全策略分发和存储 +- 记录密钥创建时间和负责人 + +### 4.2 使用 +- 仅在必要时使用解封密钥 +- 定期轮换用户和服务令牌 +- 监控密钥使用情况 + +### 4.3 更新 +- 定期重新初始化Vault以生成新密钥(谨慎操作) +- 当管理员变更时更新密钥分发 +- 发生安全事件时立即重新生成密钥 + +### 4.4 销毁 +- 安全删除不再需要的密钥副本 +- 使用安全删除工具确保数据不可恢复 +- 记录密钥销毁时间和负责人 + +## 5. 应急响应 + +### 5.1 密钥泄露 +1. 立即生成新的解封密钥 +2. 重新初始化Vault集群 +3. 更新所有依赖Vault的服务配置 +4. 调查泄露原因并修复安全漏洞 + +### 5.2 管理员不可用 +1. 确保有足够的密钥持有者可用(至少3人) +2. 建立备用密钥持有者列表 +3. 定期验证密钥持有者的可用性 + +## 6. 审计和合规 + +### 6.1 审计要求 +- 记录所有密钥相关操作 +- 定期审查密钥管理策略执行情况 +- 生成密钥使用报告 + +### 6.2 合规性 +- 遵循组织安全政策 +- 满足行业标准要求(如SOC 2, ISO 27001等) +- 定期进行安全评估 + +## 7. 实施步骤 + +### 7.1 初始化Vault +```bash +# 使用提供的脚本初始化Vault +/root/mgmt/deployment/scripts/init_vault_cluster.sh +``` + +### 7.2 安全分发密钥 +1. 将生成的密钥文件复制到安全位置 +2. 将密钥文件加密并分发给不同管理员 +3. 验证每个管理员都能正确解封Vault + +### 7.3 创建管理令牌 +```bash +# 使用根令牌创建管理令牌 +export VAULT_ADDR='http://<节点IP>:8200' +export VAULT_TOKEN= +vault token create -policy=admin -period=24h +``` + +### 7.4 撤销根令牌 +```bash +# 撤销根令牌以提高安全性 +vault token revoke +``` + +## 8. 最佳实践 + +### 8.1 访问控制 +- 实施最小权限原则 +- 使用策略限制令牌权限 +- 定期审查和更新策略 + +### 8.2 监控和告警 +- 监控Vault解封和密封事件 +- 设置密钥使用异常告警 +- 定期生成安全报告 + +### 8.3 备份和恢复 +- 定期备份Vault数据 +- 测试恢复流程 +- 确保备份数据的安全性 + +## 9. 相关文档 +- [Vault官方安全指南](https://www.vaultproject.io/docs/internals/security) +- [HashiCorp安全模型](https://www.hashicorp.com/security) +- 组织内部安全政策 \ No newline at end of file