diff --git a/.gitea/settings.yml b/.gitea/settings.yml new file mode 100644 index 0000000..02eb3fc --- /dev/null +++ b/.gitea/settings.yml @@ -0,0 +1,42 @@ +# Gitea 仓库设置 +repository: + name: mgmt + description: "基础设施管理项目 - OpenTofu + Ansible + Docker Swarm" + website: "" + default_branch: main + + # 功能开关 + has_issues: true + has_wiki: true + has_projects: true + has_actions: true + + # 权限设置 + private: false + allow_merge_commits: true + allow_squash_merge: true + allow_rebase_merge: true + delete_branch_on_merge: true + +# Actions 设置 +actions: + enabled: true + allow_fork_pull_request_run: true + default_actions_url: "https://gitea.com" + +# 分支保护 +branch_protection: + main: + enable_push: false + enable_push_whitelist: true + push_whitelist_usernames: ["ben"] + require_signed_commits: false + enable_merge_whitelist: true + merge_whitelist_usernames: ["ben"] + enable_status_check: true + status_check_contexts: ["validate", "plan"] + enable_approvals_whitelist: false + approvals_whitelist_usernames: [] + block_on_rejected_reviews: true + dismiss_stale_approvals: true + require_signed_commits: false diff --git a/.gitea/workflows/ansible-deploy.yml b/.gitea/workflows/ansible-deploy.yml new file mode 100644 index 0000000..325e323 --- /dev/null +++ b/.gitea/workflows/ansible-deploy.yml @@ -0,0 +1,136 @@ +name: Ansible Deploy +on: + workflow_dispatch: + inputs: + environment: + description: '部署环境' + required: true + default: 'dev' + type: choice + options: + - dev + - staging + - production + provider: + description: '云服务商' + required: true + default: 'oracle-cloud' + type: choice + options: + - oracle-cloud + - huawei-cloud + - google-cloud + - digitalocean + - aws + playbook: + description: 'Playbook 类型' + required: true + default: 'bootstrap' + type: choice + options: + - bootstrap + - security + - applications + - monitoring + - maintenance + +env: + ANSIBLE_VERSION: "8.0.0" + +jobs: + deploy: + runs-on: ubuntu-latest + environment: ${{ github.event.inputs.environment }} + + steps: + - name: Checkout code + uses: actions/checkout@v4 + + - name: Setup Python + uses: actions/setup-python@v4 + with: + python-version: '3.11' + + - name: Install Ansible + run: | + pip install ansible==${{ env.ANSIBLE_VERSION }} + pip install ansible-core + ansible-galaxy collection install community.general + ansible-galaxy collection install ansible.posix + + - name: Setup SSH key + run: | + mkdir -p ~/.ssh + echo "${{ secrets.SSH_PRIVATE_KEY }}" > ~/.ssh/id_rsa + chmod 600 ~/.ssh/id_rsa + ssh-keyscan -H ${{ secrets.SSH_HOST }} >> ~/.ssh/known_hosts + + - name: Create dynamic inventory + run: | + ENV="${{ github.event.inputs.environment }}" + PROVIDER="${{ github.event.inputs.provider }}" + + # 从 OpenTofu 输出创建动态清单 + if [ -f "configuration/inventories/$ENV/$PROVIDER-inventory.json" ]; then + echo "Using existing inventory from OpenTofu output" + cp configuration/inventories/$ENV/$PROVIDER-inventory.json /tmp/inventory.json + else + echo "Creating static inventory" + cat > /tmp/inventory.ini << EOF + [$ENV] + ${{ secrets.TARGET_HOST }} ansible_host=${{ secrets.TARGET_HOST }} ansible_user=${{ secrets.SSH_USER }} ansible_become=yes ansible_become_pass=${{ secrets.SUDO_PASSWORD }} + + [all:vars] + ansible_ssh_common_args='-o StrictHostKeyChecking=no' + EOF + fi + + - name: Run Ansible Playbook + run: | + ENV="${{ github.event.inputs.environment }}" + PLAYBOOK="${{ github.event.inputs.playbook }}" + + cd configuration + + # 选择正确的清单文件 + if [ -f "/tmp/inventory.json" ]; then + INVENTORY="/tmp/inventory.json" + else + INVENTORY="/tmp/inventory.ini" + fi + + # 运行对应的 playbook + case "$PLAYBOOK" in + "bootstrap") + ansible-playbook -i $INVENTORY playbooks/bootstrap/main.yml -e "environment=$ENV" + ;; + "security") + ansible-playbook -i $INVENTORY playbooks/security/main.yml -e "environment=$ENV" + ;; + "applications") + ansible-playbook -i $INVENTORY playbooks/applications/main.yml -e "environment=$ENV" + ;; + "monitoring") + ansible-playbook -i $INVENTORY playbooks/monitoring/main.yml -e "environment=$ENV" + ;; + "maintenance") + ansible-playbook -i $INVENTORY playbooks/maintenance/main.yml -e "environment=$ENV" + ;; + esac + + - name: Generate deployment report + run: | + echo "## 部署报告" > deployment-report.md + echo "" >> deployment-report.md + echo "**环境**: ${{ github.event.inputs.environment }}" >> deployment-report.md + echo "**云服务商**: ${{ github.event.inputs.provider }}" >> deployment-report.md + echo "**Playbook**: ${{ github.event.inputs.playbook }}" >> deployment-report.md + echo "**时间**: $(date)" >> deployment-report.md + echo "**状态**: ✅ 部署成功" >> deployment-report.md + + - name: Upload deployment report + uses: actions/upload-artifact@v4 + with: + name: deployment-report-${{ github.event.inputs.environment }}-${{ github.event.inputs.provider }} + path: deployment-report.md + retention-days: 30 \ No newline at end of file diff --git a/.gitea/workflows/deploy.yml b/.gitea/workflows/deploy.yml new file mode 100644 index 0000000..f8beabc --- /dev/null +++ b/.gitea/workflows/deploy.yml @@ -0,0 +1,78 @@ +name: Application Deployment + +on: + push: + branches: [ main ] + paths: + - 'configuration/**' + - 'containers/**' + - '.gitea/workflows/deploy.yml' + workflow_dispatch: + inputs: + environment: + description: 'Target environment' + required: true + default: 'dev' + type: choice + options: + - dev + - staging + - production + +jobs: + ansible-check: + runs-on: ubuntu-latest + name: Ansible Syntax Check + steps: + - name: Checkout + uses: actions/checkout@v4 + + - name: Setup Python + uses: actions/setup-python@v4 + with: + python-version: '3.11' + + - name: Install Ansible + run: | + pip install ansible ansible-core + ansible-galaxy collection install community.general + ansible-galaxy collection install ansible.posix + ansible-galaxy collection install community.docker + + - name: Ansible syntax check + run: | + cd configuration + for playbook in playbooks/*/*.yml; do + if [ -f "$playbook" ]; then + echo "Checking $playbook" + ansible-playbook --syntax-check "$playbook" + fi + done + + deploy: + runs-on: ubuntu-latest + name: Deploy Applications + needs: ansible-check + steps: + - name: Checkout + uses: actions/checkout@v4 + + - name: Setup Python + uses: actions/setup-python@v4 + with: + python-version: '3.11' + + - name: Install Ansible + run: | + pip install ansible ansible-core + ansible-galaxy collection install community.general + ansible-galaxy collection install ansible.posix + ansible-galaxy collection install community.docker + + - name: Deploy applications + run: | + cd configuration + ENV="${{ github.event.inputs.environment || 'dev' }}" + ansible-playbook -i "inventories/${ENV}/inventory.ini" playbooks/bootstrap/main.yml + env: + ANSIBLE_HOST_KEY_CHECKING: False diff --git a/.gitea/workflows/docker.yml b/.gitea/workflows/docker.yml new file mode 100644 index 0000000..7855c92 --- /dev/null +++ b/.gitea/workflows/docker.yml @@ -0,0 +1,52 @@ +name: Docker Build and Deploy + +on: + push: + branches: [ main ] + paths: + - 'containers/**' + - 'Dockerfile*' + - '.gitea/workflows/docker.yml' + +jobs: + build: + runs-on: ubuntu-latest + name: Build Docker Images + steps: + - name: Checkout + uses: actions/checkout@v4 + + - name: Set up Docker Buildx + uses: docker/setup-buildx-action@v3 + + - name: Login to Container Registry + uses: docker/login-action@v3 + with: + registry: ${{ secrets.REGISTRY_URL }} + username: ${{ secrets.REGISTRY_USERNAME }} + password: ${{ secrets.REGISTRY_PASSWORD }} + + - name: Build and push images + run: | + # 构建应用镜像 + for dockerfile in containers/applications/*/Dockerfile; do + if [ -f "$dockerfile" ]; then + app_name=$(basename $(dirname "$dockerfile")) + echo "Building $app_name" + docker build -t "${{ secrets.REGISTRY_URL }}/$app_name:${{ github.sha }}" -f "$dockerfile" . + docker push "${{ secrets.REGISTRY_URL }}/$app_name:${{ github.sha }}" + fi + done + + deploy-swarm: + runs-on: ubuntu-latest + name: Deploy to Docker Swarm + needs: build + steps: + - name: Checkout + uses: actions/checkout@v4 + + - name: Deploy to Swarm + run: | + # 这里可以通过 SSH 连接到 Swarm 管理节点进行部署 + echo "Deploy to Swarm placeholder" diff --git a/.gitea/workflows/infrastructure.yml b/.gitea/workflows/infrastructure.yml new file mode 100644 index 0000000..a2fc1bb --- /dev/null +++ b/.gitea/workflows/infrastructure.yml @@ -0,0 +1,91 @@ +name: Infrastructure CI/CD + +on: + push: + branches: [ main, develop ] + paths: + - 'infrastructure/**' + - '.gitea/workflows/infrastructure.yml' + pull_request: + branches: [ main ] + paths: + - 'infrastructure/**' + +jobs: + validate: + runs-on: ubuntu-latest + name: Validate Infrastructure + steps: + - name: Checkout + uses: actions/checkout@v4 + + - name: Setup OpenTofu + uses: opentofu/setup-opentofu@v1 + with: + tofu_version: 1.10.6 + + - name: Validate OpenTofu configurations + run: | + for dir in infrastructure/providers/*/; do + if [ -d "$dir" ]; then + echo "Validating $dir" + cd "$dir" + tofu init -backend=false + tofu validate + cd - > /dev/null + fi + done + + - name: Check formatting + run: | + tofu fmt -check -recursive infrastructure/ + + - name: Security scan + run: | + # 这里可以添加 tfsec 或 checkov 扫描 + echo "Security scan placeholder" + + plan: + runs-on: ubuntu-latest + name: Plan Infrastructure + needs: validate + if: github.event_name == 'pull_request' + steps: + - name: Checkout + uses: actions/checkout@v4 + + - name: Setup OpenTofu + uses: opentofu/setup-opentofu@v1 + with: + tofu_version: 1.10.6 + + - name: Plan infrastructure changes + run: | + cd infrastructure/environments/dev + tofu init + tofu plan -var-file="terraform.tfvars" -out=tfplan + env: + # 这里需要配置云服务商的环境变量 + TF_VAR_environment: dev + + apply: + runs-on: ubuntu-latest + name: Apply Infrastructure + needs: validate + if: github.ref == 'refs/heads/main' && github.event_name == 'push' + steps: + - name: Checkout + uses: actions/checkout@v4 + + - name: Setup OpenTofu + uses: opentofu/setup-opentofu@v1 + with: + tofu_version: 1.10.6 + + - name: Apply infrastructure changes + run: | + cd infrastructure/environments/dev + tofu init + tofu apply -var-file="terraform.tfvars" -auto-approve + env: + TF_VAR_environment: dev diff --git a/.gitea/workflows/terraform-apply.yml b/.gitea/workflows/terraform-apply.yml new file mode 100644 index 0000000..3f6bd42 --- /dev/null +++ b/.gitea/workflows/terraform-apply.yml @@ -0,0 +1,175 @@ +name: OpenTofu Apply +on: + push: + branches: [main] + paths: + - 'infrastructure/**' + workflow_dispatch: + inputs: + environment: + description: '部署环境' + required: true + default: 'dev' + type: choice + options: + - dev + - staging + - production + provider: + description: '云服务商' + required: true + default: 'oracle-cloud' + type: choice + options: + - oracle-cloud + - huawei-cloud + - google-cloud + - digitalocean + - aws + +env: + TOFU_VERSION: "1.10.6" + +jobs: + apply: + runs-on: ubuntu-latest + environment: ${{ github.event.inputs.environment || 'dev' }} + + steps: + - name: Checkout code + uses: actions/checkout@v4 + + - name: Setup OpenTofu + uses: opentofu/setup-opentofu@v1 + with: + tofu_version: ${{ env.TOFU_VERSION }} + + - name: Configure credentials + run: | + PROVIDER="${{ github.event.inputs.provider || 'oracle-cloud' }}" + echo "Setting up credentials for $PROVIDER" + + case "$PROVIDER" in + "oracle-cloud") + mkdir -p ~/.oci + echo "${{ secrets.OCI_PRIVATE_KEY }}" > ~/.oci/oci_api_key.pem + chmod 600 ~/.oci/oci_api_key.pem + ;; + "huawei-cloud") + export HW_ACCESS_KEY="${{ secrets.HW_ACCESS_KEY }}" + export HW_SECRET_KEY="${{ secrets.HW_SECRET_KEY }}" + ;; + "google-cloud") + echo "${{ secrets.GCP_SERVICE_ACCOUNT_KEY }}" > /tmp/gcp-key.json + export GOOGLE_APPLICATION_CREDENTIALS="/tmp/gcp-key.json" + ;; + "digitalocean") + export DIGITALOCEAN_TOKEN="${{ secrets.DO_TOKEN }}" + ;; + "aws") + export AWS_ACCESS_KEY_ID="${{ secrets.AWS_ACCESS_KEY_ID }}" + export AWS_SECRET_ACCESS_KEY="${{ secrets.AWS_SECRET_ACCESS_KEY }}" + ;; + esac + + - name: Create terraform.tfvars + run: | + ENV="${{ github.event.inputs.environment || 'dev' }}" + cd infrastructure/environments/$ENV + cat > terraform.tfvars << EOF + environment = "$ENV" + project_name = "mgmt" + owner = "ben" + + # Oracle Cloud 配置 + oci_config = { + tenancy_ocid = "${{ secrets.OCI_TENANCY_OCID }}" + user_ocid = "${{ secrets.OCI_USER_OCID }}" + fingerprint = "${{ secrets.OCI_FINGERPRINT }}" + private_key_path = "~/.oci/oci_api_key.pem" + region = "ap-seoul-1" + } + + # 华为云配置 + huawei_config = { + access_key = "${{ secrets.HW_ACCESS_KEY }}" + secret_key = "${{ secrets.HW_SECRET_KEY }}" + region = "cn-north-4" + } + + # Google Cloud 配置 + gcp_config = { + project_id = "${{ secrets.GCP_PROJECT_ID }}" + region = "asia-northeast3" + zone = "asia-northeast3-a" + credentials = "/tmp/gcp-key.json" + } + + # DigitalOcean 配置 + do_config = { + token = "${{ secrets.DO_TOKEN }}" + region = "sgp1" + } + + # AWS 配置 + aws_config = { + access_key = "${{ secrets.AWS_ACCESS_KEY_ID }}" + secret_key = "${{ secrets.AWS_SECRET_ACCESS_KEY }}" + region = "ap-northeast-1" + } + EOF + + - name: OpenTofu Init + run: | + PROVIDER="${{ github.event.inputs.provider || 'oracle-cloud' }}" + cd infrastructure/providers/$PROVIDER + tofu init + + - name: OpenTofu Plan + run: | + ENV="${{ github.event.inputs.environment || 'dev' }}" + PROVIDER="${{ github.event.inputs.provider || 'oracle-cloud' }}" + cd infrastructure/providers/$PROVIDER + tofu plan \ + -var-file="../../../environments/$ENV/terraform.tfvars" \ + -out=tfplan + + - name: OpenTofu Apply + run: | + PROVIDER="${{ github.event.inputs.provider || 'oracle-cloud' }}" + cd infrastructure/providers/$PROVIDER + tofu apply -auto-approve tfplan + + - name: Save State + run: | + ENV="${{ github.event.inputs.environment || 'dev' }}" + PROVIDER="${{ github.event.inputs.provider || 'oracle-cloud' }}" + cd infrastructure/providers/$PROVIDER + + # 这里可以配置远程状态存储 + # 例如上传到 S3, GCS, 或其他存储 + echo "State saved locally for now" + + - name: Generate Inventory + run: | + ENV="${{ github.event.inputs.environment || 'dev' }}" + PROVIDER="${{ github.event.inputs.provider || 'oracle-cloud' }}" + cd infrastructure/providers/$PROVIDER + + # 生成 Ansible 动态清单 + tofu output -json > ../../../configuration/inventories/$ENV/$PROVIDER-inventory.json + + - name: Trigger Ansible Deployment + uses: actions/github-script@v7 + with: + script: | + github.rest.actions.createWorkflowDispatch({ + owner: context.repo.owner, + repo: context.repo.repo, + workflow_id: 'ansible-deploy.yml', + ref: 'main', + inputs: { + environment: '${{ github.event.inputs.environment || "dev" }}', + provider: '${{ github.event.inputs.provider || "oracle-cloud" }}' + } + }); \ No newline at end of file diff --git a/.gitea/workflows/terraform-plan.yml b/.gitea/workflows/terraform-plan.yml new file mode 100644 index 0000000..a27793d --- /dev/null +++ b/.gitea/workflows/terraform-plan.yml @@ -0,0 +1,148 @@ +name: OpenTofu Plan +on: + pull_request: + branches: [main, develop] + paths: + - 'infrastructure/**' + - '.gitea/workflows/terraform-plan.yml' + +env: + TOFU_VERSION: "1.10.6" + +jobs: + plan: + runs-on: ubuntu-latest + strategy: + matrix: + environment: [dev, staging, production] + provider: [oracle-cloud, huawei-cloud, google-cloud, digitalocean, aws] + + steps: + - name: Checkout code + uses: actions/checkout@v4 + + - name: Setup OpenTofu + uses: opentofu/setup-opentofu@v1 + with: + tofu_version: ${{ env.TOFU_VERSION }} + + - name: Configure credentials + run: | + # 设置各云服务商的认证信息 + echo "Setting up credentials for ${{ matrix.provider }}" + + case "${{ matrix.provider }}" in + "oracle-cloud") + mkdir -p ~/.oci + echo "${{ secrets.OCI_PRIVATE_KEY }}" > ~/.oci/oci_api_key.pem + chmod 600 ~/.oci/oci_api_key.pem + ;; + "huawei-cloud") + export HW_ACCESS_KEY="${{ secrets.HW_ACCESS_KEY }}" + export HW_SECRET_KEY="${{ secrets.HW_SECRET_KEY }}" + ;; + "google-cloud") + echo "${{ secrets.GCP_SERVICE_ACCOUNT_KEY }}" > /tmp/gcp-key.json + export GOOGLE_APPLICATION_CREDENTIALS="/tmp/gcp-key.json" + ;; + "digitalocean") + export DIGITALOCEAN_TOKEN="${{ secrets.DO_TOKEN }}" + ;; + "aws") + export AWS_ACCESS_KEY_ID="${{ secrets.AWS_ACCESS_KEY_ID }}" + export AWS_SECRET_ACCESS_KEY="${{ secrets.AWS_SECRET_ACCESS_KEY }}" + ;; + esac + + - name: Create terraform.tfvars + run: | + cd infrastructure/environments/${{ matrix.environment }} + cat > terraform.tfvars << EOF + environment = "${{ matrix.environment }}" + project_name = "mgmt" + owner = "ben" + + # Oracle Cloud 配置 + oci_config = { + tenancy_ocid = "${{ secrets.OCI_TENANCY_OCID }}" + user_ocid = "${{ secrets.OCI_USER_OCID }}" + fingerprint = "${{ secrets.OCI_FINGERPRINT }}" + private_key_path = "~/.oci/oci_api_key.pem" + region = "ap-seoul-1" + } + + # 华为云配置 + huawei_config = { + access_key = "${{ secrets.HW_ACCESS_KEY }}" + secret_key = "${{ secrets.HW_SECRET_KEY }}" + region = "cn-north-4" + } + + # Google Cloud 配置 + gcp_config = { + project_id = "${{ secrets.GCP_PROJECT_ID }}" + region = "asia-northeast3" + zone = "asia-northeast3-a" + credentials = "/tmp/gcp-key.json" + } + + # DigitalOcean 配置 + do_config = { + token = "${{ secrets.DO_TOKEN }}" + region = "sgp1" + } + + # AWS 配置 + aws_config = { + access_key = "${{ secrets.AWS_ACCESS_KEY_ID }}" + secret_key = "${{ secrets.AWS_SECRET_ACCESS_KEY }}" + region = "ap-northeast-1" + } + EOF + + - name: OpenTofu Init + run: | + cd infrastructure/providers/${{ matrix.provider }} + tofu init + + - name: OpenTofu Validate + run: | + cd infrastructure/providers/${{ matrix.provider }} + tofu validate + + - name: OpenTofu Plan + run: | + cd infrastructure/providers/${{ matrix.provider }} + tofu plan \ + -var-file="../../../environments/${{ matrix.environment }}/terraform.tfvars" \ + -out=tfplan-${{ matrix.environment }}-${{ matrix.provider }} + + - name: Upload Plan + uses: actions/upload-artifact@v4 + with: + name: tfplan-${{ matrix.environment }}-${{ matrix.provider }} + path: infrastructure/providers/${{ matrix.provider }}/tfplan-${{ matrix.environment }}-${{ matrix.provider }} + retention-days: 30 + + - name: Comment PR + uses: actions/github-script@v7 + if: github.event_name == 'pull_request' + with: + script: | + const fs = require('fs'); + const path = 'infrastructure/providers/${{ matrix.provider }}/tfplan-${{ matrix.environment }}-${{ matrix.provider }}'; + + github.rest.issues.createComment({ + issue_number: context.issue.number, + owner: context.repo.owner, + repo: context.repo.repo, + body: `## OpenTofu Plan Results + + **Environment:** ${{ matrix.environment }} + **Provider:** ${{ matrix.provider }} + **Status:** ✅ Plan generated successfully + + Plan artifact uploaded: \`tfplan-${{ matrix.environment }}-${{ matrix.provider }}\` + + Please review the plan before merging.` + }); \ No newline at end of file diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..900c2ce --- /dev/null +++ b/.gitignore @@ -0,0 +1,81 @@ +# OpenTofu/Terraform +*.tfstate +*.tfstate.* +*.tfvars +!*.tfvars.example +.terraform/ +.terraform.lock.hcl +crash.log +crash.*.log + +# Ansible +*.retry +.vault_pass +host_vars/*/vault.yml +group_vars/*/vault.yml + +# Docker +.env +docker-compose.override.yml + +# IDE +.vscode/ +.idea/ +*.swp +*.swo +*~ + +# OS +.DS_Store +Thumbs.db + +# Logs +*.log +logs/ + +# Temporary files +tmp/ +temp/ +.tmp/ + +# Backup files +backup-*/ +*.bak + +# Secrets +secrets/ +*.pem +*.key +*.crt +!*.example.* + +# Node modules (if any) +node_modules/ + +# Python +__pycache__/ +*.pyc +*.pyo +*.pyd +.Python +env/ +venv/ +.venv/ +pip-log.txt +pip-delete-this-directory.txt +.tox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*.cover +*.log +.git +.mypy_cache +.pytest_cache +.hypothesis + +# Local development +.local/ +local-* diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..4daede3 --- /dev/null +++ b/Makefile @@ -0,0 +1,88 @@ +# 项目管理 Makefile + +.PHONY: help setup init plan apply destroy clean test lint docs + +# 默认目标 +help: ## 显示帮助信息 + @echo "可用的命令:" + @grep -E '^[a-zA-Z_-]+:.*?## .*$$' $(MAKEFILE_LIST) | sort | awk 'BEGIN {FS = ":.*?## "}; {printf "\033[36m%-20s\033[0m %s\n", $$1, $$2}' + +# 环境设置 +setup: ## 设置开发环境 + @echo "🚀 设置开发环境..." + @bash scripts/setup/setup-environment.sh + +# OpenTofu 操作 +init: ## 初始化 OpenTofu + @echo "🏗️ 初始化 OpenTofu..." + @cd infrastructure/environments/dev && tofu init + +plan: ## 生成执行计划 + @echo "📋 生成执行计划..." + @cd infrastructure/environments/dev && tofu plan -var-file="terraform.tfvars" + +apply: ## 应用基础设施变更 + @echo "🚀 应用基础设施变更..." + @cd infrastructure/environments/dev && tofu apply -var-file="terraform.tfvars" + +destroy: ## 销毁基础设施 + @echo "💥 销毁基础设施..." + @cd infrastructure/environments/dev && tofu destroy -var-file="terraform.tfvars" + +# Ansible 操作 +ansible-check: ## 检查 Ansible 配置 + @echo "🔍 检查 Ansible 配置..." + @cd configuration && ansible-playbook --syntax-check playbooks/bootstrap/main.yml + +ansible-deploy: ## 部署应用 + @echo "📦 部署应用..." + @cd configuration && ansible-playbook -i inventories/production/inventory.ini playbooks/bootstrap/main.yml + +# Docker 操作 +docker-build: ## 构建 Docker 镜像 + @echo "🐳 构建 Docker 镜像..." + @docker-compose -f containers/compose/development/docker-compose.yml build + +docker-up: ## 启动开发环境 + @echo "🚀 启动开发环境..." + @docker-compose -f containers/compose/development/docker-compose.yml up -d + +docker-down: ## 停止开发环境 + @echo "🛑 停止开发环境..." + @docker-compose -f containers/compose/development/docker-compose.yml down + +# 测试 +test: ## 运行测试 + @echo "🧪 运行测试..." + @bash scripts/utilities/run-tests.sh + +lint: ## 代码检查 + @echo "🔍 代码检查..." + @bash scripts/utilities/lint.sh + +# 文档 +docs: ## 生成文档 + @echo "📚 生成文档..." + @bash scripts/utilities/generate-docs.sh + +# 清理 +clean: ## 清理临时文件 + @echo "🧹 清理临时文件..." + @find . -name "*.tfstate*" -delete + @find . -name ".terraform" -type d -exec rm -rf {} + 2>/dev/null || true + @docker system prune -f + +# 备份 +backup: ## 创建备份 + @echo "💾 创建备份..." + @bash scripts/utilities/backup.sh + +# 监控 +monitor: ## 启动监控 + @echo "📊 启动监控..." + @docker-compose -f containers/compose/production/monitoring.yml up -d + +# 安全扫描 +security-scan: ## 安全扫描 + @echo "🔒 安全扫描..." + @bash scripts/utilities/security-scan.sh \ No newline at end of file diff --git a/README-traefik-swarm.md b/README-traefik-swarm.md deleted file mode 100644 index d24f294..0000000 --- a/README-traefik-swarm.md +++ /dev/null @@ -1,253 +0,0 @@ -# Traefik + Docker Swarm 集成 - -## 📋 概述 - -本项目实现了 Traefik 与 Docker Swarm 的完整集成,提供统一的入口点管理所有 Swarm 服务。 - -## 🏗️ 架构设计 - -``` -Internet - ↓ -Traefik (Load Balancer) - ↓ -Docker Swarm Services - ├── Web App (app.local) - ├── API Service (api.local) - ├── Monitor Service (monitor.local) - └── Other Services... -``` - -## 📁 文件结构 - -``` -/root/mgmt/ -├── traefik-swarm-stack.yml # Traefik 主服务配置 -├── demo-services-stack.yml # 示例服务配置 -├── monitoring-stack.yml # 监控服务配置 -├── swarm-traefik-manager.sh # 管理脚本 -└── README-traefik-swarm.md # 说明文档 -``` - -## 🚀 快速开始 - -### 1. 初始化环境 - -```bash -# 确保 Docker Swarm 已激活 -docker swarm init - -# 初始化 Traefik 环境 -./swarm-traefik-manager.sh init -``` - -### 2. 部署所有服务 - -```bash -# 一键部署所有服务 -./swarm-traefik-manager.sh deploy-all - -# 或分步部署 -./swarm-traefik-manager.sh deploy # 仅部署 Traefik -./swarm-traefik-manager.sh deploy-demo # 部署示例服务 -./swarm-traefik-manager.sh deploy-monitoring # 部署监控服务 -``` - -### 3. 更新 hosts 文件 - -```bash -# 自动更新 hosts 文件 -./swarm-traefik-manager.sh update-hosts - -# 或手动添加到 /etc/hosts -echo "127.0.0.1 traefik.local app.local api.local monitor.local prometheus.local grafana.local" >> /etc/hosts -``` - -## 🌐 访问地址 - -| 服务 | 地址 | 说明 | -|------|------|------| -| Traefik Dashboard | http://traefik.local:8080 | 管理界面 | -| Web App | http://app.local | 示例 Web 应用 | -| API Service | http://api.local | 示例 API 服务 | -| Monitor Service | http://monitor.local | 监控服务 | -| Prometheus | http://prometheus.local | 指标收集 | -| Grafana | http://grafana.local | 可视化面板 | - -## 🛠️ 管理命令 - -### 查看服务状态 -```bash -./swarm-traefik-manager.sh status -``` - -### 查看服务日志 -```bash -./swarm-traefik-manager.sh logs traefik_traefik -./swarm-traefik-manager.sh logs demo_webapp -``` - -### 扩缩容服务 -```bash -# 扩容 webapp 到 3 个副本 -./swarm-traefik-manager.sh scale demo webapp 3 - -# 扩容 API 服务到 2 个副本 -./swarm-traefik-manager.sh scale demo api 2 -``` - -### 清理环境 -```bash -./swarm-traefik-manager.sh cleanup -``` - -## 📊 监控配置 - -### Prometheus 指标 -- Traefik 指标: http://traefik:8080/metrics -- Node Exporter: 系统指标 -- cAdvisor: 容器指标 - -### Grafana 配置 -- 默认用户: admin -- 默认密码: admin123 -- 数据源: Prometheus (http://prometheus:9090) - -## 🔧 服务配置 - -### 为新服务添加 Traefik 路由 - -在 Docker Compose 文件中添加以下标签: - -```yaml -services: - your-service: - image: your-image - networks: - - traefik-public - deploy: - labels: - - traefik.enable=true - - traefik.http.routers.your-service.rule=Host(`your-domain.local`) - - traefik.http.routers.your-service.entrypoints=web - - traefik.http.services.your-service.loadbalancer.server.port=80 -``` - -### 高级路由配置 - -```yaml -# 路径前缀路由 -- traefik.http.routers.api-path.rule=Host(`app.local`) && PathPrefix(`/api`) - -# HTTPS 重定向 -- traefik.http.routers.your-service.entrypoints=websecure -- traefik.http.routers.your-service.tls.certresolver=letsencrypt - -# 中间件配置 -- traefik.http.routers.your-service.middlewares=auth -- traefik.http.middlewares.auth.basicauth.users=user:password -``` - -## 🔒 安全配置 - -### 基本认证 -```yaml -labels: - - traefik.http.middlewares.auth.basicauth.users=admin:$$2y$$10$$... - - traefik.http.routers.service.middlewares=auth -``` - -### HTTPS 配置 -```yaml -labels: - - traefik.http.routers.service.tls.certresolver=letsencrypt - - traefik.http.routers.service.entrypoints=websecure -``` - -## 🐛 故障排除 - -### 常见问题 - -1. **服务无法访问** - ```bash - # 检查服务状态 - docker stack services traefik - - # 检查网络连接 - docker network ls | grep traefik-public - ``` - -2. **路由不生效** - ```bash - # 查看 Traefik 日志 - ./swarm-traefik-manager.sh logs traefik_traefik - - # 检查服务标签 - docker service inspect demo_webapp - ``` - -3. **DNS 解析问题** - ```bash - # 检查 hosts 文件 - cat /etc/hosts | grep local - - # 更新 hosts 文件 - ./swarm-traefik-manager.sh update-hosts - ``` - -### 调试命令 - -```bash -# 查看所有 Swarm 服务 -docker service ls - -# 查看特定服务详情 -docker service inspect traefik_traefik - -# 查看服务任务 -docker service ps traefik_traefik - -# 进入容器调试 -docker exec -it $(docker ps -q -f name=traefik) sh -``` - -## 📈 性能优化 - -### 负载均衡配置 -```yaml -labels: - - traefik.http.services.service.loadbalancer.sticky.cookie=true - - traefik.http.services.service.loadbalancer.healthcheck.path=/health -``` - -### 缓存配置 -```yaml -labels: - - traefik.http.middlewares.cache.headers.customrequestheaders.Cache-Control=max-age=3600 -``` - -## 🔄 备份与恢复 - -### 备份配置 -```bash -# 备份 Docker 配置 -docker config ls -docker config inspect config_name - -# 备份 Swarm 状态 -docker node ls -docker service ls -``` - -### 恢复服务 -```bash -# 重新部署服务 -./swarm-traefik-manager.sh deploy-all -``` - -## 📚 参考资料 - -- [Traefik 官方文档](https://doc.traefik.io/traefik/) -- [Docker Swarm 文档](https://docs.docker.com/engine/swarm/) -- [Prometheus 配置](https://prometheus.io/docs/prometheus/latest/configuration/configuration/) -- [Grafana 文档](https://grafana.com/docs/) \ No newline at end of file diff --git a/README.md b/README.md index 21cda4d..87cf4da 100644 --- a/README.md +++ b/README.md @@ -1,364 +1,217 @@ -# 🛠️ 服务器管理自动化项目 +# 🏗️ 基础设施管理项目 -这是一个基于 Ansible 的服务器管理自动化项目,用于管理多台服务器的系统更新、配置和维护。 +这是一个现代化的多云基础设施管理平台,集成 OpenTofu、Ansible、Docker Swarm 和 Gitea CI/CD。 + +## 🎯 项目特性 + +- **🌩️ 多云支持**: Oracle Cloud, 华为云, Google Cloud, AWS, DigitalOcean +- **🏗️ 基础设施即代码**: 使用 OpenTofu 管理云资源 +- **⚙️ 配置管理**: 使用 Ansible 自动化配置和部署 +- **🐳 容器编排**: Docker Swarm 集群管理 +- **🔄 CI/CD**: Gitea Actions 自动化流水线 +- **📊 监控**: Prometheus + Grafana 监控体系 +- **🔐 安全**: 多层安全防护和合规性 ## 📁 项目结构 ``` mgmt/ -├── ansible/ -│ ├── inventory.ini # 服务器清单 -│ ├── ansible.cfg # Ansible 配置 -│ ├── system-update.yml # 系统更新 playbook -│ ├── cloud-providers-update.yml # 云服务商更新 playbook -│ ├── system-cleanup.yml # 系统清理和维护 -│ ├── service-health-check.yml # 服务健康检查 -│ ├── security-hardening.yml # 安全加固和备份 -│ ├── docker-management.yml # Docker 容器管理 -│ ├── network-connectivity.yml # 网络连通性检查 -│ ├── certificate-management.yml # SSL 证书管理 -│ ├── ops-toolkit.yml # 运维工具包 -│ ├── cron-setup.yml # 定时任务配置 -│ └── run.sh # 执行脚本 -├── scripts/ -│ └── ops-manager.sh # 运维管理脚本 -├── config.json # Semaphore 配置 -├── keys-info.md # SSH 密钥信息 -├── semaphore-setup-guide.md # Semaphore 设置指南 -└── README.md # 项目说明 +├── .gitea/workflows/ # CI/CD 工作流 +├── infrastructure/ # OpenTofu 基础设施代码 +│ ├── environments/ # 环境配置 (dev/staging/prod) +│ ├── modules/ # 可复用模块 +│ ├── providers/ # 云服务商配置 +│ └── shared/ # 共享配置 +├── configuration/ # Ansible 配置管理 +│ ├── inventories/ # 主机清单 +│ ├── playbooks/ # 剧本 +│ ├── roles/ # 角色 +│ └── group_vars/ # 组变量 +├── containers/ # 容器化应用 +│ ├── applications/ # 应用容器 +│ ├── infrastructure/ # 基础设施容器 +│ └── compose/ # Docker Compose 文件 +├── monitoring/ # 监控配置 +├── scripts/ # 自动化脚本 +├── docs/ # 文档 +└── Makefile # 项目管理命令 ``` ## 🚀 快速开始 ### 1. 环境准备 -确保已安装 Ansible: ```bash -# Ubuntu/Debian -sudo apt update && sudo apt install ansible +# 克隆项目 +git clone +cd mgmt -# CentOS/RHEL -sudo yum install ansible +# 设置开发环境 +make setup ``` -### 2. 配置服务器清单 - -编辑 `ansible/inventory.ini` 文件,服务器已按功能分组: - -- **lxc**: Debian/Ubuntu 容器 -- **alpine**: Alpine Linux 容器 -- **proxmox**: Proxmox VE 物理机 -- **armbian**: ARM 设备 -- **hcp**: HCP 云服务器 -- **feiniu**: 飞牛服务器 -- **germany**: 德国服务器 -- 以及各种云服务商组 - -### 3. 使用运维管理脚本 +### 2. 配置云服务商 ```bash -# 给脚本执行权限 -chmod +x scripts/ops-manager.sh +# 复制配置模板 +cp infrastructure/environments/dev/terraform.tfvars.example infrastructure/environments/dev/terraform.tfvars -# 交互式模式 -./scripts/ops-manager.sh - -# 直接执行 -./scripts/ops-manager.sh update lxc # 更新 LXC 容器 -./scripts/ops-manager.sh cleanup all # 清理所有服务器 -./scripts/ops-manager.sh health proxmox # 检查 Proxmox 健康状态 -./scripts/ops-manager.sh docker lxc # 管理 LXC 中的 Docker -./scripts/ops-manager.sh toolkit germany # 运行德国服务器工具包 - -# 检查模式(不做实际更改) -./scripts/ops-manager.sh update all --check +# 编辑配置文件,填入你的云服务商凭据 +vim infrastructure/environments/dev/terraform.tfvars ``` -## 🛠️ 可用的运维脚本 +### 3. 初始化基础设施 -### 核心功能 -- **system-update.yml**: 系统包更新 -- **system-cleanup.yml**: 磁盘清理、日志清理、缓存清理 -- **service-health-check.yml**: 服务状态监控 -- **security-hardening.yml**: 安全加固和备份 - -### 专业工具 -- **docker-management.yml**: Docker 容器和镜像管理 -- **network-connectivity.yml**: 网络连通性和性能测试 -- **certificate-management.yml**: SSL 证书监控和管理 -- **ops-toolkit.yml**: 统一运维仪表板 - -### 自动化 -- **cron-setup.yml**: 配置定时任务自动化 -- **ops-manager.sh**: 便捷的命令行管理工具 - -## 🤖 自动化定时任务 - -设置自动化定时任务: ```bash -ansible-playbook -i ansible/inventory.ini ansible/cron-setup.yml +# 初始化 OpenTofu +make init + +# 查看执行计划 +make plan + +# 应用基础设施变更 +make apply ``` -配置的定时任务: -- **每日 08:00**: 系统健康检查 -- **每日 01:00**: Docker 清理 (LXC 组) -- **每周日 02:00**: 系统清理 -- **每周一 04:30**: 证书检查 -- **每周二 06:00**: 网络连通性检查 -- **每月1日 03:00**: 安全检查 +### 4. 部署应用 -查看自动化状态: ```bash -automation-status +# 检查 Ansible 配置 +make ansible-check + +# 部署应用 +make ansible-deploy ``` -## 📊 使用 Semaphore Web UI +## 🛠️ 常用命令 -参考 `semaphore-setup-guide.md` 文件设置 Semaphore Web 界面管理。 +| 命令 | 描述 | +|------|------| +| `make help` | 显示所有可用命令 | +| `make setup` | 设置开发环境 | +| `make init` | 初始化 OpenTofu | +| `make plan` | 生成基础设施执行计划 | +| `make apply` | 应用基础设施变更 | +| `make ansible-deploy` | 部署应用 | +| `make docker-up` | 启动开发环境 | +| `make test` | 运行测试 | +| `make clean` | 清理临时文件 | -推送到 Gitea 后,Semaphore 可以: -- ✅ 直接识别 Ansible 项目结构 -- ✅ 使用现有的 inventory 分组 -- ✅ 运行预定义的 playbooks -- ✅ 支持按组选择性更新 -- ✅ 提供 Web 界面管理和监控 +## 🌩️ 支持的云服务商 -## 💡 最佳实践 +### Oracle Cloud Infrastructure (OCI) +- ✅ 计算实例 +- ✅ 网络配置 (VCN, 子网, 安全组) +- ✅ 存储 (块存储, 对象存储) +- ✅ 负载均衡器 -### 日常运维 -```bash -# 每日快速检查 -./scripts/ops-manager.sh toolkit all +### 华为云 +- ✅ 弹性云服务器 (ECS) +- ✅ 虚拟私有云 (VPC) +- ✅ 弹性负载均衡 (ELB) +- ✅ 云硬盘 (EVS) -# 每周系统维护 -./scripts/ops-manager.sh cleanup all -./scripts/ops-manager.sh health all +### Google Cloud Platform +- ✅ Compute Engine +- ✅ VPC 网络 +- ✅ Cloud Load Balancing +- ✅ Persistent Disk -# 每月安全检查 -./scripts/ops-manager.sh security all --check -./scripts/ops-manager.sh cert all -``` +### Amazon Web Services +- ✅ EC2 实例 +- ✅ VPC 网络 +- ✅ Application Load Balancer +- ✅ EBS 存储 -### 紧急情况 -```bash -# 紧急安全更新 -./scripts/ops-manager.sh update all +### DigitalOcean +- ✅ Droplets +- ✅ VPC 网络 +- ✅ Load Balancers +- ✅ Block Storage -# 网络问题诊断 -./scripts/ops-manager.sh network all +## 🔄 CI/CD 流程 -# 服务状态检查 -./scripts/ops-manager.sh health all -``` +### 基础设施部署流程 +1. **代码提交** → 触发 Gitea Actions +2. **OpenTofu Plan** → 生成执行计划 +3. **人工审核** → 确认变更 +4. **OpenTofu Apply** → 应用基础设施变更 +5. **Ansible 部署** → 配置和部署应用 -### 容器管理 -```bash -# LXC 容器管理 -./scripts/ops-manager.sh docker lxc -./scripts/ops-manager.sh cleanup lxc +### 应用部署流程 +1. **应用代码更新** → 构建 Docker 镜像 +2. **镜像推送** → 推送到镜像仓库 +3. **Compose 更新** → 更新服务定义 +4. **Swarm 部署** → 滚动更新服务 +5. **健康检查** → 验证部署状态 -# Alpine 容器更新 -./scripts/ops-manager.sh update alpine -``` +## 📊 监控和可观测性 -## 🔧 高级用法 +### 监控组件 +- **Prometheus**: 指标收集和存储 +- **Grafana**: 可视化仪表板 +- **AlertManager**: 告警管理 +- **Node Exporter**: 系统指标导出 -### 按组管理 -```bash -# 物理机维护 -./scripts/ops-manager.sh cleanup proxmox -./scripts/ops-manager.sh health armbian +### 日志管理 +- **ELK Stack**: Elasticsearch + Logstash + Kibana +- **Fluentd**: 日志收集和转发 +- **结构化日志**: JSON 格式标准化 -# 云服务商管理 -./scripts/ops-manager.sh update huawei -./scripts/ops-manager.sh network google +## 🔐 安全最佳实践 -# 容器管理 -./scripts/ops-manager.sh docker lxc -./scripts/ops-manager.sh update alpine -``` +### 基础设施安全 +- **网络隔离**: VPC, 安全组, 防火墙 +- **访问控制**: IAM 角色和策略 +- **数据加密**: 传输和静态加密 +- **密钥管理**: 云服务商密钥管理服务 -### 检查模式 -```bash -# 检查更新但不执行 -./scripts/ops-manager.sh update all --check +### 应用安全 +- **容器安全**: 镜像扫描, 最小权限 +- **网络安全**: 服务网格, TLS 终止 +- **秘密管理**: Docker Secrets, Ansible Vault +- **安全审计**: 日志监控和审计 -# 详细输出 -./scripts/ops-manager.sh health all --verbose +## 🧪 测试策略 -# 仅显示命令 -./scripts/ops-manager.sh cleanup all --dry-run -``` +### 基础设施测试 +- **语法检查**: OpenTofu validate +- **安全扫描**: Checkov, tfsec +- **合规检查**: OPA (Open Policy Agent) -## 📋 服务器组说明 +### 应用测试 +- **单元测试**: 应用代码测试 +- **集成测试**: 服务间集成测试 +- **端到端测试**: 完整流程测试 -- **lxc**: Debian/Ubuntu 容器 (warden, gitea, mysql, postgresql, influxdb) -- **alpine**: Alpine Linux 容器 (redis, authentik, calibreweb) -- **proxmox**: Proxmox VE 物理机 (pve, xgp, nuc12) -- **armbian**: ARM 设备 (onecloud1) -- **hcp**: HCP 云服务器 (hcp1, hcp2) -- **feiniu**: 飞牛服务器 (snail) -- **germany**: 德国服务器 (de) -- **dev**: 开发服务器 (dev1, dev2) -- **oci_kr/oci_us**: Oracle 云服务器 -- **huawei/google/aws**: 各云服务商 +## 📚 文档 -## 📝 注意事项 +- [架构概览](docs/architecture/project-overview.md) +- [部署指南](docs/runbooks/deployment-guide.md) +- [运维手册](docs/runbooks/operations-guide.md) +- [故障排除](docs/runbooks/troubleshooting.md) +- [API 文档](docs/api/README.md) -- 确保 SSH 密钥已正确配置 -- LXC 组更新需要顺序执行,避免同时更新 -- Alpine 容器使用 `apk` 包管理器 -- 建议先在测试环境验证 -- 定期备份重要数据 -- 监控自动化日志:`tail -f /var/log/daily-health-check.log` +## 🤝 贡献指南 -## 🆘 故障排除 +1. Fork 项目 +2. 创建特性分支 (`git checkout -b feature/amazing-feature`) +3. 提交变更 (`git commit -m 'Add amazing feature'`) +4. 推送到分支 (`git push origin feature/amazing-feature`) +5. 创建 Pull Request -### 连接问题 -```bash -# 测试连接 -ansible all -i ansible/inventory.ini -m ping +## 📄 许可证 -# 检查特定组 -ansible lxc -i ansible/inventory.ini -m ping -e "ansible_ssh_pass=313131" -``` +本项目采用 MIT 许可证 - 查看 [LICENSE](LICENSE) 文件了解详情。 -### 权限问题 -```bash -# 检查 sudo 权限 -ansible all -i ansible/inventory.ini -m shell -a "whoami" --become -``` +## 🆘 支持 -### 日志查看 -```bash -# 查看自动化日志 -ls -la /var/log/*-*.log -tail -f /var/log/daily-health-check.log -``` +如果你遇到问题或有疑问: -## 🎯 运维脚本使用示例 +1. 查看 [文档](docs/) +2. 搜索 [Issues](../../issues) +3. 创建新的 [Issue](../../issues/new) -### 系统更新 -```bash -# 更新所有服务器 -./scripts/ops-manager.sh update all +## 🎉 致谢 -# 更新特定组 -./scripts/ops-manager.sh update lxc -./scripts/ops-manager.sh update alpine -./scripts/ops-manager.sh update proxmox -``` - -### 系统清理 -```bash -# 清理所有服务器 -./scripts/ops-manager.sh cleanup all - -# 清理特定组 -./scripts/ops-manager.sh cleanup lxc -``` - -### 健康检查 -```bash -# 检查所有服务器健康状态 -./scripts/ops-manager.sh health all - -# 检查特定组 -./scripts/ops-manager.sh health proxmox -``` - -### Docker 管理 -```bash -# 管理 LXC 组的 Docker -./scripts/ops-manager.sh docker lxc - -# 检查 Docker 状态 -./scripts/ops-manager.sh docker all -``` - -### 网络诊断 -```bash -# 检查网络连通性 -./scripts/ops-manager.sh network all - -# 检查特定组网络 -./scripts/ops-manager.sh network germany -``` - -### 证书管理 -```bash -# 检查所有证书 -./scripts/ops-manager.sh cert all - -# 检查特定组证书 -./scripts/ops-manager.sh cert proxmox -``` - -### 安全检查 -```bash -# 安全检查(检查模式) -./scripts/ops-manager.sh security all --check - -# 执行安全加固 -./scripts/ops-manager.sh security all -``` - -### 运维工具包 -```bash -# 运行完整的运维工具包 -./scripts/ops-manager.sh toolkit all - -# 检查特定服务器 -./scripts/ops-manager.sh toolkit germany -``` - -## 📈 监控和日志 - -### 自动化监控 -```bash -# 查看自动化状态 -automation-status - -# 查看定时任务 -crontab -l - -# 查看最近的健康检查 -tail -20 /var/log/daily-health-check.log -``` - -### 手动日志查看 -```bash -# 查看所有自动化日志 -ls -la /var/log/*-*.log - -# 实时监控日志 -tail -f /var/log/daily-health-check.log - -# 查看清理日志 -cat /var/log/weekly-cleanup.log -``` - -## 🔄 定期维护建议 - -### 每日 -- 运行 `./scripts/ops-manager.sh toolkit all` 快速检查 -- 查看 `automation-status` 了解自动化状态 - -### 每周 -- 运行 `./scripts/ops-manager.sh cleanup all` 系统清理 -- 运行 `./scripts/ops-manager.sh health all` 健康检查 -- 检查 `/var/log/` 下的日志文件 - -### 每月 -- 运行 `./scripts/ops-manager.sh security all --check` 安全检查 -- 运行 `./scripts/ops-manager.sh cert all` 证书检查 -- 运行 `./scripts/ops-manager.sh network all` 网络检查 - -### 按需 -- 系统更新:`./scripts/ops-manager.sh update [group]` -- Docker 清理:`./scripts/ops-manager.sh docker lxc` -- 网络诊断:`./scripts/ops-manager.sh network all` - -## 许可证 - -MIT License \ No newline at end of file +感谢所有为这个项目做出贡献的开发者和社区成员! \ No newline at end of file diff --git a/ansible/README.md b/ansible/README.md deleted file mode 100644 index e09d6ff..0000000 --- a/ansible/README.md +++ /dev/null @@ -1,168 +0,0 @@ -# Ansible Playbooks 管理文档 - -## 📁 目录结构 - -``` -ansible/ -├── playbooks/ # 主要 playbooks 目录 -│ ├── 01-system/ # 系统管理类 -│ ├── 02-security/ # 安全管理类 -│ ├── 03-services/ # 服务管理类 -│ ├── 04-monitoring/ # 监控检查类 -│ ├── 05-cloud/ # 云服务商专用 -│ └── 99-tools/ # 工具和集成类 -├── inventory.ini # 主机清单 -├── ansible.cfg # Ansible 配置 -├── run.sh # 原始运行脚本 -└── run-playbook.sh # 新的分类运行脚本 -``` - -## 🎯 分类说明 - -### 01-system (系统管理) -负责基础系统的维护和管理任务。 - -| Playbook | 功能描述 | 适用主机 | -|----------|----------|----------| -| `system-update.yml` | 系统包更新和升级 | 所有 Linux 主机 | -| `system-cleanup.yml` | 系统清理和维护 | 所有主机 | -| `cron-setup.yml` | 定时任务配置 | 需要定时任务的主机 | - -### 02-security (安全管理) -处理安全相关的配置和监控。 - -| Playbook | 功能描述 | 适用主机 | -|----------|----------|----------| -| `security-hardening.yml` | SSH 安全加固和备份 | 所有主机 | -| `certificate-management.yml` | SSL 证书管理和监控 | Web 服务器和 SSL 服务 | - -### 03-services (服务管理) -管理各种服务和容器。 - -| Playbook | 功能描述 | 适用主机 | -|----------|----------|----------| -| `docker-management.yml` | Docker 容器管理 | Docker 主机 | -| `docker-status-check.yml` | Docker 状态检查 | Docker Swarm 节点 | - -### 04-monitoring (监控检查) -系统和服务的健康检查。 - -| Playbook | 功能描述 | 适用主机 | -|----------|----------|----------| -| `service-health-check.yml` | 服务健康状态监控 | 所有主机 | -| `network-connectivity.yml` | 网络连接性能检查 | 所有主机 | - -### 05-cloud (云服务商专用) -针对特定云服务商的优化脚本。 - -| Playbook | 功能描述 | 适用主机 | -|----------|----------|----------| -| `cloud-providers-update.yml` | 云服务商系统更新 | huawei, google, digitalocean, aws | - -### 99-tools (工具和集成) -运维工具和集成脚本。 - -| Playbook | 功能描述 | 适用主机 | -|----------|----------|----------| -| `ops-toolkit.yml` | 统一运维管理面板 | 所有主机 | - -## 🚀 使用方法 - -### 1. 使用新的分类运行脚本 - -```bash -# 查看帮助 -./run-playbook.sh help - -# 列出所有可用的 playbooks -./run-playbook.sh list - -# 运行特定分类的 playbook -./run-playbook.sh 01-system system-update.yml all -./run-playbook.sh 03-services docker-status-check.yml hcp -./run-playbook.sh 04-monitoring network-connectivity.yml dev1 -``` - -### 2. 直接使用 ansible-playbook - -```bash -# 运行系统更新 -ansible-playbook -i inventory.ini playbooks/01-system/system-update.yml - -# 检查 Docker 状态 -ansible-playbook -i inventory.ini playbooks/03-services/docker-status-check.yml --limit hcp - -# 网络连接检查 -ansible-playbook -i inventory.ini playbooks/04-monitoring/network-connectivity.yml --limit dev1 -``` - -## 📋 主机组说明 - -根据 `inventory.ini` 配置的主机组: - -- **dev**: 开发环境 (dev1, dev2) -- **hcp**: HCP 节点 (hcp1, hcp2) - Docker Swarm 集群 -- **oci_kr**: Oracle Cloud Korea (ch2, ch3, master) -- **oci_us**: Oracle Cloud US (ash1d, ash2e, ash3c) -- **huawei**: 华为云 (hcs) -- **google**: Google Cloud (benwork) -- **digitalocean**: DigitalOcean (syd) -- **aws**: Amazon Web Services (awsirish) -- **proxmox**: Proxmox 虚拟化 (pve, xgp, nuc12) -- **lxc**: LXC 容器 (warden, gitea, influxdb, mysql, postgresql) -- **alpine**: Alpine Linux 容器 (redis, authentik, calibreweb) -- **vm**: 虚拟机 (kali) - -## 🔧 配置文件 - -### ansible.cfg -已更新支持新的目录结构,包含: -- 新的 playbooks 路径配置 -- SSH 连接优化 -- 动态 inventory 支持 - -### inventory.ini -包含所有主机的连接信息和分组配置。 - -## 📝 最佳实践 - -1. **按功能分类运行**: 根据需要选择合适的分类目录 -2. **使用主机组**: 利用 inventory 中的主机组进行批量操作 -3. **测试先行**: 在开发环境先测试,再应用到生产环境 -4. **日志记录**: 重要操作建议记录执行日志 -5. **定期维护**: 定期运行系统清理和更新脚本 - -## 🆘 故障排除 - -### 常见问题 - -1. **SSH 连接失败** - - 检查主机是否可达 - - 验证 SSH 密钥或密码 - - 确认用户权限 - -2. **Playbook 执行失败** - - 检查目标主机的系统类型 - - 验证所需的软件包是否安装 - - 查看详细错误日志 - -3. **权限问题** - - 确认 `ansible_become` 配置正确 - - 验证 sudo 权限 - -### 调试命令 - -```bash -# 测试连接 -ansible all -i inventory.ini -m ping - -# 详细输出 -ansible-playbook -i inventory.ini playbooks/01-system/system-update.yml -vvv - -# 检查语法 -ansible-playbook --syntax-check playbooks/01-system/system-update.yml -``` - ---- - -*最后更新: $(date '+%Y-%m-%d %H:%M:%S')* \ No newline at end of file diff --git a/ansible/run-playbook.sh b/ansible/run-playbook.sh deleted file mode 100755 index 70ea39f..0000000 --- a/ansible/run-playbook.sh +++ /dev/null @@ -1,109 +0,0 @@ -#!/bin/bash - -# Ansible Playbooks 分类运行脚本 -# 使用方法: ./run-playbook.sh [category] [playbook] [hosts] - -set -e - -SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" -PLAYBOOKS_DIR="$SCRIPT_DIR/playbooks" - -# 颜色定义 -RED='\033[0;31m' -GREEN='\033[0;32m' -YELLOW='\033[1;33m' -BLUE='\033[0;34m' -NC='\033[0m' # No Color - -# 显示使用帮助 -show_help() { - echo -e "${BLUE}Ansible Playbooks 分类运行脚本${NC}" - echo "" - echo "使用方法:" - echo " $0 [category] [playbook] [hosts]" - echo "" - echo "可用分类:" - echo -e " ${GREEN}01-system${NC} - 系统管理 (更新、清理、定时任务)" - echo -e " ${GREEN}02-security${NC} - 安全管理 (安全加固、证书管理)" - echo -e " ${GREEN}03-services${NC} - 服务管理 (Docker、容器服务)" - echo -e " ${GREEN}04-monitoring${NC} - 监控检查 (健康检查、网络连接)" - echo -e " ${GREEN}05-cloud${NC} - 云服务商专用" - echo -e " ${GREEN}99-tools${NC} - 工具和集成" - echo "" - echo "示例:" - echo " $0 list # 列出所有可用的 playbooks" - echo " $0 01-system system-update.yml all # 在所有主机上运行系统更新" - echo " $0 03-services docker-status-check.yml hcp # 在 hcp 组上检查 Docker 状态" - echo " $0 04-monitoring network-connectivity.yml dev1 # 在 dev1 主机上检查网络连接" -} - -# 列出所有可用的 playbooks -list_playbooks() { - echo -e "${BLUE}可用的 Ansible Playbooks:${NC}" - echo "" - - for category in $(ls -1 "$PLAYBOOKS_DIR" | sort); do - if [ -d "$PLAYBOOKS_DIR/$category" ]; then - echo -e "${GREEN}📁 $category${NC}" - for playbook in $(ls -1 "$PLAYBOOKS_DIR/$category"/*.yml 2>/dev/null | sort); do - if [ -f "$playbook" ]; then - basename_playbook=$(basename "$playbook") - echo -e " └── ${YELLOW}$basename_playbook${NC}" - fi - done - echo "" - fi - done -} - -# 运行指定的 playbook -run_playbook() { - local category="$1" - local playbook="$2" - local hosts="$3" - - local playbook_path="$PLAYBOOKS_DIR/$category/$playbook" - - if [ ! -f "$playbook_path" ]; then - echo -e "${RED}错误: Playbook 文件不存在: $playbook_path${NC}" - exit 1 - fi - - echo -e "${GREEN}运行 Playbook:${NC} $category/$playbook" - echo -e "${GREEN}目标主机:${NC} $hosts" - echo "" - - # 运行 ansible-playbook - ansible-playbook -i inventory.ini "$playbook_path" --limit "$hosts" -} - -# 主逻辑 -case "${1:-}" in - "help"|"-h"|"--help"|"") - show_help - ;; - "list"|"ls") - list_playbooks - ;; - *) - if [ $# -lt 3 ]; then - echo -e "${RED}错误: 参数不足${NC}" - echo "" - show_help - exit 1 - fi - - category="$1" - playbook="$2" - hosts="$3" - - if [ ! -d "$PLAYBOOKS_DIR/$category" ]; then - echo -e "${RED}错误: 分类目录不存在: $category${NC}" - echo "" - list_playbooks - exit 1 - fi - - run_playbook "$category" "$playbook" "$hosts" - ;; -esac \ No newline at end of file diff --git a/ansible/run.sh b/ansible/run.sh deleted file mode 100755 index dc12746..0000000 --- a/ansible/run.sh +++ /dev/null @@ -1,123 +0,0 @@ -#!/bin/bash - -# Ansible Playbook Runner Script -# Usage: ./run.sh -dev (or any group name) - -# Set script directory -SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" -INVENTORY_FILE="$SCRIPT_DIR/inventory.ini" -PLAYBOOK_FILE="$SCRIPT_DIR/system-update.yml" - -# Function to display usage -show_usage() { - echo "Usage: $0 -" - echo "" - echo "Examples:" - echo " $0 -dev # Run on dev group (dev1, dev2)" - echo " $0 -prod # Run on prod group" - echo " $0 -all # Run on all hosts" - echo "" - echo "Available groups in inventory:" - grep '^\[' "$INVENTORY_FILE" | grep -v ':vars' | sed 's/\[//g' | sed 's/\]//g' | sort -} - -# Function to check if group exists in inventory -check_group_exists() { - local group_name="$1" - if [ "$group_name" = "all" ]; then - return 0 - fi - - if grep -q "^\[$group_name\]" "$INVENTORY_FILE"; then - return 0 - else - return 1 - fi -} - -# Function to run ansible playbook -run_playbook() { - local group_name="$1" - - echo "=========================================" - echo "Running Ansible Playbook on group: $group_name" - echo "=========================================" - echo "Inventory: $INVENTORY_FILE" - echo "Playbook: $PLAYBOOK_FILE" - echo "Target: $group_name" - echo "=========================================" - echo "" - - # Set environment variables for better output - export LANG=C - export ANSIBLE_HOST_KEY_CHECKING=False - - # Run the playbook - cd "$SCRIPT_DIR" - ansible-playbook -i "$INVENTORY_FILE" "$PLAYBOOK_FILE" --limit "$group_name" -v - - local exit_code=$? - echo "" - echo "=========================================" - if [ $exit_code -eq 0 ]; then - echo "✅ Playbook execution completed successfully!" - else - echo "❌ Playbook execution failed with exit code: $exit_code" - fi - echo "=========================================" - - return $exit_code -} - -# Main script logic -main() { - # Check if argument is provided - if [ $# -eq 0 ]; then - echo "❌ Error: No group specified" - echo "" - show_usage - exit 1 - fi - - # Parse argument - local arg="$1" - if [[ "$arg" =~ ^-(.+)$ ]]; then - local group_name="${BASH_REMATCH[1]}" - else - echo "❌ Error: Invalid argument format. Use -" - echo "" - show_usage - exit 1 - fi - - # Check if files exist - if [ ! -f "$INVENTORY_FILE" ]; then - echo "❌ Error: Inventory file not found: $INVENTORY_FILE" - exit 1 - fi - - if [ ! -f "$PLAYBOOK_FILE" ]; then - echo "❌ Error: Playbook file not found: $PLAYBOOK_FILE" - exit 1 - fi - - # Check if group exists - if ! check_group_exists "$group_name"; then - echo "❌ Error: Group '$group_name' not found in inventory" - echo "" - show_usage - exit 1 - fi - - # Run the playbook - run_playbook "$group_name" -} - -# Handle help argument -if [ "$1" = "-h" ] || [ "$1" = "--help" ]; then - show_usage - exit 0 -fi - -# Run main function -main "$@" \ No newline at end of file diff --git a/config.json b/config.json deleted file mode 100644 index a964117..0000000 --- a/config.json +++ /dev/null @@ -1,16 +0,0 @@ -{ - "postgres": { - "host": "postgresql:5432", - "user": "postgres", - "pass": "Ccie#15544", - "name": "semaphore", - "options": { - "sslmode": "disable" - } - }, - "dialect": "postgres", - "tmp_path": "/tmp/semaphore", - "cookie_hash": "DlY3h3sXjiJV04u4F1eF6ZuLEQOw+jlXe6Qj4Fxn2m8=", - "cookie_encryption": "4BZst4BOkCobGLDQn00WuSVFH8oA4dcgTgbZf7rtkyo=", - "access_key_encryption": "SqHu6FvyjMkFfjJ/8apw5HN26XZaXNg32Yqp0p1tFs8=" - } \ No newline at end of file diff --git a/ansible/ansible.cfg b/configuration/ansible.cfg similarity index 100% rename from ansible/ansible.cfg rename to configuration/ansible.cfg diff --git a/ansible/inventory.ini b/configuration/inventories/production/inventory.ini similarity index 100% rename from ansible/inventory.ini rename to configuration/inventories/production/inventory.ini diff --git a/ansible/playbooks/05-cloud/cloud-providers-update.yml b/configuration/playbooks/applications/cloud-providers-update.yml similarity index 100% rename from ansible/playbooks/05-cloud/cloud-providers-update.yml rename to configuration/playbooks/applications/cloud-providers-update.yml diff --git a/ansible/playbooks/03-services/docker-management.yml b/configuration/playbooks/applications/docker-management.yml similarity index 100% rename from ansible/playbooks/03-services/docker-management.yml rename to configuration/playbooks/applications/docker-management.yml diff --git a/ansible/playbooks/03-services/docker-status-check.yml b/configuration/playbooks/applications/docker-status-check.yml similarity index 100% rename from ansible/playbooks/03-services/docker-status-check.yml rename to configuration/playbooks/applications/docker-status-check.yml diff --git a/configuration/playbooks/applications/docker-swarm-analysis-simple.yml b/configuration/playbooks/applications/docker-swarm-analysis-simple.yml new file mode 100644 index 0000000..98eeb5c --- /dev/null +++ b/configuration/playbooks/applications/docker-swarm-analysis-simple.yml @@ -0,0 +1,210 @@ +--- +- name: Simple Docker Swarm Analysis for ash3c + hosts: ash3c + become: yes + gather_facts: yes + + tasks: + # 基础检查 + - name: Check if Docker is installed + command: which docker + register: docker_installed + failed_when: false + changed_when: false + + - name: Fail if Docker not installed + fail: + msg: "Docker is not installed on {{ inventory_hostname }}" + when: docker_installed.rc != 0 + + # 检查当前 Swarm 状态 + - name: Check Docker Swarm status + shell: docker info | grep "Swarm:" -A 1 + register: swarm_status + + - name: Display current Swarm status + debug: + msg: "🔍 Current Swarm Status: {{ swarm_status.stdout_lines }}" + + # 获取运行中的容器 + - name: Get running containers + shell: docker ps --format "table {{ '{{' }}.Names{{ '}}' }}\t{{ '{{' }}.Image{{ '}}' }}\t{{ '{{' }}.Status{{ '}}' }}\t{{ '{{' }}.Ports{{ '}}' }}" + register: running_containers + + - name: Display running containers + debug: + msg: "🏃 Running Containers: {{ running_containers.stdout_lines }}" + + # 获取所有容器(包括停止的) + - name: Get all containers + shell: docker ps -a --format "table {{ '{{' }}.Names{{ '}}' }}\t{{ '{{' }}.Image{{ '}}' }}\t{{ '{{' }}.Status{{ '}}' }}" + register: all_containers + + - name: Display all containers + debug: + msg: "📦 All Containers: {{ all_containers.stdout_lines }}" + + # 检查每个容器的详细信息 + - name: Get container names only + shell: docker ps -a --format "{{ '{{' }}.Names{{ '}}' }}" + register: container_names + + - name: Inspect each container + shell: | + echo "=== Container: {{ item }} ===" + echo "Image: $(docker inspect {{ item }} --format '{{ '{{' }}.Config.Image{{ '}}' }}')" + echo "Status: $(docker inspect {{ item }} --format '{{ '{{' }}.State.Status{{ '}}' }}')" + echo "Restart Policy: $(docker inspect {{ item }} --format '{{ '{{' }}.HostConfig.RestartPolicy.Name{{ '}}' }}')" + echo "Network Mode: $(docker inspect {{ item }} --format '{{ '{{' }}.HostConfig.NetworkMode{{ '}}' }}')" + echo "Published Ports: $(docker port {{ item }} 2>/dev/null || echo 'None')" + echo "Volumes/Mounts:" + docker inspect {{ item }} --format '{{ '{{' }}range .Mounts{{ '}}' }} {{ '{{' }}.Source{{ '}}' }}:{{ '{{' }}.Destination{{ '}}' }} ({{ '{{' }}.Mode{{ '}}' }}){{ '{{' }}"\n"{{ '}}' }}{{ '{{' }}end{{ '}}' }}' || echo " None" + echo "Environment Variables:" + docker inspect {{ item }} --format '{{ '{{' }}range .Config.Env{{ '}}' }} {{ '{{' }}.{{ '}}' }}{{ '{{' }}"\n"{{ '}}' }}{{ '{{' }}end{{ '}}' }}' | head -10 + echo "Labels:" + docker inspect {{ item }} --format '{{ '{{' }}range $key, $value := .Config.Labels{{ '}}' }} {{ '{{' }}$key{{ '}}' }}={{ '{{' }}$value{{ '}}' }}{{ '{{' }}"\n"{{ '}}' }}{{ '{{' }}end{{ '}}' }}' | head -5 + echo "---" + register: container_inspect + loop: "{{ container_names.stdout_lines }}" + when: container_names.stdout_lines | length > 0 + + - name: Display container inspection results + debug: + msg: "{{ item.stdout }}" + loop: "{{ container_inspect.results }}" + when: container_inspect is defined + + # 检查 Docker Compose 文件 + - name: Find docker-compose files + find: + paths: + - /root + - /home + - /opt + patterns: + - "docker-compose.yml" + - "docker-compose.yaml" + - "compose.yml" + - "compose.yaml" + recurse: yes + depth: 3 + register: compose_files + + - name: Display found compose files + debug: + msg: "📄 Found compose files: {{ item.path }}" + loop: "{{ compose_files.files }}" + when: compose_files.files | length > 0 + + # 分析网络配置 + - name: Get Docker networks + shell: docker network ls + register: docker_networks + + - name: Display Docker networks + debug: + msg: "🌐 Docker Networks: {{ docker_networks.stdout_lines }}" + + # 检查卷使用情况 + - name: Get Docker volumes + shell: docker volume ls + register: docker_volumes + + - name: Display Docker volumes + debug: + msg: "💾 Docker Volumes: {{ docker_volumes.stdout_lines }}" + + # 检查容器资源使用 + - name: Get container resource usage + shell: docker stats --no-stream + register: container_stats + when: container_names.stdout_lines | length > 0 + + - name: Display container stats + debug: + msg: "📊 Container Resource Usage: {{ container_stats.stdout_lines }}" + when: container_stats is defined + + # 生成 Swarm 适用性分析 + - name: Generate Swarm suitability analysis + debug: + msg: | + 🔍 DOCKER SWARM MIGRATION ANALYSIS FOR {{ inventory_hostname }} + ================================================================ + + 📋 SUMMARY: + - Current Swarm Status: {{ 'Active' if 'active' in swarm_status.stdout else 'Inactive' }} + - Total Containers: {{ container_names.stdout_lines | length }} + - Running Containers: {{ (running_containers.stdout_lines | length) - 1 }} + - Compose Files Found: {{ compose_files.files | length }} + + 💡 GENERAL RECOMMENDATIONS: + + ✅ SUITABLE FOR SWARM (typically): + - Web applications (nginx, apache, etc.) + - API services + - Databases (with proper volume management) + - Monitoring tools (prometheus, grafana, etc.) + - Load balancers + + ❌ NOT SUITABLE FOR SWARM: + - Containers using Docker socket (/var/run/docker.sock) + - Containers with --privileged flag + - Containers requiring specific host access + - Development/testing containers + + ⚠️ NEEDS MODIFICATION: + - Containers using bind mounts (convert to volumes) + - Containers without restart policies + - Containers using host networking + + 🚀 NEXT STEPS: + 1. Review each container's configuration above + 2. Identify services that can benefit from scaling + 3. Convert suitable containers to Docker services + 4. Set up overlay networks + 5. Configure secrets and configs management + + 📝 MIGRATION CHECKLIST: + □ Initialize Swarm (already done: {{ 'Yes' if 'active' in swarm_status.stdout else 'No' }}) + □ Create overlay networks + □ Convert containers to services + □ Set up service discovery + □ Configure load balancing + □ Test service scaling + □ Set up monitoring + when: container_names is defined + + # 保存分析结果 + - name: Save analysis summary + copy: + content: | + Docker Swarm Analysis for {{ inventory_hostname }} + Generated: {{ ansible_date_time.iso8601 }} + + Current Swarm Status: {{ swarm_status.stdout }} + Total Containers: {{ container_names.stdout_lines | length }} + + Container List: + {{ container_names.stdout_lines | join('\n') }} + + Networks: + {{ docker_networks.stdout }} + + Volumes: + {{ docker_volumes.stdout }} + + Compose Files Found: + {% for file in compose_files.files %} + - {{ file.path }} + {% endfor %} + dest: "/tmp/swarm-analysis-{{ inventory_hostname }}-{{ ansible_date_time.epoch }}.txt" + + - name: Analysis complete + debug: + msg: | + 🎉 Analysis complete! + Results saved to: /tmp/swarm-analysis-{{ inventory_hostname }}-{{ ansible_date_time.epoch }}.txt + + Review the container details above to determine which services + are suitable for Swarm migration. \ No newline at end of file diff --git a/configuration/playbooks/applications/docker-swarm-analysis.yml b/configuration/playbooks/applications/docker-swarm-analysis.yml new file mode 100644 index 0000000..12a1fd6 --- /dev/null +++ b/configuration/playbooks/applications/docker-swarm-analysis.yml @@ -0,0 +1,246 @@ +--- +- name: Docker Swarm Migration Analysis for ash3c + hosts: ash3c + become: yes + gather_facts: yes + + vars: + analysis_results: [] + + tasks: + # 基础检查 + - name: Check if Docker is installed + command: which docker + register: docker_installed + failed_when: false + changed_when: false + + - name: Fail if Docker not installed + fail: + msg: "Docker is not installed on {{ inventory_hostname }}" + when: docker_installed.rc != 0 + + # 检查当前 Swarm 状态 + - name: Check Docker Swarm status + shell: docker info --format "{{ '{{' }}.Swarm.LocalNodeState{{ '}}' }}" + register: swarm_status + + - name: Display current Swarm status + debug: + msg: "🔍 Current Swarm Status: {{ swarm_status.stdout }}" + + # 获取所有容器的详细信息 + - name: Get all containers (running and stopped) + shell: docker ps -a --format "{{ '{{' }}.Names{{ '}}' }}" + register: all_containers + + - name: Get basic container information + shell: | + echo "=== Container: {{ item }} ===" + docker inspect {{ item }} | jq -r ' + .[0] | + "Image: " + .Config.Image, + "Status: " + .State.Status, + "RestartPolicy: " + .HostConfig.RestartPolicy.Name, + "NetworkMode: " + .HostConfig.NetworkMode, + "Ports: " + (.NetworkSettings.Ports | keys | join(", ")), + "Volumes: " + ([.Mounts[]? | .Source + ":" + .Destination + ":" + .Mode] | join(" ")), + "Memory: " + (.HostConfig.Memory | tostring), + "CPUs: " + (.HostConfig.NanoCpus | tostring) + ' + echo "---" + register: container_details + loop: "{{ all_containers.stdout_lines }}" + when: all_containers.stdout_lines | length > 0 + + - name: Display container details + debug: + msg: "{{ item.stdout }}" + loop: "{{ container_details.results }}" + when: container_details is defined + + # 检查 Docker Compose 文件 + - name: Find docker-compose files + find: + paths: + - /root + - /home + - /opt + patterns: + - "docker-compose.yml" + - "docker-compose.yaml" + - "compose.yml" + - "compose.yaml" + recurse: yes + register: compose_files + + - name: Display found compose files + debug: + msg: "📄 Found compose files: {{ item.path }}" + loop: "{{ compose_files.files }}" + when: compose_files.files | length > 0 + + # 分析网络配置 + - name: Get Docker networks + shell: docker network ls --format "{{ '{{' }}.Name{{ '}}' }}\t{{ '{{' }}.Driver{{ '}}' }}\t{{ '{{' }}.Scope{{ '}}' }}" + register: docker_networks + + - name: Display Docker networks + debug: + msg: "🌐 Docker Networks: {{ docker_networks.stdout_lines }}" + + # 检查卷使用情况 + - name: Get Docker volumes + shell: docker volume ls --format "{{ '{{' }}.Name{{ '}}' }}\t{{ '{{' }}.Driver{{ '}}' }}" + register: docker_volumes + + - name: Display Docker volumes + debug: + msg: "💾 Docker Volumes: {{ docker_volumes.stdout_lines }}" + + # 检查容器资源使用 + - name: Get container resource usage + shell: docker stats --no-stream --format "{{ '{{' }}.Name{{ '}}' }}\t{{ '{{' }}.CPUPerc{{ '}}' }}\t{{ '{{' }}.MemUsage{{ '}}' }}\t{{ '{{' }}.NetIO{{ '}}' }}\t{{ '{{' }}.BlockIO{{ '}}' }}" + register: container_stats + when: all_containers.stdout_lines | length > 0 + + - name: Display container stats + debug: + msg: "📊 Container Resource Usage: {{ container_stats.stdout_lines }}" + when: container_stats is defined + + # 分析服务类型和 Swarm 适用性 + - name: Analyze containers for Swarm suitability + set_fact: + swarm_analysis: | + 🔍 SWARM MIGRATION ANALYSIS FOR {{ inventory_hostname }} + ================================================ + + Current Swarm Status: {{ swarm_status.stdout }} + Total Containers: {{ all_containers.stdout_lines | length }} + + 📋 CONTAINER ANALYSIS: + {% for container in container_details.results %} + + Container: {{ container.item }} + {% set details = container.stdout.split('\n') %} + {% for line in details %} + {{ line }} + {% endfor %} + + SWARM SUITABILITY ASSESSMENT: + {% if 'restart=always' in container.stdout or 'restart=unless-stopped' in container.stdout %} + ✅ Good restart policy for Swarm + {% else %} + ⚠️ Consider adding restart policy + {% endif %} + + {% if 'NetworkMode: bridge' in container.stdout or 'NetworkMode: host' in container.stdout %} + ⚠️ May need network configuration for Swarm + {% else %} + ✅ Custom network - good for Swarm + {% endif %} + + {% if '/var/run/docker.sock' in container.stdout %} + ❌ Uses Docker socket - NOT suitable for Swarm + {% elif 'bind' in container.stdout %} + ⚠️ Uses bind mounts - consider using volumes + {% else %} + ✅ Good volume configuration + {% endif %} + + {% endfor %} + + 💡 RECOMMENDATIONS: + + SUITABLE FOR SWARM: + {% for container in container_details.results %} + {% if '/var/run/docker.sock' not in container.stdout %} + - {{ container.item }}: Ready for Swarm migration + {% endif %} + {% endfor %} + + NEEDS MODIFICATION: + {% for container in container_details.results %} + {% if '/var/run/docker.sock' in container.stdout %} + - {{ container.item }}: Uses Docker socket - keep as standalone + {% elif 'bind' in container.stdout %} + - {{ container.item }}: Convert bind mounts to volumes + {% endif %} + {% endfor %} + + NEXT STEPS: + 1. Initialize Swarm: docker swarm init + 2. Create overlay networks for services + 3. Convert suitable containers to services + 4. Set up service discovery and load balancing + 5. Configure secrets and configs management + when: container_details is defined + + - name: Display Swarm analysis + debug: + msg: "{{ swarm_analysis }}" + when: swarm_analysis is defined + + # 生成迁移脚本建议 + - name: Generate migration script suggestions + set_fact: + migration_script: | + #!/bin/bash + # Docker Swarm Migration Script for {{ inventory_hostname }} + # Generated on {{ ansible_date_time.iso8601 }} + + echo "🚀 Starting Docker Swarm migration..." + + # Initialize Swarm (if not already done) + if [ "{{ swarm_status.stdout }}" != "active" ]; then + echo "Initializing Docker Swarm..." + docker swarm init + fi + + # Create overlay networks + echo "Creating overlay networks..." + docker network create -d overlay --attachable app-network + + # Example service creation (modify as needed) + {% for container in container_details.results if container_details is defined %} + {% if '/var/run/docker.sock' not in container.stdout %} + echo "Converting {{ container.item }} to Swarm service..." + # docker service create --name {{ container.item }}-svc \ + # --network app-network \ + # --replicas 1 \ + # [ADD_YOUR_SPECIFIC_OPTIONS] \ + # [IMAGE_NAME] + {% endif %} + {% endfor %} + + echo "✅ Migration script template generated!" + echo "Please review and customize before running." + when: container_details is defined + + - name: Display migration script + debug: + msg: "{{ migration_script }}" + when: migration_script is defined + + # 保存分析结果到文件 + - name: Save analysis results to file + copy: + content: | + {{ swarm_analysis }} + + MIGRATION SCRIPT: + {{ migration_script }} + dest: "/tmp/swarm-analysis-{{ inventory_hostname }}-{{ ansible_date_time.epoch }}.txt" + when: swarm_analysis is defined and migration_script is defined + + - name: Analysis complete + debug: + msg: | + 🎉 Analysis complete! + Results saved to: /tmp/swarm-analysis-{{ inventory_hostname }}-{{ ansible_date_time.epoch }}.txt + + Summary: + - Total containers analyzed: {{ all_containers.stdout_lines | length }} + - Compose files found: {{ compose_files.files | length }} + - Current Swarm status: {{ swarm_status.stdout }} \ No newline at end of file diff --git a/configuration/playbooks/applications/docker-swarm-check.yml b/configuration/playbooks/applications/docker-swarm-check.yml new file mode 100644 index 0000000..6f2303d --- /dev/null +++ b/configuration/playbooks/applications/docker-swarm-check.yml @@ -0,0 +1,236 @@ +--- +- name: Docker Swarm Check for ash3c + hosts: ash3c + become: yes + gather_facts: yes + + tasks: + # 基础检查 + - name: Check if Docker is installed + command: which docker + register: docker_installed + failed_when: false + changed_when: false + + - name: Fail if Docker not installed + fail: + msg: "Docker is not installed on {{ inventory_hostname }}" + when: docker_installed.rc != 0 + + # 检查当前 Swarm 状态 + - name: Check Docker Swarm status + shell: docker info | grep "Swarm:" -A 1 + register: swarm_status + + - name: Display current Swarm status + debug: + msg: "🔍 Current Swarm Status: {{ swarm_status.stdout_lines }}" + + # 获取运行中的容器 - 使用简单格式 + - name: Get running containers + shell: docker ps + register: running_containers + + - name: Display running containers + debug: + msg: "🏃 Running Containers:\n{{ running_containers.stdout }}" + + # 获取所有容器(包括停止的) + - name: Get all containers + shell: docker ps -a + register: all_containers + + - name: Display all containers + debug: + msg: "📦 All Containers:\n{{ all_containers.stdout }}" + + # 获取容器名称列表 + - name: Get container names + shell: docker ps -a | awk 'NR>1 {print $NF}' | head -20 + register: container_names + + - name: Display container names + debug: + msg: "Container names: {{ container_names.stdout_lines }}" + + # 检查每个容器的基本信息 + - name: Get basic container info + shell: | + echo "=== Container: {{ item }} ===" + docker inspect {{ item }} | jq -r '.[0] | { + "Image": .Config.Image, + "Status": .State.Status, + "RestartPolicy": .HostConfig.RestartPolicy.Name, + "NetworkMode": .HostConfig.NetworkMode + }' + echo "Ports:" + docker port {{ item }} 2>/dev/null || echo "No published ports" + echo "Mounts:" + docker inspect {{ item }} | jq -r '.[0].Mounts[]? | " \(.Source):\(.Destination) (\(.Mode))"' + echo "---" + register: container_info + loop: "{{ container_names.stdout_lines[:10] }}" # 限制前10个容器 + when: container_names.stdout_lines | length > 0 + + - name: Display container info + debug: + msg: "{{ item.stdout }}" + loop: "{{ container_info.results }}" + when: container_info is defined + + # 检查 Docker Compose 文件 + - name: Find docker-compose files in common locations + find: + paths: + - /root + - /home + - /opt + - /var/lib/docker + patterns: + - "docker-compose.yml" + - "docker-compose.yaml" + - "compose.yml" + - "compose.yaml" + recurse: yes + depth: 3 + register: compose_files + ignore_errors: yes + + - name: Display found compose files + debug: + msg: "📄 Found compose files: {{ compose_files.files | map(attribute='path') | list }}" + when: compose_files.files | length > 0 + + # 分析网络配置 + - name: Get Docker networks + shell: docker network ls + register: docker_networks + + - name: Display Docker networks + debug: + msg: "🌐 Docker Networks:\n{{ docker_networks.stdout }}" + + # 检查卷使用情况 + - name: Get Docker volumes + shell: docker volume ls + register: docker_volumes + + - name: Display Docker volumes + debug: + msg: "💾 Docker Volumes:\n{{ docker_volumes.stdout }}" + + # 检查容器资源使用 + - name: Get container resource usage + shell: docker stats --no-stream + register: container_stats + when: container_names.stdout_lines | length > 0 + + - name: Display container stats + debug: + msg: "📊 Container Resource Usage:\n{{ container_stats.stdout }}" + when: container_stats is defined + + # 检查 Docker 镜像 + - name: Get Docker images + shell: docker images + register: docker_images + + - name: Display Docker images + debug: + msg: "🖼️ Docker Images:\n{{ docker_images.stdout }}" + + # 生成 Swarm 适用性分析 + - name: Generate Swarm suitability analysis + debug: + msg: | + + 🔍 DOCKER SWARM MIGRATION ANALYSIS FOR {{ inventory_hostname }} + ================================================================ + + 📋 SUMMARY: + - Current Swarm Status: {{ 'Active' if 'active' in swarm_status.stdout else 'Inactive' }} + - Total Containers: {{ container_names.stdout_lines | length }} + - Running Containers: {{ running_containers.stdout_lines | length - 1 }} + - Compose Files Found: {{ compose_files.files | length if compose_files.files is defined else 0 }} + + 💡 SWARM MIGRATION RECOMMENDATIONS: + + ✅ TYPICALLY SUITABLE FOR SWARM: + - Web servers (nginx, apache, caddy) + - API services and microservices + - Application servers + - Load balancers (traefik, haproxy) + - Monitoring tools (prometheus, grafana) + - Databases (with proper volume strategy) + + ❌ NOT SUITABLE FOR SWARM: + - Containers using Docker socket (/var/run/docker.sock) + - Containers with --privileged flag + - Development/testing containers + - Containers requiring specific host hardware access + + ⚠️ NEEDS MODIFICATION FOR SWARM: + - Containers using bind mounts → convert to volumes + - Containers without restart policies → add restart policies + - Containers using host networking → use overlay networks + - Containers with hardcoded IPs → use service discovery + + 🚀 MIGRATION STEPS: + 1. ✅ Swarm is already initialized + 2. Create overlay networks for service communication + 3. Convert suitable containers to Docker services + 4. Set up service discovery and load balancing + 5. Configure secrets and configs management + 6. Test service scaling and failover + + 📝 NEXT ACTIONS: + - Review each container above for Swarm suitability + - Identify services that would benefit from scaling + - Plan network topology for services + - Prepare volume migration strategy + when: container_names is defined + + # 保存分析结果 + - name: Save analysis summary to file + copy: + content: | + Docker Swarm Analysis for {{ inventory_hostname }} + Generated: {{ ansible_date_time.iso8601 }} + + SWARM STATUS: + {{ swarm_status.stdout }} + + CONTAINERS ({{ container_names.stdout_lines | length }} total): + {{ container_names.stdout_lines | join('\n') }} + + NETWORKS: + {{ docker_networks.stdout }} + + VOLUMES: + {{ docker_volumes.stdout }} + + IMAGES: + {{ docker_images.stdout }} + + {% if compose_files.files is defined and compose_files.files | length > 0 %} + COMPOSE FILES FOUND: + {% for file in compose_files.files %} + - {{ file.path }} + {% endfor %} + {% endif %} + dest: "/tmp/swarm-analysis-{{ inventory_hostname }}-{{ ansible_date_time.epoch }}.txt" + + - name: Analysis complete + debug: + msg: | + + 🎉 ANALYSIS COMPLETE! + + 📄 Results saved to: /tmp/swarm-analysis-{{ inventory_hostname }}-{{ ansible_date_time.epoch }}.txt + + 🔍 Review the container details above to identify: + - Which services are suitable for Swarm + - Which containers need modification + - Migration priority and strategy + + 💡 TIP: Focus on stateless services first for easier migration! \ No newline at end of file diff --git a/configuration/playbooks/applications/swarm-migration-plan.yml b/configuration/playbooks/applications/swarm-migration-plan.yml new file mode 100644 index 0000000..c9eff47 --- /dev/null +++ b/configuration/playbooks/applications/swarm-migration-plan.yml @@ -0,0 +1,194 @@ +--- +- name: Docker Swarm Migration Plan for ash3c + hosts: ash3c + become: yes + gather_facts: yes + + vars: + # 定义服务迁移计划 + swarm_services: + high_priority: + - name: ghproxy + image: wjqserver/ghproxy:latest + ports: "8046:8080" + replicas: 2 + networks: ["app-network"] + + - name: redis + image: redis:latest + ports: "63789:6379" + replicas: 1 + networks: ["app-network"] + volumes: ["redis-data:/data"] + + medium_priority: + - name: consul + image: bitnami/consul:latest + ports: + - "8310:8300" + - "8311:8301" + - "8312:8302" + - "8501:8500" + - "8601:8600/udp" + replicas: 1 + networks: ["consul-network"] + + - name: discourse-app + image: bitnami/discourse:3.4.1 + ports: "31080:3000" + replicas: 1 + networks: ["app-network"] + depends_on: ["postgres", "redis"] + + - name: discourse-sidekiq + image: bitnami/discourse:3.4.1 + replicas: 1 + networks: ["app-network"] + depends_on: ["postgres", "redis"] + + low_priority: + - name: elasticsearch + image: bitnami/elasticsearch:8.17.2 + ports: "59200:9200" + replicas: 1 + networks: ["elastic-network"] + volumes: ["elastic-data:/bitnami/elasticsearch/data"] + constraints: ["node.role==manager"] + + - name: postgres + image: postgres:17.2 + ports: "54322:5432" + replicas: 1 + networks: ["db-network"] + volumes: ["postgres-data:/var/lib/postgresql/data"] + constraints: ["node.role==manager"] + secrets: ["postgres_password"] + + tasks: + - name: Display migration plan + debug: + msg: | + 🚀 DOCKER SWARM MIGRATION PLAN FOR {{ inventory_hostname }} + ========================================================= + + 📋 PHASE 1 - HIGH PRIORITY (Low Risk) + {% for service in swarm_services.high_priority %} + ✅ {{ service.name }}: + - Image: {{ service.image }} + - Replicas: {{ service.replicas }} + - Networks: {{ service.networks | join(', ') }} + - Migration: Safe, stateless service + {% endfor %} + + 📋 PHASE 2 - MEDIUM PRIORITY (Medium Risk) + {% for service in swarm_services.medium_priority %} + ⚠️ {{ service.name }}: + - Image: {{ service.image }} + - Replicas: {{ service.replicas }} + - Networks: {{ service.networks | join(', ') }} + - Migration: Requires coordination + {% endfor %} + + 📋 PHASE 3 - LOW PRIORITY (High Risk) + {% for service in swarm_services.low_priority %} + 🔴 {{ service.name }}: + - Image: {{ service.image }} + - Replicas: {{ service.replicas }} + - Networks: {{ service.networks | join(', ') }} + - Migration: Requires careful planning + {% endfor %} + + - name: Create migration script + copy: + content: | + #!/bin/bash + # Docker Swarm Migration Script for {{ inventory_hostname }} + # Generated: {{ ansible_date_time.iso8601 }} + + set -e + + echo "🚀 Starting Docker Swarm Migration..." + + # Create networks + echo "📡 Creating overlay networks..." + docker network create -d overlay --attachable app-network || true + docker network create -d overlay --attachable db-network || true + docker network create -d overlay --attachable consul-network || true + docker network create -d overlay --attachable elastic-network || true + + # Create volumes + echo "💾 Creating volumes..." + docker volume create redis-data || true + docker volume create postgres-data || true + docker volume create elastic-data || true + + # Create secrets (example) + echo "🔐 Creating secrets..." + echo "your_postgres_password" | docker secret create postgres_password - || true + + echo "✅ Infrastructure setup complete!" + echo "" + echo "🔄 PHASE 1 - Migrate high priority services:" + echo "docker service create --name ghproxy-svc --replicas 2 --network app-network -p 8046:8080 wjqserver/ghproxy:latest" + echo "docker service create --name redis-svc --replicas 1 --network app-network -p 63789:6379 --mount type=volume,source=redis-data,target=/data redis:latest" + echo "" + echo "🔄 PHASE 2 - Migrate medium priority services:" + echo "docker service create --name consul-svc --replicas 1 --network consul-network -p 8310:8300 -p 8311:8301 -p 8312:8302 -p 8501:8500 -p 8601:8600/udp bitnami/consul:latest" + echo "docker service create --name discourse-app-svc --replicas 1 --network app-network -p 31080:3000 bitnami/discourse:3.4.1" + echo "docker service create --name discourse-sidekiq-svc --replicas 1 --network app-network bitnami/discourse:3.4.1" + echo "" + echo "🔄 PHASE 3 - Migrate low priority services (CAREFUL!):" + echo "docker service create --name postgres-svc --replicas 1 --network db-network -p 54322:5432 --mount type=volume,source=postgres-data,target=/var/lib/postgresql/data --secret postgres_password --constraint 'node.role==manager' postgres:17.2" + echo "docker service create --name elasticsearch-svc --replicas 1 --network elastic-network -p 59200:9200 --mount type=volume,source=elastic-data,target=/bitnami/elasticsearch/data --constraint 'node.role==manager' bitnami/elasticsearch:8.17.2" + echo "" + echo "📊 Monitor services:" + echo "docker service ls" + echo "docker service ps " + echo "" + echo "⚠️ IMPORTANT NOTES:" + echo "1. Stop original containers before creating services" + echo "2. Backup data before migrating databases" + echo "3. Test each phase before proceeding" + echo "4. Monitor logs: docker service logs " + dest: "/tmp/swarm-migration-{{ inventory_hostname }}.sh" + mode: '0755' + + - name: Create rollback script + copy: + content: | + #!/bin/bash + # Docker Swarm Rollback Script for {{ inventory_hostname }} + + echo "🔄 Rolling back Swarm services..." + + # Remove services + docker service rm ghproxy-svc redis-svc consul-svc discourse-app-svc discourse-sidekiq-svc postgres-svc elasticsearch-svc 2>/dev/null || true + + # Remove networks (optional) + # docker network rm app-network db-network consul-network elastic-network 2>/dev/null || true + + echo "✅ Rollback complete. Original containers should be restarted manually." + dest: "/tmp/swarm-rollback-{{ inventory_hostname }}.sh" + mode: '0755' + + - name: Migration plan complete + debug: + msg: | + 🎉 MIGRATION PLAN GENERATED! + + 📄 Files created: + - /tmp/swarm-migration-{{ inventory_hostname }}.sh (Migration script) + - /tmp/swarm-rollback-{{ inventory_hostname }}.sh (Rollback script) + + 🚀 RECOMMENDED APPROACH: + 1. Backup all data first + 2. Test migration in phases + 3. Start with Phase 1 (low risk services) + 4. Monitor each service before proceeding + 5. Keep rollback script ready + + 💡 NEXT STEPS: + 1. Review and customize the migration script + 2. Plan maintenance window + 3. Execute phase by phase + 4. Monitor and validate each service \ No newline at end of file diff --git a/ansible/playbooks/01-system/cron-setup.yml b/configuration/playbooks/bootstrap/cron-setup.yml similarity index 100% rename from ansible/playbooks/01-system/cron-setup.yml rename to configuration/playbooks/bootstrap/cron-setup.yml diff --git a/configuration/playbooks/bootstrap/main.yml b/configuration/playbooks/bootstrap/main.yml new file mode 100644 index 0000000..250e45e --- /dev/null +++ b/configuration/playbooks/bootstrap/main.yml @@ -0,0 +1,175 @@ +--- +- name: Bootstrap Infrastructure + hosts: all + become: yes + gather_facts: yes + + vars: + # 基础软件包 + base_packages: + - curl + - wget + - git + - vim + - htop + - tree + - unzip + - jq + - python3 + - python3-pip + - apt-transport-https + - ca-certificates + - gnupg + - lsb-release + + # Docker 配置 + docker_users: + - "{{ ansible_user }}" + + # 系统配置 + timezone: "Asia/Shanghai" + + tasks: + - name: Update package cache + apt: + update_cache: yes + cache_valid_time: 3600 + when: ansible_os_family == "Debian" + + - name: Install base packages + package: + name: "{{ base_packages }}" + state: present + + - name: Set timezone + timezone: + name: "{{ timezone }}" + + - name: Create system users + user: + name: "{{ ansible_user }}" + groups: sudo + shell: /bin/bash + create_home: yes + when: ansible_user != "root" + + - name: Configure SSH + lineinfile: + path: /etc/ssh/sshd_config + regexp: "{{ item.regexp }}" + line: "{{ item.line }}" + backup: yes + loop: + - { regexp: '^#?PermitRootLogin', line: 'PermitRootLogin no' } + - { regexp: '^#?PasswordAuthentication', line: 'PasswordAuthentication no' } + - { regexp: '^#?PubkeyAuthentication', line: 'PubkeyAuthentication yes' } + notify: restart ssh + when: ansible_user != "root" + + - name: Install Docker + block: + - name: Add Docker GPG key + apt_key: + url: https://download.docker.com/linux/ubuntu/gpg + state: present + + - name: Add Docker repository + apt_repository: + repo: "deb [arch=amd64] https://download.docker.com/linux/ubuntu {{ ansible_distribution_release }} stable" + state: present + + - name: Install Docker + package: + name: + - docker-ce + - docker-ce-cli + - containerd.io + - docker-compose-plugin + state: present + + - name: Add users to docker group + user: + name: "{{ item }}" + groups: docker + append: yes + loop: "{{ docker_users }}" + + - name: Start and enable Docker + systemd: + name: docker + state: started + enabled: yes + + - name: Install Docker Compose (standalone) + get_url: + url: "https://github.com/docker/compose/releases/latest/download/docker-compose-linux-x86_64" + dest: /usr/local/bin/docker-compose + mode: '0755' + + - name: Configure firewall + ufw: + rule: "{{ item.rule }}" + port: "{{ item.port }}" + proto: "{{ item.proto | default('tcp') }}" + loop: + - { rule: 'allow', port: '22' } + - { rule: 'allow', port: '80' } + - { rule: 'allow', port: '443' } + notify: enable ufw + + - name: Create application directories + file: + path: "{{ item }}" + state: directory + owner: "{{ ansible_user }}" + group: "{{ ansible_user }}" + mode: '0755' + loop: + - /opt/apps + - /opt/data + - /opt/logs + - /opt/backups + - /opt/scripts + + - name: Install monitoring tools + package: + name: + - htop + - iotop + - nethogs + - ncdu + - tmux + state: present + + - name: Configure system limits + pam_limits: + domain: '*' + limit_type: "{{ item.type }}" + limit_item: "{{ item.item }}" + value: "{{ item.value }}" + loop: + - { type: 'soft', item: 'nofile', value: '65536' } + - { type: 'hard', item: 'nofile', value: '65536' } + - { type: 'soft', item: 'nproc', value: '32768' } + - { type: 'hard', item: 'nproc', value: '32768' } + + - name: Configure sysctl + sysctl: + name: "{{ item.name }}" + value: "{{ item.value }}" + state: present + reload: yes + loop: + - { name: 'vm.max_map_count', value: '262144' } + - { name: 'fs.file-max', value: '2097152' } + - { name: 'net.core.somaxconn', value: '32768' } + + handlers: + - name: restart ssh + systemd: + name: ssh + state: restarted + + - name: enable ufw + ufw: + state: enabled \ No newline at end of file diff --git a/ansible/playbooks/01-system/system-cleanup.yml b/configuration/playbooks/bootstrap/system-cleanup.yml similarity index 100% rename from ansible/playbooks/01-system/system-cleanup.yml rename to configuration/playbooks/bootstrap/system-cleanup.yml diff --git a/ansible/playbooks/01-system/system-update.yml b/configuration/playbooks/bootstrap/system-update.yml similarity index 100% rename from ansible/playbooks/01-system/system-update.yml rename to configuration/playbooks/bootstrap/system-update.yml diff --git a/ansible/playbooks/99-tools/ops-toolkit.yml b/configuration/playbooks/maintenance/ops-toolkit.yml similarity index 100% rename from ansible/playbooks/99-tools/ops-toolkit.yml rename to configuration/playbooks/maintenance/ops-toolkit.yml diff --git a/ansible/playbooks/04-monitoring/network-connectivity.yml b/configuration/playbooks/monitoring/network-connectivity.yml similarity index 100% rename from ansible/playbooks/04-monitoring/network-connectivity.yml rename to configuration/playbooks/monitoring/network-connectivity.yml diff --git a/ansible/playbooks/04-monitoring/service-health-check.yml b/configuration/playbooks/monitoring/service-health-check.yml similarity index 100% rename from ansible/playbooks/04-monitoring/service-health-check.yml rename to configuration/playbooks/monitoring/service-health-check.yml diff --git a/ansible/playbooks/02-security/certificate-management.yml b/configuration/playbooks/security/certificate-management.yml similarity index 100% rename from ansible/playbooks/02-security/certificate-management.yml rename to configuration/playbooks/security/certificate-management.yml diff --git a/ansible/playbooks/02-security/security-hardening.yml b/configuration/playbooks/security/security-hardening.yml similarity index 100% rename from ansible/playbooks/02-security/security-hardening.yml rename to configuration/playbooks/security/security-hardening.yml diff --git a/consul-demo.sh b/consul-demo.sh deleted file mode 100755 index 75f43ea..0000000 --- a/consul-demo.sh +++ /dev/null @@ -1,64 +0,0 @@ -#!/bin/bash - -echo "🚀 Consul 集群演示脚本" - -# 检查 Consul 集群状态 -check_cluster() { - echo "📊 检查集群状态..." - - for node in consul1 consul2 consul3; do - echo "检查节点: $node" - curl -s http://$node:8500/v1/status/leader 2>/dev/null && echo " - Leader: $(curl -s http://$node:8500/v1/status/leader 2>/dev/null)" || echo " - 节点不可达" - curl -s http://$node:8500/v1/status/peers 2>/dev/null && echo " - 集群节点: $(curl -s http://$node:8500/v1/status/peers 2>/dev/null)" || echo " - 无法获取集群信息" - echo "" - done -} - -# 测试配置读写 -test_config() { - echo "🔧 测试配置读写..." - - # 写入配置到不同节点 - echo "写入配置到 consul1..." - curl -X PUT http://consul1:8500/v1/kv/test/config "value-from-consul1" 2>/dev/null - - echo "从 consul2 读取配置..." - value=$(curl -s http://consul2:8500/v1/kv/test/config?raw 2>/dev/null) - echo "读取到的值: $value" - - echo "从 consul3 读取配置..." - value=$(curl -s http://consul3:8500/v1/kv/test/config?raw 2>/dev/null) - echo "读取到的值: $value" -} - -# 模拟故障转移 -simulate_failure() { - echo "💥 模拟 Leader 故障..." - - # 获取当前 Leader - leader=$(curl -s http://consul1:8500/v1/status/leader 2>/dev/null | tr -d '"') - echo "当前 Leader: $leader" - - # 这里只是演示,实际环境中你可以停止 Leader 节点 - echo "在实际环境中,你可以:" - echo "docker stop consul-leader-container" - echo "然后观察其他节点自动选举新 Leader" -} - -case "$1" in - "status") - check_cluster - ;; - "test") - test_config - ;; - "failure") - simulate_failure - ;; - *) - echo "用法: $0 {status|test|failure}" - echo " status - 检查集群状态" - echo " test - 测试配置同步" - echo " failure - 模拟故障转移" - ;; -esac \ No newline at end of file diff --git a/demo-services-stack.yml b/containers/compose/production/demo-services-stack.yml similarity index 100% rename from demo-services-stack.yml rename to containers/compose/production/demo-services-stack.yml diff --git a/traefik-swarm-stack.yml b/containers/compose/production/traefik-swarm-stack.yml similarity index 100% rename from traefik-swarm-stack.yml rename to containers/compose/production/traefik-swarm-stack.yml diff --git a/traefik-consul-setup.yml b/containers/infrastructure/traefik-consul-setup.yml similarity index 100% rename from traefik-consul-setup.yml rename to containers/infrastructure/traefik-consul-setup.yml diff --git a/traefik.yml b/containers/infrastructure/traefik.yml similarity index 100% rename from traefik.yml rename to containers/infrastructure/traefik.yml diff --git a/deploy-traefik-consul.sh b/deploy-traefik-consul.sh deleted file mode 100644 index bb888e8..0000000 --- a/deploy-traefik-consul.sh +++ /dev/null @@ -1,110 +0,0 @@ -#!/bin/bash - -echo "🚀 部署 Traefik + Consul 集群" - -# 创建必要的目录 -mkdir -p {certs,web-content,api,logs} - -# 创建示例网页 -cat > web-content/index.html << 'EOF' - - - - Traefik + Consul Demo - - -

🎉 Traefik + Consul 集群运行成功!

-

当前时间:

- - - -EOF - -# 创建示例 API -cat > api/server.js << 'EOF' -const express = require('express'); -const consul = require('consul')(); -const app = express(); -const port = 3000; - -app.use(express.json()); - -// 健康检查 -app.get('/health', (req, res) => { - res.json({ status: 'healthy', timestamp: new Date().toISOString() }); -}); - -// API 路由 -app.get('/api/config', async (req, res) => { - try { - const result = await consul.kv.get('config/api/message'); - res.json({ - message: result ? result.Value : 'Hello from API!', - source: 'consul' - }); - } catch (error) { - res.json({ - message: 'Hello from API!', - source: 'default' - }); - } -}); - -app.post('/api/config', async (req, res) => { - try { - await consul.kv.set('config/api/message', req.body.message); - res.json({ success: true }); - } catch (error) { - res.status(500).json({ error: error.message }); - } -}); - -app.listen(port, () => { - console.log(`API server running on port ${port}`); -}); -EOF - -# 创建 API package.json -cat > api/package.json << 'EOF' -{ - "name": "demo-api", - "version": "1.0.0", - "dependencies": { - "express": "^4.18.0", - "consul": "^0.40.0" - } -} -EOF - -# 设置 hosts 文件(用于本地测试) -echo "📝 请添加以下内容到 /etc/hosts 文件:" -echo "127.0.0.1 traefik.local" -echo "127.0.0.1 consul.local" -echo "127.0.0.1 app.local" -echo "127.0.0.1 api.local" - -# 启动服务 -echo "🚀 启动 Traefik + Consul 集群..." -docker-compose -f traefik-consul-setup.yml up -d - -# 等待服务启动 -echo "⏳ 等待服务启动..." -sleep 10 - -# 检查服务状态 -echo "📊 检查服务状态..." -docker-compose -f traefik-consul-setup.yml ps - -# 显示访问地址 -echo "" -echo "🎉 部署完成!访问地址:" -echo " Traefik Dashboard: http://traefik.local:8080" -echo " Consul UI: http://consul.local:8500" -echo " Web App: http://app.local" -echo " API: http://api.local/api/config" -echo "" -echo "📝 测试命令:" -echo " curl http://api.local/api/config" -echo " curl -X POST http://api.local/api/config -H 'Content-Type: application/json' -d '{\"message\":\"Hello Consul!\"}'" \ No newline at end of file diff --git a/infrastructure/environments/dev/main.tf b/infrastructure/environments/dev/main.tf new file mode 100644 index 0000000..ba916fc --- /dev/null +++ b/infrastructure/environments/dev/main.tf @@ -0,0 +1,49 @@ +# 开发环境主配置文件 + +# 引用共享配置 +module "shared" { + source = "../../shared" +} + +# Oracle Cloud 基础设施 +module "oracle_cloud" { + source = "../../providers/oracle-cloud" + + # 传递变量 + environment = var.environment + project_name = var.project_name + owner = var.owner + vpc_cidr = var.vpc_cidr + availability_zones = var.availability_zones + common_tags = var.common_tags + oci_config = var.oci_config + + # 开发环境特定配置 + instance_count = 1 + instance_size = "VM.Standard.E2.1.Micro" # 免费层 +} + +# 华为云基础设施 (可选) +module "huawei_cloud" { + source = "../../providers/huawei-cloud" + count = contains(var.cloud_providers, "huawei") ? 1 : 0 + + environment = var.environment + project_name = var.project_name + owner = var.owner + vpc_cidr = "10.1.0.0/16" # 不同的 CIDR 避免冲突 + availability_zones = var.availability_zones + common_tags = var.common_tags + huawei_config = var.huawei_config +} + +# 输出 +output "oracle_cloud_outputs" { + description = "Oracle Cloud 基础设施输出" + value = module.oracle_cloud +} + +output "huawei_cloud_outputs" { + description = "华为云基础设施输出" + value = length(module.huawei_cloud) > 0 ? module.huawei_cloud[0] : null +} \ No newline at end of file diff --git a/infrastructure/environments/dev/terraform.tfvars.example b/infrastructure/environments/dev/terraform.tfvars.example new file mode 100644 index 0000000..c060882 --- /dev/null +++ b/infrastructure/environments/dev/terraform.tfvars.example @@ -0,0 +1,61 @@ +# 开发环境配置示例 +# 复制此文件为 terraform.tfvars 并填入实际值 + +# 基本配置 +environment = "dev" +project_name = "mgmt" +owner = "ben" + +# 要启用的云服务商 +cloud_providers = ["oracle", "huawei"] + +# 网络配置 +vpc_cidr = "10.0.0.0/16" +availability_zones = ["a", "b"] + +# 通用标签 +common_tags = { + Environment = "dev" + Project = "mgmt" + Owner = "ben" + ManagedBy = "opentofu" +} + +# Oracle Cloud 配置 +oci_config = { + tenancy_ocid = "ocid1.tenancy.oc1..your-tenancy-id" + user_ocid = "ocid1.user.oc1..your-user-id" + fingerprint = "your-key-fingerprint" + private_key_path = "~/.oci/oci_api_key.pem" + region = "ap-seoul-1" + compartment_ocid = "ocid1.compartment.oc1..your-compartment-id" +} + +# 华为云配置 +huawei_config = { + access_key = "your-access-key" + secret_key = "your-secret-key" + region = "cn-north-4" + project_id = "your-project-id" +} + +# Google Cloud 配置 (可选) +gcp_config = { + project_id = "your-project-id" + region = "asia-northeast3" + zone = "asia-northeast3-a" + credentials_file = "~/.gcp/service-account.json" +} + +# AWS 配置 (可选) +aws_config = { + region = "ap-northeast-2" + access_key = "your-access-key" + secret_key = "your-secret-key" +} + +# DigitalOcean 配置 (可选) +do_config = { + token = "your-do-token" + region = "sgp1" +} \ No newline at end of file diff --git a/infrastructure/environments/dev/variables.tf b/infrastructure/environments/dev/variables.tf new file mode 100644 index 0000000..21de37d --- /dev/null +++ b/infrastructure/environments/dev/variables.tf @@ -0,0 +1,133 @@ +# 开发环境变量定义 + +variable "environment" { + description = "环境名称" + type = string + default = "dev" +} + +variable "project_name" { + description = "项目名称" + type = string + default = "mgmt" +} + +variable "owner" { + description = "项目所有者" + type = string + default = "ben" +} + +variable "cloud_providers" { + description = "要启用的云服务商列表" + type = list(string) + default = ["oracle"] +} + +variable "vpc_cidr" { + description = "VPC CIDR 块" + type = string + default = "10.0.0.0/16" +} + +variable "availability_zones" { + description = "可用区列表" + type = list(string) + default = ["a", "b"] +} + +variable "common_tags" { + description = "通用标签" + type = map(string) + default = { + Environment = "dev" + Project = "mgmt" + ManagedBy = "opentofu" + } +} + +# Oracle Cloud 配置 +variable "oci_config" { + description = "Oracle Cloud 配置" + type = object({ + tenancy_ocid = string + user_ocid = string + fingerprint = string + private_key_path = string + region = string + compartment_ocid = optional(string) + }) + default = { + tenancy_ocid = "" + user_ocid = "" + fingerprint = "" + private_key_path = "" + region = "ap-seoul-1" + compartment_ocid = "" + } +} + +# 华为云配置 +variable "huawei_config" { + description = "华为云配置" + type = object({ + access_key = string + secret_key = string + region = string + project_id = optional(string) + }) + default = { + access_key = "" + secret_key = "" + region = "cn-north-4" + project_id = "" + } + sensitive = true +} + +# Google Cloud 配置 +variable "gcp_config" { + description = "Google Cloud 配置" + type = object({ + project_id = string + region = string + zone = string + credentials_file = string + }) + default = { + project_id = "" + region = "asia-northeast3" + zone = "asia-northeast3-a" + credentials_file = "" + } +} + +# AWS 配置 +variable "aws_config" { + description = "AWS 配置" + type = object({ + region = string + access_key = string + secret_key = string + }) + default = { + region = "ap-northeast-2" + access_key = "" + secret_key = "" + } + sensitive = true +} + +# DigitalOcean 配置 +variable "do_config" { + description = "DigitalOcean 配置" + type = object({ + token = string + region = string + }) + default = { + token = "" + region = "sgp1" + } + sensitive = true +} \ No newline at end of file diff --git a/infrastructure/providers/huawei-cloud/main.tf b/infrastructure/providers/huawei-cloud/main.tf new file mode 100644 index 0000000..a719ef5 --- /dev/null +++ b/infrastructure/providers/huawei-cloud/main.tf @@ -0,0 +1,144 @@ +# 华为云提供商配置 + +terraform { + required_providers { + huaweicloud = { + source = "huaweicloud/huaweicloud" + version = "~> 1.60" + } + } +} + +# 华为云提供商配置 +provider "huaweicloud" { + access_key = var.huawei_config.access_key + secret_key = var.huawei_config.secret_key + region = var.huawei_config.region +} + +# 获取可用区 +data "huaweicloud_availability_zones" "zones" {} + +# 获取镜像 +data "huaweicloud_images_image" "ubuntu" { + name = "Ubuntu 22.04 server 64bit" + most_recent = true +} + +# VPC +resource "huaweicloud_vpc" "main" { + name = "${var.project_name}-${var.environment}-vpc" + cidr = var.vpc_cidr + + tags = merge(var.common_tags, { + Name = "${var.project_name}-${var.environment}-vpc" + }) +} + +# 子网 +resource "huaweicloud_vpc_subnet" "public" { + count = length(var.availability_zones) + name = "${var.project_name}-${var.environment}-public-${var.availability_zones[count.index]}" + cidr = cidrsubnet(var.vpc_cidr, 8, count.index) + gateway_ip = cidrhost(cidrsubnet(var.vpc_cidr, 8, count.index), 1) + vpc_id = huaweicloud_vpc.main.id + + tags = merge(var.common_tags, { + Name = "${var.project_name}-${var.environment}-public-${var.availability_zones[count.index]}" + Type = "public" + }) +} + +# 安全组 +resource "huaweicloud_networking_secgroup" "main" { + name = "${var.project_name}-${var.environment}-sg" + description = "Security group for ${var.project_name} ${var.environment}" + + tags = merge(var.common_tags, { + Name = "${var.project_name}-${var.environment}-sg" + }) +} + +# 安全组规则 - SSH +resource "huaweicloud_networking_secgroup_rule" "ssh" { + direction = "ingress" + ethertype = "IPv4" + protocol = "tcp" + port_range_min = 22 + port_range_max = 22 + remote_ip_prefix = "0.0.0.0/0" + security_group_id = huaweicloud_networking_secgroup.main.id +} + +# 安全组规则 - HTTP +resource "huaweicloud_networking_secgroup_rule" "http" { + direction = "ingress" + ethertype = "IPv4" + protocol = "tcp" + port_range_min = 80 + port_range_max = 80 + remote_ip_prefix = "0.0.0.0/0" + security_group_id = huaweicloud_networking_secgroup.main.id +} + +# 安全组规则 - HTTPS +resource "huaweicloud_networking_secgroup_rule" "https" { + direction = "ingress" + ethertype = "IPv4" + protocol = "tcp" + port_range_min = 443 + port_range_max = 443 + remote_ip_prefix = "0.0.0.0/0" + security_group_id = huaweicloud_networking_secgroup.main.id +} + +# 弹性IP +resource "huaweicloud_vpc_eip" "main" { + count = var.environment == "production" ? 2 : 1 + + publicip { + type = "5_bgp" + } + + bandwidth { + name = "${var.project_name}-${var.environment}-bandwidth-${count.index}" + size = var.environment == "production" ? 10 : 5 + share_type = "PER" + charge_mode = "traffic" + } + + tags = merge(var.common_tags, { + Name = "${var.project_name}-${var.environment}-eip-${count.index}" + }) +} + +# 输出 +output "vpc_id" { + description = "VPC ID" + value = huaweicloud_vpc.main.id +} + +output "subnet_ids" { + description = "子网 ID 列表" + value = huaweicloud_vpc_subnet.public[*].id +} + +output "security_group_id" { + description = "安全组 ID" + value = huaweicloud_networking_secgroup.main.id +} + +output "availability_zones" { + description = "可用区列表" + value = data.huaweicloud_availability_zones.zones.names +} + +output "ubuntu_image_id" { + description = "Ubuntu 镜像 ID" + value = data.huaweicloud_images_image.ubuntu.id +} + +output "eip_addresses" { + description = "弹性IP地址列表" + value = huaweicloud_vpc_eip.main[*].address +} \ No newline at end of file diff --git a/infrastructure/providers/oracle-cloud/main.tf b/infrastructure/providers/oracle-cloud/main.tf new file mode 100644 index 0000000..55a644e --- /dev/null +++ b/infrastructure/providers/oracle-cloud/main.tf @@ -0,0 +1,160 @@ +# Oracle Cloud Infrastructure 提供商配置 + +terraform { + required_providers { + oci = { + source = "oracle/oci" + version = "~> 5.0" + } + } +} + +# OCI 提供商配置 +provider "oci" { + tenancy_ocid = var.oci_config.tenancy_ocid + user_ocid = var.oci_config.user_ocid + fingerprint = var.oci_config.fingerprint + private_key_path = var.oci_config.private_key_path + region = var.oci_config.region +} + +# 获取可用域 +data "oci_identity_availability_domains" "ads" { + compartment_id = var.oci_config.tenancy_ocid +} + +# 获取镜像 +data "oci_core_images" "ubuntu_images" { + compartment_id = var.oci_config.tenancy_ocid + operating_system = "Canonical Ubuntu" + operating_system_version = "22.04" + shape = "VM.Standard.E2.1.Micro" + sort_by = "TIMECREATED" + sort_order = "DESC" +} + +# VCN (虚拟云网络) +resource "oci_core_vcn" "main" { + compartment_id = var.oci_config.tenancy_ocid + cidr_blocks = [var.vpc_cidr] + display_name = "${var.project_name}-${var.environment}-vcn" + dns_label = "${var.project_name}${var.environment}" + + freeform_tags = merge(var.common_tags, { + Name = "${var.project_name}-${var.environment}-vcn" + }) +} + +# 互联网网关 +resource "oci_core_internet_gateway" "main" { + compartment_id = var.oci_config.tenancy_ocid + vcn_id = oci_core_vcn.main.id + display_name = "${var.project_name}-${var.environment}-igw" + enabled = true + + freeform_tags = merge(var.common_tags, { + Name = "${var.project_name}-${var.environment}-igw" + }) +} + +# 路由表 +resource "oci_core_route_table" "main" { + compartment_id = var.oci_config.tenancy_ocid + vcn_id = oci_core_vcn.main.id + display_name = "${var.project_name}-${var.environment}-rt" + + route_rules { + destination = "0.0.0.0/0" + destination_type = "CIDR_BLOCK" + network_entity_id = oci_core_internet_gateway.main.id + } + + freeform_tags = merge(var.common_tags, { + Name = "${var.project_name}-${var.environment}-rt" + }) +} + +# 安全列表 +resource "oci_core_security_list" "main" { + compartment_id = var.oci_config.tenancy_ocid + vcn_id = oci_core_vcn.main.id + display_name = "${var.project_name}-${var.environment}-sl" + + # 出站规则 + egress_security_rules { + destination = "0.0.0.0/0" + protocol = "all" + } + + # 入站规则 - SSH + ingress_security_rules { + protocol = "6" # TCP + source = "0.0.0.0/0" + tcp_options { + min = 22 + max = 22 + } + } + + # 入站规则 - HTTP + ingress_security_rules { + protocol = "6" # TCP + source = "0.0.0.0/0" + tcp_options { + min = 80 + max = 80 + } + } + + # 入站规则 - HTTPS + ingress_security_rules { + protocol = "6" # TCP + source = "0.0.0.0/0" + tcp_options { + min = 443 + max = 443 + } + } + + freeform_tags = merge(var.common_tags, { + Name = "${var.project_name}-${var.environment}-sl" + }) +} + +# 子网 +resource "oci_core_subnet" "public" { + count = length(var.availability_zones) + compartment_id = var.oci_config.tenancy_ocid + vcn_id = oci_core_vcn.main.id + cidr_block = cidrsubnet(var.vpc_cidr, 8, count.index) + display_name = "${var.project_name}-${var.environment}-public-${var.availability_zones[count.index]}" + dns_label = "public${var.availability_zones[count.index]}" + route_table_id = oci_core_route_table.main.id + security_list_ids = [oci_core_security_list.main.id] + + freeform_tags = merge(var.common_tags, { + Name = "${var.project_name}-${var.environment}-public-${var.availability_zones[count.index]}" + Type = "public" + }) +} + +# 输出 +output "vcn_id" { + description = "VCN ID" + value = oci_core_vcn.main.id +} + +output "subnet_ids" { + description = "子网 ID 列表" + value = oci_core_subnet.public[*].id +} + +output "availability_domains" { + description = "可用域列表" + value = data.oci_identity_availability_domains.ads.availability_domains[*].name +} + +output "ubuntu_image_id" { + description = "Ubuntu 镜像 ID" + value = data.oci_core_images.ubuntu_images.images[0].id +} \ No newline at end of file diff --git a/infrastructure/shared/outputs.tf b/infrastructure/shared/outputs.tf new file mode 100644 index 0000000..0c30ee9 --- /dev/null +++ b/infrastructure/shared/outputs.tf @@ -0,0 +1,39 @@ +# 全局输出定义 + +# 环境信息 +output "environment" { + description = "当前部署环境" + value = var.environment +} + +output "project_name" { + description = "项目名称" + value = var.project_name +} + +# 网络信息 +output "vpc_cidr" { + description = "VPC CIDR 块" + value = var.vpc_cidr +} + +# 通用标签 +output "common_tags" { + description = "通用资源标签" + value = merge(var.common_tags, { + Environment = var.environment + Timestamp = timestamp() + }) +} + +# 云服务商配置状态 +output "enabled_providers" { + description = "启用的云服务商列表" + value = var.cloud_providers +} + +# 实例类型配置 +output "instance_types" { + description = "当前环境的实例类型配置" + value = var.instance_types[var.environment] +} \ No newline at end of file diff --git a/infrastructure/shared/variables.tf b/infrastructure/shared/variables.tf new file mode 100644 index 0000000..4c98e3a --- /dev/null +++ b/infrastructure/shared/variables.tf @@ -0,0 +1,169 @@ +# 全局变量定义 + +# 环境配置 +variable "environment" { + description = "部署环境 (dev, staging, production)" + type = string + validation { + condition = contains(["dev", "staging", "production"], var.environment) + error_message = "环境必须是 dev, staging, 或 production 之一。" + } +} + +variable "project_name" { + description = "项目名称" + type = string + default = "mgmt" +} + +variable "owner" { + description = "资源所有者" + type = string + default = "ben" +} + +# 网络配置 +variable "vpc_cidr" { + description = "VPC CIDR 块" + type = string + default = "10.0.0.0/16" +} + +variable "availability_zones" { + description = "可用区列表" + type = list(string) + default = ["a", "b", "c"] +} + +# 计算资源配置 +variable "instance_types" { + description = "不同环境的实例类型" + type = map(object({ + web = string + app = string + db = string + cache = string + })) + default = { + dev = { + web = "t3.micro" + app = "t3.small" + db = "t3.micro" + cache = "t3.micro" + } + staging = { + web = "t3.small" + app = "t3.medium" + db = "t3.small" + cache = "t3.small" + } + production = { + web = "t3.medium" + app = "t3.large" + db = "t3.medium" + cache = "t3.medium" + } + } +} + +# 标签配置 +variable "common_tags" { + description = "通用标签" + type = map(string) + default = { + Project = "mgmt" + ManagedBy = "opentofu" + Owner = "ben" + } +} + +# 云服务商特定配置 +variable "cloud_providers" { + description = "启用的云服务商" + type = list(string) + default = ["oracle", "huawei", "google", "digitalocean", "aws"] +} + +# Oracle Cloud 配置 +variable "oci_config" { + description = "Oracle Cloud 配置" + type = object({ + tenancy_ocid = string + user_ocid = string + fingerprint = string + private_key_path = string + region = string + }) + default = { + tenancy_ocid = "" + user_ocid = "" + fingerprint = "" + private_key_path = "~/.oci/oci_api_key.pem" + region = "ap-seoul-1" + } + sensitive = true +} + +# 华为云配置 +variable "huawei_config" { + description = "华为云配置" + type = object({ + access_key = string + secret_key = string + region = string + }) + default = { + access_key = "" + secret_key = "" + region = "cn-north-4" + } + sensitive = true +} + +# Google Cloud 配置 +variable "gcp_config" { + description = "Google Cloud 配置" + type = object({ + project_id = string + region = string + zone = string + credentials = string + }) + default = { + project_id = "" + region = "asia-northeast3" + zone = "asia-northeast3-a" + credentials = "" + } + sensitive = true +} + +# DigitalOcean 配置 +variable "do_config" { + description = "DigitalOcean 配置" + type = object({ + token = string + region = string + }) + default = { + token = "" + region = "sgp1" + } + sensitive = true +} + +# AWS 配置 +variable "aws_config" { + description = "AWS 配置" + type = object({ + access_key = string + secret_key = string + region = string + }) + default = { + access_key = "" + secret_key = "" + region = "ap-northeast-1" + } + sensitive = true +} \ No newline at end of file diff --git a/infrastructure/shared/versions.tf b/infrastructure/shared/versions.tf new file mode 100644 index 0000000..145d54c --- /dev/null +++ b/infrastructure/shared/versions.tf @@ -0,0 +1,57 @@ +# OpenTofu 版本和提供商配置 +terraform { + required_version = ">= 1.6" + + required_providers { + # Oracle Cloud Infrastructure + oci = { + source = "oracle/oci" + version = "~> 5.0" + } + + # 华为云 + huaweicloud = { + source = "huaweicloud/huaweicloud" + version = "~> 1.60" + } + + # Google Cloud Platform + google = { + source = "hashicorp/google" + version = "~> 5.0" + } + + # DigitalOcean + digitalocean = { + source = "digitalocean/digitalocean" + version = "~> 2.0" + } + + # Amazon Web Services + aws = { + source = "hashicorp/aws" + version = "~> 5.0" + } + + # 其他常用提供商 + random = { + source = "hashicorp/random" + version = "~> 3.1" + } + + tls = { + source = "hashicorp/tls" + version = "~> 4.0" + } + + local = { + source = "hashicorp/local" + version = "~> 2.1" + } + } + + # 后端配置 - 可以使用 S3, GCS, 或本地 + backend "local" { + path = "terraform.tfstate" + } +} \ No newline at end of file diff --git a/keys-info.md b/keys-info.md deleted file mode 100644 index 2693ce0..0000000 --- a/keys-info.md +++ /dev/null @@ -1,45 +0,0 @@ -# 密钥信息 - -## SSH 公钥 (Ed25519) -``` -ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIMSUUfma8FKEFvH8Nq65XM2PZ9kitfgv1q727cKV9y5Z houzhongxu@seekkey.tech -``` - -## GPG 公钥 (Ed25519/Curve25519) -``` ------BEGIN PGP PUBLIC KEY BLOCK----- - -mDMEaMt8oxYJKwYBBAHaRw8BAQdA12gQlBUibUxlktq4lg2WE4aQYF+lcBPMhcZY -Dgi4rEC0JUhvdSBaaG9uZ3h1IDxob3V6aG9uZ3h1QHNlZWtrZXkudGVjaD6IjwQT -FggAOBYhBDKnNH301qf6d+MeO7RL6xQ48bRvBQJoy3yjAhsjBQsJCAcCBhUKCQgL -AgQWAgMBAh4BAheAAAoJELRL6xQ48bRv+vIBAO8T89J4o+jT+gyXjnSlcYLLTX8J -2h4Pjn+WSD6JSMd6APjGuQOplQEGsK6FYPLQWQJnBEYaP1uZXOqnHo3tybEPuDgE -aMt8oxIKKwYBBAGXVQEFAQEHQG0IjWKNJ+KhGFz7Jav8kgzv2Y/o0w/LAN+wwI/a -rGpPAwEIB4h4BBgWCAAgFiEEMqc0ffTWp/p34x47tEvrFDjxtG8FAmjLfKMCGwwA -CgkQtEvrFDjxtG9s3QD+JyeAHkoDIkVsc5wrRztZYc+HGNzGV6X0GWAqWSOW/Q8B -AOhtF5Xgf4j0pKkuqKbGrpiPtBuK5T7Q+QtOz3jOSDwG -=7qIa ------END PGP PUBLIC KEY BLOCK----- -``` - -## 密钥信息 -- **用户名**: Houzhong Xu -- **邮箱**: houzhongxu@seekkey.tech -- **GPG Key ID**: 32A7347DF4D6A7FA77E31E3BB44BEB1438F1B46F -- **SSH 指纹**: SHA256:/IbwMngZcfNrlMqeFbyeAH8KTrhc43+E1Z22IEqIKss - -## 在 Gitea 中添加密钥 - -### SSH 密钥 -1. 登录 Gitea: https://gitea.tailnet-68f9.ts.net/ -2. Settings → SSH / GPG Keys → Add Key -3. 粘贴上面的 SSH 公钥 - -### GPG 密钥 -1. 在同一页面点击 "Add GPG Key" -2. 粘贴上面的 GPG 公钥 - -## Git 配置 -- 已配置 Git 使用 GPG 签名提交 -- 用户名: Houzhong Xu -- 邮箱: houzhongxu@seekkey.tech \ No newline at end of file diff --git a/monitoring-stack.yml b/monitoring-stack.yml deleted file mode 100644 index b673d7a..0000000 --- a/monitoring-stack.yml +++ /dev/null @@ -1,131 +0,0 @@ -version: '3.8' - -services: - # Prometheus 监控 - prometheus: - image: prom/prometheus:latest - command: - - '--config.file=/etc/prometheus/prometheus.yml' - - '--storage.tsdb.path=/prometheus' - - '--web.console.libraries=/etc/prometheus/console_libraries' - - '--web.console.templates=/etc/prometheus/consoles' - - '--storage.tsdb.retention.time=200h' - - '--web.enable-lifecycle' - networks: - - traefik-public - - monitoring - configs: - - source: prometheus-config - target: /etc/prometheus/prometheus.yml - volumes: - - prometheus-data:/prometheus - deploy: - replicas: 1 - labels: - - traefik.enable=true - - traefik.http.routers.prometheus.rule=Host(`prometheus.local`) - - traefik.http.routers.prometheus.entrypoints=web - - traefik.http.services.prometheus.loadbalancer.server.port=9090 - restart_policy: - condition: on-failure - - # Grafana 可视化 - grafana: - image: grafana/grafana:latest - environment: - - GF_SECURITY_ADMIN_PASSWORD=admin123 - - GF_USERS_ALLOW_SIGN_UP=false - networks: - - traefik-public - - monitoring - volumes: - - grafana-data:/var/lib/grafana - deploy: - replicas: 1 - labels: - - traefik.enable=true - - traefik.http.routers.grafana.rule=Host(`grafana.local`) - - traefik.http.routers.grafana.entrypoints=web - - traefik.http.services.grafana.loadbalancer.server.port=3000 - restart_policy: - condition: on-failure - - # Node Exporter (系统指标) - node-exporter: - image: prom/node-exporter:latest - command: - - '--path.procfs=/host/proc' - - '--path.rootfs=/rootfs' - - '--path.sysfs=/host/sys' - - '--collector.filesystem.ignored-mount-points=^/(sys|proc|dev|host|etc)($$|/)' - volumes: - - /proc:/host/proc:ro - - /sys:/host/sys:ro - - /:/rootfs:ro - networks: - - monitoring - deploy: - mode: global - restart_policy: - condition: on-failure - - # cAdvisor (容器指标) - cadvisor: - image: gcr.io/cadvisor/cadvisor:latest - volumes: - - /:/rootfs:ro - - /var/run:/var/run:rw - - /sys:/sys:ro - - /var/lib/docker/:/var/lib/docker:ro - - /dev/disk/:/dev/disk:ro - networks: - - monitoring - deploy: - mode: global - restart_policy: - condition: on-failure - -networks: - traefik-public: - external: true - monitoring: - driver: overlay - -volumes: - prometheus-data: - grafana-data: - -configs: - prometheus-config: - content: | - global: - scrape_interval: 15s - evaluation_interval: 15s - - scrape_configs: - # Traefik 指标 - - job_name: 'traefik' - static_configs: - - targets: ['traefik:8080'] - metrics_path: /metrics - - # Prometheus 自身 - - job_name: 'prometheus' - static_configs: - - targets: ['localhost:9090'] - - # Node Exporter - - job_name: 'node-exporter' - dns_sd_configs: - - names: - - 'tasks.node-exporter' - type: 'A' - port: 9100 - - # cAdvisor - - job_name: 'cadvisor' - dns_sd_configs: - - names: - - 'tasks.cadvisor' - type: 'A' - port: 8080 \ No newline at end of file diff --git a/scripts/ops-manager.sh b/scripts/ops-manager.sh deleted file mode 100644 index 31be6f3..0000000 --- a/scripts/ops-manager.sh +++ /dev/null @@ -1,260 +0,0 @@ -#!/bin/bash - -# Operations Manager - 便捷的运维脚本管理工具 -# 使用方法: ./ops-manager.sh [action] [target] [options] - -set -e - -# 颜色定义 -RED='\033[0;31m' -GREEN='\033[0;32m' -YELLOW='\033[1;33m' -BLUE='\033[0;34m' -PURPLE='\033[0;35m' -CYAN='\033[0;36m' -NC='\033[0m' # No Color - -# 配置 -ANSIBLE_DIR="$(dirname "$0")/../ansible" -INVENTORY="$ANSIBLE_DIR/inventory.ini" - -# 可用的操作 -declare -A OPERATIONS=( - ["update"]="system-update.yml" - ["cleanup"]="system-cleanup.yml" - ["health"]="service-health-check.yml" - ["security"]="security-hardening.yml" - ["docker"]="docker-management.yml" - ["network"]="network-connectivity.yml" - ["cert"]="certificate-management.yml" - ["toolkit"]="ops-toolkit.yml" - ["cloud"]="cloud-providers-update.yml" -) - -# 可用的目标组 -declare -A TARGETS=( - ["all"]="all" - ["lxc"]="lxc" - ["alpine"]="alpine" - ["proxmox"]="proxmox" - ["armbian"]="armbian" - ["hcp"]="hcp" - ["feiniu"]="feiniu" - ["dev"]="dev" - ["oci-kr"]="oci_kr" - ["oci-us"]="oci_us" - ["huawei"]="huawei" - ["google"]="google" - ["aws"]="aws" - ["germany"]="germany" -) - -# 显示帮助信息 -show_help() { - echo -e "${CYAN}🛠️ Operations Manager - 运维脚本管理工具${NC}" - echo "" - echo -e "${YELLOW}使用方法:${NC}" - echo " $0 [操作] [目标] [选项]" - echo "" - echo -e "${YELLOW}可用操作:${NC}" - for op in "${!OPERATIONS[@]}"; do - echo -e " ${GREEN}$op${NC} - ${OPERATIONS[$op]}" - done - echo "" - echo -e "${YELLOW}可用目标:${NC}" - for target in "${!TARGETS[@]}"; do - echo -e " ${BLUE}$target${NC} - ${TARGETS[$target]}" - done - echo "" - echo -e "${YELLOW}示例:${NC}" - echo -e " $0 ${GREEN}update${NC} ${BLUE}lxc${NC} # 更新 LXC 容器" - echo -e " $0 ${GREEN}cleanup${NC} ${BLUE}all${NC} # 清理所有服务器" - echo -e " $0 ${GREEN}health${NC} ${BLUE}proxmox${NC} # 检查 Proxmox 健康状态" - echo -e " $0 ${GREEN}docker${NC} ${BLUE}lxc${NC} # 管理 LXC 中的 Docker" - echo -e " $0 ${GREEN}toolkit${NC} ${BLUE}germany${NC} # 运行德国服务器工具包" - echo "" - echo -e "${YELLOW}选项:${NC}" - echo -e " ${PURPLE}--dry-run${NC} 仅显示将要执行的命令" - echo -e " ${PURPLE}--verbose${NC} 显示详细输出" - echo -e " ${PURPLE}--check${NC} 检查模式(不做实际更改)" - echo -e " ${PURPLE}--help${NC} 显示此帮助信息" -} - -# 显示状态信息 -show_status() { - echo -e "${CYAN}📊 系统状态概览${NC}" - echo "" - - # 检查 Ansible 是否可用 - if command -v ansible >/dev/null 2>&1; then - echo -e "${GREEN}✅ Ansible 已安装${NC}" - else - echo -e "${RED}❌ Ansible 未安装${NC}" - exit 1 - fi - - # 检查 inventory 文件 - if [ -f "$INVENTORY" ]; then - echo -e "${GREEN}✅ Inventory 文件存在${NC}" - echo -e " 📁 路径: $INVENTORY" - else - echo -e "${RED}❌ Inventory 文件不存在${NC}" - exit 1 - fi - - # 显示可用的主机组 - echo "" - echo -e "${YELLOW}📋 可用主机组:${NC}" - ansible-inventory -i "$INVENTORY" --list | jq -r 'keys[]' | grep -v "_meta" | sort | while read group; do - count=$(ansible-inventory -i "$INVENTORY" --list | jq -r ".[\"$group\"].hosts // [] | length") - echo -e " ${BLUE}$group${NC}: $count 台主机" - done -} - -# 执行 Ansible 命令 -run_ansible() { - local operation=$1 - local target=$2 - local options=$3 - - local playbook="${OPERATIONS[$operation]}" - local host_pattern="${TARGETS[$target]}" - - if [ -z "$playbook" ]; then - echo -e "${RED}❌ 未知操作: $operation${NC}" - show_help - exit 1 - fi - - if [ -z "$host_pattern" ]; then - echo -e "${RED}❌ 未知目标: $target${NC}" - show_help - exit 1 - fi - - local ansible_cmd="ansible-playbook -i $INVENTORY $ANSIBLE_DIR/$playbook --limit $host_pattern" - - # 添加选项 - if [[ "$options" == *"--check"* ]]; then - ansible_cmd="$ansible_cmd --check" - fi - - if [[ "$options" == *"--verbose"* ]]; then - ansible_cmd="$ansible_cmd -v" - fi - - echo -e "${CYAN}🚀 执行操作${NC}" - echo -e "操作: ${GREEN}$operation${NC} ($playbook)" - echo -e "目标: ${BLUE}$target${NC} ($host_pattern)" - echo -e "命令: ${PURPLE}$ansible_cmd${NC}" - echo "" - - if [[ "$options" == *"--dry-run"* ]]; then - echo -e "${YELLOW}🔍 DRY RUN 模式 - 仅显示命令,不执行${NC}" - return 0 - fi - - # 确认执行 - read -p "确认执行? (y/N): " -n 1 -r - echo - if [[ ! $REPLY =~ ^[Yy]$ ]]; then - echo -e "${YELLOW}⏹️ 操作已取消${NC}" - exit 0 - fi - - echo -e "${GREEN}▶️ 开始执行...${NC}" - eval $ansible_cmd -} - -# 快速操作菜单 -interactive_mode() { - echo -e "${CYAN}🎯 交互式运维管理${NC}" - echo "" - - # 选择操作 - echo -e "${YELLOW}选择操作:${NC}" - local ops=($(printf '%s\n' "${!OPERATIONS[@]}" | sort)) - for i in "${!ops[@]}"; do - echo -e " $((i+1)). ${GREEN}${ops[i]}${NC} - ${OPERATIONS[${ops[i]}]}" - done - - read -p "请选择操作 (1-${#ops[@]}): " op_choice - if [[ ! "$op_choice" =~ ^[0-9]+$ ]] || [ "$op_choice" -lt 1 ] || [ "$op_choice" -gt "${#ops[@]}" ]; then - echo -e "${RED}❌ 无效选择${NC}" - exit 1 - fi - - local selected_op="${ops[$((op_choice-1))]}" - - # 选择目标 - echo "" - echo -e "${YELLOW}选择目标:${NC}" - local targets=($(printf '%s\n' "${!TARGETS[@]}" | sort)) - for i in "${!targets[@]}"; do - echo -e " $((i+1)). ${BLUE}${targets[i]}${NC} - ${TARGETS[${targets[i]}]}" - done - - read -p "请选择目标 (1-${#targets[@]}): " target_choice - if [[ ! "$target_choice" =~ ^[0-9]+$ ]] || [ "$target_choice" -lt 1 ] || [ "$target_choice" -gt "${#targets[@]}" ]; then - echo -e "${RED}❌ 无效选择${NC}" - exit 1 - fi - - local selected_target="${targets[$((target_choice-1))]}" - - # 选择选项 - echo "" - echo -e "${YELLOW}选择执行选项:${NC}" - echo -e " 1. ${GREEN}正常执行${NC}" - echo -e " 2. ${PURPLE}检查模式${NC} (--check)" - echo -e " 3. ${PURPLE}详细输出${NC} (--verbose)" - echo -e " 4. ${PURPLE}仅显示命令${NC} (--dry-run)" - - read -p "请选择选项 (1-4): " option_choice - - local options="" - case $option_choice in - 2) options="--check" ;; - 3) options="--verbose" ;; - 4) options="--dry-run" ;; - esac - - run_ansible "$selected_op" "$selected_target" "$options" -} - -# 主程序 -main() { - # 检查参数 - if [ $# -eq 0 ]; then - interactive_mode - exit 0 - fi - - case "$1" in - --help|-h|help) - show_help - ;; - --status|-s|status) - show_status - ;; - --interactive|-i|interactive) - interactive_mode - ;; - *) - if [ $# -lt 2 ]; then - echo -e "${RED}❌ 参数不足${NC}" - show_help - exit 1 - fi - - local operation=$1 - local target=$2 - local options="${@:3}" - - run_ansible "$operation" "$target" "$options" - ;; - esac -} - -# 执行主程序 -main "$@" \ No newline at end of file diff --git a/scripts/setup/setup-gitea-integration.sh b/scripts/setup/setup-gitea-integration.sh new file mode 100755 index 0000000..7526a6e --- /dev/null +++ b/scripts/setup/setup-gitea-integration.sh @@ -0,0 +1,467 @@ +#!/bin/bash +# Gitea 集成设置脚本 + +set -e + +echo "🔗 设置 Gitea 集成..." + +# 配置变量 +GITEA_HOST="gitea" +GITEA_USER="ben" +GITEA_SSH_URL="git@${GITEA_HOST}" +REPO_NAME="mgmt" +GITEA_HTTP_URL="http://${GITEA_HOST}:3000" + +# 检查 SSH 连接 +echo "🔍 检查 Gitea SSH 连接..." +if ssh -o ConnectTimeout=5 -o BatchMode=yes "${GITEA_SSH_URL}" 2>&1 | grep -q "successfully authenticated"; then + echo "✅ SSH 连接正常" +else + echo "❌ SSH 连接失败,请检查:" + echo " 1. Gitea 服务是否运行" + echo " 2. SSH 密钥是否已添加到 Gitea" + echo " 3. 网络连接是否正常" + exit 1 +fi + +# 检查是否已经是 Git 仓库 +if [ ! -d ".git" ]; then + echo "📦 初始化 Git 仓库..." + git init + git config user.name "${GITEA_USER}" + git config user.email "${GITEA_USER}@example.com" +else + echo "✅ Git 仓库已存在" +fi + +# 检查远程仓库配置 +if git remote get-url origin >/dev/null 2>&1; then + CURRENT_ORIGIN=$(git remote get-url origin) + echo "ℹ️ 当前远程仓库: $CURRENT_ORIGIN" + + if [[ "$CURRENT_ORIGIN" != *"${GITEA_HOST}"* ]]; then + echo "🔄 更新远程仓库地址..." + git remote set-url origin "${GITEA_SSH_URL}:${GITEA_USER}/${REPO_NAME}.git" + fi +else + echo "➕ 添加远程仓库..." + git remote add origin "${GITEA_SSH_URL}:${GITEA_USER}/${REPO_NAME}.git" +fi + +# 创建 .gitignore +echo "📝 创建 .gitignore..." +cat > .gitignore << 'EOF' +# OpenTofu/Terraform +*.tfstate +*.tfstate.* +*.tfvars +!*.tfvars.example +.terraform/ +.terraform.lock.hcl +crash.log +crash.*.log + +# Ansible +*.retry +.vault_pass +host_vars/*/vault.yml +group_vars/*/vault.yml + +# Docker +.env +docker-compose.override.yml + +# IDE +.vscode/ +.idea/ +*.swp +*.swo +*~ + +# OS +.DS_Store +Thumbs.db + +# Logs +*.log +logs/ + +# Temporary files +tmp/ +temp/ +.tmp/ + +# Backup files +backup-*/ +*.bak + +# Secrets +secrets/ +*.pem +*.key +*.crt +!*.example.* + +# Node modules (if any) +node_modules/ + +# Python +__pycache__/ +*.pyc +*.pyo +*.pyd +.Python +env/ +venv/ +.venv/ +pip-log.txt +pip-delete-this-directory.txt +.tox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*.cover +*.log +.git +.mypy_cache +.pytest_cache +.hypothesis + +# Local development +.local/ +local-* +EOF + +# 创建 Gitea Actions 工作流 +echo "🔄 创建 Gitea Actions 工作流..." + +# 基础设施 CI/CD +cat > .gitea/workflows/infrastructure.yml << 'EOF' +name: Infrastructure CI/CD + +on: + push: + branches: [ main, develop ] + paths: + - 'infrastructure/**' + - '.gitea/workflows/infrastructure.yml' + pull_request: + branches: [ main ] + paths: + - 'infrastructure/**' + +jobs: + validate: + runs-on: ubuntu-latest + name: Validate Infrastructure + steps: + - name: Checkout + uses: actions/checkout@v4 + + - name: Setup OpenTofu + uses: opentofu/setup-opentofu@v1 + with: + tofu_version: 1.10.6 + + - name: Validate OpenTofu configurations + run: | + for dir in infrastructure/providers/*/; do + if [ -d "$dir" ]; then + echo "Validating $dir" + cd "$dir" + tofu init -backend=false + tofu validate + cd - > /dev/null + fi + done + + - name: Check formatting + run: | + tofu fmt -check -recursive infrastructure/ + + - name: Security scan + run: | + # 这里可以添加 tfsec 或 checkov 扫描 + echo "Security scan placeholder" + + plan: + runs-on: ubuntu-latest + name: Plan Infrastructure + needs: validate + if: github.event_name == 'pull_request' + steps: + - name: Checkout + uses: actions/checkout@v4 + + - name: Setup OpenTofu + uses: opentofu/setup-opentofu@v1 + with: + tofu_version: 1.10.6 + + - name: Plan infrastructure changes + run: | + cd infrastructure/environments/dev + tofu init + tofu plan -var-file="terraform.tfvars" -out=tfplan + env: + # 这里需要配置云服务商的环境变量 + TF_VAR_environment: dev + + apply: + runs-on: ubuntu-latest + name: Apply Infrastructure + needs: validate + if: github.ref == 'refs/heads/main' && github.event_name == 'push' + steps: + - name: Checkout + uses: actions/checkout@v4 + + - name: Setup OpenTofu + uses: opentofu/setup-opentofu@v1 + with: + tofu_version: 1.10.6 + + - name: Apply infrastructure changes + run: | + cd infrastructure/environments/dev + tofu init + tofu apply -var-file="terraform.tfvars" -auto-approve + env: + TF_VAR_environment: dev +EOF + +# 应用部署工作流 +cat > .gitea/workflows/deploy.yml << 'EOF' +name: Application Deployment + +on: + push: + branches: [ main ] + paths: + - 'configuration/**' + - 'containers/**' + - '.gitea/workflows/deploy.yml' + workflow_dispatch: + inputs: + environment: + description: 'Target environment' + required: true + default: 'dev' + type: choice + options: + - dev + - staging + - production + +jobs: + ansible-check: + runs-on: ubuntu-latest + name: Ansible Syntax Check + steps: + - name: Checkout + uses: actions/checkout@v4 + + - name: Setup Python + uses: actions/setup-python@v4 + with: + python-version: '3.11' + + - name: Install Ansible + run: | + pip install ansible ansible-core + ansible-galaxy collection install community.general + ansible-galaxy collection install ansible.posix + ansible-galaxy collection install community.docker + + - name: Ansible syntax check + run: | + cd configuration + for playbook in playbooks/*/*.yml; do + if [ -f "$playbook" ]; then + echo "Checking $playbook" + ansible-playbook --syntax-check "$playbook" + fi + done + + deploy: + runs-on: ubuntu-latest + name: Deploy Applications + needs: ansible-check + steps: + - name: Checkout + uses: actions/checkout@v4 + + - name: Setup Python + uses: actions/setup-python@v4 + with: + python-version: '3.11' + + - name: Install Ansible + run: | + pip install ansible ansible-core + ansible-galaxy collection install community.general + ansible-galaxy collection install ansible.posix + ansible-galaxy collection install community.docker + + - name: Deploy applications + run: | + cd configuration + ENV="${{ github.event.inputs.environment || 'dev' }}" + ansible-playbook -i "inventories/${ENV}/inventory.ini" playbooks/bootstrap/main.yml + env: + ANSIBLE_HOST_KEY_CHECKING: False +EOF + +# Docker 构建工作流 +cat > .gitea/workflows/docker.yml << 'EOF' +name: Docker Build and Deploy + +on: + push: + branches: [ main ] + paths: + - 'containers/**' + - 'Dockerfile*' + - '.gitea/workflows/docker.yml' + +jobs: + build: + runs-on: ubuntu-latest + name: Build Docker Images + steps: + - name: Checkout + uses: actions/checkout@v4 + + - name: Set up Docker Buildx + uses: docker/setup-buildx-action@v3 + + - name: Login to Container Registry + uses: docker/login-action@v3 + with: + registry: ${{ secrets.REGISTRY_URL }} + username: ${{ secrets.REGISTRY_USERNAME }} + password: ${{ secrets.REGISTRY_PASSWORD }} + + - name: Build and push images + run: | + # 构建应用镜像 + for dockerfile in containers/applications/*/Dockerfile; do + if [ -f "$dockerfile" ]; then + app_name=$(basename $(dirname "$dockerfile")) + echo "Building $app_name" + docker build -t "${{ secrets.REGISTRY_URL }}/$app_name:${{ github.sha }}" -f "$dockerfile" . + docker push "${{ secrets.REGISTRY_URL }}/$app_name:${{ github.sha }}" + fi + done + + deploy-swarm: + runs-on: ubuntu-latest + name: Deploy to Docker Swarm + needs: build + steps: + - name: Checkout + uses: actions/checkout@v4 + + - name: Deploy to Swarm + run: | + # 这里可以通过 SSH 连接到 Swarm 管理节点进行部署 + echo "Deploy to Swarm placeholder" +EOF + +# 创建项目配置文件 +echo "⚙️ 创建项目配置文件..." + +# Gitea 仓库配置 +cat > .gitea/settings.yml << 'EOF' +# Gitea 仓库设置 +repository: + name: mgmt + description: "基础设施管理项目 - OpenTofu + Ansible + Docker Swarm" + website: "" + default_branch: main + + # 功能开关 + has_issues: true + has_wiki: true + has_projects: true + has_actions: true + + # 权限设置 + private: false + allow_merge_commits: true + allow_squash_merge: true + allow_rebase_merge: true + delete_branch_on_merge: true + +# Actions 设置 +actions: + enabled: true + allow_fork_pull_request_run: true + default_actions_url: "https://gitea.com" + +# 分支保护 +branch_protection: + main: + enable_push: false + enable_push_whitelist: true + push_whitelist_usernames: ["ben"] + require_signed_commits: false + enable_merge_whitelist: true + merge_whitelist_usernames: ["ben"] + enable_status_check: true + status_check_contexts: ["validate", "plan"] + enable_approvals_whitelist: false + approvals_whitelist_usernames: [] + block_on_rejected_reviews: true + dismiss_stale_approvals: true + require_signed_commits: false +EOF + +# 添加所有文件到 Git +echo "📦 添加文件到 Git..." +git add . + +# 检查是否有变更需要提交 +if git diff --staged --quiet; then + echo "ℹ️ 没有新的变更需要提交" +else + echo "💾 提交变更..." + git commit -m "feat: 集成 OpenTofu + Ansible + Gitea CI/CD + +- 重构项目目录结构 +- 添加 OpenTofu 多云支持 +- 配置 Ansible 自动化部署 +- 集成 Gitea Actions CI/CD 流水线 +- 添加 Docker Swarm 管理 +- 完善监控和安全配置" +fi + +# 推送到远程仓库 +echo "🚀 推送到 Gitea..." +if git push -u origin main; then + echo "✅ 成功推送到 Gitea" +else + echo "⚠️ 推送失败,可能需要先在 Gitea 创建仓库" + echo " 请访问: ${GITEA_HTTP_URL}/repo/create" + echo " 创建名为 '${REPO_NAME}' 的仓库" +fi + +echo "" +echo "🎉 Gitea 集成设置完成!" +echo "" +echo "📋 下一步操作:" +echo "1. 访问 Gitea: ${GITEA_HTTP_URL}/${GITEA_USER}/${REPO_NAME}" +echo "2. 配置 Actions Secrets (如果需要):" +echo " - REGISTRY_URL: 容器镜像仓库地址" +echo " - REGISTRY_USERNAME: 仓库用户名" +echo " - REGISTRY_PASSWORD: 仓库密码" +echo "3. 配置云服务商凭据 (通过 Secrets 或环境变量)" +echo "4. 测试 CI/CD 流水线" +echo "" +echo "🔗 有用的命令:" +echo " git status - 查看仓库状态" +echo " git log --oneline - 查看提交历史" +echo " git push - 推送变更" +echo " make help - 查看项目命令" \ No newline at end of file diff --git a/scripts/utilities/gitea-repo-manager.sh b/scripts/utilities/gitea-repo-manager.sh new file mode 100755 index 0000000..29e49b6 --- /dev/null +++ b/scripts/utilities/gitea-repo-manager.sh @@ -0,0 +1,242 @@ +#!/bin/bash +# Gitea 仓库管理脚本 + +set -e + +# 配置 +GITEA_HOST="gitea" +GITEA_USER="ben" +GITEA_HTTP_URL="http://${GITEA_HOST}:3000" +GITEA_SSH_URL="git@${GITEA_HOST}" +REPO_NAME="mgmt" + +# 颜色定义 +RED='\033[0;31m' +GREEN='\033[0;32m' +YELLOW='\033[1;33m' +BLUE='\033[0;34m' +NC='\033[0m' # No Color + +# 打印带颜色的消息 +print_message() { + local color=$1 + local message=$2 + echo -e "${color}${message}${NC}" +} + +# 检查 SSH 连接 +check_ssh_connection() { + print_message $BLUE "🔍 检查 Gitea SSH 连接..." + + if ssh -o ConnectTimeout=5 -o BatchMode=yes "${GITEA_SSH_URL}" 2>&1 | grep -q "successfully authenticated"; then + print_message $GREEN "✅ SSH 连接正常" + return 0 + else + print_message $RED "❌ SSH 连接失败" + return 1 + fi +} + +# 检查仓库状态 +check_repo_status() { + print_message $BLUE "📊 检查仓库状态..." + + if [ -d ".git" ]; then + print_message $GREEN "✅ Git 仓库已初始化" + + if git remote get-url origin >/dev/null 2>&1; then + local origin_url=$(git remote get-url origin) + print_message $GREEN "✅ 远程仓库: $origin_url" + else + print_message $YELLOW "⚠️ 未配置远程仓库" + fi + + local branch=$(git branch --show-current) + print_message $BLUE "📍 当前分支: $branch" + + local status=$(git status --porcelain) + if [ -z "$status" ]; then + print_message $GREEN "✅ 工作目录干净" + else + print_message $YELLOW "⚠️ 有未提交的变更" + fi + else + print_message $RED "❌ 不是 Git 仓库" + fi +} + +# 初始化仓库 +init_repo() { + print_message $BLUE "📦 初始化 Git 仓库..." + + if [ ! -d ".git" ]; then + git init + git config user.name "${GITEA_USER}" + git config user.email "${GITEA_USER}@example.com" + print_message $GREEN "✅ Git 仓库初始化完成" + fi + + # 配置远程仓库 + if ! git remote get-url origin >/dev/null 2>&1; then + git remote add origin "${GITEA_SSH_URL}:${GITEA_USER}/${REPO_NAME}.git" + print_message $GREEN "✅ 远程仓库配置完成" + fi +} + +# 同步代码 +sync_code() { + print_message $BLUE "🔄 同步代码..." + + # 检查是否有未提交的变更 + if ! git diff --quiet || ! git diff --staged --quiet; then + print_message $YELLOW "⚠️ 发现未提交的变更" + git status --short + + read -p "是否提交这些变更? (y/N): " -n 1 -r + echo + if [[ $REPLY =~ ^[Yy]$ ]]; then + git add . + read -p "请输入提交消息: " commit_message + git commit -m "$commit_message" + print_message $GREEN "✅ 变更已提交" + else + print_message $YELLOW "⚠️ 跳过提交" + return 1 + fi + fi + + # 推送到远程仓库 + if git push origin main; then + print_message $GREEN "✅ 代码推送成功" + else + print_message $RED "❌ 代码推送失败" + return 1 + fi +} + +# 拉取最新代码 +pull_code() { + print_message $BLUE "⬇️ 拉取最新代码..." + + if git pull origin main; then + print_message $GREEN "✅ 代码拉取成功" + else + print_message $RED "❌ 代码拉取失败" + return 1 + fi +} + +# 查看提交历史 +show_history() { + print_message $BLUE "📜 提交历史:" + git log --oneline --graph --decorate -10 +} + +# 查看分支状态 +show_branches() { + print_message $BLUE "🌿 分支状态:" + git branch -a +} + +# 创建新分支 +create_branch() { + local branch_name=$1 + if [ -z "$branch_name" ]; then + read -p "请输入分支名称: " branch_name + fi + + if [ -n "$branch_name" ]; then + git checkout -b "$branch_name" + print_message $GREEN "✅ 分支 '$branch_name' 创建成功" + else + print_message $RED "❌ 分支名称不能为空" + fi +} + +# 切换分支 +switch_branch() { + local branch_name=$1 + if [ -z "$branch_name" ]; then + print_message $BLUE "可用分支:" + git branch -a + read -p "请输入要切换的分支名称: " branch_name + fi + + if [ -n "$branch_name" ]; then + git checkout "$branch_name" + print_message $GREEN "✅ 已切换到分支 '$branch_name'" + else + print_message $RED "❌ 分支名称不能为空" + fi +} + +# 显示帮助 +show_help() { + echo "Gitea 仓库管理脚本" + echo "" + echo "用法: $0 [命令]" + echo "" + echo "命令:" + echo " check 检查连接和仓库状态" + echo " init 初始化仓库" + echo " sync 同步代码到远程仓库" + echo " pull 拉取最新代码" + echo " history 查看提交历史" + echo " branches 查看分支状态" + echo " create-branch [name] 创建新分支" + echo " switch-branch [name] 切换分支" + echo " status 查看仓库状态" + echo " help 显示帮助信息" + echo "" + echo "示例:" + echo " $0 check # 检查状态" + echo " $0 sync # 同步代码" + echo " $0 create-branch feature-x # 创建功能分支" +} + +# 主函数 +main() { + local command=${1:-help} + + case $command in + check) + check_ssh_connection + check_repo_status + ;; + init) + init_repo + ;; + sync) + sync_code + ;; + pull) + pull_code + ;; + history) + show_history + ;; + branches) + show_branches + ;; + create-branch) + create_branch "$2" + ;; + switch-branch) + switch_branch "$2" + ;; + status) + check_repo_status + ;; + help|--help|-h) + show_help + ;; + *) + print_message $RED "❌ 未知命令: $command" + show_help + exit 1 + ;; + esac +} + +# 执行主函数 +main "$@" \ No newline at end of file diff --git a/scripts/utilities/quick-start.sh b/scripts/utilities/quick-start.sh new file mode 100755 index 0000000..c6366d7 --- /dev/null +++ b/scripts/utilities/quick-start.sh @@ -0,0 +1,114 @@ +#!/bin/bash +# 快速启动脚本 + +set -e + +echo "🚀 欢迎使用基础设施管理平台!" +echo "" + +# 检查必要工具 +check_tool() { + if ! command -v "$1" &> /dev/null; then + echo "❌ $1 未安装,请先运行 'make setup'" + return 1 + fi +} + +echo "🔍 检查必要工具..." +check_tool "tofu" || exit 1 +check_tool "ansible" || exit 1 +check_tool "docker" || exit 1 + +echo "✅ 工具检查通过" +echo "" + +# 检查配置文件 +CONFIG_FILE="infrastructure/environments/dev/terraform.tfvars" +if [ ! -f "$CONFIG_FILE" ]; then + echo "⚠️ 配置文件不存在,正在创建..." + cp "${CONFIG_FILE}.example" "$CONFIG_FILE" + echo "📝 请编辑配置文件: $CONFIG_FILE" + echo " 填入你的云服务商凭据后再次运行此脚本" + exit 1 +fi + +echo "✅ 配置文件存在" +echo "" + +# 选择操作 +echo "请选择要执行的操作:" +echo "1) 初始化基础设施" +echo "2) 查看执行计划" +echo "3) 应用基础设施变更" +echo "4) 部署应用" +echo "5) 启动开发环境" +echo "6) 查看监控" +echo "7) 完整部署流程" +echo "" + +read -p "请输入选项 (1-7): " choice + +case $choice in + 1) + echo "🏗️ 初始化基础设施..." + make init + ;; + 2) + echo "📋 查看执行计划..." + make plan + ;; + 3) + echo "🚀 应用基础设施变更..." + make apply + ;; + 4) + echo "📦 部署应用..." + make ansible-deploy + ;; + 5) + echo "🐳 启动开发环境..." + make docker-up + ;; + 6) + echo "📊 启动监控..." + make monitor + ;; + 7) + echo "🎯 执行完整部署流程..." + echo "" + echo "步骤 1/4: 初始化基础设施..." + make init + echo "" + echo "步骤 2/4: 查看执行计划..." + make plan + echo "" + read -p "是否继续应用基础设施变更? (y/N): " -n 1 -r + echo + if [[ $REPLY =~ ^[Yy]$ ]]; then + echo "步骤 3/4: 应用基础设施变更..." + make apply + echo "" + echo "步骤 4/4: 部署应用..." + make ansible-deploy + echo "" + echo "🎉 完整部署流程完成!" + else + echo "ℹ️ 部署流程已取消" + fi + ;; + *) + echo "❌ 无效选项" + exit 1 + ;; +esac + +echo "" +echo "🎉 操作完成!" +echo "" +echo "📋 有用的命令:" +echo " make help - 查看所有可用命令" +echo " make plan - 查看基础设施变更计划" +echo " make apply - 应用基础设施变更" +echo " make ansible-deploy - 部署应用" +echo " make monitor - 启动监控" +echo " make clean - 清理临时文件" \ No newline at end of file diff --git a/semaphore-setup-guide.md b/semaphore-setup-guide.md deleted file mode 100644 index 546f1b4..0000000 --- a/semaphore-setup-guide.md +++ /dev/null @@ -1,86 +0,0 @@ -# Semaphore 项目配置指南 - -## 1. 访问 Semaphore Web 界面 - -- URL: http://your-server-ip:3000 -- 用户名: `admin` -- 密码: `admin123` - -## 2. 创建项目步骤 - -### 第一步:添加 Key Store (SSH 密钥或访问令牌) -1. 点击左侧菜单 "Key Store" -2. 点击 "New Key" 按钮 -3. 填写信息: - - **Name**: `gitea-access-token` - - **Type**: 选择 "Login with password" - - **Username**: `ben` - - **Password**: `8d7d70f324796be650b79415303c31f567bf459b` - -### 第二步:添加 Repository -1. 点击左侧菜单 "Repositories" -2. 点击 "New Repository" 按钮 -3. 填写信息: - - **Name**: `mgmt` - - **URL**: `https://gitea.tailnet-68f9.ts.net/ben/mgmt.git` - - **Branch**: `main` - - **Access Key**: 选择刚创建的 `gitea-access-token` - -### 第三步:创建 Inventory -1. 点击左侧菜单 "Inventory" -2. 点击 "New Inventory" 按钮 -3. 填写信息: - - **Name**: `servers` - - **User Credentials**: 选择或创建服务器访问凭据 - - **Sudo Credentials**: 如果需要 sudo 权限,选择相应凭据 - - **Type**: 选择 "Static" - - **Inventory**: 输入服务器信息,例如: - ``` - [servers] - localhost ansible_connection=local - # 或添加远程服务器 - # server1 ansible_host=192.168.1.10 ansible_user=root - ``` - -### 第四步:创建 Environment -1. 点击左侧菜单 "Environment" -2. 点击 "New Environment" 按钮 -3. 填写信息: - - **Name**: `production` - - **JSON**: 可以留空或添加环境变量 - -### 第五步:创建 Task Template -1. 点击左侧菜单 "Task Templates" -2. 点击 "New Template" 按钮 -3. 填写信息: - - **Name**: `System Update` - - **Playbook**: `ansible/system-update.yml` - - **Inventory**: 选择刚创建的 `servers` - - **Repository**: 选择 `mgmt` - - **Environment**: 选择 `production` - -## 3. 运行任务 - -1. 在 "Task Templates" 页面找到 "System Update" 模板 -2. 点击 "Run" 按钮 -3. 确认设置后点击 "Run" 执行任务 - -## 4. 监控任务执行 - -- 在 "Tasks" 页面可以查看任务执行历史 -- 点击具体任务可以查看详细日志 -- 可以设置定时任务自动执行系统更新 - -## 项目文件说明 - -- `system-update.yml`: 主要的 Ansible playbook,执行 apt update && apt upgrade -- `inventory.ini`: 服务器清单文件模板 -- `ansible.cfg`: Ansible 配置文件 -- `README.md`: 项目说明文档 - -## Git 仓库信息 - -- **仓库地址**: https://gitea.tailnet-68f9.ts.net/ben/mgmt -- **分支**: main -- **最新提交**: 包含 Ansible 脚本和 Semaphore 配置文件 -- **Ansible 脚本路径**: `ansible/system-update.yml` \ No newline at end of file diff --git a/swarm-traefik-manager.sh b/swarm-traefik-manager.sh deleted file mode 100755 index 7d836c2..0000000 --- a/swarm-traefik-manager.sh +++ /dev/null @@ -1,251 +0,0 @@ -#!/bin/bash - -# Traefik + Docker Swarm 管理脚本 -# 用于部署、管理和监控 Traefik 在 Docker Swarm 中的集成 - -set -e - -SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" -NETWORK_NAME="traefik-public" - -# 颜色定义 -RED='\033[0;31m' -GREEN='\033[0;32m' -YELLOW='\033[1;33m' -BLUE='\033[0;34m' -NC='\033[0m' # No Color - -# 日志函数 -log_info() { - echo -e "${BLUE}[INFO]${NC} $1" -} - -log_success() { - echo -e "${GREEN}[SUCCESS]${NC} $1" -} - -log_warning() { - echo -e "${YELLOW}[WARNING]${NC} $1" -} - -log_error() { - echo -e "${RED}[ERROR]${NC} $1" -} - -# 检查 Docker Swarm 状态 -check_swarm() { - log_info "检查 Docker Swarm 状态..." - if ! docker info | grep -q "Swarm: active"; then - log_error "Docker Swarm 未激活,请先初始化 Swarm 集群" - echo "运行: docker swarm init" - exit 1 - fi - log_success "Docker Swarm 已激活" -} - -# 创建网络 -create_network() { - log_info "创建 Traefik 公共网络..." - if docker network ls | grep -q "$NETWORK_NAME"; then - log_warning "网络 $NETWORK_NAME 已存在" - else - docker network create --driver overlay --attachable "$NETWORK_NAME" - log_success "网络 $NETWORK_NAME 创建成功" - fi -} - -# 部署 Traefik -deploy_traefik() { - log_info "部署 Traefik 服务..." - docker stack deploy -c "$SCRIPT_DIR/traefik-swarm-stack.yml" traefik - log_success "Traefik 部署完成" -} - -# 部署示例服务 -deploy_demo() { - log_info "部署示例服务..." - docker stack deploy -c "$SCRIPT_DIR/demo-services-stack.yml" demo - log_success "示例服务部署完成" -} - -# 部署监控服务 -deploy_monitoring() { - log_info "部署监控服务..." - docker stack deploy -c "$SCRIPT_DIR/monitoring-stack.yml" monitoring - log_success "监控服务部署完成" -} - -# 显示服务状态 -show_status() { - log_info "显示服务状态..." - echo "" - echo "=== Traefik Stack ===" - docker stack services traefik - echo "" - echo "=== Demo Stack ===" - docker stack services demo 2>/dev/null || echo "Demo stack not deployed" - echo "" - echo "=== Monitoring Stack ===" - docker stack services monitoring 2>/dev/null || echo "Monitoring stack not deployed" - echo "" -} - -# 显示访问地址 -show_urls() { - log_info "服务访问地址:" - echo "" - echo "🎛️ Traefik Dashboard: http://traefik.local:8080" - echo "🌐 Web App: http://app.local" - echo "🔌 API Service: http://api.local" - echo "📊 Monitor Service: http://monitor.local" - echo "📈 Prometheus: http://prometheus.local" - echo "📊 Grafana: http://grafana.local (admin/admin123)" - echo "" - echo "💡 请确保在 /etc/hosts 中添加以下条目:" - echo "127.0.0.1 traefik.local app.local api.local monitor.local prometheus.local grafana.local" -} - -# 查看日志 -show_logs() { - local service=${1:-traefik_traefik} - log_info "显示 $service 服务日志..." - docker service logs -f "$service" -} - -# 扩缩容服务 -scale_service() { - local stack=$1 - local service=$2 - local replicas=$3 - - if [[ -z "$stack" || -z "$service" || -z "$replicas" ]]; then - log_error "用法: $0 scale " - exit 1 - fi - - log_info "扩缩容 ${stack}_${service} 到 $replicas 个副本..." - docker service scale "${stack}_${service}=$replicas" - log_success "扩缩容完成" -} - -# 清理所有服务 -cleanup() { - log_warning "清理所有 Traefik 相关服务..." - read -p "确认删除所有服务? (y/N): " -n 1 -r - echo - if [[ $REPLY =~ ^[Yy]$ ]]; then - docker stack rm monitoring 2>/dev/null || true - docker stack rm demo 2>/dev/null || true - docker stack rm traefik 2>/dev/null || true - - log_info "等待服务清理完成..." - sleep 10 - - # 清理网络 - docker network rm "$NETWORK_NAME" 2>/dev/null || true - - log_success "清理完成" - else - log_info "取消清理操作" - fi -} - -# 更新 hosts 文件 -update_hosts() { - log_info "更新 /etc/hosts 文件..." - - # 备份原文件 - sudo cp /etc/hosts /etc/hosts.backup.$(date +%Y%m%d_%H%M%S) - - # 移除旧的条目 - sudo sed -i '/# Traefik Swarm Demo/d' /etc/hosts - sudo sed -i '/traefik.local\|app.local\|api.local\|monitor.local\|prometheus.local\|grafana.local/d' /etc/hosts - - # 添加新条目 - echo "# Traefik Swarm Demo" | sudo tee -a /etc/hosts - echo "127.0.0.1 traefik.local app.local api.local monitor.local prometheus.local grafana.local" | sudo tee -a /etc/hosts - - log_success "hosts 文件更新完成" -} - -# 显示帮助 -show_help() { - echo "Traefik + Docker Swarm 管理脚本" - echo "" - echo "用法: $0 [options]" - echo "" - echo "命令:" - echo " init 初始化环境 (创建网络)" - echo " deploy 部署 Traefik" - echo " deploy-demo 部署示例服务" - echo " deploy-monitoring 部署监控服务" - echo " deploy-all 部署所有服务" - echo " status 显示服务状态" - echo " urls 显示访问地址" - echo " logs [service] 查看服务日志" - echo " scale 扩缩容服务" - echo " update-hosts 更新 hosts 文件" - echo " cleanup 清理所有服务" - echo " help 显示帮助信息" - echo "" - echo "示例:" - echo " $0 deploy-all # 部署所有服务" - echo " $0 scale demo webapp 3 # 扩容 webapp 到 3 个副本" - echo " $0 logs traefik_traefik # 查看 Traefik 日志" -} - -# 主函数 -main() { - case "${1:-help}" in - "init") - check_swarm - create_network - ;; - "deploy") - check_swarm - create_network - deploy_traefik - show_urls - ;; - "deploy-demo") - deploy_demo - ;; - "deploy-monitoring") - deploy_monitoring - ;; - "deploy-all") - check_swarm - create_network - deploy_traefik - sleep 5 - deploy_demo - deploy_monitoring - show_status - show_urls - ;; - "status") - show_status - ;; - "urls") - show_urls - ;; - "logs") - show_logs "$2" - ;; - "scale") - scale_service "$2" "$3" "$4" - ;; - "update-hosts") - update_hosts - ;; - "cleanup") - cleanup - ;; - "help"|*) - show_help - ;; - esac -} - -# 执行主函数 -main "$@" \ No newline at end of file