feat: 集成 OpenTofu + Ansible + Gitea CI/CD

- 重构项目目录结构
- 添加 OpenTofu 多云支持
- 配置 Ansible 自动化部署
- 集成 Gitea Actions CI/CD 流水线
- 添加 Docker Swarm 管理
- 完善监控和安全配置
This commit is contained in:
Houzhong Xu 2025-09-20 10:48:41 +00:00
parent d755f237a0
commit 7eb4a33523
No known key found for this signature in database
GPG Key ID: B44BEB1438F1B46F
55 changed files with 3745 additions and 1921 deletions

42
.gitea/settings.yml Normal file
View File

@ -0,0 +1,42 @@
# Gitea 仓库设置
repository:
name: mgmt
description: "基础设施管理项目 - OpenTofu + Ansible + Docker Swarm"
website: ""
default_branch: main
# 功能开关
has_issues: true
has_wiki: true
has_projects: true
has_actions: true
# 权限设置
private: false
allow_merge_commits: true
allow_squash_merge: true
allow_rebase_merge: true
delete_branch_on_merge: true
# Actions 设置
actions:
enabled: true
allow_fork_pull_request_run: true
default_actions_url: "https://gitea.com"
# 分支保护
branch_protection:
main:
enable_push: false
enable_push_whitelist: true
push_whitelist_usernames: ["ben"]
require_signed_commits: false
enable_merge_whitelist: true
merge_whitelist_usernames: ["ben"]
enable_status_check: true
status_check_contexts: ["validate", "plan"]
enable_approvals_whitelist: false
approvals_whitelist_usernames: []
block_on_rejected_reviews: true
dismiss_stale_approvals: true
require_signed_commits: false

View File

@ -0,0 +1,136 @@
name: Ansible Deploy
on:
workflow_dispatch:
inputs:
environment:
description: '部署环境'
required: true
default: 'dev'
type: choice
options:
- dev
- staging
- production
provider:
description: '云服务商'
required: true
default: 'oracle-cloud'
type: choice
options:
- oracle-cloud
- huawei-cloud
- google-cloud
- digitalocean
- aws
playbook:
description: 'Playbook 类型'
required: true
default: 'bootstrap'
type: choice
options:
- bootstrap
- security
- applications
- monitoring
- maintenance
env:
ANSIBLE_VERSION: "8.0.0"
jobs:
deploy:
runs-on: ubuntu-latest
environment: ${{ github.event.inputs.environment }}
steps:
- name: Checkout code
uses: actions/checkout@v4
- name: Setup Python
uses: actions/setup-python@v4
with:
python-version: '3.11'
- name: Install Ansible
run: |
pip install ansible==${{ env.ANSIBLE_VERSION }}
pip install ansible-core
ansible-galaxy collection install community.general
ansible-galaxy collection install ansible.posix
- name: Setup SSH key
run: |
mkdir -p ~/.ssh
echo "${{ secrets.SSH_PRIVATE_KEY }}" > ~/.ssh/id_rsa
chmod 600 ~/.ssh/id_rsa
ssh-keyscan -H ${{ secrets.SSH_HOST }} >> ~/.ssh/known_hosts
- name: Create dynamic inventory
run: |
ENV="${{ github.event.inputs.environment }}"
PROVIDER="${{ github.event.inputs.provider }}"
# 从 OpenTofu 输出创建动态清单
if [ -f "configuration/inventories/$ENV/$PROVIDER-inventory.json" ]; then
echo "Using existing inventory from OpenTofu output"
cp configuration/inventories/$ENV/$PROVIDER-inventory.json /tmp/inventory.json
else
echo "Creating static inventory"
cat > /tmp/inventory.ini << EOF
[$ENV]
${{ secrets.TARGET_HOST }} ansible_host=${{ secrets.TARGET_HOST }} ansible_user=${{ secrets.SSH_USER }} ansible_become=yes ansible_become_pass=${{ secrets.SUDO_PASSWORD }}
[all:vars]
ansible_ssh_common_args='-o StrictHostKeyChecking=no'
EOF
fi
- name: Run Ansible Playbook
run: |
ENV="${{ github.event.inputs.environment }}"
PLAYBOOK="${{ github.event.inputs.playbook }}"
cd configuration
# 选择正确的清单文件
if [ -f "/tmp/inventory.json" ]; then
INVENTORY="/tmp/inventory.json"
else
INVENTORY="/tmp/inventory.ini"
fi
# 运行对应的 playbook
case "$PLAYBOOK" in
"bootstrap")
ansible-playbook -i $INVENTORY playbooks/bootstrap/main.yml -e "environment=$ENV"
;;
"security")
ansible-playbook -i $INVENTORY playbooks/security/main.yml -e "environment=$ENV"
;;
"applications")
ansible-playbook -i $INVENTORY playbooks/applications/main.yml -e "environment=$ENV"
;;
"monitoring")
ansible-playbook -i $INVENTORY playbooks/monitoring/main.yml -e "environment=$ENV"
;;
"maintenance")
ansible-playbook -i $INVENTORY playbooks/maintenance/main.yml -e "environment=$ENV"
;;
esac
- name: Generate deployment report
run: |
echo "## 部署报告" > deployment-report.md
echo "" >> deployment-report.md
echo "**环境**: ${{ github.event.inputs.environment }}" >> deployment-report.md
echo "**云服务商**: ${{ github.event.inputs.provider }}" >> deployment-report.md
echo "**Playbook**: ${{ github.event.inputs.playbook }}" >> deployment-report.md
echo "**时间**: $(date)" >> deployment-report.md
echo "**状态**: ✅ 部署成功" >> deployment-report.md
- name: Upload deployment report
uses: actions/upload-artifact@v4
with:
name: deployment-report-${{ github.event.inputs.environment }}-${{ github.event.inputs.provider }}
path: deployment-report.md
retention-days: 30

View File

@ -0,0 +1,78 @@
name: Application Deployment
on:
push:
branches: [ main ]
paths:
- 'configuration/**'
- 'containers/**'
- '.gitea/workflows/deploy.yml'
workflow_dispatch:
inputs:
environment:
description: 'Target environment'
required: true
default: 'dev'
type: choice
options:
- dev
- staging
- production
jobs:
ansible-check:
runs-on: ubuntu-latest
name: Ansible Syntax Check
steps:
- name: Checkout
uses: actions/checkout@v4
- name: Setup Python
uses: actions/setup-python@v4
with:
python-version: '3.11'
- name: Install Ansible
run: |
pip install ansible ansible-core
ansible-galaxy collection install community.general
ansible-galaxy collection install ansible.posix
ansible-galaxy collection install community.docker
- name: Ansible syntax check
run: |
cd configuration
for playbook in playbooks/*/*.yml; do
if [ -f "$playbook" ]; then
echo "Checking $playbook"
ansible-playbook --syntax-check "$playbook"
fi
done
deploy:
runs-on: ubuntu-latest
name: Deploy Applications
needs: ansible-check
steps:
- name: Checkout
uses: actions/checkout@v4
- name: Setup Python
uses: actions/setup-python@v4
with:
python-version: '3.11'
- name: Install Ansible
run: |
pip install ansible ansible-core
ansible-galaxy collection install community.general
ansible-galaxy collection install ansible.posix
ansible-galaxy collection install community.docker
- name: Deploy applications
run: |
cd configuration
ENV="${{ github.event.inputs.environment || 'dev' }}"
ansible-playbook -i "inventories/${ENV}/inventory.ini" playbooks/bootstrap/main.yml
env:
ANSIBLE_HOST_KEY_CHECKING: False

View File

@ -0,0 +1,52 @@
name: Docker Build and Deploy
on:
push:
branches: [ main ]
paths:
- 'containers/**'
- 'Dockerfile*'
- '.gitea/workflows/docker.yml'
jobs:
build:
runs-on: ubuntu-latest
name: Build Docker Images
steps:
- name: Checkout
uses: actions/checkout@v4
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v3
- name: Login to Container Registry
uses: docker/login-action@v3
with:
registry: ${{ secrets.REGISTRY_URL }}
username: ${{ secrets.REGISTRY_USERNAME }}
password: ${{ secrets.REGISTRY_PASSWORD }}
- name: Build and push images
run: |
# 构建应用镜像
for dockerfile in containers/applications/*/Dockerfile; do
if [ -f "$dockerfile" ]; then
app_name=$(basename $(dirname "$dockerfile"))
echo "Building $app_name"
docker build -t "${{ secrets.REGISTRY_URL }}/$app_name:${{ github.sha }}" -f "$dockerfile" .
docker push "${{ secrets.REGISTRY_URL }}/$app_name:${{ github.sha }}"
fi
done
deploy-swarm:
runs-on: ubuntu-latest
name: Deploy to Docker Swarm
needs: build
steps:
- name: Checkout
uses: actions/checkout@v4
- name: Deploy to Swarm
run: |
# 这里可以通过 SSH 连接到 Swarm 管理节点进行部署
echo "Deploy to Swarm placeholder"

View File

@ -0,0 +1,91 @@
name: Infrastructure CI/CD
on:
push:
branches: [ main, develop ]
paths:
- 'infrastructure/**'
- '.gitea/workflows/infrastructure.yml'
pull_request:
branches: [ main ]
paths:
- 'infrastructure/**'
jobs:
validate:
runs-on: ubuntu-latest
name: Validate Infrastructure
steps:
- name: Checkout
uses: actions/checkout@v4
- name: Setup OpenTofu
uses: opentofu/setup-opentofu@v1
with:
tofu_version: 1.10.6
- name: Validate OpenTofu configurations
run: |
for dir in infrastructure/providers/*/; do
if [ -d "$dir" ]; then
echo "Validating $dir"
cd "$dir"
tofu init -backend=false
tofu validate
cd - > /dev/null
fi
done
- name: Check formatting
run: |
tofu fmt -check -recursive infrastructure/
- name: Security scan
run: |
# 这里可以添加 tfsec 或 checkov 扫描
echo "Security scan placeholder"
plan:
runs-on: ubuntu-latest
name: Plan Infrastructure
needs: validate
if: github.event_name == 'pull_request'
steps:
- name: Checkout
uses: actions/checkout@v4
- name: Setup OpenTofu
uses: opentofu/setup-opentofu@v1
with:
tofu_version: 1.10.6
- name: Plan infrastructure changes
run: |
cd infrastructure/environments/dev
tofu init
tofu plan -var-file="terraform.tfvars" -out=tfplan
env:
# 这里需要配置云服务商的环境变量
TF_VAR_environment: dev
apply:
runs-on: ubuntu-latest
name: Apply Infrastructure
needs: validate
if: github.ref == 'refs/heads/main' && github.event_name == 'push'
steps:
- name: Checkout
uses: actions/checkout@v4
- name: Setup OpenTofu
uses: opentofu/setup-opentofu@v1
with:
tofu_version: 1.10.6
- name: Apply infrastructure changes
run: |
cd infrastructure/environments/dev
tofu init
tofu apply -var-file="terraform.tfvars" -auto-approve
env:
TF_VAR_environment: dev

View File

@ -0,0 +1,175 @@
name: OpenTofu Apply
on:
push:
branches: [main]
paths:
- 'infrastructure/**'
workflow_dispatch:
inputs:
environment:
description: '部署环境'
required: true
default: 'dev'
type: choice
options:
- dev
- staging
- production
provider:
description: '云服务商'
required: true
default: 'oracle-cloud'
type: choice
options:
- oracle-cloud
- huawei-cloud
- google-cloud
- digitalocean
- aws
env:
TOFU_VERSION: "1.10.6"
jobs:
apply:
runs-on: ubuntu-latest
environment: ${{ github.event.inputs.environment || 'dev' }}
steps:
- name: Checkout code
uses: actions/checkout@v4
- name: Setup OpenTofu
uses: opentofu/setup-opentofu@v1
with:
tofu_version: ${{ env.TOFU_VERSION }}
- name: Configure credentials
run: |
PROVIDER="${{ github.event.inputs.provider || 'oracle-cloud' }}"
echo "Setting up credentials for $PROVIDER"
case "$PROVIDER" in
"oracle-cloud")
mkdir -p ~/.oci
echo "${{ secrets.OCI_PRIVATE_KEY }}" > ~/.oci/oci_api_key.pem
chmod 600 ~/.oci/oci_api_key.pem
;;
"huawei-cloud")
export HW_ACCESS_KEY="${{ secrets.HW_ACCESS_KEY }}"
export HW_SECRET_KEY="${{ secrets.HW_SECRET_KEY }}"
;;
"google-cloud")
echo "${{ secrets.GCP_SERVICE_ACCOUNT_KEY }}" > /tmp/gcp-key.json
export GOOGLE_APPLICATION_CREDENTIALS="/tmp/gcp-key.json"
;;
"digitalocean")
export DIGITALOCEAN_TOKEN="${{ secrets.DO_TOKEN }}"
;;
"aws")
export AWS_ACCESS_KEY_ID="${{ secrets.AWS_ACCESS_KEY_ID }}"
export AWS_SECRET_ACCESS_KEY="${{ secrets.AWS_SECRET_ACCESS_KEY }}"
;;
esac
- name: Create terraform.tfvars
run: |
ENV="${{ github.event.inputs.environment || 'dev' }}"
cd infrastructure/environments/$ENV
cat > terraform.tfvars << EOF
environment = "$ENV"
project_name = "mgmt"
owner = "ben"
# Oracle Cloud 配置
oci_config = {
tenancy_ocid = "${{ secrets.OCI_TENANCY_OCID }}"
user_ocid = "${{ secrets.OCI_USER_OCID }}"
fingerprint = "${{ secrets.OCI_FINGERPRINT }}"
private_key_path = "~/.oci/oci_api_key.pem"
region = "ap-seoul-1"
}
# 华为云配置
huawei_config = {
access_key = "${{ secrets.HW_ACCESS_KEY }}"
secret_key = "${{ secrets.HW_SECRET_KEY }}"
region = "cn-north-4"
}
# Google Cloud 配置
gcp_config = {
project_id = "${{ secrets.GCP_PROJECT_ID }}"
region = "asia-northeast3"
zone = "asia-northeast3-a"
credentials = "/tmp/gcp-key.json"
}
# DigitalOcean 配置
do_config = {
token = "${{ secrets.DO_TOKEN }}"
region = "sgp1"
}
# AWS 配置
aws_config = {
access_key = "${{ secrets.AWS_ACCESS_KEY_ID }}"
secret_key = "${{ secrets.AWS_SECRET_ACCESS_KEY }}"
region = "ap-northeast-1"
}
EOF
- name: OpenTofu Init
run: |
PROVIDER="${{ github.event.inputs.provider || 'oracle-cloud' }}"
cd infrastructure/providers/$PROVIDER
tofu init
- name: OpenTofu Plan
run: |
ENV="${{ github.event.inputs.environment || 'dev' }}"
PROVIDER="${{ github.event.inputs.provider || 'oracle-cloud' }}"
cd infrastructure/providers/$PROVIDER
tofu plan \
-var-file="../../../environments/$ENV/terraform.tfvars" \
-out=tfplan
- name: OpenTofu Apply
run: |
PROVIDER="${{ github.event.inputs.provider || 'oracle-cloud' }}"
cd infrastructure/providers/$PROVIDER
tofu apply -auto-approve tfplan
- name: Save State
run: |
ENV="${{ github.event.inputs.environment || 'dev' }}"
PROVIDER="${{ github.event.inputs.provider || 'oracle-cloud' }}"
cd infrastructure/providers/$PROVIDER
# 这里可以配置远程状态存储
# 例如上传到 S3, GCS, 或其他存储
echo "State saved locally for now"
- name: Generate Inventory
run: |
ENV="${{ github.event.inputs.environment || 'dev' }}"
PROVIDER="${{ github.event.inputs.provider || 'oracle-cloud' }}"
cd infrastructure/providers/$PROVIDER
# 生成 Ansible 动态清单
tofu output -json > ../../../configuration/inventories/$ENV/$PROVIDER-inventory.json
- name: Trigger Ansible Deployment
uses: actions/github-script@v7
with:
script: |
github.rest.actions.createWorkflowDispatch({
owner: context.repo.owner,
repo: context.repo.repo,
workflow_id: 'ansible-deploy.yml',
ref: 'main',
inputs: {
environment: '${{ github.event.inputs.environment || "dev" }}',
provider: '${{ github.event.inputs.provider || "oracle-cloud" }}'
}
});

View File

@ -0,0 +1,148 @@
name: OpenTofu Plan
on:
pull_request:
branches: [main, develop]
paths:
- 'infrastructure/**'
- '.gitea/workflows/terraform-plan.yml'
env:
TOFU_VERSION: "1.10.6"
jobs:
plan:
runs-on: ubuntu-latest
strategy:
matrix:
environment: [dev, staging, production]
provider: [oracle-cloud, huawei-cloud, google-cloud, digitalocean, aws]
steps:
- name: Checkout code
uses: actions/checkout@v4
- name: Setup OpenTofu
uses: opentofu/setup-opentofu@v1
with:
tofu_version: ${{ env.TOFU_VERSION }}
- name: Configure credentials
run: |
# 设置各云服务商的认证信息
echo "Setting up credentials for ${{ matrix.provider }}"
case "${{ matrix.provider }}" in
"oracle-cloud")
mkdir -p ~/.oci
echo "${{ secrets.OCI_PRIVATE_KEY }}" > ~/.oci/oci_api_key.pem
chmod 600 ~/.oci/oci_api_key.pem
;;
"huawei-cloud")
export HW_ACCESS_KEY="${{ secrets.HW_ACCESS_KEY }}"
export HW_SECRET_KEY="${{ secrets.HW_SECRET_KEY }}"
;;
"google-cloud")
echo "${{ secrets.GCP_SERVICE_ACCOUNT_KEY }}" > /tmp/gcp-key.json
export GOOGLE_APPLICATION_CREDENTIALS="/tmp/gcp-key.json"
;;
"digitalocean")
export DIGITALOCEAN_TOKEN="${{ secrets.DO_TOKEN }}"
;;
"aws")
export AWS_ACCESS_KEY_ID="${{ secrets.AWS_ACCESS_KEY_ID }}"
export AWS_SECRET_ACCESS_KEY="${{ secrets.AWS_SECRET_ACCESS_KEY }}"
;;
esac
- name: Create terraform.tfvars
run: |
cd infrastructure/environments/${{ matrix.environment }}
cat > terraform.tfvars << EOF
environment = "${{ matrix.environment }}"
project_name = "mgmt"
owner = "ben"
# Oracle Cloud 配置
oci_config = {
tenancy_ocid = "${{ secrets.OCI_TENANCY_OCID }}"
user_ocid = "${{ secrets.OCI_USER_OCID }}"
fingerprint = "${{ secrets.OCI_FINGERPRINT }}"
private_key_path = "~/.oci/oci_api_key.pem"
region = "ap-seoul-1"
}
# 华为云配置
huawei_config = {
access_key = "${{ secrets.HW_ACCESS_KEY }}"
secret_key = "${{ secrets.HW_SECRET_KEY }}"
region = "cn-north-4"
}
# Google Cloud 配置
gcp_config = {
project_id = "${{ secrets.GCP_PROJECT_ID }}"
region = "asia-northeast3"
zone = "asia-northeast3-a"
credentials = "/tmp/gcp-key.json"
}
# DigitalOcean 配置
do_config = {
token = "${{ secrets.DO_TOKEN }}"
region = "sgp1"
}
# AWS 配置
aws_config = {
access_key = "${{ secrets.AWS_ACCESS_KEY_ID }}"
secret_key = "${{ secrets.AWS_SECRET_ACCESS_KEY }}"
region = "ap-northeast-1"
}
EOF
- name: OpenTofu Init
run: |
cd infrastructure/providers/${{ matrix.provider }}
tofu init
- name: OpenTofu Validate
run: |
cd infrastructure/providers/${{ matrix.provider }}
tofu validate
- name: OpenTofu Plan
run: |
cd infrastructure/providers/${{ matrix.provider }}
tofu plan \
-var-file="../../../environments/${{ matrix.environment }}/terraform.tfvars" \
-out=tfplan-${{ matrix.environment }}-${{ matrix.provider }}
- name: Upload Plan
uses: actions/upload-artifact@v4
with:
name: tfplan-${{ matrix.environment }}-${{ matrix.provider }}
path: infrastructure/providers/${{ matrix.provider }}/tfplan-${{ matrix.environment }}-${{ matrix.provider }}
retention-days: 30
- name: Comment PR
uses: actions/github-script@v7
if: github.event_name == 'pull_request'
with:
script: |
const fs = require('fs');
const path = 'infrastructure/providers/${{ matrix.provider }}/tfplan-${{ matrix.environment }}-${{ matrix.provider }}';
github.rest.issues.createComment({
issue_number: context.issue.number,
owner: context.repo.owner,
repo: context.repo.repo,
body: `## OpenTofu Plan Results
**Environment:** ${{ matrix.environment }}
**Provider:** ${{ matrix.provider }}
**Status:** ✅ Plan generated successfully
Plan artifact uploaded: \`tfplan-${{ matrix.environment }}-${{ matrix.provider }}\`
Please review the plan before merging.`
});

81
.gitignore vendored Normal file
View File

@ -0,0 +1,81 @@
# OpenTofu/Terraform
*.tfstate
*.tfstate.*
*.tfvars
!*.tfvars.example
.terraform/
.terraform.lock.hcl
crash.log
crash.*.log
# Ansible
*.retry
.vault_pass
host_vars/*/vault.yml
group_vars/*/vault.yml
# Docker
.env
docker-compose.override.yml
# IDE
.vscode/
.idea/
*.swp
*.swo
*~
# OS
.DS_Store
Thumbs.db
# Logs
*.log
logs/
# Temporary files
tmp/
temp/
.tmp/
# Backup files
backup-*/
*.bak
# Secrets
secrets/
*.pem
*.key
*.crt
!*.example.*
# Node modules (if any)
node_modules/
# Python
__pycache__/
*.pyc
*.pyo
*.pyd
.Python
env/
venv/
.venv/
pip-log.txt
pip-delete-this-directory.txt
.tox/
.coverage
.coverage.*
.cache
nosetests.xml
coverage.xml
*.cover
*.log
.git
.mypy_cache
.pytest_cache
.hypothesis
# Local development
.local/
local-*

88
Makefile Normal file
View File

@ -0,0 +1,88 @@
# 项目管理 Makefile
.PHONY: help setup init plan apply destroy clean test lint docs
# 默认目标
help: ## 显示帮助信息
@echo "可用的命令:"
@grep -E '^[a-zA-Z_-]+:.*?## .*$$' $(MAKEFILE_LIST) | sort | awk 'BEGIN {FS = ":.*?## "}; {printf "\033[36m%-20s\033[0m %s\n", $$1, $$2}'
# 环境设置
setup: ## 设置开发环境
@echo "🚀 设置开发环境..."
@bash scripts/setup/setup-environment.sh
# OpenTofu 操作
init: ## 初始化 OpenTofu
@echo "🏗️ 初始化 OpenTofu..."
@cd infrastructure/environments/dev && tofu init
plan: ## 生成执行计划
@echo "📋 生成执行计划..."
@cd infrastructure/environments/dev && tofu plan -var-file="terraform.tfvars"
apply: ## 应用基础设施变更
@echo "🚀 应用基础设施变更..."
@cd infrastructure/environments/dev && tofu apply -var-file="terraform.tfvars"
destroy: ## 销毁基础设施
@echo "💥 销毁基础设施..."
@cd infrastructure/environments/dev && tofu destroy -var-file="terraform.tfvars"
# Ansible 操作
ansible-check: ## 检查 Ansible 配置
@echo "🔍 检查 Ansible 配置..."
@cd configuration && ansible-playbook --syntax-check playbooks/bootstrap/main.yml
ansible-deploy: ## 部署应用
@echo "📦 部署应用..."
@cd configuration && ansible-playbook -i inventories/production/inventory.ini playbooks/bootstrap/main.yml
# Docker 操作
docker-build: ## 构建 Docker 镜像
@echo "🐳 构建 Docker 镜像..."
@docker-compose -f containers/compose/development/docker-compose.yml build
docker-up: ## 启动开发环境
@echo "🚀 启动开发环境..."
@docker-compose -f containers/compose/development/docker-compose.yml up -d
docker-down: ## 停止开发环境
@echo "🛑 停止开发环境..."
@docker-compose -f containers/compose/development/docker-compose.yml down
# 测试
test: ## 运行测试
@echo "🧪 运行测试..."
@bash scripts/utilities/run-tests.sh
lint: ## 代码检查
@echo "🔍 代码检查..."
@bash scripts/utilities/lint.sh
# 文档
docs: ## 生成文档
@echo "📚 生成文档..."
@bash scripts/utilities/generate-docs.sh
# 清理
clean: ## 清理临时文件
@echo "🧹 清理临时文件..."
@find . -name "*.tfstate*" -delete
@find . -name ".terraform" -type d -exec rm -rf {} + 2>/dev/null || true
@docker system prune -f
# 备份
backup: ## 创建备份
@echo "💾 创建备份..."
@bash scripts/utilities/backup.sh
# 监控
monitor: ## 启动监控
@echo "📊 启动监控..."
@docker-compose -f containers/compose/production/monitoring.yml up -d
# 安全扫描
security-scan: ## 安全扫描
@echo "🔒 安全扫描..."
@bash scripts/utilities/security-scan.sh

View File

@ -1,253 +0,0 @@
# Traefik + Docker Swarm 集成
## 📋 概述
本项目实现了 Traefik 与 Docker Swarm 的完整集成,提供统一的入口点管理所有 Swarm 服务。
## 🏗️ 架构设计
```
Internet
Traefik (Load Balancer)
Docker Swarm Services
├── Web App (app.local)
├── API Service (api.local)
├── Monitor Service (monitor.local)
└── Other Services...
```
## 📁 文件结构
```
/root/mgmt/
├── traefik-swarm-stack.yml # Traefik 主服务配置
├── demo-services-stack.yml # 示例服务配置
├── monitoring-stack.yml # 监控服务配置
├── swarm-traefik-manager.sh # 管理脚本
└── README-traefik-swarm.md # 说明文档
```
## 🚀 快速开始
### 1. 初始化环境
```bash
# 确保 Docker Swarm 已激活
docker swarm init
# 初始化 Traefik 环境
./swarm-traefik-manager.sh init
```
### 2. 部署所有服务
```bash
# 一键部署所有服务
./swarm-traefik-manager.sh deploy-all
# 或分步部署
./swarm-traefik-manager.sh deploy # 仅部署 Traefik
./swarm-traefik-manager.sh deploy-demo # 部署示例服务
./swarm-traefik-manager.sh deploy-monitoring # 部署监控服务
```
### 3. 更新 hosts 文件
```bash
# 自动更新 hosts 文件
./swarm-traefik-manager.sh update-hosts
# 或手动添加到 /etc/hosts
echo "127.0.0.1 traefik.local app.local api.local monitor.local prometheus.local grafana.local" >> /etc/hosts
```
## 🌐 访问地址
| 服务 | 地址 | 说明 |
|------|------|------|
| Traefik Dashboard | http://traefik.local:8080 | 管理界面 |
| Web App | http://app.local | 示例 Web 应用 |
| API Service | http://api.local | 示例 API 服务 |
| Monitor Service | http://monitor.local | 监控服务 |
| Prometheus | http://prometheus.local | 指标收集 |
| Grafana | http://grafana.local | 可视化面板 |
## 🛠️ 管理命令
### 查看服务状态
```bash
./swarm-traefik-manager.sh status
```
### 查看服务日志
```bash
./swarm-traefik-manager.sh logs traefik_traefik
./swarm-traefik-manager.sh logs demo_webapp
```
### 扩缩容服务
```bash
# 扩容 webapp 到 3 个副本
./swarm-traefik-manager.sh scale demo webapp 3
# 扩容 API 服务到 2 个副本
./swarm-traefik-manager.sh scale demo api 2
```
### 清理环境
```bash
./swarm-traefik-manager.sh cleanup
```
## 📊 监控配置
### Prometheus 指标
- Traefik 指标: http://traefik:8080/metrics
- Node Exporter: 系统指标
- cAdvisor: 容器指标
### Grafana 配置
- 默认用户: admin
- 默认密码: admin123
- 数据源: Prometheus (http://prometheus:9090)
## 🔧 服务配置
### 为新服务添加 Traefik 路由
在 Docker Compose 文件中添加以下标签:
```yaml
services:
your-service:
image: your-image
networks:
- traefik-public
deploy:
labels:
- traefik.enable=true
- traefik.http.routers.your-service.rule=Host(`your-domain.local`)
- traefik.http.routers.your-service.entrypoints=web
- traefik.http.services.your-service.loadbalancer.server.port=80
```
### 高级路由配置
```yaml
# 路径前缀路由
- traefik.http.routers.api-path.rule=Host(`app.local`) && PathPrefix(`/api`)
# HTTPS 重定向
- traefik.http.routers.your-service.entrypoints=websecure
- traefik.http.routers.your-service.tls.certresolver=letsencrypt
# 中间件配置
- traefik.http.routers.your-service.middlewares=auth
- traefik.http.middlewares.auth.basicauth.users=user:password
```
## 🔒 安全配置
### 基本认证
```yaml
labels:
- traefik.http.middlewares.auth.basicauth.users=admin:$$2y$$10$$...
- traefik.http.routers.service.middlewares=auth
```
### HTTPS 配置
```yaml
labels:
- traefik.http.routers.service.tls.certresolver=letsencrypt
- traefik.http.routers.service.entrypoints=websecure
```
## 🐛 故障排除
### 常见问题
1. **服务无法访问**
```bash
# 检查服务状态
docker stack services traefik
# 检查网络连接
docker network ls | grep traefik-public
```
2. **路由不生效**
```bash
# 查看 Traefik 日志
./swarm-traefik-manager.sh logs traefik_traefik
# 检查服务标签
docker service inspect demo_webapp
```
3. **DNS 解析问题**
```bash
# 检查 hosts 文件
cat /etc/hosts | grep local
# 更新 hosts 文件
./swarm-traefik-manager.sh update-hosts
```
### 调试命令
```bash
# 查看所有 Swarm 服务
docker service ls
# 查看特定服务详情
docker service inspect traefik_traefik
# 查看服务任务
docker service ps traefik_traefik
# 进入容器调试
docker exec -it $(docker ps -q -f name=traefik) sh
```
## 📈 性能优化
### 负载均衡配置
```yaml
labels:
- traefik.http.services.service.loadbalancer.sticky.cookie=true
- traefik.http.services.service.loadbalancer.healthcheck.path=/health
```
### 缓存配置
```yaml
labels:
- traefik.http.middlewares.cache.headers.customrequestheaders.Cache-Control=max-age=3600
```
## 🔄 备份与恢复
### 备份配置
```bash
# 备份 Docker 配置
docker config ls
docker config inspect config_name
# 备份 Swarm 状态
docker node ls
docker service ls
```
### 恢复服务
```bash
# 重新部署服务
./swarm-traefik-manager.sh deploy-all
```
## 📚 参考资料
- [Traefik 官方文档](https://doc.traefik.io/traefik/)
- [Docker Swarm 文档](https://docs.docker.com/engine/swarm/)
- [Prometheus 配置](https://prometheus.io/docs/prometheus/latest/configuration/configuration/)
- [Grafana 文档](https://grafana.com/docs/)

463
README.md
View File

@ -1,364 +1,217 @@
# 🛠️ 服务器管理自动化项目
# 🏗️ 基础设施管理项目
这是一个基于 Ansible 的服务器管理自动化项目,用于管理多台服务器的系统更新、配置和维护。
这是一个现代化的多云基础设施管理平台,集成 OpenTofu、Ansible、Docker Swarm 和 Gitea CI/CD。
## 🎯 项目特性
- **🌩️ 多云支持**: Oracle Cloud, 华为云, Google Cloud, AWS, DigitalOcean
- **🏗️ 基础设施即代码**: 使用 OpenTofu 管理云资源
- **⚙️ 配置管理**: 使用 Ansible 自动化配置和部署
- **🐳 容器编排**: Docker Swarm 集群管理
- **🔄 CI/CD**: Gitea Actions 自动化流水线
- **📊 监控**: Prometheus + Grafana 监控体系
- **🔐 安全**: 多层安全防护和合规性
## 📁 项目结构
```
mgmt/
├── ansible/
│ ├── inventory.ini # 服务器清单
│ ├── ansible.cfg # Ansible 配置
│ ├── system-update.yml # 系统更新 playbook
│ ├── cloud-providers-update.yml # 云服务商更新 playbook
│ ├── system-cleanup.yml # 系统清理和维护
│ ├── service-health-check.yml # 服务健康检查
│ ├── security-hardening.yml # 安全加固和备份
│ ├── docker-management.yml # Docker 容器管理
│ ├── network-connectivity.yml # 网络连通性检查
│ ├── certificate-management.yml # SSL 证书管理
│ ├── ops-toolkit.yml # 运维工具包
│ ├── cron-setup.yml # 定时任务配置
│ └── run.sh # 执行脚本
├── scripts/
│ └── ops-manager.sh # 运维管理脚本
├── config.json # Semaphore 配置
├── keys-info.md # SSH 密钥信息
├── semaphore-setup-guide.md # Semaphore 设置指南
└── README.md # 项目说明
├── .gitea/workflows/ # CI/CD 工作流
├── infrastructure/ # OpenTofu 基础设施代码
│ ├── environments/ # 环境配置 (dev/staging/prod)
│ ├── modules/ # 可复用模块
│ ├── providers/ # 云服务商配置
│ └── shared/ # 共享配置
├── configuration/ # Ansible 配置管理
│ ├── inventories/ # 主机清单
│ ├── playbooks/ # 剧本
│ ├── roles/ # 角色
│ └── group_vars/ # 组变量
├── containers/ # 容器化应用
│ ├── applications/ # 应用容器
│ ├── infrastructure/ # 基础设施容器
│ └── compose/ # Docker Compose 文件
├── monitoring/ # 监控配置
├── scripts/ # 自动化脚本
├── docs/ # 文档
└── Makefile # 项目管理命令
```
## 🚀 快速开始
### 1. 环境准备
确保已安装 Ansible
```bash
# Ubuntu/Debian
sudo apt update && sudo apt install ansible
# 克隆项目
git clone <repository-url>
cd mgmt
# CentOS/RHEL
sudo yum install ansible
# 设置开发环境
make setup
```
### 2. 配置服务器清单
编辑 `ansible/inventory.ini` 文件,服务器已按功能分组:
- **lxc**: Debian/Ubuntu 容器
- **alpine**: Alpine Linux 容器
- **proxmox**: Proxmox VE 物理机
- **armbian**: ARM 设备
- **hcp**: HCP 云服务器
- **feiniu**: 飞牛服务器
- **germany**: 德国服务器
- 以及各种云服务商组
### 3. 使用运维管理脚本
### 2. 配置云服务商
```bash
# 给脚本执行权限
chmod +x scripts/ops-manager.sh
# 复制配置模板
cp infrastructure/environments/dev/terraform.tfvars.example infrastructure/environments/dev/terraform.tfvars
# 交互式模式
./scripts/ops-manager.sh
# 直接执行
./scripts/ops-manager.sh update lxc # 更新 LXC 容器
./scripts/ops-manager.sh cleanup all # 清理所有服务器
./scripts/ops-manager.sh health proxmox # 检查 Proxmox 健康状态
./scripts/ops-manager.sh docker lxc # 管理 LXC 中的 Docker
./scripts/ops-manager.sh toolkit germany # 运行德国服务器工具包
# 检查模式(不做实际更改)
./scripts/ops-manager.sh update all --check
# 编辑配置文件,填入你的云服务商凭据
vim infrastructure/environments/dev/terraform.tfvars
```
## 🛠️ 可用的运维脚本
### 3. 初始化基础设施
### 核心功能
- **system-update.yml**: 系统包更新
- **system-cleanup.yml**: 磁盘清理、日志清理、缓存清理
- **service-health-check.yml**: 服务状态监控
- **security-hardening.yml**: 安全加固和备份
### 专业工具
- **docker-management.yml**: Docker 容器和镜像管理
- **network-connectivity.yml**: 网络连通性和性能测试
- **certificate-management.yml**: SSL 证书监控和管理
- **ops-toolkit.yml**: 统一运维仪表板
### 自动化
- **cron-setup.yml**: 配置定时任务自动化
- **ops-manager.sh**: 便捷的命令行管理工具
## 🤖 自动化定时任务
设置自动化定时任务:
```bash
ansible-playbook -i ansible/inventory.ini ansible/cron-setup.yml
# 初始化 OpenTofu
make init
# 查看执行计划
make plan
# 应用基础设施变更
make apply
```
配置的定时任务:
- **每日 08:00**: 系统健康检查
- **每日 01:00**: Docker 清理 (LXC 组)
- **每周日 02:00**: 系统清理
- **每周一 04:30**: 证书检查
- **每周二 06:00**: 网络连通性检查
- **每月1日 03:00**: 安全检查
### 4. 部署应用
查看自动化状态:
```bash
automation-status
# 检查 Ansible 配置
make ansible-check
# 部署应用
make ansible-deploy
```
## 📊 使用 Semaphore Web UI
## 🛠️ 常用命令
参考 `semaphore-setup-guide.md` 文件设置 Semaphore Web 界面管理。
| 命令 | 描述 |
|------|------|
| `make help` | 显示所有可用命令 |
| `make setup` | 设置开发环境 |
| `make init` | 初始化 OpenTofu |
| `make plan` | 生成基础设施执行计划 |
| `make apply` | 应用基础设施变更 |
| `make ansible-deploy` | 部署应用 |
| `make docker-up` | 启动开发环境 |
| `make test` | 运行测试 |
| `make clean` | 清理临时文件 |
推送到 Gitea 后Semaphore 可以:
- ✅ 直接识别 Ansible 项目结构
- ✅ 使用现有的 inventory 分组
- ✅ 运行预定义的 playbooks
- ✅ 支持按组选择性更新
- ✅ 提供 Web 界面管理和监控
## 🌩️ 支持的云服务商
## 💡 最佳实践
### Oracle Cloud Infrastructure (OCI)
- ✅ 计算实例
- ✅ 网络配置 (VCN, 子网, 安全组)
- ✅ 存储 (块存储, 对象存储)
- ✅ 负载均衡器
### 日常运维
```bash
# 每日快速检查
./scripts/ops-manager.sh toolkit all
### 华为云
- ✅ 弹性云服务器 (ECS)
- ✅ 虚拟私有云 (VPC)
- ✅ 弹性负载均衡 (ELB)
- ✅ 云硬盘 (EVS)
# 每周系统维护
./scripts/ops-manager.sh cleanup all
./scripts/ops-manager.sh health all
### Google Cloud Platform
- ✅ Compute Engine
- ✅ VPC 网络
- ✅ Cloud Load Balancing
- ✅ Persistent Disk
# 每月安全检查
./scripts/ops-manager.sh security all --check
./scripts/ops-manager.sh cert all
```
### Amazon Web Services
- ✅ EC2 实例
- ✅ VPC 网络
- ✅ Application Load Balancer
- ✅ EBS 存储
### 紧急情况
```bash
# 紧急安全更新
./scripts/ops-manager.sh update all
### DigitalOcean
- ✅ Droplets
- ✅ VPC 网络
- ✅ Load Balancers
- ✅ Block Storage
# 网络问题诊断
./scripts/ops-manager.sh network all
## 🔄 CI/CD 流程
# 服务状态检查
./scripts/ops-manager.sh health all
```
### 基础设施部署流程
1. **代码提交** → 触发 Gitea Actions
2. **OpenTofu Plan** → 生成执行计划
3. **人工审核** → 确认变更
4. **OpenTofu Apply** → 应用基础设施变更
5. **Ansible 部署** → 配置和部署应用
### 容器管理
```bash
# LXC 容器管理
./scripts/ops-manager.sh docker lxc
./scripts/ops-manager.sh cleanup lxc
### 应用部署流程
1. **应用代码更新** → 构建 Docker 镜像
2. **镜像推送** → 推送到镜像仓库
3. **Compose 更新** → 更新服务定义
4. **Swarm 部署** → 滚动更新服务
5. **健康检查** → 验证部署状态
# Alpine 容器更新
./scripts/ops-manager.sh update alpine
```
## 📊 监控和可观测性
## 🔧 高级用法
### 监控组件
- **Prometheus**: 指标收集和存储
- **Grafana**: 可视化仪表板
- **AlertManager**: 告警管理
- **Node Exporter**: 系统指标导出
### 按组管理
```bash
# 物理机维护
./scripts/ops-manager.sh cleanup proxmox
./scripts/ops-manager.sh health armbian
### 日志管理
- **ELK Stack**: Elasticsearch + Logstash + Kibana
- **Fluentd**: 日志收集和转发
- **结构化日志**: JSON 格式标准化
# 云服务商管理
./scripts/ops-manager.sh update huawei
./scripts/ops-manager.sh network google
## 🔐 安全最佳实践
# 容器管理
./scripts/ops-manager.sh docker lxc
./scripts/ops-manager.sh update alpine
```
### 基础设施安全
- **网络隔离**: VPC, 安全组, 防火墙
- **访问控制**: IAM 角色和策略
- **数据加密**: 传输和静态加密
- **密钥管理**: 云服务商密钥管理服务
### 检查模式
```bash
# 检查更新但不执行
./scripts/ops-manager.sh update all --check
### 应用安全
- **容器安全**: 镜像扫描, 最小权限
- **网络安全**: 服务网格, TLS 终止
- **秘密管理**: Docker Secrets, Ansible Vault
- **安全审计**: 日志监控和审计
# 详细输出
./scripts/ops-manager.sh health all --verbose
## 🧪 测试策略
# 仅显示命令
./scripts/ops-manager.sh cleanup all --dry-run
```
### 基础设施测试
- **语法检查**: OpenTofu validate
- **安全扫描**: Checkov, tfsec
- **合规检查**: OPA (Open Policy Agent)
## 📋 服务器组说明
### 应用测试
- **单元测试**: 应用代码测试
- **集成测试**: 服务间集成测试
- **端到端测试**: 完整流程测试
- **lxc**: Debian/Ubuntu 容器 (warden, gitea, mysql, postgresql, influxdb)
- **alpine**: Alpine Linux 容器 (redis, authentik, calibreweb)
- **proxmox**: Proxmox VE 物理机 (pve, xgp, nuc12)
- **armbian**: ARM 设备 (onecloud1)
- **hcp**: HCP 云服务器 (hcp1, hcp2)
- **feiniu**: 飞牛服务器 (snail)
- **germany**: 德国服务器 (de)
- **dev**: 开发服务器 (dev1, dev2)
- **oci_kr/oci_us**: Oracle 云服务器
- **huawei/google/aws**: 各云服务商
## 📚 文档
## 📝 注意事项
- [架构概览](docs/architecture/project-overview.md)
- [部署指南](docs/runbooks/deployment-guide.md)
- [运维手册](docs/runbooks/operations-guide.md)
- [故障排除](docs/runbooks/troubleshooting.md)
- [API 文档](docs/api/README.md)
- 确保 SSH 密钥已正确配置
- LXC 组更新需要顺序执行,避免同时更新
- Alpine 容器使用 `apk` 包管理器
- 建议先在测试环境验证
- 定期备份重要数据
- 监控自动化日志:`tail -f /var/log/daily-health-check.log`
## 🤝 贡献指南
## 🆘 故障排除
1. Fork 项目
2. 创建特性分支 (`git checkout -b feature/amazing-feature`)
3. 提交变更 (`git commit -m 'Add amazing feature'`)
4. 推送到分支 (`git push origin feature/amazing-feature`)
5. 创建 Pull Request
### 连接问题
```bash
# 测试连接
ansible all -i ansible/inventory.ini -m ping
## 📄 许可证
# 检查特定组
ansible lxc -i ansible/inventory.ini -m ping -e "ansible_ssh_pass=313131"
```
本项目采用 MIT 许可证 - 查看 [LICENSE](LICENSE) 文件了解详情。
### 权限问题
```bash
# 检查 sudo 权限
ansible all -i ansible/inventory.ini -m shell -a "whoami" --become
```
## 🆘 支持
### 日志查看
```bash
# 查看自动化日志
ls -la /var/log/*-*.log
tail -f /var/log/daily-health-check.log
```
如果你遇到问题或有疑问:
## 🎯 运维脚本使用示例
1. 查看 [文档](docs/)
2. 搜索 [Issues](../../issues)
3. 创建新的 [Issue](../../issues/new)
### 系统更新
```bash
# 更新所有服务器
./scripts/ops-manager.sh update all
## 🎉 致谢
# 更新特定组
./scripts/ops-manager.sh update lxc
./scripts/ops-manager.sh update alpine
./scripts/ops-manager.sh update proxmox
```
### 系统清理
```bash
# 清理所有服务器
./scripts/ops-manager.sh cleanup all
# 清理特定组
./scripts/ops-manager.sh cleanup lxc
```
### 健康检查
```bash
# 检查所有服务器健康状态
./scripts/ops-manager.sh health all
# 检查特定组
./scripts/ops-manager.sh health proxmox
```
### Docker 管理
```bash
# 管理 LXC 组的 Docker
./scripts/ops-manager.sh docker lxc
# 检查 Docker 状态
./scripts/ops-manager.sh docker all
```
### 网络诊断
```bash
# 检查网络连通性
./scripts/ops-manager.sh network all
# 检查特定组网络
./scripts/ops-manager.sh network germany
```
### 证书管理
```bash
# 检查所有证书
./scripts/ops-manager.sh cert all
# 检查特定组证书
./scripts/ops-manager.sh cert proxmox
```
### 安全检查
```bash
# 安全检查(检查模式)
./scripts/ops-manager.sh security all --check
# 执行安全加固
./scripts/ops-manager.sh security all
```
### 运维工具包
```bash
# 运行完整的运维工具包
./scripts/ops-manager.sh toolkit all
# 检查特定服务器
./scripts/ops-manager.sh toolkit germany
```
## 📈 监控和日志
### 自动化监控
```bash
# 查看自动化状态
automation-status
# 查看定时任务
crontab -l
# 查看最近的健康检查
tail -20 /var/log/daily-health-check.log
```
### 手动日志查看
```bash
# 查看所有自动化日志
ls -la /var/log/*-*.log
# 实时监控日志
tail -f /var/log/daily-health-check.log
# 查看清理日志
cat /var/log/weekly-cleanup.log
```
## 🔄 定期维护建议
### 每日
- 运行 `./scripts/ops-manager.sh toolkit all` 快速检查
- 查看 `automation-status` 了解自动化状态
### 每周
- 运行 `./scripts/ops-manager.sh cleanup all` 系统清理
- 运行 `./scripts/ops-manager.sh health all` 健康检查
- 检查 `/var/log/` 下的日志文件
### 每月
- 运行 `./scripts/ops-manager.sh security all --check` 安全检查
- 运行 `./scripts/ops-manager.sh cert all` 证书检查
- 运行 `./scripts/ops-manager.sh network all` 网络检查
### 按需
- 系统更新:`./scripts/ops-manager.sh update [group]`
- Docker 清理:`./scripts/ops-manager.sh docker lxc`
- 网络诊断:`./scripts/ops-manager.sh network all`
## 许可证
MIT License
感谢所有为这个项目做出贡献的开发者和社区成员!

View File

@ -1,168 +0,0 @@
# Ansible Playbooks 管理文档
## 📁 目录结构
```
ansible/
├── playbooks/ # 主要 playbooks 目录
│ ├── 01-system/ # 系统管理类
│ ├── 02-security/ # 安全管理类
│ ├── 03-services/ # 服务管理类
│ ├── 04-monitoring/ # 监控检查类
│ ├── 05-cloud/ # 云服务商专用
│ └── 99-tools/ # 工具和集成类
├── inventory.ini # 主机清单
├── ansible.cfg # Ansible 配置
├── run.sh # 原始运行脚本
└── run-playbook.sh # 新的分类运行脚本
```
## 🎯 分类说明
### 01-system (系统管理)
负责基础系统的维护和管理任务。
| Playbook | 功能描述 | 适用主机 |
|----------|----------|----------|
| `system-update.yml` | 系统包更新和升级 | 所有 Linux 主机 |
| `system-cleanup.yml` | 系统清理和维护 | 所有主机 |
| `cron-setup.yml` | 定时任务配置 | 需要定时任务的主机 |
### 02-security (安全管理)
处理安全相关的配置和监控。
| Playbook | 功能描述 | 适用主机 |
|----------|----------|----------|
| `security-hardening.yml` | SSH 安全加固和备份 | 所有主机 |
| `certificate-management.yml` | SSL 证书管理和监控 | Web 服务器和 SSL 服务 |
### 03-services (服务管理)
管理各种服务和容器。
| Playbook | 功能描述 | 适用主机 |
|----------|----------|----------|
| `docker-management.yml` | Docker 容器管理 | Docker 主机 |
| `docker-status-check.yml` | Docker 状态检查 | Docker Swarm 节点 |
### 04-monitoring (监控检查)
系统和服务的健康检查。
| Playbook | 功能描述 | 适用主机 |
|----------|----------|----------|
| `service-health-check.yml` | 服务健康状态监控 | 所有主机 |
| `network-connectivity.yml` | 网络连接性能检查 | 所有主机 |
### 05-cloud (云服务商专用)
针对特定云服务商的优化脚本。
| Playbook | 功能描述 | 适用主机 |
|----------|----------|----------|
| `cloud-providers-update.yml` | 云服务商系统更新 | huawei, google, digitalocean, aws |
### 99-tools (工具和集成)
运维工具和集成脚本。
| Playbook | 功能描述 | 适用主机 |
|----------|----------|----------|
| `ops-toolkit.yml` | 统一运维管理面板 | 所有主机 |
## 🚀 使用方法
### 1. 使用新的分类运行脚本
```bash
# 查看帮助
./run-playbook.sh help
# 列出所有可用的 playbooks
./run-playbook.sh list
# 运行特定分类的 playbook
./run-playbook.sh 01-system system-update.yml all
./run-playbook.sh 03-services docker-status-check.yml hcp
./run-playbook.sh 04-monitoring network-connectivity.yml dev1
```
### 2. 直接使用 ansible-playbook
```bash
# 运行系统更新
ansible-playbook -i inventory.ini playbooks/01-system/system-update.yml
# 检查 Docker 状态
ansible-playbook -i inventory.ini playbooks/03-services/docker-status-check.yml --limit hcp
# 网络连接检查
ansible-playbook -i inventory.ini playbooks/04-monitoring/network-connectivity.yml --limit dev1
```
## 📋 主机组说明
根据 `inventory.ini` 配置的主机组:
- **dev**: 开发环境 (dev1, dev2)
- **hcp**: HCP 节点 (hcp1, hcp2) - Docker Swarm 集群
- **oci_kr**: Oracle Cloud Korea (ch2, ch3, master)
- **oci_us**: Oracle Cloud US (ash1d, ash2e, ash3c)
- **huawei**: 华为云 (hcs)
- **google**: Google Cloud (benwork)
- **digitalocean**: DigitalOcean (syd)
- **aws**: Amazon Web Services (awsirish)
- **proxmox**: Proxmox 虚拟化 (pve, xgp, nuc12)
- **lxc**: LXC 容器 (warden, gitea, influxdb, mysql, postgresql)
- **alpine**: Alpine Linux 容器 (redis, authentik, calibreweb)
- **vm**: 虚拟机 (kali)
## 🔧 配置文件
### ansible.cfg
已更新支持新的目录结构,包含:
- 新的 playbooks 路径配置
- SSH 连接优化
- 动态 inventory 支持
### inventory.ini
包含所有主机的连接信息和分组配置。
## 📝 最佳实践
1. **按功能分类运行**: 根据需要选择合适的分类目录
2. **使用主机组**: 利用 inventory 中的主机组进行批量操作
3. **测试先行**: 在开发环境先测试,再应用到生产环境
4. **日志记录**: 重要操作建议记录执行日志
5. **定期维护**: 定期运行系统清理和更新脚本
## 🆘 故障排除
### 常见问题
1. **SSH 连接失败**
- 检查主机是否可达
- 验证 SSH 密钥或密码
- 确认用户权限
2. **Playbook 执行失败**
- 检查目标主机的系统类型
- 验证所需的软件包是否安装
- 查看详细错误日志
3. **权限问题**
- 确认 `ansible_become` 配置正确
- 验证 sudo 权限
### 调试命令
```bash
# 测试连接
ansible all -i inventory.ini -m ping
# 详细输出
ansible-playbook -i inventory.ini playbooks/01-system/system-update.yml -vvv
# 检查语法
ansible-playbook --syntax-check playbooks/01-system/system-update.yml
```
---
*最后更新: $(date '+%Y-%m-%d %H:%M:%S')*

View File

@ -1,109 +0,0 @@
#!/bin/bash
# Ansible Playbooks 分类运行脚本
# 使用方法: ./run-playbook.sh [category] [playbook] [hosts]
set -e
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
PLAYBOOKS_DIR="$SCRIPT_DIR/playbooks"
# 颜色定义
RED='\033[0;31m'
GREEN='\033[0;32m'
YELLOW='\033[1;33m'
BLUE='\033[0;34m'
NC='\033[0m' # No Color
# 显示使用帮助
show_help() {
echo -e "${BLUE}Ansible Playbooks 分类运行脚本${NC}"
echo ""
echo "使用方法:"
echo " $0 [category] [playbook] [hosts]"
echo ""
echo "可用分类:"
echo -e " ${GREEN}01-system${NC} - 系统管理 (更新、清理、定时任务)"
echo -e " ${GREEN}02-security${NC} - 安全管理 (安全加固、证书管理)"
echo -e " ${GREEN}03-services${NC} - 服务管理 (Docker、容器服务)"
echo -e " ${GREEN}04-monitoring${NC} - 监控检查 (健康检查、网络连接)"
echo -e " ${GREEN}05-cloud${NC} - 云服务商专用"
echo -e " ${GREEN}99-tools${NC} - 工具和集成"
echo ""
echo "示例:"
echo " $0 list # 列出所有可用的 playbooks"
echo " $0 01-system system-update.yml all # 在所有主机上运行系统更新"
echo " $0 03-services docker-status-check.yml hcp # 在 hcp 组上检查 Docker 状态"
echo " $0 04-monitoring network-connectivity.yml dev1 # 在 dev1 主机上检查网络连接"
}
# 列出所有可用的 playbooks
list_playbooks() {
echo -e "${BLUE}可用的 Ansible Playbooks:${NC}"
echo ""
for category in $(ls -1 "$PLAYBOOKS_DIR" | sort); do
if [ -d "$PLAYBOOKS_DIR/$category" ]; then
echo -e "${GREEN}📁 $category${NC}"
for playbook in $(ls -1 "$PLAYBOOKS_DIR/$category"/*.yml 2>/dev/null | sort); do
if [ -f "$playbook" ]; then
basename_playbook=$(basename "$playbook")
echo -e " └── ${YELLOW}$basename_playbook${NC}"
fi
done
echo ""
fi
done
}
# 运行指定的 playbook
run_playbook() {
local category="$1"
local playbook="$2"
local hosts="$3"
local playbook_path="$PLAYBOOKS_DIR/$category/$playbook"
if [ ! -f "$playbook_path" ]; then
echo -e "${RED}错误: Playbook 文件不存在: $playbook_path${NC}"
exit 1
fi
echo -e "${GREEN}运行 Playbook:${NC} $category/$playbook"
echo -e "${GREEN}目标主机:${NC} $hosts"
echo ""
# 运行 ansible-playbook
ansible-playbook -i inventory.ini "$playbook_path" --limit "$hosts"
}
# 主逻辑
case "${1:-}" in
"help"|"-h"|"--help"|"")
show_help
;;
"list"|"ls")
list_playbooks
;;
*)
if [ $# -lt 3 ]; then
echo -e "${RED}错误: 参数不足${NC}"
echo ""
show_help
exit 1
fi
category="$1"
playbook="$2"
hosts="$3"
if [ ! -d "$PLAYBOOKS_DIR/$category" ]; then
echo -e "${RED}错误: 分类目录不存在: $category${NC}"
echo ""
list_playbooks
exit 1
fi
run_playbook "$category" "$playbook" "$hosts"
;;
esac

View File

@ -1,123 +0,0 @@
#!/bin/bash
# Ansible Playbook Runner Script
# Usage: ./run.sh -dev (or any group name)
# Set script directory
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
INVENTORY_FILE="$SCRIPT_DIR/inventory.ini"
PLAYBOOK_FILE="$SCRIPT_DIR/system-update.yml"
# Function to display usage
show_usage() {
echo "Usage: $0 -<group_name>"
echo ""
echo "Examples:"
echo " $0 -dev # Run on dev group (dev1, dev2)"
echo " $0 -prod # Run on prod group"
echo " $0 -all # Run on all hosts"
echo ""
echo "Available groups in inventory:"
grep '^\[' "$INVENTORY_FILE" | grep -v ':vars' | sed 's/\[//g' | sed 's/\]//g' | sort
}
# Function to check if group exists in inventory
check_group_exists() {
local group_name="$1"
if [ "$group_name" = "all" ]; then
return 0
fi
if grep -q "^\[$group_name\]" "$INVENTORY_FILE"; then
return 0
else
return 1
fi
}
# Function to run ansible playbook
run_playbook() {
local group_name="$1"
echo "========================================="
echo "Running Ansible Playbook on group: $group_name"
echo "========================================="
echo "Inventory: $INVENTORY_FILE"
echo "Playbook: $PLAYBOOK_FILE"
echo "Target: $group_name"
echo "========================================="
echo ""
# Set environment variables for better output
export LANG=C
export ANSIBLE_HOST_KEY_CHECKING=False
# Run the playbook
cd "$SCRIPT_DIR"
ansible-playbook -i "$INVENTORY_FILE" "$PLAYBOOK_FILE" --limit "$group_name" -v
local exit_code=$?
echo ""
echo "========================================="
if [ $exit_code -eq 0 ]; then
echo "✅ Playbook execution completed successfully!"
else
echo "❌ Playbook execution failed with exit code: $exit_code"
fi
echo "========================================="
return $exit_code
}
# Main script logic
main() {
# Check if argument is provided
if [ $# -eq 0 ]; then
echo "❌ Error: No group specified"
echo ""
show_usage
exit 1
fi
# Parse argument
local arg="$1"
if [[ "$arg" =~ ^-(.+)$ ]]; then
local group_name="${BASH_REMATCH[1]}"
else
echo "❌ Error: Invalid argument format. Use -<group_name>"
echo ""
show_usage
exit 1
fi
# Check if files exist
if [ ! -f "$INVENTORY_FILE" ]; then
echo "❌ Error: Inventory file not found: $INVENTORY_FILE"
exit 1
fi
if [ ! -f "$PLAYBOOK_FILE" ]; then
echo "❌ Error: Playbook file not found: $PLAYBOOK_FILE"
exit 1
fi
# Check if group exists
if ! check_group_exists "$group_name"; then
echo "❌ Error: Group '$group_name' not found in inventory"
echo ""
show_usage
exit 1
fi
# Run the playbook
run_playbook "$group_name"
}
# Handle help argument
if [ "$1" = "-h" ] || [ "$1" = "--help" ]; then
show_usage
exit 0
fi
# Run main function
main "$@"

View File

@ -1,16 +0,0 @@
{
"postgres": {
"host": "postgresql:5432",
"user": "postgres",
"pass": "Ccie#15544",
"name": "semaphore",
"options": {
"sslmode": "disable"
}
},
"dialect": "postgres",
"tmp_path": "/tmp/semaphore",
"cookie_hash": "DlY3h3sXjiJV04u4F1eF6ZuLEQOw+jlXe6Qj4Fxn2m8=",
"cookie_encryption": "4BZst4BOkCobGLDQn00WuSVFH8oA4dcgTgbZf7rtkyo=",
"access_key_encryption": "SqHu6FvyjMkFfjJ/8apw5HN26XZaXNg32Yqp0p1tFs8="
}

View File

@ -0,0 +1,210 @@
---
- name: Simple Docker Swarm Analysis for ash3c
hosts: ash3c
become: yes
gather_facts: yes
tasks:
# 基础检查
- name: Check if Docker is installed
command: which docker
register: docker_installed
failed_when: false
changed_when: false
- name: Fail if Docker not installed
fail:
msg: "Docker is not installed on {{ inventory_hostname }}"
when: docker_installed.rc != 0
# 检查当前 Swarm 状态
- name: Check Docker Swarm status
shell: docker info | grep "Swarm:" -A 1
register: swarm_status
- name: Display current Swarm status
debug:
msg: "🔍 Current Swarm Status: {{ swarm_status.stdout_lines }}"
# 获取运行中的容器
- name: Get running containers
shell: docker ps --format "table {{ '{{' }}.Names{{ '}}' }}\t{{ '{{' }}.Image{{ '}}' }}\t{{ '{{' }}.Status{{ '}}' }}\t{{ '{{' }}.Ports{{ '}}' }}"
register: running_containers
- name: Display running containers
debug:
msg: "🏃 Running Containers: {{ running_containers.stdout_lines }}"
# 获取所有容器(包括停止的)
- name: Get all containers
shell: docker ps -a --format "table {{ '{{' }}.Names{{ '}}' }}\t{{ '{{' }}.Image{{ '}}' }}\t{{ '{{' }}.Status{{ '}}' }}"
register: all_containers
- name: Display all containers
debug:
msg: "📦 All Containers: {{ all_containers.stdout_lines }}"
# 检查每个容器的详细信息
- name: Get container names only
shell: docker ps -a --format "{{ '{{' }}.Names{{ '}}' }}"
register: container_names
- name: Inspect each container
shell: |
echo "=== Container: {{ item }} ==="
echo "Image: $(docker inspect {{ item }} --format '{{ '{{' }}.Config.Image{{ '}}' }}')"
echo "Status: $(docker inspect {{ item }} --format '{{ '{{' }}.State.Status{{ '}}' }}')"
echo "Restart Policy: $(docker inspect {{ item }} --format '{{ '{{' }}.HostConfig.RestartPolicy.Name{{ '}}' }}')"
echo "Network Mode: $(docker inspect {{ item }} --format '{{ '{{' }}.HostConfig.NetworkMode{{ '}}' }}')"
echo "Published Ports: $(docker port {{ item }} 2>/dev/null || echo 'None')"
echo "Volumes/Mounts:"
docker inspect {{ item }} --format '{{ '{{' }}range .Mounts{{ '}}' }} {{ '{{' }}.Source{{ '}}' }}:{{ '{{' }}.Destination{{ '}}' }} ({{ '{{' }}.Mode{{ '}}' }}){{ '{{' }}"\n"{{ '}}' }}{{ '{{' }}end{{ '}}' }}' || echo " None"
echo "Environment Variables:"
docker inspect {{ item }} --format '{{ '{{' }}range .Config.Env{{ '}}' }} {{ '{{' }}.{{ '}}' }}{{ '{{' }}"\n"{{ '}}' }}{{ '{{' }}end{{ '}}' }}' | head -10
echo "Labels:"
docker inspect {{ item }} --format '{{ '{{' }}range $key, $value := .Config.Labels{{ '}}' }} {{ '{{' }}$key{{ '}}' }}={{ '{{' }}$value{{ '}}' }}{{ '{{' }}"\n"{{ '}}' }}{{ '{{' }}end{{ '}}' }}' | head -5
echo "---"
register: container_inspect
loop: "{{ container_names.stdout_lines }}"
when: container_names.stdout_lines | length > 0
- name: Display container inspection results
debug:
msg: "{{ item.stdout }}"
loop: "{{ container_inspect.results }}"
when: container_inspect is defined
# 检查 Docker Compose 文件
- name: Find docker-compose files
find:
paths:
- /root
- /home
- /opt
patterns:
- "docker-compose.yml"
- "docker-compose.yaml"
- "compose.yml"
- "compose.yaml"
recurse: yes
depth: 3
register: compose_files
- name: Display found compose files
debug:
msg: "📄 Found compose files: {{ item.path }}"
loop: "{{ compose_files.files }}"
when: compose_files.files | length > 0
# 分析网络配置
- name: Get Docker networks
shell: docker network ls
register: docker_networks
- name: Display Docker networks
debug:
msg: "🌐 Docker Networks: {{ docker_networks.stdout_lines }}"
# 检查卷使用情况
- name: Get Docker volumes
shell: docker volume ls
register: docker_volumes
- name: Display Docker volumes
debug:
msg: "💾 Docker Volumes: {{ docker_volumes.stdout_lines }}"
# 检查容器资源使用
- name: Get container resource usage
shell: docker stats --no-stream
register: container_stats
when: container_names.stdout_lines | length > 0
- name: Display container stats
debug:
msg: "📊 Container Resource Usage: {{ container_stats.stdout_lines }}"
when: container_stats is defined
# 生成 Swarm 适用性分析
- name: Generate Swarm suitability analysis
debug:
msg: |
🔍 DOCKER SWARM MIGRATION ANALYSIS FOR {{ inventory_hostname }}
================================================================
📋 SUMMARY:
- Current Swarm Status: {{ 'Active' if 'active' in swarm_status.stdout else 'Inactive' }}
- Total Containers: {{ container_names.stdout_lines | length }}
- Running Containers: {{ (running_containers.stdout_lines | length) - 1 }}
- Compose Files Found: {{ compose_files.files | length }}
💡 GENERAL RECOMMENDATIONS:
✅ SUITABLE FOR SWARM (typically):
- Web applications (nginx, apache, etc.)
- API services
- Databases (with proper volume management)
- Monitoring tools (prometheus, grafana, etc.)
- Load balancers
❌ NOT SUITABLE FOR SWARM:
- Containers using Docker socket (/var/run/docker.sock)
- Containers with --privileged flag
- Containers requiring specific host access
- Development/testing containers
⚠️ NEEDS MODIFICATION:
- Containers using bind mounts (convert to volumes)
- Containers without restart policies
- Containers using host networking
🚀 NEXT STEPS:
1. Review each container's configuration above
2. Identify services that can benefit from scaling
3. Convert suitable containers to Docker services
4. Set up overlay networks
5. Configure secrets and configs management
📝 MIGRATION CHECKLIST:
□ Initialize Swarm (already done: {{ 'Yes' if 'active' in swarm_status.stdout else 'No' }})
□ Create overlay networks
□ Convert containers to services
□ Set up service discovery
□ Configure load balancing
□ Test service scaling
□ Set up monitoring
when: container_names is defined
# 保存分析结果
- name: Save analysis summary
copy:
content: |
Docker Swarm Analysis for {{ inventory_hostname }}
Generated: {{ ansible_date_time.iso8601 }}
Current Swarm Status: {{ swarm_status.stdout }}
Total Containers: {{ container_names.stdout_lines | length }}
Container List:
{{ container_names.stdout_lines | join('\n') }}
Networks:
{{ docker_networks.stdout }}
Volumes:
{{ docker_volumes.stdout }}
Compose Files Found:
{% for file in compose_files.files %}
- {{ file.path }}
{% endfor %}
dest: "/tmp/swarm-analysis-{{ inventory_hostname }}-{{ ansible_date_time.epoch }}.txt"
- name: Analysis complete
debug:
msg: |
🎉 Analysis complete!
Results saved to: /tmp/swarm-analysis-{{ inventory_hostname }}-{{ ansible_date_time.epoch }}.txt
Review the container details above to determine which services
are suitable for Swarm migration.

View File

@ -0,0 +1,246 @@
---
- name: Docker Swarm Migration Analysis for ash3c
hosts: ash3c
become: yes
gather_facts: yes
vars:
analysis_results: []
tasks:
# 基础检查
- name: Check if Docker is installed
command: which docker
register: docker_installed
failed_when: false
changed_when: false
- name: Fail if Docker not installed
fail:
msg: "Docker is not installed on {{ inventory_hostname }}"
when: docker_installed.rc != 0
# 检查当前 Swarm 状态
- name: Check Docker Swarm status
shell: docker info --format "{{ '{{' }}.Swarm.LocalNodeState{{ '}}' }}"
register: swarm_status
- name: Display current Swarm status
debug:
msg: "🔍 Current Swarm Status: {{ swarm_status.stdout }}"
# 获取所有容器的详细信息
- name: Get all containers (running and stopped)
shell: docker ps -a --format "{{ '{{' }}.Names{{ '}}' }}"
register: all_containers
- name: Get basic container information
shell: |
echo "=== Container: {{ item }} ==="
docker inspect {{ item }} | jq -r '
.[0] |
"Image: " + .Config.Image,
"Status: " + .State.Status,
"RestartPolicy: " + .HostConfig.RestartPolicy.Name,
"NetworkMode: " + .HostConfig.NetworkMode,
"Ports: " + (.NetworkSettings.Ports | keys | join(", ")),
"Volumes: " + ([.Mounts[]? | .Source + ":" + .Destination + ":" + .Mode] | join(" ")),
"Memory: " + (.HostConfig.Memory | tostring),
"CPUs: " + (.HostConfig.NanoCpus | tostring)
'
echo "---"
register: container_details
loop: "{{ all_containers.stdout_lines }}"
when: all_containers.stdout_lines | length > 0
- name: Display container details
debug:
msg: "{{ item.stdout }}"
loop: "{{ container_details.results }}"
when: container_details is defined
# 检查 Docker Compose 文件
- name: Find docker-compose files
find:
paths:
- /root
- /home
- /opt
patterns:
- "docker-compose.yml"
- "docker-compose.yaml"
- "compose.yml"
- "compose.yaml"
recurse: yes
register: compose_files
- name: Display found compose files
debug:
msg: "📄 Found compose files: {{ item.path }}"
loop: "{{ compose_files.files }}"
when: compose_files.files | length > 0
# 分析网络配置
- name: Get Docker networks
shell: docker network ls --format "{{ '{{' }}.Name{{ '}}' }}\t{{ '{{' }}.Driver{{ '}}' }}\t{{ '{{' }}.Scope{{ '}}' }}"
register: docker_networks
- name: Display Docker networks
debug:
msg: "🌐 Docker Networks: {{ docker_networks.stdout_lines }}"
# 检查卷使用情况
- name: Get Docker volumes
shell: docker volume ls --format "{{ '{{' }}.Name{{ '}}' }}\t{{ '{{' }}.Driver{{ '}}' }}"
register: docker_volumes
- name: Display Docker volumes
debug:
msg: "💾 Docker Volumes: {{ docker_volumes.stdout_lines }}"
# 检查容器资源使用
- name: Get container resource usage
shell: docker stats --no-stream --format "{{ '{{' }}.Name{{ '}}' }}\t{{ '{{' }}.CPUPerc{{ '}}' }}\t{{ '{{' }}.MemUsage{{ '}}' }}\t{{ '{{' }}.NetIO{{ '}}' }}\t{{ '{{' }}.BlockIO{{ '}}' }}"
register: container_stats
when: all_containers.stdout_lines | length > 0
- name: Display container stats
debug:
msg: "📊 Container Resource Usage: {{ container_stats.stdout_lines }}"
when: container_stats is defined
# 分析服务类型和 Swarm 适用性
- name: Analyze containers for Swarm suitability
set_fact:
swarm_analysis: |
🔍 SWARM MIGRATION ANALYSIS FOR {{ inventory_hostname }}
================================================
Current Swarm Status: {{ swarm_status.stdout }}
Total Containers: {{ all_containers.stdout_lines | length }}
📋 CONTAINER ANALYSIS:
{% for container in container_details.results %}
Container: {{ container.item }}
{% set details = container.stdout.split('\n') %}
{% for line in details %}
{{ line }}
{% endfor %}
SWARM SUITABILITY ASSESSMENT:
{% if 'restart=always' in container.stdout or 'restart=unless-stopped' in container.stdout %}
✅ Good restart policy for Swarm
{% else %}
⚠️ Consider adding restart policy
{% endif %}
{% if 'NetworkMode: bridge' in container.stdout or 'NetworkMode: host' in container.stdout %}
⚠️ May need network configuration for Swarm
{% else %}
✅ Custom network - good for Swarm
{% endif %}
{% if '/var/run/docker.sock' in container.stdout %}
❌ Uses Docker socket - NOT suitable for Swarm
{% elif 'bind' in container.stdout %}
⚠️ Uses bind mounts - consider using volumes
{% else %}
✅ Good volume configuration
{% endif %}
{% endfor %}
💡 RECOMMENDATIONS:
SUITABLE FOR SWARM:
{% for container in container_details.results %}
{% if '/var/run/docker.sock' not in container.stdout %}
- {{ container.item }}: Ready for Swarm migration
{% endif %}
{% endfor %}
NEEDS MODIFICATION:
{% for container in container_details.results %}
{% if '/var/run/docker.sock' in container.stdout %}
- {{ container.item }}: Uses Docker socket - keep as standalone
{% elif 'bind' in container.stdout %}
- {{ container.item }}: Convert bind mounts to volumes
{% endif %}
{% endfor %}
NEXT STEPS:
1. Initialize Swarm: docker swarm init
2. Create overlay networks for services
3. Convert suitable containers to services
4. Set up service discovery and load balancing
5. Configure secrets and configs management
when: container_details is defined
- name: Display Swarm analysis
debug:
msg: "{{ swarm_analysis }}"
when: swarm_analysis is defined
# 生成迁移脚本建议
- name: Generate migration script suggestions
set_fact:
migration_script: |
#!/bin/bash
# Docker Swarm Migration Script for {{ inventory_hostname }}
# Generated on {{ ansible_date_time.iso8601 }}
echo "🚀 Starting Docker Swarm migration..."
# Initialize Swarm (if not already done)
if [ "{{ swarm_status.stdout }}" != "active" ]; then
echo "Initializing Docker Swarm..."
docker swarm init
fi
# Create overlay networks
echo "Creating overlay networks..."
docker network create -d overlay --attachable app-network
# Example service creation (modify as needed)
{% for container in container_details.results if container_details is defined %}
{% if '/var/run/docker.sock' not in container.stdout %}
echo "Converting {{ container.item }} to Swarm service..."
# docker service create --name {{ container.item }}-svc \
# --network app-network \
# --replicas 1 \
# [ADD_YOUR_SPECIFIC_OPTIONS] \
# [IMAGE_NAME]
{% endif %}
{% endfor %}
echo "✅ Migration script template generated!"
echo "Please review and customize before running."
when: container_details is defined
- name: Display migration script
debug:
msg: "{{ migration_script }}"
when: migration_script is defined
# 保存分析结果到文件
- name: Save analysis results to file
copy:
content: |
{{ swarm_analysis }}
MIGRATION SCRIPT:
{{ migration_script }}
dest: "/tmp/swarm-analysis-{{ inventory_hostname }}-{{ ansible_date_time.epoch }}.txt"
when: swarm_analysis is defined and migration_script is defined
- name: Analysis complete
debug:
msg: |
🎉 Analysis complete!
Results saved to: /tmp/swarm-analysis-{{ inventory_hostname }}-{{ ansible_date_time.epoch }}.txt
Summary:
- Total containers analyzed: {{ all_containers.stdout_lines | length }}
- Compose files found: {{ compose_files.files | length }}
- Current Swarm status: {{ swarm_status.stdout }}

View File

@ -0,0 +1,236 @@
---
- name: Docker Swarm Check for ash3c
hosts: ash3c
become: yes
gather_facts: yes
tasks:
# 基础检查
- name: Check if Docker is installed
command: which docker
register: docker_installed
failed_when: false
changed_when: false
- name: Fail if Docker not installed
fail:
msg: "Docker is not installed on {{ inventory_hostname }}"
when: docker_installed.rc != 0
# 检查当前 Swarm 状态
- name: Check Docker Swarm status
shell: docker info | grep "Swarm:" -A 1
register: swarm_status
- name: Display current Swarm status
debug:
msg: "🔍 Current Swarm Status: {{ swarm_status.stdout_lines }}"
# 获取运行中的容器 - 使用简单格式
- name: Get running containers
shell: docker ps
register: running_containers
- name: Display running containers
debug:
msg: "🏃 Running Containers:\n{{ running_containers.stdout }}"
# 获取所有容器(包括停止的)
- name: Get all containers
shell: docker ps -a
register: all_containers
- name: Display all containers
debug:
msg: "📦 All Containers:\n{{ all_containers.stdout }}"
# 获取容器名称列表
- name: Get container names
shell: docker ps -a | awk 'NR>1 {print $NF}' | head -20
register: container_names
- name: Display container names
debug:
msg: "Container names: {{ container_names.stdout_lines }}"
# 检查每个容器的基本信息
- name: Get basic container info
shell: |
echo "=== Container: {{ item }} ==="
docker inspect {{ item }} | jq -r '.[0] | {
"Image": .Config.Image,
"Status": .State.Status,
"RestartPolicy": .HostConfig.RestartPolicy.Name,
"NetworkMode": .HostConfig.NetworkMode
}'
echo "Ports:"
docker port {{ item }} 2>/dev/null || echo "No published ports"
echo "Mounts:"
docker inspect {{ item }} | jq -r '.[0].Mounts[]? | " \(.Source):\(.Destination) (\(.Mode))"'
echo "---"
register: container_info
loop: "{{ container_names.stdout_lines[:10] }}" # 限制前10个容器
when: container_names.stdout_lines | length > 0
- name: Display container info
debug:
msg: "{{ item.stdout }}"
loop: "{{ container_info.results }}"
when: container_info is defined
# 检查 Docker Compose 文件
- name: Find docker-compose files in common locations
find:
paths:
- /root
- /home
- /opt
- /var/lib/docker
patterns:
- "docker-compose.yml"
- "docker-compose.yaml"
- "compose.yml"
- "compose.yaml"
recurse: yes
depth: 3
register: compose_files
ignore_errors: yes
- name: Display found compose files
debug:
msg: "📄 Found compose files: {{ compose_files.files | map(attribute='path') | list }}"
when: compose_files.files | length > 0
# 分析网络配置
- name: Get Docker networks
shell: docker network ls
register: docker_networks
- name: Display Docker networks
debug:
msg: "🌐 Docker Networks:\n{{ docker_networks.stdout }}"
# 检查卷使用情况
- name: Get Docker volumes
shell: docker volume ls
register: docker_volumes
- name: Display Docker volumes
debug:
msg: "💾 Docker Volumes:\n{{ docker_volumes.stdout }}"
# 检查容器资源使用
- name: Get container resource usage
shell: docker stats --no-stream
register: container_stats
when: container_names.stdout_lines | length > 0
- name: Display container stats
debug:
msg: "📊 Container Resource Usage:\n{{ container_stats.stdout }}"
when: container_stats is defined
# 检查 Docker 镜像
- name: Get Docker images
shell: docker images
register: docker_images
- name: Display Docker images
debug:
msg: "🖼️ Docker Images:\n{{ docker_images.stdout }}"
# 生成 Swarm 适用性分析
- name: Generate Swarm suitability analysis
debug:
msg: |
🔍 DOCKER SWARM MIGRATION ANALYSIS FOR {{ inventory_hostname }}
================================================================
📋 SUMMARY:
- Current Swarm Status: {{ 'Active' if 'active' in swarm_status.stdout else 'Inactive' }}
- Total Containers: {{ container_names.stdout_lines | length }}
- Running Containers: {{ running_containers.stdout_lines | length - 1 }}
- Compose Files Found: {{ compose_files.files | length if compose_files.files is defined else 0 }}
💡 SWARM MIGRATION RECOMMENDATIONS:
✅ TYPICALLY SUITABLE FOR SWARM:
- Web servers (nginx, apache, caddy)
- API services and microservices
- Application servers
- Load balancers (traefik, haproxy)
- Monitoring tools (prometheus, grafana)
- Databases (with proper volume strategy)
❌ NOT SUITABLE FOR SWARM:
- Containers using Docker socket (/var/run/docker.sock)
- Containers with --privileged flag
- Development/testing containers
- Containers requiring specific host hardware access
⚠️ NEEDS MODIFICATION FOR SWARM:
- Containers using bind mounts → convert to volumes
- Containers without restart policies → add restart policies
- Containers using host networking → use overlay networks
- Containers with hardcoded IPs → use service discovery
🚀 MIGRATION STEPS:
1. ✅ Swarm is already initialized
2. Create overlay networks for service communication
3. Convert suitable containers to Docker services
4. Set up service discovery and load balancing
5. Configure secrets and configs management
6. Test service scaling and failover
📝 NEXT ACTIONS:
- Review each container above for Swarm suitability
- Identify services that would benefit from scaling
- Plan network topology for services
- Prepare volume migration strategy
when: container_names is defined
# 保存分析结果
- name: Save analysis summary to file
copy:
content: |
Docker Swarm Analysis for {{ inventory_hostname }}
Generated: {{ ansible_date_time.iso8601 }}
SWARM STATUS:
{{ swarm_status.stdout }}
CONTAINERS ({{ container_names.stdout_lines | length }} total):
{{ container_names.stdout_lines | join('\n') }}
NETWORKS:
{{ docker_networks.stdout }}
VOLUMES:
{{ docker_volumes.stdout }}
IMAGES:
{{ docker_images.stdout }}
{% if compose_files.files is defined and compose_files.files | length > 0 %}
COMPOSE FILES FOUND:
{% for file in compose_files.files %}
- {{ file.path }}
{% endfor %}
{% endif %}
dest: "/tmp/swarm-analysis-{{ inventory_hostname }}-{{ ansible_date_time.epoch }}.txt"
- name: Analysis complete
debug:
msg: |
🎉 ANALYSIS COMPLETE!
📄 Results saved to: /tmp/swarm-analysis-{{ inventory_hostname }}-{{ ansible_date_time.epoch }}.txt
🔍 Review the container details above to identify:
- Which services are suitable for Swarm
- Which containers need modification
- Migration priority and strategy
💡 TIP: Focus on stateless services first for easier migration!

View File

@ -0,0 +1,194 @@
---
- name: Docker Swarm Migration Plan for ash3c
hosts: ash3c
become: yes
gather_facts: yes
vars:
# 定义服务迁移计划
swarm_services:
high_priority:
- name: ghproxy
image: wjqserver/ghproxy:latest
ports: "8046:8080"
replicas: 2
networks: ["app-network"]
- name: redis
image: redis:latest
ports: "63789:6379"
replicas: 1
networks: ["app-network"]
volumes: ["redis-data:/data"]
medium_priority:
- name: consul
image: bitnami/consul:latest
ports:
- "8310:8300"
- "8311:8301"
- "8312:8302"
- "8501:8500"
- "8601:8600/udp"
replicas: 1
networks: ["consul-network"]
- name: discourse-app
image: bitnami/discourse:3.4.1
ports: "31080:3000"
replicas: 1
networks: ["app-network"]
depends_on: ["postgres", "redis"]
- name: discourse-sidekiq
image: bitnami/discourse:3.4.1
replicas: 1
networks: ["app-network"]
depends_on: ["postgres", "redis"]
low_priority:
- name: elasticsearch
image: bitnami/elasticsearch:8.17.2
ports: "59200:9200"
replicas: 1
networks: ["elastic-network"]
volumes: ["elastic-data:/bitnami/elasticsearch/data"]
constraints: ["node.role==manager"]
- name: postgres
image: postgres:17.2
ports: "54322:5432"
replicas: 1
networks: ["db-network"]
volumes: ["postgres-data:/var/lib/postgresql/data"]
constraints: ["node.role==manager"]
secrets: ["postgres_password"]
tasks:
- name: Display migration plan
debug:
msg: |
🚀 DOCKER SWARM MIGRATION PLAN FOR {{ inventory_hostname }}
=========================================================
📋 PHASE 1 - HIGH PRIORITY (Low Risk)
{% for service in swarm_services.high_priority %}
✅ {{ service.name }}:
- Image: {{ service.image }}
- Replicas: {{ service.replicas }}
- Networks: {{ service.networks | join(', ') }}
- Migration: Safe, stateless service
{% endfor %}
📋 PHASE 2 - MEDIUM PRIORITY (Medium Risk)
{% for service in swarm_services.medium_priority %}
⚠️ {{ service.name }}:
- Image: {{ service.image }}
- Replicas: {{ service.replicas }}
- Networks: {{ service.networks | join(', ') }}
- Migration: Requires coordination
{% endfor %}
📋 PHASE 3 - LOW PRIORITY (High Risk)
{% for service in swarm_services.low_priority %}
🔴 {{ service.name }}:
- Image: {{ service.image }}
- Replicas: {{ service.replicas }}
- Networks: {{ service.networks | join(', ') }}
- Migration: Requires careful planning
{% endfor %}
- name: Create migration script
copy:
content: |
#!/bin/bash
# Docker Swarm Migration Script for {{ inventory_hostname }}
# Generated: {{ ansible_date_time.iso8601 }}
set -e
echo "🚀 Starting Docker Swarm Migration..."
# Create networks
echo "📡 Creating overlay networks..."
docker network create -d overlay --attachable app-network || true
docker network create -d overlay --attachable db-network || true
docker network create -d overlay --attachable consul-network || true
docker network create -d overlay --attachable elastic-network || true
# Create volumes
echo "💾 Creating volumes..."
docker volume create redis-data || true
docker volume create postgres-data || true
docker volume create elastic-data || true
# Create secrets (example)
echo "🔐 Creating secrets..."
echo "your_postgres_password" | docker secret create postgres_password - || true
echo "✅ Infrastructure setup complete!"
echo ""
echo "🔄 PHASE 1 - Migrate high priority services:"
echo "docker service create --name ghproxy-svc --replicas 2 --network app-network -p 8046:8080 wjqserver/ghproxy:latest"
echo "docker service create --name redis-svc --replicas 1 --network app-network -p 63789:6379 --mount type=volume,source=redis-data,target=/data redis:latest"
echo ""
echo "🔄 PHASE 2 - Migrate medium priority services:"
echo "docker service create --name consul-svc --replicas 1 --network consul-network -p 8310:8300 -p 8311:8301 -p 8312:8302 -p 8501:8500 -p 8601:8600/udp bitnami/consul:latest"
echo "docker service create --name discourse-app-svc --replicas 1 --network app-network -p 31080:3000 bitnami/discourse:3.4.1"
echo "docker service create --name discourse-sidekiq-svc --replicas 1 --network app-network bitnami/discourse:3.4.1"
echo ""
echo "🔄 PHASE 3 - Migrate low priority services (CAREFUL!):"
echo "docker service create --name postgres-svc --replicas 1 --network db-network -p 54322:5432 --mount type=volume,source=postgres-data,target=/var/lib/postgresql/data --secret postgres_password --constraint 'node.role==manager' postgres:17.2"
echo "docker service create --name elasticsearch-svc --replicas 1 --network elastic-network -p 59200:9200 --mount type=volume,source=elastic-data,target=/bitnami/elasticsearch/data --constraint 'node.role==manager' bitnami/elasticsearch:8.17.2"
echo ""
echo "📊 Monitor services:"
echo "docker service ls"
echo "docker service ps <service-name>"
echo ""
echo "⚠️ IMPORTANT NOTES:"
echo "1. Stop original containers before creating services"
echo "2. Backup data before migrating databases"
echo "3. Test each phase before proceeding"
echo "4. Monitor logs: docker service logs <service-name>"
dest: "/tmp/swarm-migration-{{ inventory_hostname }}.sh"
mode: '0755'
- name: Create rollback script
copy:
content: |
#!/bin/bash
# Docker Swarm Rollback Script for {{ inventory_hostname }}
echo "🔄 Rolling back Swarm services..."
# Remove services
docker service rm ghproxy-svc redis-svc consul-svc discourse-app-svc discourse-sidekiq-svc postgres-svc elasticsearch-svc 2>/dev/null || true
# Remove networks (optional)
# docker network rm app-network db-network consul-network elastic-network 2>/dev/null || true
echo "✅ Rollback complete. Original containers should be restarted manually."
dest: "/tmp/swarm-rollback-{{ inventory_hostname }}.sh"
mode: '0755'
- name: Migration plan complete
debug:
msg: |
🎉 MIGRATION PLAN GENERATED!
📄 Files created:
- /tmp/swarm-migration-{{ inventory_hostname }}.sh (Migration script)
- /tmp/swarm-rollback-{{ inventory_hostname }}.sh (Rollback script)
🚀 RECOMMENDED APPROACH:
1. Backup all data first
2. Test migration in phases
3. Start with Phase 1 (low risk services)
4. Monitor each service before proceeding
5. Keep rollback script ready
💡 NEXT STEPS:
1. Review and customize the migration script
2. Plan maintenance window
3. Execute phase by phase
4. Monitor and validate each service

View File

@ -0,0 +1,175 @@
---
- name: Bootstrap Infrastructure
hosts: all
become: yes
gather_facts: yes
vars:
# 基础软件包
base_packages:
- curl
- wget
- git
- vim
- htop
- tree
- unzip
- jq
- python3
- python3-pip
- apt-transport-https
- ca-certificates
- gnupg
- lsb-release
# Docker 配置
docker_users:
- "{{ ansible_user }}"
# 系统配置
timezone: "Asia/Shanghai"
tasks:
- name: Update package cache
apt:
update_cache: yes
cache_valid_time: 3600
when: ansible_os_family == "Debian"
- name: Install base packages
package:
name: "{{ base_packages }}"
state: present
- name: Set timezone
timezone:
name: "{{ timezone }}"
- name: Create system users
user:
name: "{{ ansible_user }}"
groups: sudo
shell: /bin/bash
create_home: yes
when: ansible_user != "root"
- name: Configure SSH
lineinfile:
path: /etc/ssh/sshd_config
regexp: "{{ item.regexp }}"
line: "{{ item.line }}"
backup: yes
loop:
- { regexp: '^#?PermitRootLogin', line: 'PermitRootLogin no' }
- { regexp: '^#?PasswordAuthentication', line: 'PasswordAuthentication no' }
- { regexp: '^#?PubkeyAuthentication', line: 'PubkeyAuthentication yes' }
notify: restart ssh
when: ansible_user != "root"
- name: Install Docker
block:
- name: Add Docker GPG key
apt_key:
url: https://download.docker.com/linux/ubuntu/gpg
state: present
- name: Add Docker repository
apt_repository:
repo: "deb [arch=amd64] https://download.docker.com/linux/ubuntu {{ ansible_distribution_release }} stable"
state: present
- name: Install Docker
package:
name:
- docker-ce
- docker-ce-cli
- containerd.io
- docker-compose-plugin
state: present
- name: Add users to docker group
user:
name: "{{ item }}"
groups: docker
append: yes
loop: "{{ docker_users }}"
- name: Start and enable Docker
systemd:
name: docker
state: started
enabled: yes
- name: Install Docker Compose (standalone)
get_url:
url: "https://github.com/docker/compose/releases/latest/download/docker-compose-linux-x86_64"
dest: /usr/local/bin/docker-compose
mode: '0755'
- name: Configure firewall
ufw:
rule: "{{ item.rule }}"
port: "{{ item.port }}"
proto: "{{ item.proto | default('tcp') }}"
loop:
- { rule: 'allow', port: '22' }
- { rule: 'allow', port: '80' }
- { rule: 'allow', port: '443' }
notify: enable ufw
- name: Create application directories
file:
path: "{{ item }}"
state: directory
owner: "{{ ansible_user }}"
group: "{{ ansible_user }}"
mode: '0755'
loop:
- /opt/apps
- /opt/data
- /opt/logs
- /opt/backups
- /opt/scripts
- name: Install monitoring tools
package:
name:
- htop
- iotop
- nethogs
- ncdu
- tmux
state: present
- name: Configure system limits
pam_limits:
domain: '*'
limit_type: "{{ item.type }}"
limit_item: "{{ item.item }}"
value: "{{ item.value }}"
loop:
- { type: 'soft', item: 'nofile', value: '65536' }
- { type: 'hard', item: 'nofile', value: '65536' }
- { type: 'soft', item: 'nproc', value: '32768' }
- { type: 'hard', item: 'nproc', value: '32768' }
- name: Configure sysctl
sysctl:
name: "{{ item.name }}"
value: "{{ item.value }}"
state: present
reload: yes
loop:
- { name: 'vm.max_map_count', value: '262144' }
- { name: 'fs.file-max', value: '2097152' }
- { name: 'net.core.somaxconn', value: '32768' }
handlers:
- name: restart ssh
systemd:
name: ssh
state: restarted
- name: enable ufw
ufw:
state: enabled

View File

@ -1,64 +0,0 @@
#!/bin/bash
echo "🚀 Consul 集群演示脚本"
# 检查 Consul 集群状态
check_cluster() {
echo "📊 检查集群状态..."
for node in consul1 consul2 consul3; do
echo "检查节点: $node"
curl -s http://$node:8500/v1/status/leader 2>/dev/null && echo " - Leader: $(curl -s http://$node:8500/v1/status/leader 2>/dev/null)" || echo " - 节点不可达"
curl -s http://$node:8500/v1/status/peers 2>/dev/null && echo " - 集群节点: $(curl -s http://$node:8500/v1/status/peers 2>/dev/null)" || echo " - 无法获取集群信息"
echo ""
done
}
# 测试配置读写
test_config() {
echo "🔧 测试配置读写..."
# 写入配置到不同节点
echo "写入配置到 consul1..."
curl -X PUT http://consul1:8500/v1/kv/test/config "value-from-consul1" 2>/dev/null
echo "从 consul2 读取配置..."
value=$(curl -s http://consul2:8500/v1/kv/test/config?raw 2>/dev/null)
echo "读取到的值: $value"
echo "从 consul3 读取配置..."
value=$(curl -s http://consul3:8500/v1/kv/test/config?raw 2>/dev/null)
echo "读取到的值: $value"
}
# 模拟故障转移
simulate_failure() {
echo "💥 模拟 Leader 故障..."
# 获取当前 Leader
leader=$(curl -s http://consul1:8500/v1/status/leader 2>/dev/null | tr -d '"')
echo "当前 Leader: $leader"
# 这里只是演示,实际环境中你可以停止 Leader 节点
echo "在实际环境中,你可以:"
echo "docker stop consul-leader-container"
echo "然后观察其他节点自动选举新 Leader"
}
case "$1" in
"status")
check_cluster
;;
"test")
test_config
;;
"failure")
simulate_failure
;;
*)
echo "用法: $0 {status|test|failure}"
echo " status - 检查集群状态"
echo " test - 测试配置同步"
echo " failure - 模拟故障转移"
;;
esac

View File

@ -1,110 +0,0 @@
#!/bin/bash
echo "🚀 部署 Traefik + Consul 集群"
# 创建必要的目录
mkdir -p {certs,web-content,api,logs}
# 创建示例网页
cat > web-content/index.html << 'EOF'
<!DOCTYPE html>
<html>
<head>
<title>Traefik + Consul Demo</title>
</head>
<body>
<h1>🎉 Traefik + Consul 集群运行成功!</h1>
<p>当前时间: <span id="time"></span></p>
<script>
document.getElementById('time').textContent = new Date().toLocaleString();
</script>
</body>
</html>
EOF
# 创建示例 API
cat > api/server.js << 'EOF'
const express = require('express');
const consul = require('consul')();
const app = express();
const port = 3000;
app.use(express.json());
// 健康检查
app.get('/health', (req, res) => {
res.json({ status: 'healthy', timestamp: new Date().toISOString() });
});
// API 路由
app.get('/api/config', async (req, res) => {
try {
const result = await consul.kv.get('config/api/message');
res.json({
message: result ? result.Value : 'Hello from API!',
source: 'consul'
});
} catch (error) {
res.json({
message: 'Hello from API!',
source: 'default'
});
}
});
app.post('/api/config', async (req, res) => {
try {
await consul.kv.set('config/api/message', req.body.message);
res.json({ success: true });
} catch (error) {
res.status(500).json({ error: error.message });
}
});
app.listen(port, () => {
console.log(`API server running on port ${port}`);
});
EOF
# 创建 API package.json
cat > api/package.json << 'EOF'
{
"name": "demo-api",
"version": "1.0.0",
"dependencies": {
"express": "^4.18.0",
"consul": "^0.40.0"
}
}
EOF
# 设置 hosts 文件(用于本地测试)
echo "📝 请添加以下内容到 /etc/hosts 文件:"
echo "127.0.0.1 traefik.local"
echo "127.0.0.1 consul.local"
echo "127.0.0.1 app.local"
echo "127.0.0.1 api.local"
# 启动服务
echo "🚀 启动 Traefik + Consul 集群..."
docker-compose -f traefik-consul-setup.yml up -d
# 等待服务启动
echo "⏳ 等待服务启动..."
sleep 10
# 检查服务状态
echo "📊 检查服务状态..."
docker-compose -f traefik-consul-setup.yml ps
# 显示访问地址
echo ""
echo "🎉 部署完成!访问地址:"
echo " Traefik Dashboard: http://traefik.local:8080"
echo " Consul UI: http://consul.local:8500"
echo " Web App: http://app.local"
echo " API: http://api.local/api/config"
echo ""
echo "📝 测试命令:"
echo " curl http://api.local/api/config"
echo " curl -X POST http://api.local/api/config -H 'Content-Type: application/json' -d '{\"message\":\"Hello Consul!\"}'"

View File

@ -0,0 +1,49 @@
#
#
module "shared" {
source = "../../shared"
}
# Oracle Cloud
module "oracle_cloud" {
source = "../../providers/oracle-cloud"
#
environment = var.environment
project_name = var.project_name
owner = var.owner
vpc_cidr = var.vpc_cidr
availability_zones = var.availability_zones
common_tags = var.common_tags
oci_config = var.oci_config
#
instance_count = 1
instance_size = "VM.Standard.E2.1.Micro" #
}
# ()
module "huawei_cloud" {
source = "../../providers/huawei-cloud"
count = contains(var.cloud_providers, "huawei") ? 1 : 0
environment = var.environment
project_name = var.project_name
owner = var.owner
vpc_cidr = "10.1.0.0/16" # CIDR
availability_zones = var.availability_zones
common_tags = var.common_tags
huawei_config = var.huawei_config
}
#
output "oracle_cloud_outputs" {
description = "Oracle Cloud 基础设施输出"
value = module.oracle_cloud
}
output "huawei_cloud_outputs" {
description = "华为云基础设施输出"
value = length(module.huawei_cloud) > 0 ? module.huawei_cloud[0] : null
}

View File

@ -0,0 +1,61 @@
# 开发环境配置示例
# 复制此文件为 terraform.tfvars 并填入实际值
# 基本配置
environment = "dev"
project_name = "mgmt"
owner = "ben"
# 要启用的云服务商
cloud_providers = ["oracle", "huawei"]
# 网络配置
vpc_cidr = "10.0.0.0/16"
availability_zones = ["a", "b"]
# 通用标签
common_tags = {
Environment = "dev"
Project = "mgmt"
Owner = "ben"
ManagedBy = "opentofu"
}
# Oracle Cloud 配置
oci_config = {
tenancy_ocid = "ocid1.tenancy.oc1..your-tenancy-id"
user_ocid = "ocid1.user.oc1..your-user-id"
fingerprint = "your-key-fingerprint"
private_key_path = "~/.oci/oci_api_key.pem"
region = "ap-seoul-1"
compartment_ocid = "ocid1.compartment.oc1..your-compartment-id"
}
# 华为云配置
huawei_config = {
access_key = "your-access-key"
secret_key = "your-secret-key"
region = "cn-north-4"
project_id = "your-project-id"
}
# Google Cloud 配置 (可选)
gcp_config = {
project_id = "your-project-id"
region = "asia-northeast3"
zone = "asia-northeast3-a"
credentials_file = "~/.gcp/service-account.json"
}
# AWS 配置 (可选)
aws_config = {
region = "ap-northeast-2"
access_key = "your-access-key"
secret_key = "your-secret-key"
}
# DigitalOcean 配置 (可选)
do_config = {
token = "your-do-token"
region = "sgp1"
}

View File

@ -0,0 +1,133 @@
#
variable "environment" {
description = "环境名称"
type = string
default = "dev"
}
variable "project_name" {
description = "项目名称"
type = string
default = "mgmt"
}
variable "owner" {
description = "项目所有者"
type = string
default = "ben"
}
variable "cloud_providers" {
description = "要启用的云服务商列表"
type = list(string)
default = ["oracle"]
}
variable "vpc_cidr" {
description = "VPC CIDR 块"
type = string
default = "10.0.0.0/16"
}
variable "availability_zones" {
description = "可用区列表"
type = list(string)
default = ["a", "b"]
}
variable "common_tags" {
description = "通用标签"
type = map(string)
default = {
Environment = "dev"
Project = "mgmt"
ManagedBy = "opentofu"
}
}
# Oracle Cloud
variable "oci_config" {
description = "Oracle Cloud 配置"
type = object({
tenancy_ocid = string
user_ocid = string
fingerprint = string
private_key_path = string
region = string
compartment_ocid = optional(string)
})
default = {
tenancy_ocid = ""
user_ocid = ""
fingerprint = ""
private_key_path = ""
region = "ap-seoul-1"
compartment_ocid = ""
}
}
#
variable "huawei_config" {
description = "华为云配置"
type = object({
access_key = string
secret_key = string
region = string
project_id = optional(string)
})
default = {
access_key = ""
secret_key = ""
region = "cn-north-4"
project_id = ""
}
sensitive = true
}
# Google Cloud
variable "gcp_config" {
description = "Google Cloud 配置"
type = object({
project_id = string
region = string
zone = string
credentials_file = string
})
default = {
project_id = ""
region = "asia-northeast3"
zone = "asia-northeast3-a"
credentials_file = ""
}
}
# AWS
variable "aws_config" {
description = "AWS 配置"
type = object({
region = string
access_key = string
secret_key = string
})
default = {
region = "ap-northeast-2"
access_key = ""
secret_key = ""
}
sensitive = true
}
# DigitalOcean
variable "do_config" {
description = "DigitalOcean 配置"
type = object({
token = string
region = string
})
default = {
token = ""
region = "sgp1"
}
sensitive = true
}

View File

@ -0,0 +1,144 @@
#
terraform {
required_providers {
huaweicloud = {
source = "huaweicloud/huaweicloud"
version = "~> 1.60"
}
}
}
#
provider "huaweicloud" {
access_key = var.huawei_config.access_key
secret_key = var.huawei_config.secret_key
region = var.huawei_config.region
}
#
data "huaweicloud_availability_zones" "zones" {}
#
data "huaweicloud_images_image" "ubuntu" {
name = "Ubuntu 22.04 server 64bit"
most_recent = true
}
# VPC
resource "huaweicloud_vpc" "main" {
name = "${var.project_name}-${var.environment}-vpc"
cidr = var.vpc_cidr
tags = merge(var.common_tags, {
Name = "${var.project_name}-${var.environment}-vpc"
})
}
#
resource "huaweicloud_vpc_subnet" "public" {
count = length(var.availability_zones)
name = "${var.project_name}-${var.environment}-public-${var.availability_zones[count.index]}"
cidr = cidrsubnet(var.vpc_cidr, 8, count.index)
gateway_ip = cidrhost(cidrsubnet(var.vpc_cidr, 8, count.index), 1)
vpc_id = huaweicloud_vpc.main.id
tags = merge(var.common_tags, {
Name = "${var.project_name}-${var.environment}-public-${var.availability_zones[count.index]}"
Type = "public"
})
}
#
resource "huaweicloud_networking_secgroup" "main" {
name = "${var.project_name}-${var.environment}-sg"
description = "Security group for ${var.project_name} ${var.environment}"
tags = merge(var.common_tags, {
Name = "${var.project_name}-${var.environment}-sg"
})
}
# - SSH
resource "huaweicloud_networking_secgroup_rule" "ssh" {
direction = "ingress"
ethertype = "IPv4"
protocol = "tcp"
port_range_min = 22
port_range_max = 22
remote_ip_prefix = "0.0.0.0/0"
security_group_id = huaweicloud_networking_secgroup.main.id
}
# - HTTP
resource "huaweicloud_networking_secgroup_rule" "http" {
direction = "ingress"
ethertype = "IPv4"
protocol = "tcp"
port_range_min = 80
port_range_max = 80
remote_ip_prefix = "0.0.0.0/0"
security_group_id = huaweicloud_networking_secgroup.main.id
}
# - HTTPS
resource "huaweicloud_networking_secgroup_rule" "https" {
direction = "ingress"
ethertype = "IPv4"
protocol = "tcp"
port_range_min = 443
port_range_max = 443
remote_ip_prefix = "0.0.0.0/0"
security_group_id = huaweicloud_networking_secgroup.main.id
}
# IP
resource "huaweicloud_vpc_eip" "main" {
count = var.environment == "production" ? 2 : 1
publicip {
type = "5_bgp"
}
bandwidth {
name = "${var.project_name}-${var.environment}-bandwidth-${count.index}"
size = var.environment == "production" ? 10 : 5
share_type = "PER"
charge_mode = "traffic"
}
tags = merge(var.common_tags, {
Name = "${var.project_name}-${var.environment}-eip-${count.index}"
})
}
#
output "vpc_id" {
description = "VPC ID"
value = huaweicloud_vpc.main.id
}
output "subnet_ids" {
description = "子网 ID 列表"
value = huaweicloud_vpc_subnet.public[*].id
}
output "security_group_id" {
description = "安全组 ID"
value = huaweicloud_networking_secgroup.main.id
}
output "availability_zones" {
description = "可用区列表"
value = data.huaweicloud_availability_zones.zones.names
}
output "ubuntu_image_id" {
description = "Ubuntu 镜像 ID"
value = data.huaweicloud_images_image.ubuntu.id
}
output "eip_addresses" {
description = "弹性IP地址列表"
value = huaweicloud_vpc_eip.main[*].address
}

View File

@ -0,0 +1,160 @@
# Oracle Cloud Infrastructure
terraform {
required_providers {
oci = {
source = "oracle/oci"
version = "~> 5.0"
}
}
}
# OCI
provider "oci" {
tenancy_ocid = var.oci_config.tenancy_ocid
user_ocid = var.oci_config.user_ocid
fingerprint = var.oci_config.fingerprint
private_key_path = var.oci_config.private_key_path
region = var.oci_config.region
}
#
data "oci_identity_availability_domains" "ads" {
compartment_id = var.oci_config.tenancy_ocid
}
#
data "oci_core_images" "ubuntu_images" {
compartment_id = var.oci_config.tenancy_ocid
operating_system = "Canonical Ubuntu"
operating_system_version = "22.04"
shape = "VM.Standard.E2.1.Micro"
sort_by = "TIMECREATED"
sort_order = "DESC"
}
# VCN ()
resource "oci_core_vcn" "main" {
compartment_id = var.oci_config.tenancy_ocid
cidr_blocks = [var.vpc_cidr]
display_name = "${var.project_name}-${var.environment}-vcn"
dns_label = "${var.project_name}${var.environment}"
freeform_tags = merge(var.common_tags, {
Name = "${var.project_name}-${var.environment}-vcn"
})
}
#
resource "oci_core_internet_gateway" "main" {
compartment_id = var.oci_config.tenancy_ocid
vcn_id = oci_core_vcn.main.id
display_name = "${var.project_name}-${var.environment}-igw"
enabled = true
freeform_tags = merge(var.common_tags, {
Name = "${var.project_name}-${var.environment}-igw"
})
}
#
resource "oci_core_route_table" "main" {
compartment_id = var.oci_config.tenancy_ocid
vcn_id = oci_core_vcn.main.id
display_name = "${var.project_name}-${var.environment}-rt"
route_rules {
destination = "0.0.0.0/0"
destination_type = "CIDR_BLOCK"
network_entity_id = oci_core_internet_gateway.main.id
}
freeform_tags = merge(var.common_tags, {
Name = "${var.project_name}-${var.environment}-rt"
})
}
#
resource "oci_core_security_list" "main" {
compartment_id = var.oci_config.tenancy_ocid
vcn_id = oci_core_vcn.main.id
display_name = "${var.project_name}-${var.environment}-sl"
#
egress_security_rules {
destination = "0.0.0.0/0"
protocol = "all"
}
# - SSH
ingress_security_rules {
protocol = "6" # TCP
source = "0.0.0.0/0"
tcp_options {
min = 22
max = 22
}
}
# - HTTP
ingress_security_rules {
protocol = "6" # TCP
source = "0.0.0.0/0"
tcp_options {
min = 80
max = 80
}
}
# - HTTPS
ingress_security_rules {
protocol = "6" # TCP
source = "0.0.0.0/0"
tcp_options {
min = 443
max = 443
}
}
freeform_tags = merge(var.common_tags, {
Name = "${var.project_name}-${var.environment}-sl"
})
}
#
resource "oci_core_subnet" "public" {
count = length(var.availability_zones)
compartment_id = var.oci_config.tenancy_ocid
vcn_id = oci_core_vcn.main.id
cidr_block = cidrsubnet(var.vpc_cidr, 8, count.index)
display_name = "${var.project_name}-${var.environment}-public-${var.availability_zones[count.index]}"
dns_label = "public${var.availability_zones[count.index]}"
route_table_id = oci_core_route_table.main.id
security_list_ids = [oci_core_security_list.main.id]
freeform_tags = merge(var.common_tags, {
Name = "${var.project_name}-${var.environment}-public-${var.availability_zones[count.index]}"
Type = "public"
})
}
#
output "vcn_id" {
description = "VCN ID"
value = oci_core_vcn.main.id
}
output "subnet_ids" {
description = "子网 ID 列表"
value = oci_core_subnet.public[*].id
}
output "availability_domains" {
description = "可用域列表"
value = data.oci_identity_availability_domains.ads.availability_domains[*].name
}
output "ubuntu_image_id" {
description = "Ubuntu 镜像 ID"
value = data.oci_core_images.ubuntu_images.images[0].id
}

View File

@ -0,0 +1,39 @@
#
#
output "environment" {
description = "当前部署环境"
value = var.environment
}
output "project_name" {
description = "项目名称"
value = var.project_name
}
#
output "vpc_cidr" {
description = "VPC CIDR 块"
value = var.vpc_cidr
}
#
output "common_tags" {
description = "通用资源标签"
value = merge(var.common_tags, {
Environment = var.environment
Timestamp = timestamp()
})
}
#
output "enabled_providers" {
description = "启用的云服务商列表"
value = var.cloud_providers
}
#
output "instance_types" {
description = "当前环境的实例类型配置"
value = var.instance_types[var.environment]
}

View File

@ -0,0 +1,169 @@
#
#
variable "environment" {
description = "部署环境 (dev, staging, production)"
type = string
validation {
condition = contains(["dev", "staging", "production"], var.environment)
error_message = "环境必须是 dev, staging, 或 production 之一。"
}
}
variable "project_name" {
description = "项目名称"
type = string
default = "mgmt"
}
variable "owner" {
description = "资源所有者"
type = string
default = "ben"
}
#
variable "vpc_cidr" {
description = "VPC CIDR 块"
type = string
default = "10.0.0.0/16"
}
variable "availability_zones" {
description = "可用区列表"
type = list(string)
default = ["a", "b", "c"]
}
#
variable "instance_types" {
description = "不同环境的实例类型"
type = map(object({
web = string
app = string
db = string
cache = string
}))
default = {
dev = {
web = "t3.micro"
app = "t3.small"
db = "t3.micro"
cache = "t3.micro"
}
staging = {
web = "t3.small"
app = "t3.medium"
db = "t3.small"
cache = "t3.small"
}
production = {
web = "t3.medium"
app = "t3.large"
db = "t3.medium"
cache = "t3.medium"
}
}
}
#
variable "common_tags" {
description = "通用标签"
type = map(string)
default = {
Project = "mgmt"
ManagedBy = "opentofu"
Owner = "ben"
}
}
#
variable "cloud_providers" {
description = "启用的云服务商"
type = list(string)
default = ["oracle", "huawei", "google", "digitalocean", "aws"]
}
# Oracle Cloud
variable "oci_config" {
description = "Oracle Cloud 配置"
type = object({
tenancy_ocid = string
user_ocid = string
fingerprint = string
private_key_path = string
region = string
})
default = {
tenancy_ocid = ""
user_ocid = ""
fingerprint = ""
private_key_path = "~/.oci/oci_api_key.pem"
region = "ap-seoul-1"
}
sensitive = true
}
#
variable "huawei_config" {
description = "华为云配置"
type = object({
access_key = string
secret_key = string
region = string
})
default = {
access_key = ""
secret_key = ""
region = "cn-north-4"
}
sensitive = true
}
# Google Cloud
variable "gcp_config" {
description = "Google Cloud 配置"
type = object({
project_id = string
region = string
zone = string
credentials = string
})
default = {
project_id = ""
region = "asia-northeast3"
zone = "asia-northeast3-a"
credentials = ""
}
sensitive = true
}
# DigitalOcean
variable "do_config" {
description = "DigitalOcean 配置"
type = object({
token = string
region = string
})
default = {
token = ""
region = "sgp1"
}
sensitive = true
}
# AWS
variable "aws_config" {
description = "AWS 配置"
type = object({
access_key = string
secret_key = string
region = string
})
default = {
access_key = ""
secret_key = ""
region = "ap-northeast-1"
}
sensitive = true
}

View File

@ -0,0 +1,57 @@
# OpenTofu
terraform {
required_version = ">= 1.6"
required_providers {
# Oracle Cloud Infrastructure
oci = {
source = "oracle/oci"
version = "~> 5.0"
}
#
huaweicloud = {
source = "huaweicloud/huaweicloud"
version = "~> 1.60"
}
# Google Cloud Platform
google = {
source = "hashicorp/google"
version = "~> 5.0"
}
# DigitalOcean
digitalocean = {
source = "digitalocean/digitalocean"
version = "~> 2.0"
}
# Amazon Web Services
aws = {
source = "hashicorp/aws"
version = "~> 5.0"
}
#
random = {
source = "hashicorp/random"
version = "~> 3.1"
}
tls = {
source = "hashicorp/tls"
version = "~> 4.0"
}
local = {
source = "hashicorp/local"
version = "~> 2.1"
}
}
# - 使 S3, GCS,
backend "local" {
path = "terraform.tfstate"
}
}

View File

@ -1,45 +0,0 @@
# 密钥信息
## SSH 公钥 (Ed25519)
```
ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIMSUUfma8FKEFvH8Nq65XM2PZ9kitfgv1q727cKV9y5Z houzhongxu@seekkey.tech
```
## GPG 公钥 (Ed25519/Curve25519)
```
-----BEGIN PGP PUBLIC KEY BLOCK-----
mDMEaMt8oxYJKwYBBAHaRw8BAQdA12gQlBUibUxlktq4lg2WE4aQYF+lcBPMhcZY
Dgi4rEC0JUhvdSBaaG9uZ3h1IDxob3V6aG9uZ3h1QHNlZWtrZXkudGVjaD6IjwQT
FggAOBYhBDKnNH301qf6d+MeO7RL6xQ48bRvBQJoy3yjAhsjBQsJCAcCBhUKCQgL
AgQWAgMBAh4BAheAAAoJELRL6xQ48bRv+vIBAO8T89J4o+jT+gyXjnSlcYLLTX8J
2h4Pjn+WSD6JSMd6APjGuQOplQEGsK6FYPLQWQJnBEYaP1uZXOqnHo3tybEPuDgE
aMt8oxIKKwYBBAGXVQEFAQEHQG0IjWKNJ+KhGFz7Jav8kgzv2Y/o0w/LAN+wwI/a
rGpPAwEIB4h4BBgWCAAgFiEEMqc0ffTWp/p34x47tEvrFDjxtG8FAmjLfKMCGwwA
CgkQtEvrFDjxtG9s3QD+JyeAHkoDIkVsc5wrRztZYc+HGNzGV6X0GWAqWSOW/Q8B
AOhtF5Xgf4j0pKkuqKbGrpiPtBuK5T7Q+QtOz3jOSDwG
=7qIa
-----END PGP PUBLIC KEY BLOCK-----
```
## 密钥信息
- **用户名**: Houzhong Xu
- **邮箱**: houzhongxu@seekkey.tech
- **GPG Key ID**: 32A7347DF4D6A7FA77E31E3BB44BEB1438F1B46F
- **SSH 指纹**: SHA256:/IbwMngZcfNrlMqeFbyeAH8KTrhc43+E1Z22IEqIKss
## 在 Gitea 中添加密钥
### SSH 密钥
1. 登录 Gitea: https://gitea.tailnet-68f9.ts.net/
2. Settings → SSH / GPG Keys → Add Key
3. 粘贴上面的 SSH 公钥
### GPG 密钥
1. 在同一页面点击 "Add GPG Key"
2. 粘贴上面的 GPG 公钥
## Git 配置
- 已配置 Git 使用 GPG 签名提交
- 用户名: Houzhong Xu
- 邮箱: houzhongxu@seekkey.tech

View File

@ -1,131 +0,0 @@
version: '3.8'
services:
# Prometheus 监控
prometheus:
image: prom/prometheus:latest
command:
- '--config.file=/etc/prometheus/prometheus.yml'
- '--storage.tsdb.path=/prometheus'
- '--web.console.libraries=/etc/prometheus/console_libraries'
- '--web.console.templates=/etc/prometheus/consoles'
- '--storage.tsdb.retention.time=200h'
- '--web.enable-lifecycle'
networks:
- traefik-public
- monitoring
configs:
- source: prometheus-config
target: /etc/prometheus/prometheus.yml
volumes:
- prometheus-data:/prometheus
deploy:
replicas: 1
labels:
- traefik.enable=true
- traefik.http.routers.prometheus.rule=Host(`prometheus.local`)
- traefik.http.routers.prometheus.entrypoints=web
- traefik.http.services.prometheus.loadbalancer.server.port=9090
restart_policy:
condition: on-failure
# Grafana 可视化
grafana:
image: grafana/grafana:latest
environment:
- GF_SECURITY_ADMIN_PASSWORD=admin123
- GF_USERS_ALLOW_SIGN_UP=false
networks:
- traefik-public
- monitoring
volumes:
- grafana-data:/var/lib/grafana
deploy:
replicas: 1
labels:
- traefik.enable=true
- traefik.http.routers.grafana.rule=Host(`grafana.local`)
- traefik.http.routers.grafana.entrypoints=web
- traefik.http.services.grafana.loadbalancer.server.port=3000
restart_policy:
condition: on-failure
# Node Exporter (系统指标)
node-exporter:
image: prom/node-exporter:latest
command:
- '--path.procfs=/host/proc'
- '--path.rootfs=/rootfs'
- '--path.sysfs=/host/sys'
- '--collector.filesystem.ignored-mount-points=^/(sys|proc|dev|host|etc)($$|/)'
volumes:
- /proc:/host/proc:ro
- /sys:/host/sys:ro
- /:/rootfs:ro
networks:
- monitoring
deploy:
mode: global
restart_policy:
condition: on-failure
# cAdvisor (容器指标)
cadvisor:
image: gcr.io/cadvisor/cadvisor:latest
volumes:
- /:/rootfs:ro
- /var/run:/var/run:rw
- /sys:/sys:ro
- /var/lib/docker/:/var/lib/docker:ro
- /dev/disk/:/dev/disk:ro
networks:
- monitoring
deploy:
mode: global
restart_policy:
condition: on-failure
networks:
traefik-public:
external: true
monitoring:
driver: overlay
volumes:
prometheus-data:
grafana-data:
configs:
prometheus-config:
content: |
global:
scrape_interval: 15s
evaluation_interval: 15s
scrape_configs:
# Traefik 指标
- job_name: 'traefik'
static_configs:
- targets: ['traefik:8080']
metrics_path: /metrics
# Prometheus 自身
- job_name: 'prometheus'
static_configs:
- targets: ['localhost:9090']
# Node Exporter
- job_name: 'node-exporter'
dns_sd_configs:
- names:
- 'tasks.node-exporter'
type: 'A'
port: 9100
# cAdvisor
- job_name: 'cadvisor'
dns_sd_configs:
- names:
- 'tasks.cadvisor'
type: 'A'
port: 8080

View File

@ -1,260 +0,0 @@
#!/bin/bash
# Operations Manager - 便捷的运维脚本管理工具
# 使用方法: ./ops-manager.sh [action] [target] [options]
set -e
# 颜色定义
RED='\033[0;31m'
GREEN='\033[0;32m'
YELLOW='\033[1;33m'
BLUE='\033[0;34m'
PURPLE='\033[0;35m'
CYAN='\033[0;36m'
NC='\033[0m' # No Color
# 配置
ANSIBLE_DIR="$(dirname "$0")/../ansible"
INVENTORY="$ANSIBLE_DIR/inventory.ini"
# 可用的操作
declare -A OPERATIONS=(
["update"]="system-update.yml"
["cleanup"]="system-cleanup.yml"
["health"]="service-health-check.yml"
["security"]="security-hardening.yml"
["docker"]="docker-management.yml"
["network"]="network-connectivity.yml"
["cert"]="certificate-management.yml"
["toolkit"]="ops-toolkit.yml"
["cloud"]="cloud-providers-update.yml"
)
# 可用的目标组
declare -A TARGETS=(
["all"]="all"
["lxc"]="lxc"
["alpine"]="alpine"
["proxmox"]="proxmox"
["armbian"]="armbian"
["hcp"]="hcp"
["feiniu"]="feiniu"
["dev"]="dev"
["oci-kr"]="oci_kr"
["oci-us"]="oci_us"
["huawei"]="huawei"
["google"]="google"
["aws"]="aws"
["germany"]="germany"
)
# 显示帮助信息
show_help() {
echo -e "${CYAN}🛠️ Operations Manager - 运维脚本管理工具${NC}"
echo ""
echo -e "${YELLOW}使用方法:${NC}"
echo " $0 [操作] [目标] [选项]"
echo ""
echo -e "${YELLOW}可用操作:${NC}"
for op in "${!OPERATIONS[@]}"; do
echo -e " ${GREEN}$op${NC} - ${OPERATIONS[$op]}"
done
echo ""
echo -e "${YELLOW}可用目标:${NC}"
for target in "${!TARGETS[@]}"; do
echo -e " ${BLUE}$target${NC} - ${TARGETS[$target]}"
done
echo ""
echo -e "${YELLOW}示例:${NC}"
echo -e " $0 ${GREEN}update${NC} ${BLUE}lxc${NC} # 更新 LXC 容器"
echo -e " $0 ${GREEN}cleanup${NC} ${BLUE}all${NC} # 清理所有服务器"
echo -e " $0 ${GREEN}health${NC} ${BLUE}proxmox${NC} # 检查 Proxmox 健康状态"
echo -e " $0 ${GREEN}docker${NC} ${BLUE}lxc${NC} # 管理 LXC 中的 Docker"
echo -e " $0 ${GREEN}toolkit${NC} ${BLUE}germany${NC} # 运行德国服务器工具包"
echo ""
echo -e "${YELLOW}选项:${NC}"
echo -e " ${PURPLE}--dry-run${NC} 仅显示将要执行的命令"
echo -e " ${PURPLE}--verbose${NC} 显示详细输出"
echo -e " ${PURPLE}--check${NC} 检查模式(不做实际更改)"
echo -e " ${PURPLE}--help${NC} 显示此帮助信息"
}
# 显示状态信息
show_status() {
echo -e "${CYAN}📊 系统状态概览${NC}"
echo ""
# 检查 Ansible 是否可用
if command -v ansible >/dev/null 2>&1; then
echo -e "${GREEN}✅ Ansible 已安装${NC}"
else
echo -e "${RED}❌ Ansible 未安装${NC}"
exit 1
fi
# 检查 inventory 文件
if [ -f "$INVENTORY" ]; then
echo -e "${GREEN}✅ Inventory 文件存在${NC}"
echo -e " 📁 路径: $INVENTORY"
else
echo -e "${RED}❌ Inventory 文件不存在${NC}"
exit 1
fi
# 显示可用的主机组
echo ""
echo -e "${YELLOW}📋 可用主机组:${NC}"
ansible-inventory -i "$INVENTORY" --list | jq -r 'keys[]' | grep -v "_meta" | sort | while read group; do
count=$(ansible-inventory -i "$INVENTORY" --list | jq -r ".[\"$group\"].hosts // [] | length")
echo -e " ${BLUE}$group${NC}: $count 台主机"
done
}
# 执行 Ansible 命令
run_ansible() {
local operation=$1
local target=$2
local options=$3
local playbook="${OPERATIONS[$operation]}"
local host_pattern="${TARGETS[$target]}"
if [ -z "$playbook" ]; then
echo -e "${RED}❌ 未知操作: $operation${NC}"
show_help
exit 1
fi
if [ -z "$host_pattern" ]; then
echo -e "${RED}❌ 未知目标: $target${NC}"
show_help
exit 1
fi
local ansible_cmd="ansible-playbook -i $INVENTORY $ANSIBLE_DIR/$playbook --limit $host_pattern"
# 添加选项
if [[ "$options" == *"--check"* ]]; then
ansible_cmd="$ansible_cmd --check"
fi
if [[ "$options" == *"--verbose"* ]]; then
ansible_cmd="$ansible_cmd -v"
fi
echo -e "${CYAN}🚀 执行操作${NC}"
echo -e "操作: ${GREEN}$operation${NC} ($playbook)"
echo -e "目标: ${BLUE}$target${NC} ($host_pattern)"
echo -e "命令: ${PURPLE}$ansible_cmd${NC}"
echo ""
if [[ "$options" == *"--dry-run"* ]]; then
echo -e "${YELLOW}🔍 DRY RUN 模式 - 仅显示命令,不执行${NC}"
return 0
fi
# 确认执行
read -p "确认执行? (y/N): " -n 1 -r
echo
if [[ ! $REPLY =~ ^[Yy]$ ]]; then
echo -e "${YELLOW}⏹️ 操作已取消${NC}"
exit 0
fi
echo -e "${GREEN}▶️ 开始执行...${NC}"
eval $ansible_cmd
}
# 快速操作菜单
interactive_mode() {
echo -e "${CYAN}🎯 交互式运维管理${NC}"
echo ""
# 选择操作
echo -e "${YELLOW}选择操作:${NC}"
local ops=($(printf '%s\n' "${!OPERATIONS[@]}" | sort))
for i in "${!ops[@]}"; do
echo -e " $((i+1)). ${GREEN}${ops[i]}${NC} - ${OPERATIONS[${ops[i]}]}"
done
read -p "请选择操作 (1-${#ops[@]}): " op_choice
if [[ ! "$op_choice" =~ ^[0-9]+$ ]] || [ "$op_choice" -lt 1 ] || [ "$op_choice" -gt "${#ops[@]}" ]; then
echo -e "${RED}❌ 无效选择${NC}"
exit 1
fi
local selected_op="${ops[$((op_choice-1))]}"
# 选择目标
echo ""
echo -e "${YELLOW}选择目标:${NC}"
local targets=($(printf '%s\n' "${!TARGETS[@]}" | sort))
for i in "${!targets[@]}"; do
echo -e " $((i+1)). ${BLUE}${targets[i]}${NC} - ${TARGETS[${targets[i]}]}"
done
read -p "请选择目标 (1-${#targets[@]}): " target_choice
if [[ ! "$target_choice" =~ ^[0-9]+$ ]] || [ "$target_choice" -lt 1 ] || [ "$target_choice" -gt "${#targets[@]}" ]; then
echo -e "${RED}❌ 无效选择${NC}"
exit 1
fi
local selected_target="${targets[$((target_choice-1))]}"
# 选择选项
echo ""
echo -e "${YELLOW}选择执行选项:${NC}"
echo -e " 1. ${GREEN}正常执行${NC}"
echo -e " 2. ${PURPLE}检查模式${NC} (--check)"
echo -e " 3. ${PURPLE}详细输出${NC} (--verbose)"
echo -e " 4. ${PURPLE}仅显示命令${NC} (--dry-run)"
read -p "请选择选项 (1-4): " option_choice
local options=""
case $option_choice in
2) options="--check" ;;
3) options="--verbose" ;;
4) options="--dry-run" ;;
esac
run_ansible "$selected_op" "$selected_target" "$options"
}
# 主程序
main() {
# 检查参数
if [ $# -eq 0 ]; then
interactive_mode
exit 0
fi
case "$1" in
--help|-h|help)
show_help
;;
--status|-s|status)
show_status
;;
--interactive|-i|interactive)
interactive_mode
;;
*)
if [ $# -lt 2 ]; then
echo -e "${RED}❌ 参数不足${NC}"
show_help
exit 1
fi
local operation=$1
local target=$2
local options="${@:3}"
run_ansible "$operation" "$target" "$options"
;;
esac
}
# 执行主程序
main "$@"

View File

@ -0,0 +1,467 @@
#!/bin/bash
# Gitea 集成设置脚本
set -e
echo "🔗 设置 Gitea 集成..."
# 配置变量
GITEA_HOST="gitea"
GITEA_USER="ben"
GITEA_SSH_URL="git@${GITEA_HOST}"
REPO_NAME="mgmt"
GITEA_HTTP_URL="http://${GITEA_HOST}:3000"
# 检查 SSH 连接
echo "🔍 检查 Gitea SSH 连接..."
if ssh -o ConnectTimeout=5 -o BatchMode=yes "${GITEA_SSH_URL}" 2>&1 | grep -q "successfully authenticated"; then
echo "✅ SSH 连接正常"
else
echo "❌ SSH 连接失败,请检查:"
echo " 1. Gitea 服务是否运行"
echo " 2. SSH 密钥是否已添加到 Gitea"
echo " 3. 网络连接是否正常"
exit 1
fi
# 检查是否已经是 Git 仓库
if [ ! -d ".git" ]; then
echo "📦 初始化 Git 仓库..."
git init
git config user.name "${GITEA_USER}"
git config user.email "${GITEA_USER}@example.com"
else
echo "✅ Git 仓库已存在"
fi
# 检查远程仓库配置
if git remote get-url origin >/dev/null 2>&1; then
CURRENT_ORIGIN=$(git remote get-url origin)
echo " 当前远程仓库: $CURRENT_ORIGIN"
if [[ "$CURRENT_ORIGIN" != *"${GITEA_HOST}"* ]]; then
echo "🔄 更新远程仓库地址..."
git remote set-url origin "${GITEA_SSH_URL}:${GITEA_USER}/${REPO_NAME}.git"
fi
else
echo " 添加远程仓库..."
git remote add origin "${GITEA_SSH_URL}:${GITEA_USER}/${REPO_NAME}.git"
fi
# 创建 .gitignore
echo "📝 创建 .gitignore..."
cat > .gitignore << 'EOF'
# OpenTofu/Terraform
*.tfstate
*.tfstate.*
*.tfvars
!*.tfvars.example
.terraform/
.terraform.lock.hcl
crash.log
crash.*.log
# Ansible
*.retry
.vault_pass
host_vars/*/vault.yml
group_vars/*/vault.yml
# Docker
.env
docker-compose.override.yml
# IDE
.vscode/
.idea/
*.swp
*.swo
*~
# OS
.DS_Store
Thumbs.db
# Logs
*.log
logs/
# Temporary files
tmp/
temp/
.tmp/
# Backup files
backup-*/
*.bak
# Secrets
secrets/
*.pem
*.key
*.crt
!*.example.*
# Node modules (if any)
node_modules/
# Python
__pycache__/
*.pyc
*.pyo
*.pyd
.Python
env/
venv/
.venv/
pip-log.txt
pip-delete-this-directory.txt
.tox/
.coverage
.coverage.*
.cache
nosetests.xml
coverage.xml
*.cover
*.log
.git
.mypy_cache
.pytest_cache
.hypothesis
# Local development
.local/
local-*
EOF
# 创建 Gitea Actions 工作流
echo "🔄 创建 Gitea Actions 工作流..."
# 基础设施 CI/CD
cat > .gitea/workflows/infrastructure.yml << 'EOF'
name: Infrastructure CI/CD
on:
push:
branches: [ main, develop ]
paths:
- 'infrastructure/**'
- '.gitea/workflows/infrastructure.yml'
pull_request:
branches: [ main ]
paths:
- 'infrastructure/**'
jobs:
validate:
runs-on: ubuntu-latest
name: Validate Infrastructure
steps:
- name: Checkout
uses: actions/checkout@v4
- name: Setup OpenTofu
uses: opentofu/setup-opentofu@v1
with:
tofu_version: 1.10.6
- name: Validate OpenTofu configurations
run: |
for dir in infrastructure/providers/*/; do
if [ -d "$dir" ]; then
echo "Validating $dir"
cd "$dir"
tofu init -backend=false
tofu validate
cd - > /dev/null
fi
done
- name: Check formatting
run: |
tofu fmt -check -recursive infrastructure/
- name: Security scan
run: |
# 这里可以添加 tfsec 或 checkov 扫描
echo "Security scan placeholder"
plan:
runs-on: ubuntu-latest
name: Plan Infrastructure
needs: validate
if: github.event_name == 'pull_request'
steps:
- name: Checkout
uses: actions/checkout@v4
- name: Setup OpenTofu
uses: opentofu/setup-opentofu@v1
with:
tofu_version: 1.10.6
- name: Plan infrastructure changes
run: |
cd infrastructure/environments/dev
tofu init
tofu plan -var-file="terraform.tfvars" -out=tfplan
env:
# 这里需要配置云服务商的环境变量
TF_VAR_environment: dev
apply:
runs-on: ubuntu-latest
name: Apply Infrastructure
needs: validate
if: github.ref == 'refs/heads/main' && github.event_name == 'push'
steps:
- name: Checkout
uses: actions/checkout@v4
- name: Setup OpenTofu
uses: opentofu/setup-opentofu@v1
with:
tofu_version: 1.10.6
- name: Apply infrastructure changes
run: |
cd infrastructure/environments/dev
tofu init
tofu apply -var-file="terraform.tfvars" -auto-approve
env:
TF_VAR_environment: dev
EOF
# 应用部署工作流
cat > .gitea/workflows/deploy.yml << 'EOF'
name: Application Deployment
on:
push:
branches: [ main ]
paths:
- 'configuration/**'
- 'containers/**'
- '.gitea/workflows/deploy.yml'
workflow_dispatch:
inputs:
environment:
description: 'Target environment'
required: true
default: 'dev'
type: choice
options:
- dev
- staging
- production
jobs:
ansible-check:
runs-on: ubuntu-latest
name: Ansible Syntax Check
steps:
- name: Checkout
uses: actions/checkout@v4
- name: Setup Python
uses: actions/setup-python@v4
with:
python-version: '3.11'
- name: Install Ansible
run: |
pip install ansible ansible-core
ansible-galaxy collection install community.general
ansible-galaxy collection install ansible.posix
ansible-galaxy collection install community.docker
- name: Ansible syntax check
run: |
cd configuration
for playbook in playbooks/*/*.yml; do
if [ -f "$playbook" ]; then
echo "Checking $playbook"
ansible-playbook --syntax-check "$playbook"
fi
done
deploy:
runs-on: ubuntu-latest
name: Deploy Applications
needs: ansible-check
steps:
- name: Checkout
uses: actions/checkout@v4
- name: Setup Python
uses: actions/setup-python@v4
with:
python-version: '3.11'
- name: Install Ansible
run: |
pip install ansible ansible-core
ansible-galaxy collection install community.general
ansible-galaxy collection install ansible.posix
ansible-galaxy collection install community.docker
- name: Deploy applications
run: |
cd configuration
ENV="${{ github.event.inputs.environment || 'dev' }}"
ansible-playbook -i "inventories/${ENV}/inventory.ini" playbooks/bootstrap/main.yml
env:
ANSIBLE_HOST_KEY_CHECKING: False
EOF
# Docker 构建工作流
cat > .gitea/workflows/docker.yml << 'EOF'
name: Docker Build and Deploy
on:
push:
branches: [ main ]
paths:
- 'containers/**'
- 'Dockerfile*'
- '.gitea/workflows/docker.yml'
jobs:
build:
runs-on: ubuntu-latest
name: Build Docker Images
steps:
- name: Checkout
uses: actions/checkout@v4
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v3
- name: Login to Container Registry
uses: docker/login-action@v3
with:
registry: ${{ secrets.REGISTRY_URL }}
username: ${{ secrets.REGISTRY_USERNAME }}
password: ${{ secrets.REGISTRY_PASSWORD }}
- name: Build and push images
run: |
# 构建应用镜像
for dockerfile in containers/applications/*/Dockerfile; do
if [ -f "$dockerfile" ]; then
app_name=$(basename $(dirname "$dockerfile"))
echo "Building $app_name"
docker build -t "${{ secrets.REGISTRY_URL }}/$app_name:${{ github.sha }}" -f "$dockerfile" .
docker push "${{ secrets.REGISTRY_URL }}/$app_name:${{ github.sha }}"
fi
done
deploy-swarm:
runs-on: ubuntu-latest
name: Deploy to Docker Swarm
needs: build
steps:
- name: Checkout
uses: actions/checkout@v4
- name: Deploy to Swarm
run: |
# 这里可以通过 SSH 连接到 Swarm 管理节点进行部署
echo "Deploy to Swarm placeholder"
EOF
# 创建项目配置文件
echo "⚙️ 创建项目配置文件..."
# Gitea 仓库配置
cat > .gitea/settings.yml << 'EOF'
# Gitea 仓库设置
repository:
name: mgmt
description: "基础设施管理项目 - OpenTofu + Ansible + Docker Swarm"
website: ""
default_branch: main
# 功能开关
has_issues: true
has_wiki: true
has_projects: true
has_actions: true
# 权限设置
private: false
allow_merge_commits: true
allow_squash_merge: true
allow_rebase_merge: true
delete_branch_on_merge: true
# Actions 设置
actions:
enabled: true
allow_fork_pull_request_run: true
default_actions_url: "https://gitea.com"
# 分支保护
branch_protection:
main:
enable_push: false
enable_push_whitelist: true
push_whitelist_usernames: ["ben"]
require_signed_commits: false
enable_merge_whitelist: true
merge_whitelist_usernames: ["ben"]
enable_status_check: true
status_check_contexts: ["validate", "plan"]
enable_approvals_whitelist: false
approvals_whitelist_usernames: []
block_on_rejected_reviews: true
dismiss_stale_approvals: true
require_signed_commits: false
EOF
# 添加所有文件到 Git
echo "📦 添加文件到 Git..."
git add .
# 检查是否有变更需要提交
if git diff --staged --quiet; then
echo " 没有新的变更需要提交"
else
echo "💾 提交变更..."
git commit -m "feat: 集成 OpenTofu + Ansible + Gitea CI/CD
- 重构项目目录结构
- 添加 OpenTofu 多云支持
- 配置 Ansible 自动化部署
- 集成 Gitea Actions CI/CD 流水线
- 添加 Docker Swarm 管理
- 完善监控和安全配置"
fi
# 推送到远程仓库
echo "🚀 推送到 Gitea..."
if git push -u origin main; then
echo "✅ 成功推送到 Gitea"
else
echo "⚠️ 推送失败,可能需要先在 Gitea 创建仓库"
echo " 请访问: ${GITEA_HTTP_URL}/repo/create"
echo " 创建名为 '${REPO_NAME}' 的仓库"
fi
echo ""
echo "🎉 Gitea 集成设置完成!"
echo ""
echo "📋 下一步操作:"
echo "1. 访问 Gitea: ${GITEA_HTTP_URL}/${GITEA_USER}/${REPO_NAME}"
echo "2. 配置 Actions Secrets (如果需要):"
echo " - REGISTRY_URL: 容器镜像仓库地址"
echo " - REGISTRY_USERNAME: 仓库用户名"
echo " - REGISTRY_PASSWORD: 仓库密码"
echo "3. 配置云服务商凭据 (通过 Secrets 或环境变量)"
echo "4. 测试 CI/CD 流水线"
echo ""
echo "🔗 有用的命令:"
echo " git status - 查看仓库状态"
echo " git log --oneline - 查看提交历史"
echo " git push - 推送变更"
echo " make help - 查看项目命令"

View File

@ -0,0 +1,242 @@
#!/bin/bash
# Gitea 仓库管理脚本
set -e
# 配置
GITEA_HOST="gitea"
GITEA_USER="ben"
GITEA_HTTP_URL="http://${GITEA_HOST}:3000"
GITEA_SSH_URL="git@${GITEA_HOST}"
REPO_NAME="mgmt"
# 颜色定义
RED='\033[0;31m'
GREEN='\033[0;32m'
YELLOW='\033[1;33m'
BLUE='\033[0;34m'
NC='\033[0m' # No Color
# 打印带颜色的消息
print_message() {
local color=$1
local message=$2
echo -e "${color}${message}${NC}"
}
# 检查 SSH 连接
check_ssh_connection() {
print_message $BLUE "🔍 检查 Gitea SSH 连接..."
if ssh -o ConnectTimeout=5 -o BatchMode=yes "${GITEA_SSH_URL}" 2>&1 | grep -q "successfully authenticated"; then
print_message $GREEN "✅ SSH 连接正常"
return 0
else
print_message $RED "❌ SSH 连接失败"
return 1
fi
}
# 检查仓库状态
check_repo_status() {
print_message $BLUE "📊 检查仓库状态..."
if [ -d ".git" ]; then
print_message $GREEN "✅ Git 仓库已初始化"
if git remote get-url origin >/dev/null 2>&1; then
local origin_url=$(git remote get-url origin)
print_message $GREEN "✅ 远程仓库: $origin_url"
else
print_message $YELLOW "⚠️ 未配置远程仓库"
fi
local branch=$(git branch --show-current)
print_message $BLUE "📍 当前分支: $branch"
local status=$(git status --porcelain)
if [ -z "$status" ]; then
print_message $GREEN "✅ 工作目录干净"
else
print_message $YELLOW "⚠️ 有未提交的变更"
fi
else
print_message $RED "❌ 不是 Git 仓库"
fi
}
# 初始化仓库
init_repo() {
print_message $BLUE "📦 初始化 Git 仓库..."
if [ ! -d ".git" ]; then
git init
git config user.name "${GITEA_USER}"
git config user.email "${GITEA_USER}@example.com"
print_message $GREEN "✅ Git 仓库初始化完成"
fi
# 配置远程仓库
if ! git remote get-url origin >/dev/null 2>&1; then
git remote add origin "${GITEA_SSH_URL}:${GITEA_USER}/${REPO_NAME}.git"
print_message $GREEN "✅ 远程仓库配置完成"
fi
}
# 同步代码
sync_code() {
print_message $BLUE "🔄 同步代码..."
# 检查是否有未提交的变更
if ! git diff --quiet || ! git diff --staged --quiet; then
print_message $YELLOW "⚠️ 发现未提交的变更"
git status --short
read -p "是否提交这些变更? (y/N): " -n 1 -r
echo
if [[ $REPLY =~ ^[Yy]$ ]]; then
git add .
read -p "请输入提交消息: " commit_message
git commit -m "$commit_message"
print_message $GREEN "✅ 变更已提交"
else
print_message $YELLOW "⚠️ 跳过提交"
return 1
fi
fi
# 推送到远程仓库
if git push origin main; then
print_message $GREEN "✅ 代码推送成功"
else
print_message $RED "❌ 代码推送失败"
return 1
fi
}
# 拉取最新代码
pull_code() {
print_message $BLUE "⬇️ 拉取最新代码..."
if git pull origin main; then
print_message $GREEN "✅ 代码拉取成功"
else
print_message $RED "❌ 代码拉取失败"
return 1
fi
}
# 查看提交历史
show_history() {
print_message $BLUE "📜 提交历史:"
git log --oneline --graph --decorate -10
}
# 查看分支状态
show_branches() {
print_message $BLUE "🌿 分支状态:"
git branch -a
}
# 创建新分支
create_branch() {
local branch_name=$1
if [ -z "$branch_name" ]; then
read -p "请输入分支名称: " branch_name
fi
if [ -n "$branch_name" ]; then
git checkout -b "$branch_name"
print_message $GREEN "✅ 分支 '$branch_name' 创建成功"
else
print_message $RED "❌ 分支名称不能为空"
fi
}
# 切换分支
switch_branch() {
local branch_name=$1
if [ -z "$branch_name" ]; then
print_message $BLUE "可用分支:"
git branch -a
read -p "请输入要切换的分支名称: " branch_name
fi
if [ -n "$branch_name" ]; then
git checkout "$branch_name"
print_message $GREEN "✅ 已切换到分支 '$branch_name'"
else
print_message $RED "❌ 分支名称不能为空"
fi
}
# 显示帮助
show_help() {
echo "Gitea 仓库管理脚本"
echo ""
echo "用法: $0 [命令]"
echo ""
echo "命令:"
echo " check 检查连接和仓库状态"
echo " init 初始化仓库"
echo " sync 同步代码到远程仓库"
echo " pull 拉取最新代码"
echo " history 查看提交历史"
echo " branches 查看分支状态"
echo " create-branch [name] 创建新分支"
echo " switch-branch [name] 切换分支"
echo " status 查看仓库状态"
echo " help 显示帮助信息"
echo ""
echo "示例:"
echo " $0 check # 检查状态"
echo " $0 sync # 同步代码"
echo " $0 create-branch feature-x # 创建功能分支"
}
# 主函数
main() {
local command=${1:-help}
case $command in
check)
check_ssh_connection
check_repo_status
;;
init)
init_repo
;;
sync)
sync_code
;;
pull)
pull_code
;;
history)
show_history
;;
branches)
show_branches
;;
create-branch)
create_branch "$2"
;;
switch-branch)
switch_branch "$2"
;;
status)
check_repo_status
;;
help|--help|-h)
show_help
;;
*)
print_message $RED "❌ 未知命令: $command"
show_help
exit 1
;;
esac
}
# 执行主函数
main "$@"

114
scripts/utilities/quick-start.sh Executable file
View File

@ -0,0 +1,114 @@
#!/bin/bash
# 快速启动脚本
set -e
echo "🚀 欢迎使用基础设施管理平台!"
echo ""
# 检查必要工具
check_tool() {
if ! command -v "$1" &> /dev/null; then
echo "$1 未安装,请先运行 'make setup'"
return 1
fi
}
echo "🔍 检查必要工具..."
check_tool "tofu" || exit 1
check_tool "ansible" || exit 1
check_tool "docker" || exit 1
echo "✅ 工具检查通过"
echo ""
# 检查配置文件
CONFIG_FILE="infrastructure/environments/dev/terraform.tfvars"
if [ ! -f "$CONFIG_FILE" ]; then
echo "⚠️ 配置文件不存在,正在创建..."
cp "${CONFIG_FILE}.example" "$CONFIG_FILE"
echo "📝 请编辑配置文件: $CONFIG_FILE"
echo " 填入你的云服务商凭据后再次运行此脚本"
exit 1
fi
echo "✅ 配置文件存在"
echo ""
# 选择操作
echo "请选择要执行的操作:"
echo "1) 初始化基础设施"
echo "2) 查看执行计划"
echo "3) 应用基础设施变更"
echo "4) 部署应用"
echo "5) 启动开发环境"
echo "6) 查看监控"
echo "7) 完整部署流程"
echo ""
read -p "请输入选项 (1-7): " choice
case $choice in
1)
echo "🏗️ 初始化基础设施..."
make init
;;
2)
echo "📋 查看执行计划..."
make plan
;;
3)
echo "🚀 应用基础设施变更..."
make apply
;;
4)
echo "📦 部署应用..."
make ansible-deploy
;;
5)
echo "🐳 启动开发环境..."
make docker-up
;;
6)
echo "📊 启动监控..."
make monitor
;;
7)
echo "🎯 执行完整部署流程..."
echo ""
echo "步骤 1/4: 初始化基础设施..."
make init
echo ""
echo "步骤 2/4: 查看执行计划..."
make plan
echo ""
read -p "是否继续应用基础设施变更? (y/N): " -n 1 -r
echo
if [[ $REPLY =~ ^[Yy]$ ]]; then
echo "步骤 3/4: 应用基础设施变更..."
make apply
echo ""
echo "步骤 4/4: 部署应用..."
make ansible-deploy
echo ""
echo "🎉 完整部署流程完成!"
else
echo " 部署流程已取消"
fi
;;
*)
echo "❌ 无效选项"
exit 1
;;
esac
echo ""
echo "🎉 操作完成!"
echo ""
echo "📋 有用的命令:"
echo " make help - 查看所有可用命令"
echo " make plan - 查看基础设施变更计划"
echo " make apply - 应用基础设施变更"
echo " make ansible-deploy - 部署应用"
echo " make monitor - 启动监控"
echo " make clean - 清理临时文件"

View File

@ -1,86 +0,0 @@
# Semaphore 项目配置指南
## 1. 访问 Semaphore Web 界面
- URL: http://your-server-ip:3000
- 用户名: `admin`
- 密码: `admin123`
## 2. 创建项目步骤
### 第一步:添加 Key Store (SSH 密钥或访问令牌)
1. 点击左侧菜单 "Key Store"
2. 点击 "New Key" 按钮
3. 填写信息:
- **Name**: `gitea-access-token`
- **Type**: 选择 "Login with password"
- **Username**: `ben`
- **Password**: `8d7d70f324796be650b79415303c31f567bf459b`
### 第二步:添加 Repository
1. 点击左侧菜单 "Repositories"
2. 点击 "New Repository" 按钮
3. 填写信息:
- **Name**: `mgmt`
- **URL**: `https://gitea.tailnet-68f9.ts.net/ben/mgmt.git`
- **Branch**: `main`
- **Access Key**: 选择刚创建的 `gitea-access-token`
### 第三步:创建 Inventory
1. 点击左侧菜单 "Inventory"
2. 点击 "New Inventory" 按钮
3. 填写信息:
- **Name**: `servers`
- **User Credentials**: 选择或创建服务器访问凭据
- **Sudo Credentials**: 如果需要 sudo 权限,选择相应凭据
- **Type**: 选择 "Static"
- **Inventory**: 输入服务器信息,例如:
```
[servers]
localhost ansible_connection=local
# 或添加远程服务器
# server1 ansible_host=192.168.1.10 ansible_user=root
```
### 第四步:创建 Environment
1. 点击左侧菜单 "Environment"
2. 点击 "New Environment" 按钮
3. 填写信息:
- **Name**: `production`
- **JSON**: 可以留空或添加环境变量
### 第五步:创建 Task Template
1. 点击左侧菜单 "Task Templates"
2. 点击 "New Template" 按钮
3. 填写信息:
- **Name**: `System Update`
- **Playbook**: `ansible/system-update.yml`
- **Inventory**: 选择刚创建的 `servers`
- **Repository**: 选择 `mgmt`
- **Environment**: 选择 `production`
## 3. 运行任务
1. 在 "Task Templates" 页面找到 "System Update" 模板
2. 点击 "Run" 按钮
3. 确认设置后点击 "Run" 执行任务
## 4. 监控任务执行
- 在 "Tasks" 页面可以查看任务执行历史
- 点击具体任务可以查看详细日志
- 可以设置定时任务自动执行系统更新
## 项目文件说明
- `system-update.yml`: 主要的 Ansible playbook执行 apt update && apt upgrade
- `inventory.ini`: 服务器清单文件模板
- `ansible.cfg`: Ansible 配置文件
- `README.md`: 项目说明文档
## Git 仓库信息
- **仓库地址**: https://gitea.tailnet-68f9.ts.net/ben/mgmt
- **分支**: main
- **最新提交**: 包含 Ansible 脚本和 Semaphore 配置文件
- **Ansible 脚本路径**: `ansible/system-update.yml`

View File

@ -1,251 +0,0 @@
#!/bin/bash
# Traefik + Docker Swarm 管理脚本
# 用于部署、管理和监控 Traefik 在 Docker Swarm 中的集成
set -e
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
NETWORK_NAME="traefik-public"
# 颜色定义
RED='\033[0;31m'
GREEN='\033[0;32m'
YELLOW='\033[1;33m'
BLUE='\033[0;34m'
NC='\033[0m' # No Color
# 日志函数
log_info() {
echo -e "${BLUE}[INFO]${NC} $1"
}
log_success() {
echo -e "${GREEN}[SUCCESS]${NC} $1"
}
log_warning() {
echo -e "${YELLOW}[WARNING]${NC} $1"
}
log_error() {
echo -e "${RED}[ERROR]${NC} $1"
}
# 检查 Docker Swarm 状态
check_swarm() {
log_info "检查 Docker Swarm 状态..."
if ! docker info | grep -q "Swarm: active"; then
log_error "Docker Swarm 未激活,请先初始化 Swarm 集群"
echo "运行: docker swarm init"
exit 1
fi
log_success "Docker Swarm 已激活"
}
# 创建网络
create_network() {
log_info "创建 Traefik 公共网络..."
if docker network ls | grep -q "$NETWORK_NAME"; then
log_warning "网络 $NETWORK_NAME 已存在"
else
docker network create --driver overlay --attachable "$NETWORK_NAME"
log_success "网络 $NETWORK_NAME 创建成功"
fi
}
# 部署 Traefik
deploy_traefik() {
log_info "部署 Traefik 服务..."
docker stack deploy -c "$SCRIPT_DIR/traefik-swarm-stack.yml" traefik
log_success "Traefik 部署完成"
}
# 部署示例服务
deploy_demo() {
log_info "部署示例服务..."
docker stack deploy -c "$SCRIPT_DIR/demo-services-stack.yml" demo
log_success "示例服务部署完成"
}
# 部署监控服务
deploy_monitoring() {
log_info "部署监控服务..."
docker stack deploy -c "$SCRIPT_DIR/monitoring-stack.yml" monitoring
log_success "监控服务部署完成"
}
# 显示服务状态
show_status() {
log_info "显示服务状态..."
echo ""
echo "=== Traefik Stack ==="
docker stack services traefik
echo ""
echo "=== Demo Stack ==="
docker stack services demo 2>/dev/null || echo "Demo stack not deployed"
echo ""
echo "=== Monitoring Stack ==="
docker stack services monitoring 2>/dev/null || echo "Monitoring stack not deployed"
echo ""
}
# 显示访问地址
show_urls() {
log_info "服务访问地址:"
echo ""
echo "🎛️ Traefik Dashboard: http://traefik.local:8080"
echo "🌐 Web App: http://app.local"
echo "🔌 API Service: http://api.local"
echo "📊 Monitor Service: http://monitor.local"
echo "📈 Prometheus: http://prometheus.local"
echo "📊 Grafana: http://grafana.local (admin/admin123)"
echo ""
echo "💡 请确保在 /etc/hosts 中添加以下条目:"
echo "127.0.0.1 traefik.local app.local api.local monitor.local prometheus.local grafana.local"
}
# 查看日志
show_logs() {
local service=${1:-traefik_traefik}
log_info "显示 $service 服务日志..."
docker service logs -f "$service"
}
# 扩缩容服务
scale_service() {
local stack=$1
local service=$2
local replicas=$3
if [[ -z "$stack" || -z "$service" || -z "$replicas" ]]; then
log_error "用法: $0 scale <stack> <service> <replicas>"
exit 1
fi
log_info "扩缩容 ${stack}_${service}$replicas 个副本..."
docker service scale "${stack}_${service}=$replicas"
log_success "扩缩容完成"
}
# 清理所有服务
cleanup() {
log_warning "清理所有 Traefik 相关服务..."
read -p "确认删除所有服务? (y/N): " -n 1 -r
echo
if [[ $REPLY =~ ^[Yy]$ ]]; then
docker stack rm monitoring 2>/dev/null || true
docker stack rm demo 2>/dev/null || true
docker stack rm traefik 2>/dev/null || true
log_info "等待服务清理完成..."
sleep 10
# 清理网络
docker network rm "$NETWORK_NAME" 2>/dev/null || true
log_success "清理完成"
else
log_info "取消清理操作"
fi
}
# 更新 hosts 文件
update_hosts() {
log_info "更新 /etc/hosts 文件..."
# 备份原文件
sudo cp /etc/hosts /etc/hosts.backup.$(date +%Y%m%d_%H%M%S)
# 移除旧的条目
sudo sed -i '/# Traefik Swarm Demo/d' /etc/hosts
sudo sed -i '/traefik.local\|app.local\|api.local\|monitor.local\|prometheus.local\|grafana.local/d' /etc/hosts
# 添加新条目
echo "# Traefik Swarm Demo" | sudo tee -a /etc/hosts
echo "127.0.0.1 traefik.local app.local api.local monitor.local prometheus.local grafana.local" | sudo tee -a /etc/hosts
log_success "hosts 文件更新完成"
}
# 显示帮助
show_help() {
echo "Traefik + Docker Swarm 管理脚本"
echo ""
echo "用法: $0 <command> [options]"
echo ""
echo "命令:"
echo " init 初始化环境 (创建网络)"
echo " deploy 部署 Traefik"
echo " deploy-demo 部署示例服务"
echo " deploy-monitoring 部署监控服务"
echo " deploy-all 部署所有服务"
echo " status 显示服务状态"
echo " urls 显示访问地址"
echo " logs [service] 查看服务日志"
echo " scale <stack> <service> <replicas> 扩缩容服务"
echo " update-hosts 更新 hosts 文件"
echo " cleanup 清理所有服务"
echo " help 显示帮助信息"
echo ""
echo "示例:"
echo " $0 deploy-all # 部署所有服务"
echo " $0 scale demo webapp 3 # 扩容 webapp 到 3 个副本"
echo " $0 logs traefik_traefik # 查看 Traefik 日志"
}
# 主函数
main() {
case "${1:-help}" in
"init")
check_swarm
create_network
;;
"deploy")
check_swarm
create_network
deploy_traefik
show_urls
;;
"deploy-demo")
deploy_demo
;;
"deploy-monitoring")
deploy_monitoring
;;
"deploy-all")
check_swarm
create_network
deploy_traefik
sleep 5
deploy_demo
deploy_monitoring
show_status
show_urls
;;
"status")
show_status
;;
"urls")
show_urls
;;
"logs")
show_logs "$2"
;;
"scale")
scale_service "$2" "$3" "$4"
;;
"update-hosts")
update_hosts
;;
"cleanup")
cleanup
;;
"help"|*)
show_help
;;
esac
}
# 执行主函数
main "$@"