diff --git a/README-traefik-swarm.md b/README-traefik-swarm.md new file mode 100644 index 0000000..d24f294 --- /dev/null +++ b/README-traefik-swarm.md @@ -0,0 +1,253 @@ +# Traefik + Docker Swarm 集成 + +## 📋 概述 + +本项目实现了 Traefik 与 Docker Swarm 的完整集成,提供统一的入口点管理所有 Swarm 服务。 + +## 🏗️ 架构设计 + +``` +Internet + ↓ +Traefik (Load Balancer) + ↓ +Docker Swarm Services + ├── Web App (app.local) + ├── API Service (api.local) + ├── Monitor Service (monitor.local) + └── Other Services... +``` + +## 📁 文件结构 + +``` +/root/mgmt/ +├── traefik-swarm-stack.yml # Traefik 主服务配置 +├── demo-services-stack.yml # 示例服务配置 +├── monitoring-stack.yml # 监控服务配置 +├── swarm-traefik-manager.sh # 管理脚本 +└── README-traefik-swarm.md # 说明文档 +``` + +## 🚀 快速开始 + +### 1. 初始化环境 + +```bash +# 确保 Docker Swarm 已激活 +docker swarm init + +# 初始化 Traefik 环境 +./swarm-traefik-manager.sh init +``` + +### 2. 部署所有服务 + +```bash +# 一键部署所有服务 +./swarm-traefik-manager.sh deploy-all + +# 或分步部署 +./swarm-traefik-manager.sh deploy # 仅部署 Traefik +./swarm-traefik-manager.sh deploy-demo # 部署示例服务 +./swarm-traefik-manager.sh deploy-monitoring # 部署监控服务 +``` + +### 3. 更新 hosts 文件 + +```bash +# 自动更新 hosts 文件 +./swarm-traefik-manager.sh update-hosts + +# 或手动添加到 /etc/hosts +echo "127.0.0.1 traefik.local app.local api.local monitor.local prometheus.local grafana.local" >> /etc/hosts +``` + +## 🌐 访问地址 + +| 服务 | 地址 | 说明 | +|------|------|------| +| Traefik Dashboard | http://traefik.local:8080 | 管理界面 | +| Web App | http://app.local | 示例 Web 应用 | +| API Service | http://api.local | 示例 API 服务 | +| Monitor Service | http://monitor.local | 监控服务 | +| Prometheus | http://prometheus.local | 指标收集 | +| Grafana | http://grafana.local | 可视化面板 | + +## 🛠️ 管理命令 + +### 查看服务状态 +```bash +./swarm-traefik-manager.sh status +``` + +### 查看服务日志 +```bash +./swarm-traefik-manager.sh logs traefik_traefik +./swarm-traefik-manager.sh logs demo_webapp +``` + +### 扩缩容服务 +```bash +# 扩容 webapp 到 3 个副本 +./swarm-traefik-manager.sh scale demo webapp 3 + +# 扩容 API 服务到 2 个副本 +./swarm-traefik-manager.sh scale demo api 2 +``` + +### 清理环境 +```bash +./swarm-traefik-manager.sh cleanup +``` + +## 📊 监控配置 + +### Prometheus 指标 +- Traefik 指标: http://traefik:8080/metrics +- Node Exporter: 系统指标 +- cAdvisor: 容器指标 + +### Grafana 配置 +- 默认用户: admin +- 默认密码: admin123 +- 数据源: Prometheus (http://prometheus:9090) + +## 🔧 服务配置 + +### 为新服务添加 Traefik 路由 + +在 Docker Compose 文件中添加以下标签: + +```yaml +services: + your-service: + image: your-image + networks: + - traefik-public + deploy: + labels: + - traefik.enable=true + - traefik.http.routers.your-service.rule=Host(`your-domain.local`) + - traefik.http.routers.your-service.entrypoints=web + - traefik.http.services.your-service.loadbalancer.server.port=80 +``` + +### 高级路由配置 + +```yaml +# 路径前缀路由 +- traefik.http.routers.api-path.rule=Host(`app.local`) && PathPrefix(`/api`) + +# HTTPS 重定向 +- traefik.http.routers.your-service.entrypoints=websecure +- traefik.http.routers.your-service.tls.certresolver=letsencrypt + +# 中间件配置 +- traefik.http.routers.your-service.middlewares=auth +- traefik.http.middlewares.auth.basicauth.users=user:password +``` + +## 🔒 安全配置 + +### 基本认证 +```yaml +labels: + - traefik.http.middlewares.auth.basicauth.users=admin:$$2y$$10$$... + - traefik.http.routers.service.middlewares=auth +``` + +### HTTPS 配置 +```yaml +labels: + - traefik.http.routers.service.tls.certresolver=letsencrypt + - traefik.http.routers.service.entrypoints=websecure +``` + +## 🐛 故障排除 + +### 常见问题 + +1. **服务无法访问** + ```bash + # 检查服务状态 + docker stack services traefik + + # 检查网络连接 + docker network ls | grep traefik-public + ``` + +2. **路由不生效** + ```bash + # 查看 Traefik 日志 + ./swarm-traefik-manager.sh logs traefik_traefik + + # 检查服务标签 + docker service inspect demo_webapp + ``` + +3. **DNS 解析问题** + ```bash + # 检查 hosts 文件 + cat /etc/hosts | grep local + + # 更新 hosts 文件 + ./swarm-traefik-manager.sh update-hosts + ``` + +### 调试命令 + +```bash +# 查看所有 Swarm 服务 +docker service ls + +# 查看特定服务详情 +docker service inspect traefik_traefik + +# 查看服务任务 +docker service ps traefik_traefik + +# 进入容器调试 +docker exec -it $(docker ps -q -f name=traefik) sh +``` + +## 📈 性能优化 + +### 负载均衡配置 +```yaml +labels: + - traefik.http.services.service.loadbalancer.sticky.cookie=true + - traefik.http.services.service.loadbalancer.healthcheck.path=/health +``` + +### 缓存配置 +```yaml +labels: + - traefik.http.middlewares.cache.headers.customrequestheaders.Cache-Control=max-age=3600 +``` + +## 🔄 备份与恢复 + +### 备份配置 +```bash +# 备份 Docker 配置 +docker config ls +docker config inspect config_name + +# 备份 Swarm 状态 +docker node ls +docker service ls +``` + +### 恢复服务 +```bash +# 重新部署服务 +./swarm-traefik-manager.sh deploy-all +``` + +## 📚 参考资料 + +- [Traefik 官方文档](https://doc.traefik.io/traefik/) +- [Docker Swarm 文档](https://docs.docker.com/engine/swarm/) +- [Prometheus 配置](https://prometheus.io/docs/prometheus/latest/configuration/configuration/) +- [Grafana 文档](https://grafana.com/docs/) \ No newline at end of file diff --git a/ansible/README.md b/ansible/README.md new file mode 100644 index 0000000..e09d6ff --- /dev/null +++ b/ansible/README.md @@ -0,0 +1,168 @@ +# Ansible Playbooks 管理文档 + +## 📁 目录结构 + +``` +ansible/ +├── playbooks/ # 主要 playbooks 目录 +│ ├── 01-system/ # 系统管理类 +│ ├── 02-security/ # 安全管理类 +│ ├── 03-services/ # 服务管理类 +│ ├── 04-monitoring/ # 监控检查类 +│ ├── 05-cloud/ # 云服务商专用 +│ └── 99-tools/ # 工具和集成类 +├── inventory.ini # 主机清单 +├── ansible.cfg # Ansible 配置 +├── run.sh # 原始运行脚本 +└── run-playbook.sh # 新的分类运行脚本 +``` + +## 🎯 分类说明 + +### 01-system (系统管理) +负责基础系统的维护和管理任务。 + +| Playbook | 功能描述 | 适用主机 | +|----------|----------|----------| +| `system-update.yml` | 系统包更新和升级 | 所有 Linux 主机 | +| `system-cleanup.yml` | 系统清理和维护 | 所有主机 | +| `cron-setup.yml` | 定时任务配置 | 需要定时任务的主机 | + +### 02-security (安全管理) +处理安全相关的配置和监控。 + +| Playbook | 功能描述 | 适用主机 | +|----------|----------|----------| +| `security-hardening.yml` | SSH 安全加固和备份 | 所有主机 | +| `certificate-management.yml` | SSL 证书管理和监控 | Web 服务器和 SSL 服务 | + +### 03-services (服务管理) +管理各种服务和容器。 + +| Playbook | 功能描述 | 适用主机 | +|----------|----------|----------| +| `docker-management.yml` | Docker 容器管理 | Docker 主机 | +| `docker-status-check.yml` | Docker 状态检查 | Docker Swarm 节点 | + +### 04-monitoring (监控检查) +系统和服务的健康检查。 + +| Playbook | 功能描述 | 适用主机 | +|----------|----------|----------| +| `service-health-check.yml` | 服务健康状态监控 | 所有主机 | +| `network-connectivity.yml` | 网络连接性能检查 | 所有主机 | + +### 05-cloud (云服务商专用) +针对特定云服务商的优化脚本。 + +| Playbook | 功能描述 | 适用主机 | +|----------|----------|----------| +| `cloud-providers-update.yml` | 云服务商系统更新 | huawei, google, digitalocean, aws | + +### 99-tools (工具和集成) +运维工具和集成脚本。 + +| Playbook | 功能描述 | 适用主机 | +|----------|----------|----------| +| `ops-toolkit.yml` | 统一运维管理面板 | 所有主机 | + +## 🚀 使用方法 + +### 1. 使用新的分类运行脚本 + +```bash +# 查看帮助 +./run-playbook.sh help + +# 列出所有可用的 playbooks +./run-playbook.sh list + +# 运行特定分类的 playbook +./run-playbook.sh 01-system system-update.yml all +./run-playbook.sh 03-services docker-status-check.yml hcp +./run-playbook.sh 04-monitoring network-connectivity.yml dev1 +``` + +### 2. 直接使用 ansible-playbook + +```bash +# 运行系统更新 +ansible-playbook -i inventory.ini playbooks/01-system/system-update.yml + +# 检查 Docker 状态 +ansible-playbook -i inventory.ini playbooks/03-services/docker-status-check.yml --limit hcp + +# 网络连接检查 +ansible-playbook -i inventory.ini playbooks/04-monitoring/network-connectivity.yml --limit dev1 +``` + +## 📋 主机组说明 + +根据 `inventory.ini` 配置的主机组: + +- **dev**: 开发环境 (dev1, dev2) +- **hcp**: HCP 节点 (hcp1, hcp2) - Docker Swarm 集群 +- **oci_kr**: Oracle Cloud Korea (ch2, ch3, master) +- **oci_us**: Oracle Cloud US (ash1d, ash2e, ash3c) +- **huawei**: 华为云 (hcs) +- **google**: Google Cloud (benwork) +- **digitalocean**: DigitalOcean (syd) +- **aws**: Amazon Web Services (awsirish) +- **proxmox**: Proxmox 虚拟化 (pve, xgp, nuc12) +- **lxc**: LXC 容器 (warden, gitea, influxdb, mysql, postgresql) +- **alpine**: Alpine Linux 容器 (redis, authentik, calibreweb) +- **vm**: 虚拟机 (kali) + +## 🔧 配置文件 + +### ansible.cfg +已更新支持新的目录结构,包含: +- 新的 playbooks 路径配置 +- SSH 连接优化 +- 动态 inventory 支持 + +### inventory.ini +包含所有主机的连接信息和分组配置。 + +## 📝 最佳实践 + +1. **按功能分类运行**: 根据需要选择合适的分类目录 +2. **使用主机组**: 利用 inventory 中的主机组进行批量操作 +3. **测试先行**: 在开发环境先测试,再应用到生产环境 +4. **日志记录**: 重要操作建议记录执行日志 +5. **定期维护**: 定期运行系统清理和更新脚本 + +## 🆘 故障排除 + +### 常见问题 + +1. **SSH 连接失败** + - 检查主机是否可达 + - 验证 SSH 密钥或密码 + - 确认用户权限 + +2. **Playbook 执行失败** + - 检查目标主机的系统类型 + - 验证所需的软件包是否安装 + - 查看详细错误日志 + +3. **权限问题** + - 确认 `ansible_become` 配置正确 + - 验证 sudo 权限 + +### 调试命令 + +```bash +# 测试连接 +ansible all -i inventory.ini -m ping + +# 详细输出 +ansible-playbook -i inventory.ini playbooks/01-system/system-update.yml -vvv + +# 检查语法 +ansible-playbook --syntax-check playbooks/01-system/system-update.yml +``` + +--- + +*最后更新: $(date '+%Y-%m-%d %H:%M:%S')* \ No newline at end of file diff --git a/ansible/ansible.cfg b/ansible/ansible.cfg index 479c711..9e235b5 100644 --- a/ansible/ansible.cfg +++ b/ansible/ansible.cfg @@ -4,7 +4,14 @@ host_key_checking = False timeout = 30 gathering = smart fact_caching = memory +# 支持新的 playbooks 目录结构 +roles_path = playbooks/ +collections_path = playbooks/ [ssh_connection] ssh_args = -o ControlMaster=auto -o ControlPersist=60s -o StrictHostKeyChecking=no -pipelining = True \ No newline at end of file +pipelining = True + +[inventory] +# 启用插件以支持动态 inventory +enable_plugins = host_list, script, auto, yaml, ini, toml \ No newline at end of file diff --git a/ansible/cron-setup.yml b/ansible/playbooks/01-system/cron-setup.yml similarity index 100% rename from ansible/cron-setup.yml rename to ansible/playbooks/01-system/cron-setup.yml diff --git a/ansible/system-cleanup.yml b/ansible/playbooks/01-system/system-cleanup.yml similarity index 100% rename from ansible/system-cleanup.yml rename to ansible/playbooks/01-system/system-cleanup.yml diff --git a/ansible/system-update.yml b/ansible/playbooks/01-system/system-update.yml similarity index 100% rename from ansible/system-update.yml rename to ansible/playbooks/01-system/system-update.yml diff --git a/ansible/certificate-management.yml b/ansible/playbooks/02-security/certificate-management.yml similarity index 100% rename from ansible/certificate-management.yml rename to ansible/playbooks/02-security/certificate-management.yml diff --git a/ansible/security-hardening.yml b/ansible/playbooks/02-security/security-hardening.yml similarity index 100% rename from ansible/security-hardening.yml rename to ansible/playbooks/02-security/security-hardening.yml diff --git a/ansible/docker-management.yml b/ansible/playbooks/03-services/docker-management.yml similarity index 100% rename from ansible/docker-management.yml rename to ansible/playbooks/03-services/docker-management.yml diff --git a/ansible/playbooks/03-services/docker-status-check.yml b/ansible/playbooks/03-services/docker-status-check.yml new file mode 100644 index 0000000..d794f8e --- /dev/null +++ b/ansible/playbooks/03-services/docker-status-check.yml @@ -0,0 +1,97 @@ +--- +- name: Docker Status Check for HCP Nodes + hosts: hcp + gather_facts: yes + become: yes + + tasks: + - name: Check if Docker is installed + command: docker --version + register: docker_version + ignore_errors: yes + + - name: Display Docker version + debug: + msg: "Docker version: {{ docker_version.stdout }}" + when: docker_version.rc == 0 + + - name: Check Docker service status + systemd: + name: docker + register: docker_service_status + + - name: Display Docker service status + debug: + msg: "Docker service is {{ docker_service_status.status.ActiveState }}" + + - name: Check Docker daemon info + command: docker info --format "{{ '{{' }}.ServerVersion{{ '}}' }}" + register: docker_info + ignore_errors: yes + + - name: Display Docker daemon info + debug: + msg: "Docker daemon version: {{ docker_info.stdout }}" + when: docker_info.rc == 0 + + - name: Check Docker Swarm status + command: docker info --format "{{ '{{' }}.Swarm.LocalNodeState{{ '}}' }}" + register: swarm_status + ignore_errors: yes + + - name: Display Swarm status + debug: + msg: "Swarm status: {{ swarm_status.stdout }}" + when: swarm_status.rc == 0 + + - name: Get Docker Swarm node info (if in swarm) + command: docker node ls + register: swarm_nodes + ignore_errors: yes + when: swarm_status.stdout == "active" + + - name: Display Swarm nodes + debug: + msg: "{{ swarm_nodes.stdout_lines }}" + when: swarm_nodes is defined and swarm_nodes.rc == 0 + + - name: List running containers + command: docker ps --format "table {{ '{{' }}.Names{{ '}}' }}\t{{ '{{' }}.Status{{ '}}' }}\t{{ '{{' }}.Ports{{ '}}' }}" + register: running_containers + ignore_errors: yes + + - name: Display running containers + debug: + msg: "{{ running_containers.stdout_lines }}" + when: running_containers.rc == 0 + + - name: Check Docker network list + command: docker network ls + register: docker_networks + ignore_errors: yes + + - name: Display Docker networks + debug: + msg: "{{ docker_networks.stdout_lines }}" + when: docker_networks.rc == 0 + + - name: Get Docker system info + command: docker system df + register: docker_system_info + ignore_errors: yes + + - name: Display Docker system usage + debug: + msg: "{{ docker_system_info.stdout_lines }}" + when: docker_system_info.rc == 0 + + - name: Check if node is Swarm manager + command: docker node inspect self --format "{{ '{{' }}.ManagerStatus.Leader{{ '}}' }}" + register: is_manager + ignore_errors: yes + when: swarm_status.stdout == "active" + + - name: Display manager status + debug: + msg: "Is Swarm manager: {{ is_manager.stdout }}" + when: is_manager is defined and is_manager.rc == 0 \ No newline at end of file diff --git a/ansible/network-connectivity.yml b/ansible/playbooks/04-monitoring/network-connectivity.yml similarity index 100% rename from ansible/network-connectivity.yml rename to ansible/playbooks/04-monitoring/network-connectivity.yml diff --git a/ansible/service-health-check.yml b/ansible/playbooks/04-monitoring/service-health-check.yml similarity index 100% rename from ansible/service-health-check.yml rename to ansible/playbooks/04-monitoring/service-health-check.yml diff --git a/ansible/cloud-providers-update.yml b/ansible/playbooks/05-cloud/cloud-providers-update.yml similarity index 100% rename from ansible/cloud-providers-update.yml rename to ansible/playbooks/05-cloud/cloud-providers-update.yml diff --git a/ansible/ops-toolkit.yml b/ansible/playbooks/99-tools/ops-toolkit.yml similarity index 100% rename from ansible/ops-toolkit.yml rename to ansible/playbooks/99-tools/ops-toolkit.yml diff --git a/ansible/run-playbook.sh b/ansible/run-playbook.sh new file mode 100755 index 0000000..70ea39f --- /dev/null +++ b/ansible/run-playbook.sh @@ -0,0 +1,109 @@ +#!/bin/bash + +# Ansible Playbooks 分类运行脚本 +# 使用方法: ./run-playbook.sh [category] [playbook] [hosts] + +set -e + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +PLAYBOOKS_DIR="$SCRIPT_DIR/playbooks" + +# 颜色定义 +RED='\033[0;31m' +GREEN='\033[0;32m' +YELLOW='\033[1;33m' +BLUE='\033[0;34m' +NC='\033[0m' # No Color + +# 显示使用帮助 +show_help() { + echo -e "${BLUE}Ansible Playbooks 分类运行脚本${NC}" + echo "" + echo "使用方法:" + echo " $0 [category] [playbook] [hosts]" + echo "" + echo "可用分类:" + echo -e " ${GREEN}01-system${NC} - 系统管理 (更新、清理、定时任务)" + echo -e " ${GREEN}02-security${NC} - 安全管理 (安全加固、证书管理)" + echo -e " ${GREEN}03-services${NC} - 服务管理 (Docker、容器服务)" + echo -e " ${GREEN}04-monitoring${NC} - 监控检查 (健康检查、网络连接)" + echo -e " ${GREEN}05-cloud${NC} - 云服务商专用" + echo -e " ${GREEN}99-tools${NC} - 工具和集成" + echo "" + echo "示例:" + echo " $0 list # 列出所有可用的 playbooks" + echo " $0 01-system system-update.yml all # 在所有主机上运行系统更新" + echo " $0 03-services docker-status-check.yml hcp # 在 hcp 组上检查 Docker 状态" + echo " $0 04-monitoring network-connectivity.yml dev1 # 在 dev1 主机上检查网络连接" +} + +# 列出所有可用的 playbooks +list_playbooks() { + echo -e "${BLUE}可用的 Ansible Playbooks:${NC}" + echo "" + + for category in $(ls -1 "$PLAYBOOKS_DIR" | sort); do + if [ -d "$PLAYBOOKS_DIR/$category" ]; then + echo -e "${GREEN}📁 $category${NC}" + for playbook in $(ls -1 "$PLAYBOOKS_DIR/$category"/*.yml 2>/dev/null | sort); do + if [ -f "$playbook" ]; then + basename_playbook=$(basename "$playbook") + echo -e " └── ${YELLOW}$basename_playbook${NC}" + fi + done + echo "" + fi + done +} + +# 运行指定的 playbook +run_playbook() { + local category="$1" + local playbook="$2" + local hosts="$3" + + local playbook_path="$PLAYBOOKS_DIR/$category/$playbook" + + if [ ! -f "$playbook_path" ]; then + echo -e "${RED}错误: Playbook 文件不存在: $playbook_path${NC}" + exit 1 + fi + + echo -e "${GREEN}运行 Playbook:${NC} $category/$playbook" + echo -e "${GREEN}目标主机:${NC} $hosts" + echo "" + + # 运行 ansible-playbook + ansible-playbook -i inventory.ini "$playbook_path" --limit "$hosts" +} + +# 主逻辑 +case "${1:-}" in + "help"|"-h"|"--help"|"") + show_help + ;; + "list"|"ls") + list_playbooks + ;; + *) + if [ $# -lt 3 ]; then + echo -e "${RED}错误: 参数不足${NC}" + echo "" + show_help + exit 1 + fi + + category="$1" + playbook="$2" + hosts="$3" + + if [ ! -d "$PLAYBOOKS_DIR/$category" ]; then + echo -e "${RED}错误: 分类目录不存在: $category${NC}" + echo "" + list_playbooks + exit 1 + fi + + run_playbook "$category" "$playbook" "$hosts" + ;; +esac \ No newline at end of file diff --git a/consul-demo.sh b/consul-demo.sh new file mode 100755 index 0000000..75f43ea --- /dev/null +++ b/consul-demo.sh @@ -0,0 +1,64 @@ +#!/bin/bash + +echo "🚀 Consul 集群演示脚本" + +# 检查 Consul 集群状态 +check_cluster() { + echo "📊 检查集群状态..." + + for node in consul1 consul2 consul3; do + echo "检查节点: $node" + curl -s http://$node:8500/v1/status/leader 2>/dev/null && echo " - Leader: $(curl -s http://$node:8500/v1/status/leader 2>/dev/null)" || echo " - 节点不可达" + curl -s http://$node:8500/v1/status/peers 2>/dev/null && echo " - 集群节点: $(curl -s http://$node:8500/v1/status/peers 2>/dev/null)" || echo " - 无法获取集群信息" + echo "" + done +} + +# 测试配置读写 +test_config() { + echo "🔧 测试配置读写..." + + # 写入配置到不同节点 + echo "写入配置到 consul1..." + curl -X PUT http://consul1:8500/v1/kv/test/config "value-from-consul1" 2>/dev/null + + echo "从 consul2 读取配置..." + value=$(curl -s http://consul2:8500/v1/kv/test/config?raw 2>/dev/null) + echo "读取到的值: $value" + + echo "从 consul3 读取配置..." + value=$(curl -s http://consul3:8500/v1/kv/test/config?raw 2>/dev/null) + echo "读取到的值: $value" +} + +# 模拟故障转移 +simulate_failure() { + echo "💥 模拟 Leader 故障..." + + # 获取当前 Leader + leader=$(curl -s http://consul1:8500/v1/status/leader 2>/dev/null | tr -d '"') + echo "当前 Leader: $leader" + + # 这里只是演示,实际环境中你可以停止 Leader 节点 + echo "在实际环境中,你可以:" + echo "docker stop consul-leader-container" + echo "然后观察其他节点自动选举新 Leader" +} + +case "$1" in + "status") + check_cluster + ;; + "test") + test_config + ;; + "failure") + simulate_failure + ;; + *) + echo "用法: $0 {status|test|failure}" + echo " status - 检查集群状态" + echo " test - 测试配置同步" + echo " failure - 模拟故障转移" + ;; +esac \ No newline at end of file diff --git a/demo-services-stack.yml b/demo-services-stack.yml new file mode 100644 index 0000000..d4af571 --- /dev/null +++ b/demo-services-stack.yml @@ -0,0 +1,166 @@ +version: '3.8' + +services: + # Web 应用示例 + webapp: + image: nginx:alpine + networks: + - traefik-public + configs: + - source: webapp-html + target: /usr/share/nginx/html/index.html + deploy: + replicas: 2 + labels: + - traefik.enable=true + - traefik.http.routers.webapp.rule=Host(`app.local`) + - traefik.http.routers.webapp.entrypoints=web + - traefik.http.services.webapp.loadbalancer.server.port=80 + update_config: + parallelism: 1 + delay: 10s + restart_policy: + condition: on-failure + + # API 服务示例 + api: + image: httpd:alpine + networks: + - traefik-public + configs: + - source: api-html + target: /usr/local/apache2/htdocs/index.html + deploy: + replicas: 2 + labels: + - traefik.enable=true + - traefik.http.routers.api.rule=Host(`api.local`) + - traefik.http.routers.api.entrypoints=web + - traefik.http.services.api.loadbalancer.server.port=80 + # 添加路径前缀 + - traefik.http.routers.api-path.rule=Host(`app.local`) && PathPrefix(`/api`) + - traefik.http.routers.api-path.entrypoints=web + - traefik.http.routers.api-path.service=api + update_config: + parallelism: 1 + delay: 10s + restart_policy: + condition: on-failure + + # 监控服务示例 + monitor: + image: nginx:alpine + networks: + - traefik-public + configs: + - source: monitor-html + target: /usr/share/nginx/html/index.html + deploy: + replicas: 1 + labels: + - traefik.enable=true + - traefik.http.routers.monitor.rule=Host(`monitor.local`) + - traefik.http.routers.monitor.entrypoints=web + - traefik.http.services.monitor.loadbalancer.server.port=80 + # 添加基本认证 (可选) + - traefik.http.routers.monitor.middlewares=auth + - traefik.http.middlewares.auth.basicauth.users=admin:$$2y$$10$$DLKjKQKQKQKQKQKQKQKQKe + restart_policy: + condition: on-failure + +networks: + traefik-public: + external: true + +configs: + webapp-html: + content: | + + +
+服务: webapp
+访问地址: http://app.local
+负载均衡: Traefik + Docker Swarm
+时间:
+这是通过 Traefik 路由的 Web 应用示例。
+服务: api
+访问地址: http://api.local
+路径路由: http://app.local/api
+负载均衡: Traefik + Docker Swarm
+时间:
+这是通过 Traefik 路由的 API 服务示例。
+服务: monitor
+访问地址: http://monitor.local
+认证: 基本认证保护
+负载均衡: Traefik + Docker Swarm
+时间:
+这是通过 Traefik 路由的监控服务示例。
+当前时间:
+ + + +EOF + +# 创建示例 API +cat > api/server.js << 'EOF' +const express = require('express'); +const consul = require('consul')(); +const app = express(); +const port = 3000; + +app.use(express.json()); + +// 健康检查 +app.get('/health', (req, res) => { + res.json({ status: 'healthy', timestamp: new Date().toISOString() }); +}); + +// API 路由 +app.get('/api/config', async (req, res) => { + try { + const result = await consul.kv.get('config/api/message'); + res.json({ + message: result ? result.Value : 'Hello from API!', + source: 'consul' + }); + } catch (error) { + res.json({ + message: 'Hello from API!', + source: 'default' + }); + } +}); + +app.post('/api/config', async (req, res) => { + try { + await consul.kv.set('config/api/message', req.body.message); + res.json({ success: true }); + } catch (error) { + res.status(500).json({ error: error.message }); + } +}); + +app.listen(port, () => { + console.log(`API server running on port ${port}`); +}); +EOF + +# 创建 API package.json +cat > api/package.json << 'EOF' +{ + "name": "demo-api", + "version": "1.0.0", + "dependencies": { + "express": "^4.18.0", + "consul": "^0.40.0" + } +} +EOF + +# 设置 hosts 文件(用于本地测试) +echo "📝 请添加以下内容到 /etc/hosts 文件:" +echo "127.0.0.1 traefik.local" +echo "127.0.0.1 consul.local" +echo "127.0.0.1 app.local" +echo "127.0.0.1 api.local" + +# 启动服务 +echo "🚀 启动 Traefik + Consul 集群..." +docker-compose -f traefik-consul-setup.yml up -d + +# 等待服务启动 +echo "⏳ 等待服务启动..." +sleep 10 + +# 检查服务状态 +echo "📊 检查服务状态..." +docker-compose -f traefik-consul-setup.yml ps + +# 显示访问地址 +echo "" +echo "🎉 部署完成!访问地址:" +echo " Traefik Dashboard: http://traefik.local:8080" +echo " Consul UI: http://consul.local:8500" +echo " Web App: http://app.local" +echo " API: http://api.local/api/config" +echo "" +echo "📝 测试命令:" +echo " curl http://api.local/api/config" +echo " curl -X POST http://api.local/api/config -H 'Content-Type: application/json' -d '{\"message\":\"Hello Consul!\"}'" \ No newline at end of file diff --git a/monitoring-stack.yml b/monitoring-stack.yml new file mode 100644 index 0000000..b673d7a --- /dev/null +++ b/monitoring-stack.yml @@ -0,0 +1,131 @@ +version: '3.8' + +services: + # Prometheus 监控 + prometheus: + image: prom/prometheus:latest + command: + - '--config.file=/etc/prometheus/prometheus.yml' + - '--storage.tsdb.path=/prometheus' + - '--web.console.libraries=/etc/prometheus/console_libraries' + - '--web.console.templates=/etc/prometheus/consoles' + - '--storage.tsdb.retention.time=200h' + - '--web.enable-lifecycle' + networks: + - traefik-public + - monitoring + configs: + - source: prometheus-config + target: /etc/prometheus/prometheus.yml + volumes: + - prometheus-data:/prometheus + deploy: + replicas: 1 + labels: + - traefik.enable=true + - traefik.http.routers.prometheus.rule=Host(`prometheus.local`) + - traefik.http.routers.prometheus.entrypoints=web + - traefik.http.services.prometheus.loadbalancer.server.port=9090 + restart_policy: + condition: on-failure + + # Grafana 可视化 + grafana: + image: grafana/grafana:latest + environment: + - GF_SECURITY_ADMIN_PASSWORD=admin123 + - GF_USERS_ALLOW_SIGN_UP=false + networks: + - traefik-public + - monitoring + volumes: + - grafana-data:/var/lib/grafana + deploy: + replicas: 1 + labels: + - traefik.enable=true + - traefik.http.routers.grafana.rule=Host(`grafana.local`) + - traefik.http.routers.grafana.entrypoints=web + - traefik.http.services.grafana.loadbalancer.server.port=3000 + restart_policy: + condition: on-failure + + # Node Exporter (系统指标) + node-exporter: + image: prom/node-exporter:latest + command: + - '--path.procfs=/host/proc' + - '--path.rootfs=/rootfs' + - '--path.sysfs=/host/sys' + - '--collector.filesystem.ignored-mount-points=^/(sys|proc|dev|host|etc)($$|/)' + volumes: + - /proc:/host/proc:ro + - /sys:/host/sys:ro + - /:/rootfs:ro + networks: + - monitoring + deploy: + mode: global + restart_policy: + condition: on-failure + + # cAdvisor (容器指标) + cadvisor: + image: gcr.io/cadvisor/cadvisor:latest + volumes: + - /:/rootfs:ro + - /var/run:/var/run:rw + - /sys:/sys:ro + - /var/lib/docker/:/var/lib/docker:ro + - /dev/disk/:/dev/disk:ro + networks: + - monitoring + deploy: + mode: global + restart_policy: + condition: on-failure + +networks: + traefik-public: + external: true + monitoring: + driver: overlay + +volumes: + prometheus-data: + grafana-data: + +configs: + prometheus-config: + content: | + global: + scrape_interval: 15s + evaluation_interval: 15s + + scrape_configs: + # Traefik 指标 + - job_name: 'traefik' + static_configs: + - targets: ['traefik:8080'] + metrics_path: /metrics + + # Prometheus 自身 + - job_name: 'prometheus' + static_configs: + - targets: ['localhost:9090'] + + # Node Exporter + - job_name: 'node-exporter' + dns_sd_configs: + - names: + - 'tasks.node-exporter' + type: 'A' + port: 9100 + + # cAdvisor + - job_name: 'cadvisor' + dns_sd_configs: + - names: + - 'tasks.cadvisor' + type: 'A' + port: 8080 \ No newline at end of file diff --git a/swarm-traefik-manager.sh b/swarm-traefik-manager.sh new file mode 100755 index 0000000..7d836c2 --- /dev/null +++ b/swarm-traefik-manager.sh @@ -0,0 +1,251 @@ +#!/bin/bash + +# Traefik + Docker Swarm 管理脚本 +# 用于部署、管理和监控 Traefik 在 Docker Swarm 中的集成 + +set -e + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +NETWORK_NAME="traefik-public" + +# 颜色定义 +RED='\033[0;31m' +GREEN='\033[0;32m' +YELLOW='\033[1;33m' +BLUE='\033[0;34m' +NC='\033[0m' # No Color + +# 日志函数 +log_info() { + echo -e "${BLUE}[INFO]${NC} $1" +} + +log_success() { + echo -e "${GREEN}[SUCCESS]${NC} $1" +} + +log_warning() { + echo -e "${YELLOW}[WARNING]${NC} $1" +} + +log_error() { + echo -e "${RED}[ERROR]${NC} $1" +} + +# 检查 Docker Swarm 状态 +check_swarm() { + log_info "检查 Docker Swarm 状态..." + if ! docker info | grep -q "Swarm: active"; then + log_error "Docker Swarm 未激活,请先初始化 Swarm 集群" + echo "运行: docker swarm init" + exit 1 + fi + log_success "Docker Swarm 已激活" +} + +# 创建网络 +create_network() { + log_info "创建 Traefik 公共网络..." + if docker network ls | grep -q "$NETWORK_NAME"; then + log_warning "网络 $NETWORK_NAME 已存在" + else + docker network create --driver overlay --attachable "$NETWORK_NAME" + log_success "网络 $NETWORK_NAME 创建成功" + fi +} + +# 部署 Traefik +deploy_traefik() { + log_info "部署 Traefik 服务..." + docker stack deploy -c "$SCRIPT_DIR/traefik-swarm-stack.yml" traefik + log_success "Traefik 部署完成" +} + +# 部署示例服务 +deploy_demo() { + log_info "部署示例服务..." + docker stack deploy -c "$SCRIPT_DIR/demo-services-stack.yml" demo + log_success "示例服务部署完成" +} + +# 部署监控服务 +deploy_monitoring() { + log_info "部署监控服务..." + docker stack deploy -c "$SCRIPT_DIR/monitoring-stack.yml" monitoring + log_success "监控服务部署完成" +} + +# 显示服务状态 +show_status() { + log_info "显示服务状态..." + echo "" + echo "=== Traefik Stack ===" + docker stack services traefik + echo "" + echo "=== Demo Stack ===" + docker stack services demo 2>/dev/null || echo "Demo stack not deployed" + echo "" + echo "=== Monitoring Stack ===" + docker stack services monitoring 2>/dev/null || echo "Monitoring stack not deployed" + echo "" +} + +# 显示访问地址 +show_urls() { + log_info "服务访问地址:" + echo "" + echo "🎛️ Traefik Dashboard: http://traefik.local:8080" + echo "🌐 Web App: http://app.local" + echo "🔌 API Service: http://api.local" + echo "📊 Monitor Service: http://monitor.local" + echo "📈 Prometheus: http://prometheus.local" + echo "📊 Grafana: http://grafana.local (admin/admin123)" + echo "" + echo "💡 请确保在 /etc/hosts 中添加以下条目:" + echo "127.0.0.1 traefik.local app.local api.local monitor.local prometheus.local grafana.local" +} + +# 查看日志 +show_logs() { + local service=${1:-traefik_traefik} + log_info "显示 $service 服务日志..." + docker service logs -f "$service" +} + +# 扩缩容服务 +scale_service() { + local stack=$1 + local service=$2 + local replicas=$3 + + if [[ -z "$stack" || -z "$service" || -z "$replicas" ]]; then + log_error "用法: $0 scale