# ============================================================ # 监控栈 Docker Compose 编排 # # 使用方式: # docker compose -f deploy/production/monitoring/docker-compose.monitoring.yml up -d # # 包含组件: # - Prometheus (指标收集与告警) # - Grafana (可视化仪表盘) # - Node Exporter (主机指标) # - cAdvisor (容器指标) # - AlertManager (告警管理) # ============================================================ services: # ==================== Prometheus ==================== prometheus: image: prom/prometheus:v2.51.0 container_name: wm-prometheus restart: always command: - '--config.file=/etc/prometheus/prometheus.yml' - '--storage.tsdb.path=/prometheus' - '--storage.tsdb.retention.time=30d' - '--storage.tsdb.retention.size=10GB' - '--web.console.libraries=/etc/prometheus/console_libraries' - '--web.console.templates=/etc/prometheus/consoles' - '--web.enable-lifecycle' - '--web.enable-admin-api' ports: - "127.0.0.1:9090:9090" volumes: - ./prometheus.yml:/etc/prometheus/prometheus.yml:ro - ./alert_rules.yml:/etc/prometheus/alert_rules.yml:ro - prometheus_data:/prometheus networks: - wm-network - monitoring deploy: resources: limits: cpus: '1.0' memory: 2G reservations: cpus: '0.25' memory: 512M logging: driver: json-file options: max-size: "50m" max-file: "3" healthcheck: test: ["CMD", "wget", "--spider", "-q", "http://localhost:9090/-/healthy"] interval: 30s timeout: 10s retries: 3 # ==================== Grafana ==================== grafana: image: grafana/grafana:10.4.0 container_name: wm-grafana restart: always ports: - "127.0.0.1:3000:3000" environment: GF_SECURITY_ADMIN_USER: ${GRAFANA_ADMIN_USER:-admin} GF_SECURITY_ADMIN_PASSWORD: ${GRAFANA_ADMIN_PASSWORD:-admin} GF_SERVER_ROOT_URL: ${GRAFANA_ROOT_URL:-http://localhost:3000} GF_INSTALL_PLUGINS: grafana-clock-panel,grafana-simple-json-datasource GF_USERS_ALLOW_SIGN_UP: "false" GF_AUTH_ANONYMOUS_ENABLED: "false" GF_SECURITY_COOKIE_SECURE: "true" GF_SECURITY_STRICT_TRANSPORT_SECURITY: "true" volumes: - ./grafana/provisioning/datasources.yml:/etc/grafana/provisioning/datasources/datasources.yml:ro - grafana_data:/var/lib/grafana networks: - monitoring - wm-network deploy: resources: limits: cpus: '0.5' memory: 512M reservations: cpus: '0.1' memory: 128M logging: driver: json-file options: max-size: "30m" max-file: "3" healthcheck: test: ["CMD", "wget", "--spider", "-q", "http://localhost:3000/api/health"] interval: 30s timeout: 10s retries: 3 # ==================== Node Exporter ==================== node-exporter: image: prom/node-exporter:v1.7.0 container_name: wm-node-exporter restart: always command: - '--path.rootfs=/host' - '--collector.filesystem.mount-points-exclude=^/(sys|proc|dev|host|etc)($$|/)' ports: - "127.0.0.1:9100:9100" volumes: - /:/host:ro,rslave pid: host networks: - monitoring deploy: resources: limits: cpus: '0.2' memory: 128M reservations: cpus: '0.05' memory: 32M logging: driver: json-file options: max-size: "10m" max-file: "3" # ==================== cAdvisor ==================== cadvisor: image: gcr.io/cadvisor/cadvisor:v0.49.1 container_name: wm-cadvisor restart: always ports: - "127.0.0.1:8880:8080" volumes: - /:/rootfs:ro - /var/run:/var/run:ro - /sys:/sys:ro - /var/lib/docker/:/var/lib/docker:ro - /dev/disk/:/dev/disk:ro privileged: true devices: - /dev/kmsg:/dev/kmsg networks: - monitoring deploy: resources: limits: cpus: '0.3' memory: 256M reservations: cpus: '0.1' memory: 64M logging: driver: json-file options: max-size: "20m" max-file: "3" # ==================== AlertManager ==================== alertmanager: image: prom/alertmanager:v0.27.0 container_name: wm-alertmanager restart: always command: - '--config.file=/etc/alertmanager/alertmanager.yml' - '--storage.path=/alertmanager' ports: - "127.0.0.1:9093:9093" volumes: - ./alertmanager.yml:/etc/alertmanager/alertmanager.yml:ro - alertmanager_data:/alertmanager networks: - monitoring deploy: resources: limits: cpus: '0.2' memory: 256M reservations: cpus: '0.05' memory: 64M logging: driver: json-file options: max-size: "10m" max-file: "3" # ==================== PostgreSQL Exporter ==================== postgres-exporter: image: prometheuscommunity/postgres-exporter:v0.15.0 container_name: wm-postgres-exporter restart: always environment: DATA_SOURCE_NAME: "postgresql://${POSTGRES_USER:-water}:${POSTGRES_PASSWORD:-water123}@postgres:5432/${POSTGRES_DB:-water_management}?sslmode=disable" ports: - "127.0.0.1:9187:9187" depends_on: - postgres networks: - monitoring - wm-network deploy: resources: limits: cpus: '0.2' memory: 128M reservations: cpus: '0.05' memory: 32M logging: driver: json-file options: max-size: "10m" max-file: "3" # ==================== Redis Exporter ==================== redis-exporter: image: oliver006/redis_exporter:v1.58.0 container_name: wm-redis-exporter restart: always environment: REDIS_ADDR: redis://redis:6379 REDIS_PASSWORD: ${REDIS_PASSWORD:-water123} ports: - "127.0.0.1:9121:9121" depends_on: - redis networks: - monitoring - wm-network deploy: resources: limits: cpus: '0.1' memory: 64M reservations: cpus: '0.02' memory: 16M logging: driver: json-file options: max-size: "10m" max-file: "3" # ==================== 数据卷 ==================== volumes: prometheus_data: grafana_data: alertmanager_data: # ==================== 网络 ==================== networks: monitoring: driver: bridge name: wm-monitoring wm-network: external: true name: wm-network