| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259 |
- # ============================================================
- # 监控栈 Docker Compose 编排
- #
- # 使用方式:
- # docker compose -f deploy/production/monitoring/docker-compose.monitoring.yml up -d
- #
- # 包含组件:
- # - Prometheus (指标收集与告警)
- # - Grafana (可视化仪表盘)
- # - Node Exporter (主机指标)
- # - cAdvisor (容器指标)
- # - AlertManager (告警管理)
- # ============================================================
-
- services:
- # ==================== Prometheus ====================
- prometheus:
- image: prom/prometheus:v2.51.0
- container_name: wm-prometheus
- restart: always
- command:
- - '--config.file=/etc/prometheus/prometheus.yml'
- - '--storage.tsdb.path=/prometheus'
- - '--storage.tsdb.retention.time=30d'
- - '--storage.tsdb.retention.size=10GB'
- - '--web.console.libraries=/etc/prometheus/console_libraries'
- - '--web.console.templates=/etc/prometheus/consoles'
- - '--web.enable-lifecycle'
- - '--web.enable-admin-api'
- ports:
- - "127.0.0.1:9090:9090"
- volumes:
- - ./prometheus.yml:/etc/prometheus/prometheus.yml:ro
- - ./alert_rules.yml:/etc/prometheus/alert_rules.yml:ro
- - prometheus_data:/prometheus
- networks:
- - wm-network
- - monitoring
- deploy:
- resources:
- limits:
- cpus: '1.0'
- memory: 2G
- reservations:
- cpus: '0.25'
- memory: 512M
- logging:
- driver: json-file
- options:
- max-size: "50m"
- max-file: "3"
- healthcheck:
- test: ["CMD", "wget", "--spider", "-q", "http://localhost:9090/-/healthy"]
- interval: 30s
- timeout: 10s
- retries: 3
-
- # ==================== Grafana ====================
- grafana:
- image: grafana/grafana:10.4.0
- container_name: wm-grafana
- restart: always
- ports:
- - "127.0.0.1:3000:3000"
- environment:
- GF_SECURITY_ADMIN_USER: ${GRAFANA_ADMIN_USER:-admin}
- GF_SECURITY_ADMIN_PASSWORD: ${GRAFANA_ADMIN_PASSWORD:-admin}
- GF_SERVER_ROOT_URL: ${GRAFANA_ROOT_URL:-http://localhost:3000}
- GF_INSTALL_PLUGINS: grafana-clock-panel,grafana-simple-json-datasource
- GF_USERS_ALLOW_SIGN_UP: "false"
- GF_AUTH_ANONYMOUS_ENABLED: "false"
- GF_SECURITY_COOKIE_SECURE: "true"
- GF_SECURITY_STRICT_TRANSPORT_SECURITY: "true"
- volumes:
- - ./grafana/provisioning/datasources.yml:/etc/grafana/provisioning/datasources/datasources.yml:ro
- - grafana_data:/var/lib/grafana
- networks:
- - monitoring
- - wm-network
- deploy:
- resources:
- limits:
- cpus: '0.5'
- memory: 512M
- reservations:
- cpus: '0.1'
- memory: 128M
- logging:
- driver: json-file
- options:
- max-size: "30m"
- max-file: "3"
- healthcheck:
- test: ["CMD", "wget", "--spider", "-q", "http://localhost:3000/api/health"]
- interval: 30s
- timeout: 10s
- retries: 3
-
- # ==================== Node Exporter ====================
- node-exporter:
- image: prom/node-exporter:v1.7.0
- container_name: wm-node-exporter
- restart: always
- command:
- - '--path.rootfs=/host'
- - '--collector.filesystem.mount-points-exclude=^/(sys|proc|dev|host|etc)($$|/)'
- ports:
- - "127.0.0.1:9100:9100"
- volumes:
- - /:/host:ro,rslave
- pid: host
- networks:
- - monitoring
- deploy:
- resources:
- limits:
- cpus: '0.2'
- memory: 128M
- reservations:
- cpus: '0.05'
- memory: 32M
- logging:
- driver: json-file
- options:
- max-size: "10m"
- max-file: "3"
-
- # ==================== cAdvisor ====================
- cadvisor:
- image: gcr.io/cadvisor/cadvisor:v0.49.1
- container_name: wm-cadvisor
- restart: always
- ports:
- - "127.0.0.1:8880:8080"
- volumes:
- - /:/rootfs:ro
- - /var/run:/var/run:ro
- - /sys:/sys:ro
- - /var/lib/docker/:/var/lib/docker:ro
- - /dev/disk/:/dev/disk:ro
- privileged: true
- devices:
- - /dev/kmsg:/dev/kmsg
- networks:
- - monitoring
- deploy:
- resources:
- limits:
- cpus: '0.3'
- memory: 256M
- reservations:
- cpus: '0.1'
- memory: 64M
- logging:
- driver: json-file
- options:
- max-size: "20m"
- max-file: "3"
-
- # ==================== AlertManager ====================
- alertmanager:
- image: prom/alertmanager:v0.27.0
- container_name: wm-alertmanager
- restart: always
- command:
- - '--config.file=/etc/alertmanager/alertmanager.yml'
- - '--storage.path=/alertmanager'
- ports:
- - "127.0.0.1:9093:9093"
- volumes:
- - ./alertmanager.yml:/etc/alertmanager/alertmanager.yml:ro
- - alertmanager_data:/alertmanager
- networks:
- - monitoring
- deploy:
- resources:
- limits:
- cpus: '0.2'
- memory: 256M
- reservations:
- cpus: '0.05'
- memory: 64M
- logging:
- driver: json-file
- options:
- max-size: "10m"
- max-file: "3"
-
- # ==================== PostgreSQL Exporter ====================
- postgres-exporter:
- image: prometheuscommunity/postgres-exporter:v0.15.0
- container_name: wm-postgres-exporter
- restart: always
- environment:
- DATA_SOURCE_NAME: "postgresql://${POSTGRES_USER:-water}:${POSTGRES_PASSWORD:-water123}@postgres:5432/${POSTGRES_DB:-water_management}?sslmode=disable"
- ports:
- - "127.0.0.1:9187:9187"
- depends_on:
- - postgres
- networks:
- - monitoring
- - wm-network
- deploy:
- resources:
- limits:
- cpus: '0.2'
- memory: 128M
- reservations:
- cpus: '0.05'
- memory: 32M
- logging:
- driver: json-file
- options:
- max-size: "10m"
- max-file: "3"
-
- # ==================== Redis Exporter ====================
- redis-exporter:
- image: oliver006/redis_exporter:v1.58.0
- container_name: wm-redis-exporter
- restart: always
- environment:
- REDIS_ADDR: redis://redis:6379
- REDIS_PASSWORD: ${REDIS_PASSWORD:-water123}
- ports:
- - "127.0.0.1:9121:9121"
- depends_on:
- - redis
- networks:
- - monitoring
- - wm-network
- deploy:
- resources:
- limits:
- cpus: '0.1'
- memory: 64M
- reservations:
- cpus: '0.02'
- memory: 16M
- logging:
- driver: json-file
- options:
- max-size: "10m"
- max-file: "3"
-
- # ==================== 数据卷 ====================
- volumes:
- prometheus_data:
- grafana_data:
- alertmanager_data:
-
- # ==================== 网络 ====================
- networks:
- monitoring:
- driver: bridge
- name: wm-monitoring
- wm-network:
- external: true
- name: wm-network
|