groups: - name: water-management-system rules: # 应用服务可用性告警 - alert: ApplicationDown expr: up == 0 for: 1m labels: severity: critical annotations: summary: "服务 {{ $labels.instance }} 已停止响应" description: "服务 {{ $labels.instance }} 在过去1分钟内没有响应" # 高CPU使用率告警 - alert: HighCPUUsage expr: 100 - (avg by(instance) (rate(node_cpu_seconds_total{mode="idle"}[5m])) * 100) > 80 for: 5m labels: severity: warning annotations: summary: "高CPU使用率: {{ $labels.instance }}" description: "CPU使用率超过80%,持续5分钟" # 高内存使用率告警 - alert: HighMemoryUsage expr: (1 - (node_memory_MemAvailable_bytes / node_memory_MemTotal_bytes)) * 100 > 85 for: 5m labels: severity: warning annotations: summary: "高内存使用率: {{ $labels.instance }}" description: "内存使用率超过85%,持续5分钟" # 数据库连接数告警 - alert: HighDatabaseConnections expr: pg_stat_database_numbackends > 80 for: 5m labels: severity: warning annotations: summary: "数据库连接数过高: {{ $labels.datname }}" description: "当前数据库连接数超过80个" # Redis内存使用率告警 - alert: RedisMemoryUsage expr: redis_memory_used_bytes / redis_memory_max_bytes * 100 > 80 for: 5m labels: severity: warning annotations: summary: "Redis内存使用率过高: {{ $labels.instance }}" description: "Redis内存使用率超过80%"