Alertmanager
配置告警规则
# prometheus.yml
rule_files:
- "node_exporter.yml"# node_exporter.yml
# 监控 Node-Exporter 挂掉以及内存使用率超过 1% 这两种情况
# 这里我故意设置了一个很小的阈值,确保能够触发告警
groups:
- name: node_exporter
rules:
- alert: HostDown
expr: up{job="node_exporter"} == 0
for: 1m
labels:
severity: critical
annotations:
summary: Host down {{ $labels.instance }}
- alert: MemUtil
expr: 100 - node_memory_MemAvailable_bytes / node_memory_MemTotal_bytes * 100 > 1
for: 1m
labels:
severity: warn
annotations:
summary: Mem usage larger than 1%, instance:{{ $labels.instance }}
部署 Alertmanager
Last updated