Alerts


/opt/bitnami/prometheus/conf/alerts.yaml > CPU-load
HighNodeCPU (0 active)
alert: HighNodeCPU
expr: rate(node_load15[5m])
  * 100 > 85
for: 5m
labels:
  severity: warning
annotations:
  description: |-
    CPU load is high
      VALUE = {{ $value }}
      LABELS: {{ $labels }}
  summary: High CPU load (instance {{ $labels.instance }})
/opt/bitnami/prometheus/conf/alerts.yaml > Cpu-usage
HostHighCpuLoad (0 active)
alert: HostHighCpuLoad
expr: 100
  - (avg by(instance) (rate(node_cpu_seconds_total{mode="idle"}[2m])) * 100)
  > 95
for: 3m
labels:
  severity: warning
annotations:
  description: |-
    CPU load is > 95%
      VALUE = {{ $value }}
      LABELS = {{ $labels }}
  summary: Host high CPU load (instance {{ $labels.instance }})
/opt/bitnami/prometheus/conf/alerts.yaml > Disk-usage
Low data disk space (0 active)
alert: Low
  data disk space
expr: ceil(((node_filesystem_size_bytes{mountpoint!="/boot"}
  - node_filesystem_free_bytes{mountpoint!="/boot"}) / node_filesystem_size_bytes{mountpoint!="/boot"}
  * 100)) > 95
labels:
  severity: critical
annotations:
  description: 'Partition : {{$labels.mountpoint}}'
  host: '{{$labels.instance}}'
  summary: Disk usage is `{{humanize $value}}%`
  title: Disk Usage
/opt/bitnami/prometheus/conf/alerts.yaml > Disk-usage-check
Host Out Of DiskSpace (0 active)
alert: Host
  Out Of DiskSpace
expr: (node_filesystem_avail_bytes
  * 100) / node_filesystem_size_bytes < 10 and on(instance, device, mountpoint)
  node_filesystem_readonly == 0
for: 5m
labels:
  severity: warning
annotations:
  description: |-
    Disk is getting full (< 10% left)
      Available Space is only= {{ $value }} %
      LABELS = {{ $labels }}
  summary: Host out of disk space (instance {{ $labels.instance }})
  title: Low Disk space
/opt/bitnami/prometheus/conf/alerts.yaml > Memory-usage
HostOutOfMemory (0 active)
alert: HostOutOfMemory
expr: node_memory_MemAvailable_bytes
  / node_memory_MemTotal_bytes * 100 < 8
for: 10m
labels:
  severity: warning
annotations:
  description: |-
    Node memory is filling up (< 8% left)
      VALUE = {{ $value }}
      LABELS = {{ $labels }}
  summary: Host out of memory (instance {{ $labels.instance }})