CPU Load

Check CPU load. If it is greater than 70%, change an alert level to Error

  • Prometheus collected data from a Node Exporter
  • Metrics have a label instance, which identified every server
-- @interval 30s
-- @name cpu70

local cpuLimit = 0.7

local prom1 = require('datasource.prometheus.prom1')
local alert = require('alert')


local res, err = prom1.range('rate(sum(node_cpu_seconds_total{mode!="idle"}) by (instance)) / rate(sum(node_cpu_seconds_total) by (instance))')
if err ~= nil then
    return
end

for i, item in pairs(res) do
    local v = item.values[#item.values].value

    local alertName = "cpu70-" .. item.metrics.instance

    if v > cpuLimit then
        alert.on(alertName, "High CPU loading on instance " .. item.metrics.instance )
    else
        alert.off(alertName, "Success CPU loading on instance " .. item.metrics.instance )
    end
end