scripts / monitoring.d / common-tests /
Sébastien MARQUE fix failed units
7abd70a 2 years ago
2 contributor
106 lines | 3.657kb
# vim: ft=sh

function _systemctl () {
    test -n "$1" || return
    eval $(systemctl show $1 | egrep '^(ExecMainStatus|Result|ActiveState|SubState|Type)=')
    if test -n "$Type"; then
           test $ExecMainStatus -eq 0 \
        && test $Result = 'success' \
        && test $ActiveState = 'active' \
        && [[ ( $Type =~ ^(simple|forking|notify)$ && $SubState = 'running' ) || ( $Type = 'oneshot' && $SubState = 'exited' ) ]] \
        || echo "état du service anormal"
    else
        echo "service $1 inexistant"
    fi
}

function sys_state () {
    local failed_units=
    eval $(systemctl show | egrep '^(SystemState|NFailedUnits)=')
    if test $SystemState != "running" -o $NFailedUnits -ne 0; then
        echo "$SystemState mode ($NFailedUnits units failed):"
        echo "$(systemctl --failed --no-legend | awk '
            {
                split(gensub(/^[^[:alnum:]]*([^ ]+).*$/, "\\1", "1"), units, ".")
                failed[units[2]][units[1]]=1
                numfailed[units[2]]++
            }
            END {
                for (ut in failed) { 
                    print ut
                    for (un in failed[ut]) {
                        last = --numfailed[ut] == 0 ? 1 : 0
                        print last ? "└─" : "├─", un
                    }
                }
            }')"
    fi
}

function restart_needed () {
    _turn 24 || return
    needrestart -m a -r l -bkl | awk -F': ' '{
        if ($1 == "NEEDRESTART-SVC")
            needrestart[gensub("NEEDRESTART-", "", "1", $1)][gensub(".service", "", "1", $2)]++
        else
            needrestart[gensub("NEEDRESTART-", "", "1", $1)] = $2
        }
        END {
            if (needrestart["KSTA"] > 1
            && gensub("-v[78]\\+$", "", "1", needrestart["KCUR"]) != gensub("-v[78]\\+$", "", "1", needrestart["KEXP"])) {
                print "kernel:"
                print needrestart["KCUR"] " -> " needrestart["KEXP"]
            }
            if (length(needrestart["SVC"])) {
                print "services:"
                for (svc in needrestart["SVC"])
                    print "- " svc
            }
        }'
}

function autotest () {
    local testfile=/dev/shm/monitoring.autotest
    test -e $testfile && echo "it works" && rm -f $testfile
}

function swap () {
    _turn 24 || return
    awk '/^\//{if ($1 !~ "zram") { p=$4/$3*100; if (p > 10) printf("%s (%.1f%%)\n", $1, p)}}' /proc/swaps
}

function certificates () {
    _turn 24 || return
    for domain in /etc/letsencrypt/live/*.*; do
        local notAfter="$(openssl x509 -enddate -noout -in $domain/fullchain.pem)"
        local enddate=$(date +%s -d"${notAfter#*=}")
        local delay=$(date +%_j -d@$(($enddate - $now)))
        if test $delay -lt 10; then
            echo "${domain##*/} expire dans $delay jours"
        fi
    done
}

function load_avg () {
    local procs=$(grep -c "^processor" /proc/cpuinfo)
    local load=($(</proc/loadavg))
    local avg=(1 5 15)
    for ((i=0; i<${#avg[@]}; i++)); do
        test ${load[i]%.*} -ge $procs && echo "charge moyenne à ${avg[i]}min = ${load[i]}"
    done
}

function upgrades () {
    local log="/var/log/unattended-upgrades/unattended-upgrades.log"
    test -r $log || return
    local lastline=$(tail -1 $log)
    local lastentry=$(date +%s -d "$(cut -f1,2 -d' ' <<< $lastline)")
    test -n "$lastentry" || return
    local status=$(cut -f3 -d' ' <<< $lastline)
    test $(($lastentry + 86400)) -lt $now && echo "pas de mise à jour depuis le ${lastline%% *}"
}

function chrony () {
    local synced=$(chronyc sources | grep -c '^\^\*')
    test $synced -ne 1 && echo "NTP pas synchro"
}