scripts / monitoring /
bf44b6d 7 years ago
1 contributor
119 lines | 3.527kb
#!/bin/bash

basename=$(basename $0)
monitoring_status="/dev/shm/${basename}${USER:+.$USER}.status"
monitoring_disabled="/dev/shm/${basename}${USER:+.$USER}.disabled"
monitoring_turns="/dev/shm/${basename}${USER:+.$USER}.turns"
touch $monitoring_status $monitoring_disabled $monitoring_turns
now=$(date +%s)

function _err () {
    test $# -eq 2 || return
    err[${#err[@]}]="$1"
    err[${#err[@]}]="$2%0a"
}

function _load () {
    local conf=
    local l=
    local d=
    local l_ok=
    local count_loaded=
    local msg=
    local directories=(
        "$HOME/.config/$basename"
        "${0}.d"
        "/usr/local/share/${basename}"
    )
    for conf in $*; do
        count_loaded=0
        if grep -q "^@$conf$" $monitoring_disabled; then
            disabled[${#disabled[@]}]=$conf
        else
            for dir in ${directories[@]}; do
                test -r "$dir/$conf" && source "$dir/$conf" $dir && let count_loaded++
            done
            test $count_loaded -gt 0 && loaded[${#loaded[@]}]=$conf
        fi
    done
    if test ${#loaded[@]} -ne $#; then
        for conf in $*; do
            l_ok=0
            for l in ${loaded[@]}; do
                test $conf == $l && l_ok=1 && break
            done
            if test $l_ok -eq 0; then
                for d in ${disabled[@]}; do
                    test $conf == $d && msg+="$conf (disabled)%0a" && l_ok=1 && break
                done
            fi
            test $l_ok -eq 0 && msg+="$conf%0a"
        done
        _err "_load" "config not loaded:%0a${msg/%%0a}"
    fi
}

function _turn () {
    test -n "$1" && [[ $1 =~ [0-9]+ ]] || return 1
    echo ${FUNCNAME[1]} >> $monitoring_turns
    test $(( $(grep -c ${FUNCNAME[1]} $monitoring_turns) % $1 )) -eq 0 && sed -i "/^${FUNCNAME[1]}$/d" $monitoring_turns
}

function _do_tests () {
    local t=
    for t in $(declare -f | sed -rn '/^[[:alpha:]]/s/^([[:alnum:]_]+)\s*\(\)\s*$/\1/p'); do
        local msg=
        if grep -q "^${t}$" $monitoring_disabled; then
            echo supervision by $t disabled
        else
            msg=$( $t | sed 's/$/%0a/g' )
            if test -n "$msg"; then
                echo "problem: ${t}"
                err[${#err[@]}]=$t
                err[${#err[@]}]="${msg}"
            else
                if ! grep --quiet --word-regexp $t $monitoring_turns; then
                    sed -ri "/^$t=.+$/d" $monitoring_status
                fi
            fi
        fi
    done
}

function _do_manage_errors () {
    source $monitoring_status
    local msg=
    local t=
    local errors=
    local notification=0

    if test ${#err[@]} -gt 0; then
        errors="Supervision $HOSTNAME (${loaded[@]}):%0a"
        for ((i=0; i<${#err[@]}; i++)); do
            t=${err[i]}
            msg=${err[++i]}
            errors="${errors}"'*'" [${t}] ${msg}"
            if test -n "${!t}"; then
                if test ${!t} -lt $(( $now - 86400 )); then
                    sed -ri "s/^(${t}=).+$/\1$now/" $monitoring_status
                    let notification++
                fi
            else
                echo "${t}=$now" >> $monitoring_status
                let notification++
            fi
        done
        errors=$(tr -d '\n' <<< $errors)
        if test "$SHLVL" -eq 1; then
            if test $notification -gt 0; then
                $(dirname "$0")/smsapi "$errors"
            fi
        else
            echo -e ${errors//\%0a/\\n}
        fi
    fi
}

_load ${*:-common-tests $HOSTNAME}
_do_tests
_do_manage_errors