Showing 4 changed files with 219 additions and 0 deletions
+103
supervision
... ...
@@ -0,0 +1,103 @@
1
+#!/bin/bash
2
+
3
+basename=$(basename $0)
4
+supervision_status="/dev/shm/${basename}${USER:+.$USER}.status"
5
+supervision_disabled="/dev/shm/${basename}${USER:+.$USER}.disabled"
6
+touch $supervision_status $supervision_disabled
7
+now=$(date +%s)
8
+
9
+function _err () {
10
+    test $# -eq 2 || return
11
+    err[${#err[@]}]="$1"
12
+    err[${#err[@]}]="$2%0a"
13
+}
14
+
15
+function _load () {
16
+    local conf=
17
+    local l=
18
+    local d=
19
+    local l_ok=
20
+    local count_loaded=
21
+    local msg=
22
+    for conf in $*; do
23
+        count_loaded=0
24
+        if grep -q "^@$conf$" $supervision_disabled; then
25
+            disabled[${#disabled[@]}]=$conf
26
+        else
27
+            for dir in $HOME/.config/$basename ${0}.d /usr/local/share/${basename}; do
28
+                test -r "$dir/$conf" && source "$dir/$conf" && let count_loaded++
29
+            done
30
+            test $count_loaded -gt 0 && loaded[${#loaded[@]}]=$conf
31
+        fi
32
+    done
33
+    if test ${#loaded[@]} -ne $#; then
34
+        for conf in $*; do
35
+            l_ok=0
36
+            for l in ${loaded[@]}; do
37
+                test $conf == $l && l_ok=1 && break
38
+            done
39
+            if test $l_ok -eq 0; then
40
+                for d in ${disabled[@]}; do
41
+                    test $conf == $d && msg+="$conf (disabled)%0a" && l_ok=1 && break
42
+                done
43
+            fi
44
+            test $l_ok -eq 0 && msg+="$conf%0a"
45
+        done
46
+        _err "_load" "config not loaded:%0a${msg/%%0a}"
47
+    fi
48
+}
49
+
50
+
51
+function _do_tests () {
52
+    for t in $(declare -f | sed -rn '/^[[:alpha:]]/s/^([[:alnum:]_]+)\s*\(\)\s*$/\1/p'); do
53
+        msg=
54
+        if grep -q "^${t}$" $supervision_disabled; then
55
+            echo supervision by $t disabled
56
+        else
57
+            msg=$( $t | sed 's/$/%0a/g' )
58
+            if test -n "$msg"; then
59
+                echo "problem: ${t}"
60
+                err[${#err[@]}]=$t
61
+                err[${#err[@]}]="${msg}"
62
+            else
63
+                if test -n "${!t}"; then
64
+                    sed -ri "/^$t=.+$/d" $supervision_status
65
+                fi
66
+            fi
67
+        fi
68
+    done
69
+}
70
+
71
+function _do_manage_errors () {
72
+    source $supervision_status
73
+
74
+    if test ${#err[@]} -gt 0; then
75
+        errors="Supervision $HOSTNAME (${loaded[@]}):%0a"
76
+        notification=0
77
+        for ((i=0; i<${#err[@]}; i++)); do
78
+            t=${err[i]}
79
+            msg=${err[++i]}
80
+            errors="${errors}"'*'" [${t}] ${msg}"
81
+            if test -n "${!t}"; then
82
+                if test ${!t} -lt $(( $now - 86400 )); then
83
+                    sed -ri "s/^(${t}=).+$/\1$now/" $supervision_status
84
+                    let notification++
85
+                fi
86
+            else
87
+                echo "${t}=$now" >> $supervision_status
88
+                let notification++
89
+            fi
90
+        done
91
+        if test "$SHLVL" -eq 1; then
92
+            if test $notification -gt 0; then
93
+                $(dirname "$0")/smsapi "$errors"
94
+            fi
95
+        else
96
+            echo -e ${errors//\%0a/\\n}
97
+        fi
98
+    fi
99
+}
100
+
101
+_load ${*:-common-tests $HOSTNAME}
102
+_do_tests
103
+_do_manage_errors
+38
supervision.d/common-tests
... ...
@@ -0,0 +1,38 @@
1
+# vim: ft=sh
2
+
3
+function _systemctl () {
4
+    test -n "$1" || return
5
+    eval $(systemctl show $1 | egrep '^(ExecMainStatus|Result|ActiveState|SubState|Type)=')
6
+    if test -n "$Type"; then
7
+           test $ExecMainStatus -eq 0 \
8
+        && test $Result = 'success' \
9
+        && test $ActiveState = 'active' \
10
+        && [[ ( $Type =~ ^(simple|forking|notify)$ && $SubState = 'running' ) || ( $Type = 'oneshot' && $SubState = 'exited' ) ]] \
11
+        || echo "état du service anormal"
12
+    else
13
+        echo "service $1 inexistant"
14
+    fi
15
+}
16
+
17
+function sys_state () {
18
+    local failed_units=
19
+    eval $(systemctl show | egrep '^(SystemState|NFailedUnits)=')
20
+    if test $SystemState != "running" -o $NFailedUnits -ne 0; then
21
+        msg="$SystemState mode ($NFailedUnits units failed):%0a"
22
+        msg+=$(systemctl --failed --no-legend | awk '
23
+            {
24
+                split($1, units, ".")
25
+                failed[units[2]][units[1]]=1
26
+                numfailed[units[2]]++
27
+            }
28
+            END {
29
+                for (ut in failed) { 
30
+                    printf("%s%%0a", ut)
31
+                    for (un in failed[ut]) {
32
+                        last = --numfailed[ut] == 0 ? 1 : 0
33
+                        printf("%s %s%s", last ? "└─" : "├─", un, last ? "" : "%0a")
34
+                    }
35
+                }
36
+            }')
37
+    fi
38
+}
+15
supervision.d/foo
... ...
@@ -0,0 +1,15 @@
1
+# utilisé à fin de tests: supervision foo
2
+
3
+function foo () {
4
+    echo "foo1"
5
+    echo "foo2"
6
+    echo "foo3"
7
+}
8
+
9
+function bar () {
10
+    echo bar
11
+}
12
+
13
+function baz () {
14
+    echo baz
15
+}
+63
supervision.d/raspberrypi
... ...
@@ -0,0 +1,63 @@
1
+# vim: ft=sh
2
+
3
+function nginx () {
4
+    nc -zw2 127.0.0.1 80 || echo "injoignable sur port 80 local"
5
+    nc -zw2 sebmarque.hd.free.fr 80 || echo "injoignable sur port 80 distant"
6
+    nc -zw2 sebmarque.hd.free.fr 443 || echo "injoignable sur port 443 distant"
7
+}
8
+
9
+function searx () {
10
+    nc -zw2 127.0.0.1 8888 || echo "injoignable sur port 8888"
11
+}
12
+
13
+function gitprep () {
14
+    nc -zw2 127.0.0.1 10020 || echo "injoignable sur port 10020"
15
+}
16
+
17
+function cherrymusic () {
18
+    nc -zw2 127.0.0.1 8080 || echo "injoignable sur port 8080"
19
+}
20
+
21
+function core_temp () {
22
+    local min=60
23
+    local level=("warning" "severe" "danger!")
24
+    local step=$(( $(</sys/class/thermal/thermal_zone0/temp) - (min * 1000) ))
25
+    if test $step -gt 0; then
26
+        step=$((step / 10000))
27
+        test $step -le 3 || step=2
28
+        echo "${level[step]}: température > $((min + step * 10))°"
29
+    fi
30
+}
31
+
32
+function core_alim () {
33
+    eval $(/opt/vc/bin/vcgencmd measure_volts core)
34
+    test $volt == "1.2000V" || echo "$volt"
35
+}
36
+
37
+function load_avg () {
38
+    local procs=4
39
+    local load=($(</proc/loadavg))
40
+    local avg=(1 5 15)
41
+    for ((i=0; i<${#avg[@]}; i++)); do
42
+        test ${load[i]%.*} -ge $procs && echo "charge moyenne à ${avg[i]}min = ${load[i]}"
43
+    done
44
+}
45
+
46
+function mem_pressure () {
47
+    local meminfo=($(</proc/meminfo))
48
+    local level=(danger severe warning)
49
+    local margin=30000
50
+    for ((i=0; i<${#meminfo[@]}; i+=3)); do
51
+        test ${meminfo[i]} == 'MemAvailable:' && break
52
+    done
53
+    meminfo[i+2]=$(( ( ${meminfo[i+1]} - $margin ) / 100000 ))
54
+    test ${meminfo[i+2]} -ge ${#range[@]} || echo "${level[${meminfo[i+2]}]}: available ${meminfo[i+1]}kB"
55
+}
56
+
57
+function mpd_tablette () {
58
+    local tablette=192.168.0.53
59
+    local port=6600
60
+    local nfs=$(ss -no state established 'sport = :nfs' dst $tablette | grep -c $)
61
+    test $nfs -lt 3 && echo "tablette non connectée à la zique"
62
+    nc -zw2 $tablette $port || echo "MPD injoignable"
63
+}