mirror of https://github.com/aredn/aredn.git
aredn: harden cron maintenance scripts
ensure maintenance scripts execute one at a time and never in duplication
This commit is contained in:
parent
c5e9342e29
commit
3938f33afe
|
@ -1,5 +1,5 @@
|
||||||
#!/bin/sh
|
#!/bin/sh
|
||||||
<<'LICENSE'
|
true <<'LICENSE'
|
||||||
Part of AREDN -- Used for creating Amateur Radio Emergency Data Networks
|
Part of AREDN -- Used for creating Amateur Radio Emergency Data Networks
|
||||||
Copyright (C) 2018 Joe Ayers AE6XE
|
Copyright (C) 2018 Joe Ayers AE6XE
|
||||||
See Contributors file for additional contributors
|
See Contributors file for additional contributors
|
||||||
|
@ -36,36 +36,43 @@ LICENSE
|
||||||
# Look for hung 'iw' zombie processes prone to hang
|
# Look for hung 'iw' zombie processes prone to hang
|
||||||
# when available memory is low.
|
# when available memory is low.
|
||||||
|
|
||||||
# wait for rssi_monitor and snrlog to run
|
zombiepid="/tmp/clean_zombie.pid"
|
||||||
sleep 10
|
|
||||||
|
|
||||||
for pid in `ps 2>/dev/null | egrep "^\s*\d+\s+root\s+\d+\s+Z\s+\[iw\]"| sed -e "s/^\s*//"| cut -f1 -d\ `
|
[ -e $zombiepid ] && [ -d "/proc/$(cat $zombiepid)" ] && exit
|
||||||
|
|
||||||
|
echo "$$" > $zombiepid
|
||||||
|
|
||||||
|
# wait for rssi_monitor and snrlog to run
|
||||||
|
sleep 20;
|
||||||
|
|
||||||
|
for pid in $(ps | grep -E "^\s*\d+\s+root\s+\d+\s+Z\s+\[iw\]"| sed -e "s/^\s*//"| cut -f1 -d\ )
|
||||||
do
|
do
|
||||||
|
|
||||||
# found an "iw" zombie
|
# found an "iw" zombie
|
||||||
sleep 10 # give time in case process is naturally closing and needs more time
|
sleep 10 # in case process is naturally closing and needs more time
|
||||||
if [ -d /proc/$pid ] ; then
|
if [ -d "/proc/$pid" ] ; then
|
||||||
date >> /tmp/zombie.log
|
date >> /tmp/zombie.log
|
||||||
ps | egrep "^\s*${pid}" | grep -v grep | tail -1 >> /tmp/zombie.log
|
ps | grep -E "^\s*${pid}\s+" | grep -v grep | tail -1 >> /tmp/zombie.log
|
||||||
ppid=`cat /proc/$pid/status | grep -i ppid | cut -f2`
|
ppid="$(grep -i ppid < /proc/$pid/status | cut -f2)"
|
||||||
if [ -d /proc/$ppid ] ; then
|
if [ -d "/proc/$ppid" ] ; then
|
||||||
ps | egrep "\s*${ppid}" | grep -v grep | tail -1 >> /tmp/zombie.log
|
ps | grep -E "^\s*${ppid}\s+" | grep -v grep | tail -1 >> /tmp/zombie.log
|
||||||
if ( ! `grep crond /proc/$ppid/status 2>&1 > /dev/null` ) then
|
grep crond /proc/$ppid/status 2>&1 > /dev/null
|
||||||
if [ $ppid -gt 1 ] ; then
|
if [ $? -ne 0 -a "$ppid" -gt 1 ]; then
|
||||||
|
|
||||||
# kill the zombie's parent process to free up resources
|
# kill the zombie's parent process to free up resources
|
||||||
kill -9 $ppid 2>&1 >> /tmp/zombie.log
|
kill -9 "$ppid" 2>&1 >> /tmp/zombie.log
|
||||||
echo "Killed $ppid" >> /tmp/zombie.log
|
echo "Killed $ppid" >> /tmp/zombie.log
|
||||||
if [ `wc -l /tmp/zombie.log | cut -f1 -d\ ` -gt 100 ] ; then
|
if [ "$(wc -l /tmp/zombie.log | cut -f1 -d\ )" -gt 300 ] ; then
|
||||||
|
|
||||||
# keep file size in check
|
# keep file size in check
|
||||||
cp /tmp/zombie.log /tmp/zombie.tmp
|
cp /tmp/zombie.log /tmp/zombie.tmp
|
||||||
tail -80 /tmp/zombie.tmp > /tmp/zombie.log
|
tail -275 /tmp/zombie.tmp > /tmp/zombie.log
|
||||||
rm -f /tmp/zombie.tmp
|
rm -f /tmp/zombie.tmp
|
||||||
fi
|
|
||||||
fi
|
fi
|
||||||
fi
|
fi
|
||||||
fi
|
fi
|
||||||
echo "" >> /tmp/zombie.log
|
echo "" >> /tmp/zombie.log
|
||||||
fi
|
fi
|
||||||
done
|
done
|
||||||
|
|
||||||
|
rm $zombiepid
|
||||||
|
|
|
@ -41,9 +41,40 @@
|
||||||
|
|
||||||
$now=`cat /proc/uptime | cut -f1 -d" "`;
|
$now=`cat /proc/uptime | cut -f1 -d" "`;
|
||||||
chomp $now;
|
chomp $now;
|
||||||
|
|
||||||
exit 0 unless $now > 119;
|
exit 0 unless $now > 119;
|
||||||
|
|
||||||
|
sleep 3; # wait for snrlog to see that we are not running
|
||||||
|
|
||||||
|
$rssipid="/tmp/rssi_monitor.pid";
|
||||||
|
if ( -f "$rssipid" )
|
||||||
|
{
|
||||||
|
chomp (${rssipidvalue}=`cat $rssipid`);
|
||||||
|
exit 0 if ( ${rssipidvalue} > 0 and -d "/proc/${rssipidvalue}" );
|
||||||
|
}
|
||||||
|
|
||||||
|
open(my $mypid, '>', $rssipid) or die("Could not open $rssipid. $!");
|
||||||
|
print $mypid $$;
|
||||||
|
close $mypid;
|
||||||
|
|
||||||
|
$snrlogpid="/tmp/snrlog.pid";
|
||||||
|
if ( -f "$snrlogpid" )
|
||||||
|
{
|
||||||
|
chomp (${snrlogpidvalue}=`cat $snrlogpid`);
|
||||||
|
$waitcount=0;
|
||||||
|
while ( ${snrlogpidvalue} > 0 and -d "/proc/${snrlogpidvalue}" and $waitcount < 4)
|
||||||
|
{
|
||||||
|
sleep 5;
|
||||||
|
$waitcount+=1;
|
||||||
|
}
|
||||||
|
if ( $waitcount = 4 ) # skip this turn if snrlog still running
|
||||||
|
{
|
||||||
|
unlink $rssipid;
|
||||||
|
exit 0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
sleep 7;
|
||||||
|
|
||||||
chomp ($iface=`uci -q get 'network.wifi.ifname'`); # wireless interface
|
chomp ($iface=`uci -q get 'network.wifi.ifname'`); # wireless interface
|
||||||
foreach(`iwinfo $iface info`)
|
foreach(`iwinfo $iface info`)
|
||||||
{
|
{
|
||||||
|
@ -337,6 +368,7 @@ for (keys %rssiHist)
|
||||||
}
|
}
|
||||||
|
|
||||||
close $dfh;
|
close $dfh;
|
||||||
|
unlink $rssipid;
|
||||||
|
|
||||||
# when logfile gets 1k over $MAXSIZE, then chop down
|
# when logfile gets 1k over $MAXSIZE, then chop down
|
||||||
$MAXSIZE = 2**14;
|
$MAXSIZE = 2**14;
|
||||||
|
|
|
@ -49,7 +49,6 @@ string.print=print_r
|
||||||
|
|
||||||
|
|
||||||
-- delay just after rssi_monitor has a chance to run noise floor calibration
|
-- delay just after rssi_monitor has a chance to run noise floor calibration
|
||||||
sleep(5)
|
|
||||||
local MAXLINES=2880 -- 2 days worth
|
local MAXLINES=2880 -- 2 days worth
|
||||||
local AGETIME=43200
|
local AGETIME=43200
|
||||||
local INACTIVETIMEOUT=10000
|
local INACTIVETIMEOUT=10000
|
||||||
|
@ -66,6 +65,8 @@ local stations={}
|
||||||
local wifiiface=""
|
local wifiiface=""
|
||||||
local bandwidth=""
|
local bandwidth=""
|
||||||
local nulledout={}
|
local nulledout={}
|
||||||
|
local pidfile="/tmp/snrlog.pid"
|
||||||
|
local rssifile="/tmp/rssi_monitor.pid"
|
||||||
|
|
||||||
-- Neighbor Class
|
-- Neighbor Class
|
||||||
Neighbor={}
|
Neighbor={}
|
||||||
|
@ -255,6 +256,35 @@ end
|
||||||
-- Neighbor Class END
|
-- Neighbor Class END
|
||||||
|
|
||||||
-- MAIN() -------------------------------------------------------------------------------------
|
-- MAIN() -------------------------------------------------------------------------------------
|
||||||
|
-- check to make sure a prior instance is not still running
|
||||||
|
local f = io.open(pidfile,"r")
|
||||||
|
if (f) then
|
||||||
|
local oldpid = f:read("*number")
|
||||||
|
f:close()
|
||||||
|
if (oldpid ~= nill and dir_exists("/proc/" .. oldpid)) then
|
||||||
|
return
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
--- create pid file to communicate I'm running
|
||||||
|
f, err=assert(io.open(pidfile, "w"),"Cannot open file (pidfile) to write!")
|
||||||
|
if (f) then
|
||||||
|
local mypid = posix.unistd.getpid()
|
||||||
|
f:write(mypid)
|
||||||
|
f:close()
|
||||||
|
end
|
||||||
|
|
||||||
|
--- Do not run if prior period rssi_monitor is still running
|
||||||
|
local f = io.open(rssifile,"r")
|
||||||
|
if (f) then
|
||||||
|
local oldpid = f:read("*number")
|
||||||
|
f:close()
|
||||||
|
if (oldpid ~= nill and dir_exists("/proc/" .. oldpid)) then
|
||||||
|
os.remove(pidfile)
|
||||||
|
return
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
-- get wifi interface name
|
-- get wifi interface name
|
||||||
wifiiface=get_ifname("wifi")
|
wifiiface=get_ifname("wifi")
|
||||||
|
|
||||||
|
@ -388,4 +418,6 @@ for k,v in pairs(snrdatcache) do
|
||||||
end
|
end
|
||||||
f:close()
|
f:close()
|
||||||
|
|
||||||
|
os.remove(pidfile)
|
||||||
|
|
||||||
-- END MAIN
|
-- END MAIN
|
||||||
|
|
Loading…
Reference in New Issue