aredn: harden cron maintenance scripts

ensure maintenance scripts execute one at
a time and never in duplication
This commit is contained in:
Joe AE6XE 2018-09-12 23:11:25 -07:00
parent c5e9342e29
commit 3938f33afe
3 changed files with 94 additions and 23 deletions

View File

@ -1,5 +1,5 @@
#!/bin/sh #!/bin/sh
<<'LICENSE' true <<'LICENSE'
Part of AREDN -- Used for creating Amateur Radio Emergency Data Networks Part of AREDN -- Used for creating Amateur Radio Emergency Data Networks
Copyright (C) 2018 Joe Ayers AE6XE Copyright (C) 2018 Joe Ayers AE6XE
See Contributors file for additional contributors See Contributors file for additional contributors
@ -36,36 +36,43 @@ LICENSE
# Look for hung 'iw' zombie processes prone to hang # Look for hung 'iw' zombie processes prone to hang
# when available memory is low. # when available memory is low.
# wait for rssi_monitor and snrlog to run zombiepid="/tmp/clean_zombie.pid"
sleep 10
for pid in `ps 2>/dev/null | egrep "^\s*\d+\s+root\s+\d+\s+Z\s+\[iw\]"| sed -e "s/^\s*//"| cut -f1 -d\ ` [ -e $zombiepid ] && [ -d "/proc/$(cat $zombiepid)" ] && exit
echo "$$" > $zombiepid
# wait for rssi_monitor and snrlog to run
sleep 20;
for pid in $(ps | grep -E "^\s*\d+\s+root\s+\d+\s+Z\s+\[iw\]"| sed -e "s/^\s*//"| cut -f1 -d\ )
do do
# found an "iw" zombie # found an "iw" zombie
sleep 10 # give time in case process is naturally closing and needs more time sleep 10 # in case process is naturally closing and needs more time
if [ -d /proc/$pid ] ; then if [ -d "/proc/$pid" ] ; then
date >> /tmp/zombie.log date >> /tmp/zombie.log
ps | egrep "^\s*${pid}" | grep -v grep | tail -1 >> /tmp/zombie.log ps | grep -E "^\s*${pid}\s+" | grep -v grep | tail -1 >> /tmp/zombie.log
ppid=`cat /proc/$pid/status | grep -i ppid | cut -f2` ppid="$(grep -i ppid < /proc/$pid/status | cut -f2)"
if [ -d /proc/$ppid ] ; then if [ -d "/proc/$ppid" ] ; then
ps | egrep "\s*${ppid}" | grep -v grep | tail -1 >> /tmp/zombie.log ps | grep -E "^\s*${ppid}\s+" | grep -v grep | tail -1 >> /tmp/zombie.log
if ( ! `grep crond /proc/$ppid/status 2>&1 > /dev/null` ) then grep crond /proc/$ppid/status 2>&1 > /dev/null
if [ $ppid -gt 1 ] ; then if [ $? -ne 0 -a "$ppid" -gt 1 ]; then
# kill the zombie's parent process to free up resources # kill the zombie's parent process to free up resources
kill -9 $ppid 2>&1 >> /tmp/zombie.log kill -9 "$ppid" 2>&1 >> /tmp/zombie.log
echo "Killed $ppid" >> /tmp/zombie.log echo "Killed $ppid" >> /tmp/zombie.log
if [ `wc -l /tmp/zombie.log | cut -f1 -d\ ` -gt 100 ] ; then if [ "$(wc -l /tmp/zombie.log | cut -f1 -d\ )" -gt 300 ] ; then
# keep file size in check # keep file size in check
cp /tmp/zombie.log /tmp/zombie.tmp cp /tmp/zombie.log /tmp/zombie.tmp
tail -80 /tmp/zombie.tmp > /tmp/zombie.log tail -275 /tmp/zombie.tmp > /tmp/zombie.log
rm -f /tmp/zombie.tmp rm -f /tmp/zombie.tmp
fi
fi fi
fi fi
fi fi
echo "" >> /tmp/zombie.log echo "" >> /tmp/zombie.log
fi fi
done done
rm $zombiepid

View File

@ -41,9 +41,40 @@
$now=`cat /proc/uptime | cut -f1 -d" "`; $now=`cat /proc/uptime | cut -f1 -d" "`;
chomp $now; chomp $now;
exit 0 unless $now > 119; exit 0 unless $now > 119;
sleep 3; # wait for snrlog to see that we are not running
$rssipid="/tmp/rssi_monitor.pid";
if ( -f "$rssipid" )
{
chomp (${rssipidvalue}=`cat $rssipid`);
exit 0 if ( ${rssipidvalue} > 0 and -d "/proc/${rssipidvalue}" );
}
open(my $mypid, '>', $rssipid) or die("Could not open $rssipid. $!");
print $mypid $$;
close $mypid;
$snrlogpid="/tmp/snrlog.pid";
if ( -f "$snrlogpid" )
{
chomp (${snrlogpidvalue}=`cat $snrlogpid`);
$waitcount=0;
while ( ${snrlogpidvalue} > 0 and -d "/proc/${snrlogpidvalue}" and $waitcount < 4)
{
sleep 5;
$waitcount+=1;
}
if ( $waitcount = 4 ) # skip this turn if snrlog still running
{
unlink $rssipid;
exit 0;
}
}
sleep 7;
chomp ($iface=`uci -q get 'network.wifi.ifname'`); # wireless interface chomp ($iface=`uci -q get 'network.wifi.ifname'`); # wireless interface
foreach(`iwinfo $iface info`) foreach(`iwinfo $iface info`)
{ {
@ -337,6 +368,7 @@ for (keys %rssiHist)
} }
close $dfh; close $dfh;
unlink $rssipid;
# when logfile gets 1k over $MAXSIZE, then chop down # when logfile gets 1k over $MAXSIZE, then chop down
$MAXSIZE = 2**14; $MAXSIZE = 2**14;

View File

@ -49,7 +49,6 @@ string.print=print_r
-- delay just after rssi_monitor has a chance to run noise floor calibration -- delay just after rssi_monitor has a chance to run noise floor calibration
sleep(5)
local MAXLINES=2880 -- 2 days worth local MAXLINES=2880 -- 2 days worth
local AGETIME=43200 local AGETIME=43200
local INACTIVETIMEOUT=10000 local INACTIVETIMEOUT=10000
@ -66,6 +65,8 @@ local stations={}
local wifiiface="" local wifiiface=""
local bandwidth="" local bandwidth=""
local nulledout={} local nulledout={}
local pidfile="/tmp/snrlog.pid"
local rssifile="/tmp/rssi_monitor.pid"
-- Neighbor Class -- Neighbor Class
Neighbor={} Neighbor={}
@ -255,6 +256,35 @@ end
-- Neighbor Class END -- Neighbor Class END
-- MAIN() ------------------------------------------------------------------------------------- -- MAIN() -------------------------------------------------------------------------------------
-- check to make sure a prior instance is not still running
local f = io.open(pidfile,"r")
if (f) then
local oldpid = f:read("*number")
f:close()
if (oldpid ~= nill and dir_exists("/proc/" .. oldpid)) then
return
end
end
--- create pid file to communicate I'm running
f, err=assert(io.open(pidfile, "w"),"Cannot open file (pidfile) to write!")
if (f) then
local mypid = posix.unistd.getpid()
f:write(mypid)
f:close()
end
--- Do not run if prior period rssi_monitor is still running
local f = io.open(rssifile,"r")
if (f) then
local oldpid = f:read("*number")
f:close()
if (oldpid ~= nill and dir_exists("/proc/" .. oldpid)) then
os.remove(pidfile)
return
end
end
-- get wifi interface name -- get wifi interface name
wifiiface=get_ifname("wifi") wifiiface=get_ifname("wifi")
@ -388,4 +418,6 @@ for k,v in pairs(snrdatcache) do
end end
f:close() f:close()
os.remove(pidfile)
-- END MAIN -- END MAIN