mirror of https://github.com/aredn/aredn.git
aredn: harden cron maintenance scripts
ensure maintenance scripts execute one at a time and never in duplication
This commit is contained in:
parent
c5e9342e29
commit
3938f33afe
|
@ -1,5 +1,5 @@
|
|||
#!/bin/sh
|
||||
<<'LICENSE'
|
||||
true <<'LICENSE'
|
||||
Part of AREDN -- Used for creating Amateur Radio Emergency Data Networks
|
||||
Copyright (C) 2018 Joe Ayers AE6XE
|
||||
See Contributors file for additional contributors
|
||||
|
@ -36,36 +36,43 @@ LICENSE
|
|||
# Look for hung 'iw' zombie processes prone to hang
|
||||
# when available memory is low.
|
||||
|
||||
# wait for rssi_monitor and snrlog to run
|
||||
sleep 10
|
||||
zombiepid="/tmp/clean_zombie.pid"
|
||||
|
||||
for pid in `ps 2>/dev/null | egrep "^\s*\d+\s+root\s+\d+\s+Z\s+\[iw\]"| sed -e "s/^\s*//"| cut -f1 -d\ `
|
||||
[ -e $zombiepid ] && [ -d "/proc/$(cat $zombiepid)" ] && exit
|
||||
|
||||
echo "$$" > $zombiepid
|
||||
|
||||
# wait for rssi_monitor and snrlog to run
|
||||
sleep 20;
|
||||
|
||||
for pid in $(ps | grep -E "^\s*\d+\s+root\s+\d+\s+Z\s+\[iw\]"| sed -e "s/^\s*//"| cut -f1 -d\ )
|
||||
do
|
||||
|
||||
# found an "iw" zombie
|
||||
sleep 10 # give time in case process is naturally closing and needs more time
|
||||
if [ -d /proc/$pid ] ; then
|
||||
sleep 10 # in case process is naturally closing and needs more time
|
||||
if [ -d "/proc/$pid" ] ; then
|
||||
date >> /tmp/zombie.log
|
||||
ps | egrep "^\s*${pid}" | grep -v grep | tail -1 >> /tmp/zombie.log
|
||||
ppid=`cat /proc/$pid/status | grep -i ppid | cut -f2`
|
||||
if [ -d /proc/$ppid ] ; then
|
||||
ps | egrep "\s*${ppid}" | grep -v grep | tail -1 >> /tmp/zombie.log
|
||||
if ( ! `grep crond /proc/$ppid/status 2>&1 > /dev/null` ) then
|
||||
if [ $ppid -gt 1 ] ; then
|
||||
ps | grep -E "^\s*${pid}\s+" | grep -v grep | tail -1 >> /tmp/zombie.log
|
||||
ppid="$(grep -i ppid < /proc/$pid/status | cut -f2)"
|
||||
if [ -d "/proc/$ppid" ] ; then
|
||||
ps | grep -E "^\s*${ppid}\s+" | grep -v grep | tail -1 >> /tmp/zombie.log
|
||||
grep crond /proc/$ppid/status 2>&1 > /dev/null
|
||||
if [ $? -ne 0 -a "$ppid" -gt 1 ]; then
|
||||
|
||||
# kill the zombie's parent process to free up resources
|
||||
kill -9 $ppid 2>&1 >> /tmp/zombie.log
|
||||
kill -9 "$ppid" 2>&1 >> /tmp/zombie.log
|
||||
echo "Killed $ppid" >> /tmp/zombie.log
|
||||
if [ `wc -l /tmp/zombie.log | cut -f1 -d\ ` -gt 100 ] ; then
|
||||
if [ "$(wc -l /tmp/zombie.log | cut -f1 -d\ )" -gt 300 ] ; then
|
||||
|
||||
# keep file size in check
|
||||
cp /tmp/zombie.log /tmp/zombie.tmp
|
||||
tail -80 /tmp/zombie.tmp > /tmp/zombie.log
|
||||
tail -275 /tmp/zombie.tmp > /tmp/zombie.log
|
||||
rm -f /tmp/zombie.tmp
|
||||
fi
|
||||
fi
|
||||
fi
|
||||
fi
|
||||
echo "" >> /tmp/zombie.log
|
||||
fi
|
||||
done
|
||||
|
||||
rm $zombiepid
|
||||
|
|
|
@ -41,9 +41,40 @@
|
|||
|
||||
$now=`cat /proc/uptime | cut -f1 -d" "`;
|
||||
chomp $now;
|
||||
|
||||
exit 0 unless $now > 119;
|
||||
|
||||
sleep 3; # wait for snrlog to see that we are not running
|
||||
|
||||
$rssipid="/tmp/rssi_monitor.pid";
|
||||
if ( -f "$rssipid" )
|
||||
{
|
||||
chomp (${rssipidvalue}=`cat $rssipid`);
|
||||
exit 0 if ( ${rssipidvalue} > 0 and -d "/proc/${rssipidvalue}" );
|
||||
}
|
||||
|
||||
open(my $mypid, '>', $rssipid) or die("Could not open $rssipid. $!");
|
||||
print $mypid $$;
|
||||
close $mypid;
|
||||
|
||||
$snrlogpid="/tmp/snrlog.pid";
|
||||
if ( -f "$snrlogpid" )
|
||||
{
|
||||
chomp (${snrlogpidvalue}=`cat $snrlogpid`);
|
||||
$waitcount=0;
|
||||
while ( ${snrlogpidvalue} > 0 and -d "/proc/${snrlogpidvalue}" and $waitcount < 4)
|
||||
{
|
||||
sleep 5;
|
||||
$waitcount+=1;
|
||||
}
|
||||
if ( $waitcount = 4 ) # skip this turn if snrlog still running
|
||||
{
|
||||
unlink $rssipid;
|
||||
exit 0;
|
||||
}
|
||||
}
|
||||
|
||||
sleep 7;
|
||||
|
||||
chomp ($iface=`uci -q get 'network.wifi.ifname'`); # wireless interface
|
||||
foreach(`iwinfo $iface info`)
|
||||
{
|
||||
|
@ -337,6 +368,7 @@ for (keys %rssiHist)
|
|||
}
|
||||
|
||||
close $dfh;
|
||||
unlink $rssipid;
|
||||
|
||||
# when logfile gets 1k over $MAXSIZE, then chop down
|
||||
$MAXSIZE = 2**14;
|
||||
|
|
|
@ -49,7 +49,6 @@ string.print=print_r
|
|||
|
||||
|
||||
-- delay just after rssi_monitor has a chance to run noise floor calibration
|
||||
sleep(5)
|
||||
local MAXLINES=2880 -- 2 days worth
|
||||
local AGETIME=43200
|
||||
local INACTIVETIMEOUT=10000
|
||||
|
@ -66,6 +65,8 @@ local stations={}
|
|||
local wifiiface=""
|
||||
local bandwidth=""
|
||||
local nulledout={}
|
||||
local pidfile="/tmp/snrlog.pid"
|
||||
local rssifile="/tmp/rssi_monitor.pid"
|
||||
|
||||
-- Neighbor Class
|
||||
Neighbor={}
|
||||
|
@ -255,6 +256,35 @@ end
|
|||
-- Neighbor Class END
|
||||
|
||||
-- MAIN() -------------------------------------------------------------------------------------
|
||||
-- check to make sure a prior instance is not still running
|
||||
local f = io.open(pidfile,"r")
|
||||
if (f) then
|
||||
local oldpid = f:read("*number")
|
||||
f:close()
|
||||
if (oldpid ~= nill and dir_exists("/proc/" .. oldpid)) then
|
||||
return
|
||||
end
|
||||
end
|
||||
|
||||
--- create pid file to communicate I'm running
|
||||
f, err=assert(io.open(pidfile, "w"),"Cannot open file (pidfile) to write!")
|
||||
if (f) then
|
||||
local mypid = posix.unistd.getpid()
|
||||
f:write(mypid)
|
||||
f:close()
|
||||
end
|
||||
|
||||
--- Do not run if prior period rssi_monitor is still running
|
||||
local f = io.open(rssifile,"r")
|
||||
if (f) then
|
||||
local oldpid = f:read("*number")
|
||||
f:close()
|
||||
if (oldpid ~= nill and dir_exists("/proc/" .. oldpid)) then
|
||||
os.remove(pidfile)
|
||||
return
|
||||
end
|
||||
end
|
||||
|
||||
-- get wifi interface name
|
||||
wifiiface=get_ifname("wifi")
|
||||
|
||||
|
@ -388,4 +418,6 @@ for k,v in pairs(snrdatcache) do
|
|||
end
|
||||
f:close()
|
||||
|
||||
os.remove(pidfile)
|
||||
|
||||
-- END MAIN
|
||||
|
|
Loading…
Reference in New Issue