bugfix: aredn slugbug mitigation

with low memory conditions, typically on 32Mb RAM, the
device would become unresponsive in hours to days. The
symptoms only occured when no RF links. iw processes
would hang in Zombie state.  Updates to use iwinfo where
possible and avoid using both iw and iwinfo.  crontab
script is implemented to detect Zombie processes and free
up resources in the reduced chance the symptoms are still
occuring.
This commit is contained in:
Joe AE6XE 2018-08-21 23:14:26 -07:00
parent af0e26dd84
commit 5abeb8f7ac
4 changed files with 162 additions and 92 deletions

View File

@ -1,4 +1,5 @@
*/5 * * * * /usr/local/bin/fccid
* * * * * /usr/local/bin/rssi_monitor
* * * * * /usr/local/bin/snrlog
* * * * * /usr/local/bin/clean_zombie.sh

View File

@ -0,0 +1,71 @@
#!/bin/sh
<<'LICENSE'
Part of AREDN -- Used for creating Amateur Radio Emergency Data Networks
Copyright (C) 2018 Joe Ayers AE6XE
See Contributors file for additional contributors
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation version 3 of the License.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
Additional Terms:
Additional use restrictions exist on the AREDN(TM) trademark and logo.
See AREDNLicense.txt for more info.
Attributions to the AREDN Project must be retained in the source code.
If importing this code into a new or existing project attribution
to the AREDN project must be added to the source code.
You must not misrepresent the origin of the material contained within.
Modified versions must be modified to attribute to the original source
and be marked in reasonable ways as differentiate it from the original
version.
LICENSE
# Look for hung 'iw' zombie processes prone to hang
# when available memory is low.
# wait for rssi_monitor and snrlog to run
sleep 10
for pid in `ps 2>/dev/null | egrep "^\s*\d+\s+root\s+\d+\s+Z\s+\[iw\]"| sed -e "s/^\s*//"| cut -f1 -d\ `
do
# found an "iw" zombie
sleep 10 # give time in case process is naturally closing and needs more time
if [ -d /proc/$pid ] ; then
date >> /tmp/zombie.log
ps | egrep "^\s*${pid}" | grep -v grep | tail -1 >> /tmp/zombie.log
ppid=`cat /proc/$pid/status | grep -i ppid | cut -f2`
if [ -d /proc/$ppid ] ; then
ps | egrep "\s*${ppid}" | grep -v grep | tail -1 >> /tmp/zombie.log
if ( ! `grep crond /proc/$ppid/status 2>&1 > /dev/null` ) then
if [ $ppid -gt 1 ] ; then
# kill the zombie's parent process to free up resources
kill -9 $ppid 2>&1 >> /tmp/zombie.log
echo "Killed $ppid" >> /tmp/zombie.log
if [ `wc -l /tmp/zombie.log | cut -f1 -d\ ` -gt 100 ] ; then
# keep file size in check
cp /tmp/zombie.log /tmp/zombie.tmp
tail -80 /tmp/zombie.tmp > /tmp/zombie.log
rm -f /tmp/zombie.tmp
fi
fi
fi
fi
echo "" >> /tmp/zombie.log
fi
done

View File

@ -57,6 +57,10 @@ sub getRSSI
delete $rssi{$_};
}
chomp ($stationCount = `ls -1 /sys/kernel/debug/ieee80211/phy0/netdev:${iface}/stations | wc -l`);
if ($stationCount >= 1)
{
open(FILE, "/usr/sbin/iw $iface station dump 2>&1 |") or die "/usr/sbin/iw failed $!";
$neighborCount = 0;
@ -88,6 +92,7 @@ sub getRSSI
}
}
}
}
sub getChannelScan
{
@ -95,14 +100,13 @@ sub getChannelScan
$chnum += 1;
if ($chnum == 8 or $chnum == 12 or $chnum == 100 or $chnum == 185) { $chnum -= 2; }
if ($chnum == 0) { $chnum = 1; }
$freq = `iw list | grep "\\\[$chnum\\\]" | head -1`;
$freq =~ /([\d]+)[ \t]+MHz[ \t]+/;
$freq = `iwinfo $iface freqlist | grep "Channel $chnum" | head -1 | sed -e "s/\\.//"`;
$freq =~ /([\d]+)[ \t]+GHz/;
$freq = $1;
}
$antnum=`iw list | grep "Configured Antennas: TX" | cut -f6 -d" "`;
chomp $antnum;
if ($antnum eq "0x1")
$antnum=`cat /sys/kernel/debug/ieee80211/phy0/ath9k/tx_chainmask`;
if ($antnum == "1")
{
$antnum=0;
}
@ -241,21 +245,16 @@ for (keys %rssi)
}
}
if ($amac or not $neighborCount)
if ($amac)
{
getChannelScan();
if ($amac)
{
$datestring = localtime();
if ($antnum) {print $lfh "$datestring: before $amac [ $rssi{$amac}{'Hrssi'}, $rssi{$amac}{'Vrssi'} ]\n";}
else {print $lfh "$datestring: before $amac [ $rssi{$amac}{'Hrssi'}]\n";}
}
system("/usr/sbin/iw $iface scan freq $freq passive > /dev/null");
if ($amac)
{
sleep 5;
$beforeH = $rssi{$amac}{"Hrssi"};
@ -311,7 +310,6 @@ if ($amac or not $neighborCount)
print $lfh "$datestring: $amac Possible valid data point, adding to statistics.\n";
}
}
}
close $lfh;

View File

@ -607,29 +607,29 @@ sub get_wifi_signal
chomp $wifiintf;
my ($SignalLevel) = "N/A";
my ($NoiseFloor) = "N/A";
foreach(`iw dev $wifiintf station dump`)
foreach(`iwinfo $wifiintf assoclient`)
{
next unless /.+signal:\s+([-]?[\d]+)/;
next unless /.+[A-F0-9]{2}:[A-F0-9]{2}:[A-F0-9]{2}\s+([-]?[\d]+)/;
if ( $SignalLevel <= "$1" || $SignalLevel == "N/A" )
{
$SignalLevel=$1;
}
}
foreach(`iw dev $wifiintf survey dump|grep -A 1 \"\\[in use\\]\"`)
{
next unless /([\d\-]+) dBm/;
$NoiseFloor=$1;
}
if ( $NoiseFloor == "N/A" )
{
open( my $NoiseFH , "<" , "/sys/kernel/debug/ieee80211/phy0/ath9k/dump_nfcal") or return ("N/A","N/A");
while (<$NoiseFH>) {
next unless /Channel Noise Floor : ([-]?[0-9]+)/;
$NoiseFloor=$1;
}
close($NoiseFH);
if ( $NoiseFloor == "N/A" )
{
foreach(`iwinfo $wifiintf info | grep Signal`)
{
next unless /([\d\-]+) dBm/;
$NoiseFloor=$1;
}
}
if ( $SignalLevel == "N/A" || $NoiseFloor == "N/A" )