From 5abeb8f7acb74da1977723d8125b6a0a0442df5c Mon Sep 17 00:00:00 2001 From: Joe AE6XE Date: Tue, 21 Aug 2018 23:14:26 -0700 Subject: [PATCH] bugfix: aredn slugbug mitigation with low memory conditions, typically on 32Mb RAM, the device would become unresponsive in hours to days. The symptoms only occured when no RF links. iw processes would hang in Zombie state. Updates to use iwinfo where possible and avoid using both iw and iwinfo. crontab script is implemented to detect Zombie processes and free up resources in the reduced chance the symptoms are still occuring. --- files/etc/crontabs/root | 1 + files/usr/local/bin/clean_zombie.sh | 71 ++++++++++++ files/usr/local/bin/rssi_monitor | 164 ++++++++++++++-------------- files/www/cgi-bin/perlfunc.pm | 18 +-- 4 files changed, 162 insertions(+), 92 deletions(-) create mode 100755 files/usr/local/bin/clean_zombie.sh diff --git a/files/etc/crontabs/root b/files/etc/crontabs/root index 756221ae..0293c59c 100644 --- a/files/etc/crontabs/root +++ b/files/etc/crontabs/root @@ -1,4 +1,5 @@ */5 * * * * /usr/local/bin/fccid * * * * * /usr/local/bin/rssi_monitor * * * * * /usr/local/bin/snrlog +* * * * * /usr/local/bin/clean_zombie.sh diff --git a/files/usr/local/bin/clean_zombie.sh b/files/usr/local/bin/clean_zombie.sh new file mode 100755 index 00000000..f30935a7 --- /dev/null +++ b/files/usr/local/bin/clean_zombie.sh @@ -0,0 +1,71 @@ +#!/bin/sh +<<'LICENSE' + Part of AREDN -- Used for creating Amateur Radio Emergency Data Networks + Copyright (C) 2018 Joe Ayers AE6XE + See Contributors file for additional contributors + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation version 3 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . + + Additional Terms: + + Additional use restrictions exist on the AREDN(TM) trademark and logo. + See AREDNLicense.txt for more info. + + Attributions to the AREDN Project must be retained in the source code. + If importing this code into a new or existing project attribution + to the AREDN project must be added to the source code. + + You must not misrepresent the origin of the material contained within. + + Modified versions must be modified to attribute to the original source + and be marked in reasonable ways as differentiate it from the original + version. + +LICENSE + +# Look for hung 'iw' zombie processes prone to hang +# when available memory is low. + +# wait for rssi_monitor and snrlog to run +sleep 10 + +for pid in `ps 2>/dev/null | egrep "^\s*\d+\s+root\s+\d+\s+Z\s+\[iw\]"| sed -e "s/^\s*//"| cut -f1 -d\ ` +do + + # found an "iw" zombie + sleep 10 # give time in case process is naturally closing and needs more time + if [ -d /proc/$pid ] ; then + date >> /tmp/zombie.log + ps | egrep "^\s*${pid}" | grep -v grep | tail -1 >> /tmp/zombie.log + ppid=`cat /proc/$pid/status | grep -i ppid | cut -f2` + if [ -d /proc/$ppid ] ; then + ps | egrep "\s*${ppid}" | grep -v grep | tail -1 >> /tmp/zombie.log + if ( ! `grep crond /proc/$ppid/status 2>&1 > /dev/null` ) then + if [ $ppid -gt 1 ] ; then + + # kill the zombie's parent process to free up resources + kill -9 $ppid 2>&1 >> /tmp/zombie.log + echo "Killed $ppid" >> /tmp/zombie.log + if [ `wc -l /tmp/zombie.log | cut -f1 -d\ ` -gt 100 ] ; then + + # keep file size in check + cp /tmp/zombie.log /tmp/zombie.tmp + tail -80 /tmp/zombie.tmp > /tmp/zombie.log + rm -f /tmp/zombie.tmp + fi + fi + fi + fi + echo "" >> /tmp/zombie.log + fi +done diff --git a/files/usr/local/bin/rssi_monitor b/files/usr/local/bin/rssi_monitor index 2e21a166..c790c673 100755 --- a/files/usr/local/bin/rssi_monitor +++ b/files/usr/local/bin/rssi_monitor @@ -57,34 +57,39 @@ sub getRSSI delete $rssi{$_}; } - open(FILE, "/usr/sbin/iw $iface station dump 2>&1 |") or die "/usr/sbin/iw failed $!"; + chomp ($stationCount = `ls -1 /sys/kernel/debug/ieee80211/phy0/netdev:${iface}/stations | wc -l`); - $neighborCount = 0; - while($line = ) + if ($stationCount >= 1) { - if($line =~ /Station (\S+) \(on $iface\)/) { $mac = $1;} - if($antnum and $line =~ /signal:[ \t]+[-\d]+[ \t]*\[([-\d]+),[ \t]*([-\d]+)/) - { - $H = $1; - $V = $2; - } + open(FILE, "/usr/sbin/iw $iface station dump 2>&1 |") or die "/usr/sbin/iw failed $!"; - if ((not $antnum) and $line =~ /signal:[ \t]+[-\d]+[ \t]*\[([-\d]+)\]/) + $neighborCount = 0; + while($line = ) { - $H = $1; - } - if ($H) - { - if ($H < -95) { $rssi{$mac}{"Hrssi"}=-96 ; } - else { $rssi{$mac}{"Hrssi"}=$H ; } - undef $H; - $neighborCount += 1; - } - if ($V) - { - if ($V < -95) { $rssi{$mac}{"Vrssi"}=-96 ; } - else { $rssi{$mac}{"Vrssi"}=$V ; } - undef $V; + if($line =~ /Station (\S+) \(on $iface\)/) { $mac = $1;} + if($antnum and $line =~ /signal:[ \t]+[-\d]+[ \t]*\[([-\d]+),[ \t]*([-\d]+)/) + { + $H = $1; + $V = $2; + } + + if ((not $antnum) and $line =~ /signal:[ \t]+[-\d]+[ \t]*\[([-\d]+)\]/) + { + $H = $1; + } + if ($H) + { + if ($H < -95) { $rssi{$mac}{"Hrssi"}=-96 ; } + else { $rssi{$mac}{"Hrssi"}=$H ; } + undef $H; + $neighborCount += 1; + } + if ($V) + { + if ($V < -95) { $rssi{$mac}{"Vrssi"}=-96 ; } + else { $rssi{$mac}{"Vrssi"}=$V ; } + undef $V; + } } } } @@ -95,14 +100,13 @@ sub getChannelScan $chnum += 1; if ($chnum == 8 or $chnum == 12 or $chnum == 100 or $chnum == 185) { $chnum -= 2; } if ($chnum == 0) { $chnum = 1; } - $freq = `iw list | grep "\\\[$chnum\\\]" | head -1`; - $freq =~ /([\d]+)[ \t]+MHz[ \t]+/; + $freq = `iwinfo $iface freqlist | grep "Channel $chnum" | head -1 | sed -e "s/\\.//"`; + $freq =~ /([\d]+)[ \t]+GHz/; $freq = $1; } -$antnum=`iw list | grep "Configured Antennas: TX" | cut -f6 -d" "`; -chomp $antnum; -if ($antnum eq "0x1") +$antnum=`cat /sys/kernel/debug/ieee80211/phy0/ath9k/tx_chainmask`; +if ($antnum == "1") { $antnum=0; } @@ -241,75 +245,69 @@ for (keys %rssi) } } -if ($amac or not $neighborCount) +if ($amac) { getChannelScan(); - if ($amac) - { - $datestring = localtime(); - if ($antnum) {print $lfh "$datestring: before $amac [ $rssi{$amac}{'Hrssi'}, $rssi{$amac}{'Vrssi'} ]\n";} - else {print $lfh "$datestring: before $amac [ $rssi{$amac}{'Hrssi'}]\n";} - } + $datestring = localtime(); + if ($antnum) {print $lfh "$datestring: before $amac [ $rssi{$amac}{'Hrssi'}, $rssi{$amac}{'Vrssi'} ]\n";} + else {print $lfh "$datestring: before $amac [ $rssi{$amac}{'Hrssi'}]\n";} system("/usr/sbin/iw $iface scan freq $freq passive > /dev/null"); - if ($amac) + sleep 5; + + $beforeH = $rssi{$amac}{"Hrssi"}; + if ($antnum) { $beforeV = $rssi{$amac}{"Vrssi"}; } + + getRSSI() ; + $datestring = localtime(); + if ($antnum) {print $lfh "$datestring: after $amac [ $rssi{$amac}{'Hrssi'}, $rssi{$amac}{'Vrssi'} ]\n";} + else {print $lfh "$datestring: after $amac [ $rssi{$amac}{'Hrssi'}]\n";} + + $falpos = 0; + if ($antnum) { - sleep 5; + if (abs ( $beforeH - $rssi{$amac}{"Hrssi"} ) <= 2 and + abs ( $beforeV - $rssi{$amac}{"Vrssi"} ) <= 2 ) { $falpos = 1; } + } + elsif (abs ( $beforeH - $rssi{$amac}{"Hrssi"} ) <= 2 ) { $falpos = 1; } - $beforeH = $rssi{$amac}{"Hrssi"}; - if ($antnum) { $beforeV = $rssi{$amac}{"Vrssi"}; } + if ( $falpos ) + { + # if a false-positive (within 2dB change after a reset), then add data point to statistics + $aveH = (($rssiHist{$amac}{"aveH"}*$rssiHist{$amac}{"num"})+ $beforeH ) + / ($rssiHist{$amac}{"num"} + 1 ); + $sdH = sqrt((($rssiHist{$amac}{"num"}-1)*($rssiHist{$amac}{"sdH"}**2) + + (($beforeH-$aveH)*($beforeH-$rssiHist{$amac}{"aveH"}))) + /$rssiHist{$amac}{"num"}); + chomp $aveH; + chomp $sdH; + $rssiHist{$amac}{"aveH"} = $aveH; + $rssiHist{$amac}{"sdH"} = $sdH; - getRSSI() ; - $datestring = localtime(); - if ($antnum) {print $lfh "$datestring: after $amac [ $rssi{$amac}{'Hrssi'}, $rssi{$amac}{'Vrssi'} ]\n";} - else {print $lfh "$datestring: after $amac [ $rssi{$amac}{'Hrssi'}]\n";} - - $falpos = 0; if ($antnum) { - if (abs ( $beforeH - $rssi{$amac}{"Hrssi"} ) <= 2 and - abs ( $beforeV - $rssi{$amac}{"Vrssi"} ) <= 2 ) { $falpos = 1; } + $aveV = (($rssiHist{$amac}{"aveV"}*$rssiHist{$amac}{"num"})+ $beforeV ) / + ($rssiHist{$amac}{"num"} + 1 ); + $sdV = sqrt((($rssiHist{$amac}{"num"}-1)*($rssiHist{$amac}{"sdV"}**2) + + (($beforeV-$aveV)*($beforeV-$rssiHist{$amac}{"aveV"}))) / + $rssiHist{$amac}{"num"}); + chomp $aveV; + chomp $sdV; + $rssiHist{$amac}{"aveV"} = $aveV; + $rssiHist{$amac}{"sdV"} = $sdV; } - elsif (abs ( $beforeH - $rssi{$amac}{"Hrssi"} ) <= 2 ) { $falpos = 1; } - if ( $falpos ) + if ($rssiHist{$amac}{"num"} < 60 ) { - # if a false-positive (within 2dB change after a reset), then add data point to statistics - $aveH = (($rssiHist{$amac}{"aveH"}*$rssiHist{$amac}{"num"})+ $beforeH ) - / ($rssiHist{$amac}{"num"} + 1 ); - $sdH = sqrt((($rssiHist{$amac}{"num"}-1)*($rssiHist{$amac}{"sdH"}**2) + - (($beforeH-$aveH)*($beforeH-$rssiHist{$amac}{"aveH"}))) - /$rssiHist{$amac}{"num"}); - chomp $aveH; - chomp $sdH; - $rssiHist{$amac}{"aveH"} = $aveH; - $rssiHist{$amac}{"sdH"} = $sdH; - - if ($antnum) - { - $aveV = (($rssiHist{$amac}{"aveV"}*$rssiHist{$amac}{"num"})+ $beforeV ) / - ($rssiHist{$amac}{"num"} + 1 ); - $sdV = sqrt((($rssiHist{$amac}{"num"}-1)*($rssiHist{$amac}{"sdV"}**2) + - (($beforeV-$aveV)*($beforeV-$rssiHist{$amac}{"aveV"}))) / - $rssiHist{$amac}{"num"}); - chomp $aveV; - chomp $sdV; - $rssiHist{$amac}{"aveV"} = $aveV; - $rssiHist{$amac}{"sdV"} = $sdV; - } - - if ($rssiHist{$amac}{"num"} < 60 ) - { - # keep statistics to 60 sample (minute) moving window - $rssiHist{$amac}{"num"} += 1; - } - $rssiHist{$amac}{"last"} = $now + 5 ; - - $datestring = localtime(); - print $lfh "$datestring: $amac Possible valid data point, adding to statistics.\n"; + # keep statistics to 60 sample (minute) moving window + $rssiHist{$amac}{"num"} += 1; } + $rssiHist{$amac}{"last"} = $now + 5 ; + + $datestring = localtime(); + print $lfh "$datestring: $amac Possible valid data point, adding to statistics.\n"; } } diff --git a/files/www/cgi-bin/perlfunc.pm b/files/www/cgi-bin/perlfunc.pm index b4cd836b..658f7170 100644 --- a/files/www/cgi-bin/perlfunc.pm +++ b/files/www/cgi-bin/perlfunc.pm @@ -607,29 +607,29 @@ sub get_wifi_signal chomp $wifiintf; my ($SignalLevel) = "N/A"; my ($NoiseFloor) = "N/A"; - foreach(`iw dev $wifiintf station dump`) + foreach(`iwinfo $wifiintf assoclient`) { - next unless /.+signal:\s+([-]?[\d]+)/; + next unless /.+[A-F0-9]{2}:[A-F0-9]{2}:[A-F0-9]{2}\s+([-]?[\d]+)/; if ( $SignalLevel <= "$1" || $SignalLevel == "N/A" ) { $SignalLevel=$1; } } - foreach(`iw dev $wifiintf survey dump|grep -A 1 \"\\[in use\\]\"`) - { - next unless /([\d\-]+) dBm/; + open( my $NoiseFH , "<" , "/sys/kernel/debug/ieee80211/phy0/ath9k/dump_nfcal") or return ("N/A","N/A"); + while (<$NoiseFH>) { + next unless /Channel Noise Floor : ([-]?[0-9]+)/; $NoiseFloor=$1; } + close($NoiseFH); if ( $NoiseFloor == "N/A" ) { - open( my $NoiseFH , "<" , "/sys/kernel/debug/ieee80211/phy0/ath9k/dump_nfcal") or return ("N/A","N/A"); - while (<$NoiseFH>) { - next unless /Channel Noise Floor : ([-]?[0-9]+)/; + foreach(`iwinfo $wifiintf info | grep Signal`) + { + next unless /([\d\-]+) dBm/; $NoiseFloor=$1; } - close($NoiseFH); } if ( $SignalLevel == "N/A" || $NoiseFloor == "N/A" )