From 8c4d9edd99db899092ff063c2c84b7232d0b70fe Mon Sep 17 00:00:00 2001 From: Tim Wilkinson Date: Tue, 20 Jun 2023 01:27:23 -0700 Subject: [PATCH] Merge all the station monitoring and mitigation into a single service. (#874) This is an attempt to unify all the station monitoring and make it work better as one. We're trying to square a circle here somewhat, with taking steps to kick nodes when problems are detected, but not kick them too quickly or often in case we're mis-identifing issues. We've seen these issue manifest themselves which nodes messing VoIP services as well as resets causing nodes to get into unrecoverable states when there was no real problems in the first place. This will probably need to evolve before the next release, but would be good to get some milage on the new code. --- .../usr/local/bin/mgr/rssi_monitor_ath10k.lua | 114 ------- .../usr/local/bin/mgr/rssi_monitor_ath9k.lua | 269 ---------------- files/usr/local/bin/mgr/station_monitor.lua | 154 ---------- files/usr/local/bin/mgr/wireless_monitor.lua | 287 ++++++++++++++++++ files/www/cgi-bin/supporttool | 2 + 5 files changed, 289 insertions(+), 537 deletions(-) delete mode 100644 files/usr/local/bin/mgr/rssi_monitor_ath10k.lua delete mode 100644 files/usr/local/bin/mgr/rssi_monitor_ath9k.lua delete mode 100755 files/usr/local/bin/mgr/station_monitor.lua create mode 100755 files/usr/local/bin/mgr/wireless_monitor.lua diff --git a/files/usr/local/bin/mgr/rssi_monitor_ath10k.lua b/files/usr/local/bin/mgr/rssi_monitor_ath10k.lua deleted file mode 100644 index 7d667154..00000000 --- a/files/usr/local/bin/mgr/rssi_monitor_ath10k.lua +++ /dev/null @@ -1,114 +0,0 @@ ---[[ - - Part of AREDN -- Used for creating Amateur Radio Emergency Data Networks - Copyright (C) 2022 Tim Wilkinson - See Contributors file for additional contributors - - This program is free software: you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation version 3 of the License. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program. If not, see . - - Additional Terms: - - Additional use restrictions exist on the AREDN(TM) trademark and logo. - See AREDNLicense.txt for more info. - - Attributions to the AREDN Project must be retained in the source code. - If importing this code into a new or existing project attribution - to the AREDN project must be added to the source code. - - You must not misrepresent the origin of the material contained within. - - Modified versions must be modified to attribute to the original source - and be marked in reasonable ways as differentiate it from the original - version - ---]] - -local periodic_scan_tick = 5 - -local wifiiface -local phy - -function rssi_monitor_10k() - if not string.match(get_ifname("wifi"), "^wlan") then - exit_app() - else - wait_for_ticks(math.max(1, 120 - nixio.sysinfo().uptime)) - - wifiiface = get_ifname("wifi") - - -- ath10k only - phy = iwinfo.nl80211.phyname(wifiiface) - if not phy or not nixio.fs.stat("/sys/kernel/debug/ieee80211/" .. phy .. "/ath10k") then - exit_app() - return - end - - while true - do - run_monitor_10k() - wait_for_ticks(60) -- 1 minute - end - end -end - -local logfile = "/tmp/rssi_ath10k.log" - -if not file_exists(logfile) then - io.open(logfile, "w+"):close() -end - -local station_zero = 0 -local log = aredn.log.open(logfile, 16000) - -local function reset_network() - local coverage - local f = io.popen("iw " .. phy .. " info") - if f then - for line in f:lines() - do - coverage = tonumber(line:match("Coverage class: (%d+)")) - if coverage then - os.execute("iw " .. phy .. " set coverage 0 > /dev/null 2>&1") - break - end - end - f:close() - end - write_all("/sys/kernel/debug/ieee80211/" .. phy .. "/ath10k/simulate_fw_crash", "hw-restart") - if coverage then - os.execute("iw " .. phy .. " set coverage " .. coverage .. " > /dev/null 2>&1") - end -end - -function run_monitor_10k() - - local station_count = 0 - local stations = iwinfo.nl80211.assoclist(wifiiface) - for mac, station in pairs(stations) - do - station_count = station_count + 1 - end - - if station_count ~= 0 then - station_zero = periodic_scan_tick - 1 - else - station_zero = station_zero + 1 - if math.mod(station_zero, periodic_scan_tick) == 0 then - reset_network() - log:write("No stations detected") - log:flush() - end - end -end - -return rssi_monitor_10k diff --git a/files/usr/local/bin/mgr/rssi_monitor_ath9k.lua b/files/usr/local/bin/mgr/rssi_monitor_ath9k.lua deleted file mode 100644 index 3ee3e217..00000000 --- a/files/usr/local/bin/mgr/rssi_monitor_ath9k.lua +++ /dev/null @@ -1,269 +0,0 @@ ---[[ - - Part of AREDN -- Used for creating Amateur Radio Emergency Data Networks - Copyright (C) 2021 Tim Wilkinson - Original Perl Copyright (C) 2015 Joe Ayers ae6xe@arrl.net - See Contributors file for additional contributors - - This program is free software: you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation version 3 of the License. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program. If not, see . - - Additional Terms: - - Additional use restrictions exist on the AREDN(TM) trademark and logo. - See AREDNLicense.txt for more info. - - Attributions to the AREDN Project must be retained in the source code. - If importing this code into a new or existing project attribution - to the AREDN project must be added to the source code. - - You must not misrepresent the origin of the material contained within. - - Modified versions must be modified to attribute to the original source - and be marked in reasonable ways as differentiate it from the original - version - ---]] - -local wifiiface -local phy -local multiple_ant = false - -function rssi_monitor_9k() - if not string.match(get_ifname("wifi"), "^wlan") then - exit_app() - else - wait_for_ticks(math.max(1, 120 - nixio.sysinfo().uptime)) - - wifiiface = get_ifname("wifi") - phy = iwinfo.nl80211.phyname(wifiiface) - - -- Supports ath9k - if not phy or not nixio.fs.stat("/sys/kernel/debug/ieee80211/" .. phy .. "/ath9k") then - exit_app() - return - end - - if read_all("/sys/kernel/debug/ieee80211/" .. phy .. "/ath9k/tx_chainmask"):chomp() ~= "1" then - multiple_ant = true - end - - while true - do - run_monitor_9k() - wait_for_ticks(60) -- 1 minute - end - end -end - -local datfile = "/tmp/rssi.dat" -local logfile = "/tmp/rssi.log" - -if not file_exists(datfile) then - io.open(datfile, "w+"):close() -end -if not file_exists(logfile) then - io.open(logfile, "w+"):close() -end - -local station_zero = 0 -local periodic_scan_tick = 5 -local log = aredn.log.open(logfile, 16000) - -local function reset_network() - write_all("/sys/kernel/debug/ieee80211/" .. phy .. "/ath9k/reset", "1") -end - -function run_monitor_9k() - - local now = nixio.sysinfo().uptime - - -- load history - local rssi_hist = {} - for line in io.lines(datfile) do - local mac, ave_h, sd_h, ave_v, sd_v, num, last = string.match(line, "([0-9a-fA-F:]*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)") - rssi_hist[mac] = { - ave_h = ave_h, - sd_h = sd_h, - ave_v = ave_v, - sd_v = sd_v, - num = tonumber(num), - last = last - } - end - - local ofdm_level = 0 - for i, line in ipairs(read_all("/sys/kernel/debug/ieee80211/" .. phy .. "/ath9k/ani"):splitNewLine()) - do - ofdm_level = tonumber(string.match(line, "OFDM LEVEL: (.*)")) - if ofdm_level then - break - end - end - local amac = nil - local station_count = 0 - local rssi = get_rssi(wifiiface) - for mac, info in pairs(rssi) - do - station_count = station_count + 1 - local rssih = rssi_hist[mac] - if rssih and now - rssih.last < 3600 then - local hit = 0 - local sdh3 = math.floor(rssih.sd_h * 3 + 0.5) - if math.abs(rssih.ave_h - info.Hrssi) > sdh3 then - hit = hit + 1 - end - local sdv3 = math.floor(rssih.sd_v * 3 + 0.5) - if math.abs(rssih.ave_v - info.Vrssi) > sdv3 and multiple_ant then - hit = hit + 1 - end - if rssih.num > 9 and ofdm_level <= 3 and hit > 0 then - -- overly attenuated chain suspected - local msg = string.format("Attenuated Suspect %s [%d] %f %f", mac, info.Hrssi, rssih.ave_h, rssih.sd_h) - if multiple_ant then - msg = msg .. string.format(" [%d] %f %f", info.Vrssi, rssih.ave_v, rssih.sd_v) - end - if not amac or rssi[amac].Hrssi < info.Hrssi then - amac = mac - end - log:write(msg) - else - -- update statistics - local ave_h = (rssih.ave_h * rssih.num + info.Hrssi) / (rssih.num + 1) - local sd_h = math.sqrt(((rssih.num - 1) * rssih.sd_h * rssih.sd_h + (info.Hrssi - ave_h) * (info.Hrssi - rssih.ave_h)) / rssih.num) - rssih.ave_h = ave_h - rssih.sd_h = sd_h - local ave_v = (rssih.ave_v * rssih.num + info.Vrssi) / (rssih.num + 1) - local sd_v = math.sqrt(((rssih.num - 1) * rssih.sd_v * rssih.sd_v + (info.Vrssi - ave_v) * (info.Vrssi - rssih.ave_v)) / rssih.num) - rssih.ave_v = ave_v - rssih.sd_v = sd_v - rssih.last = now - if rssih.num < 60 then - rssih.num = rssih.num + 1 - end - end - else - rssi_hist[mac] = { - ave_h = info.Hrssi, - sd_h = 0, - ave_v = info.Vrssi, - sd_v = 0, - num = 1, - last = now - } - end - end - - if amac then - reset_network() - wait_for_ticks(5) - -- update time - now = nixio.sysinfo().uptime - - local beforeh = rssi[amac].Hrssi - local beforev = rssi[amac].Vrssi - local arssi = get_rssi(wifiiface) - - if arssi[amac] then - if multiple_ant then - log:write(string.format("before %s [%d] [%d]", amac, beforeh, beforev)) - log:write(string.format("after %s [%d] [%d]", amac, arssi[amac].Hrssi, arssi[amac].Vrssi)) - else - log:write(string.format("before %s [%d]", amac, beforeh)) - log:write(string.format("after %s [%d]", amac, arssi[amac].Hrssi)) - end - if math.abs(beforeh - arssi[amac].Hrssi) <= 2 and math.abs(beforev - arssi[amac].Vrssi) <= 2 then - -- false positive if within 2dB after reset - log:write(string.format("%s Possible valid data point, adding to statistics", amac)) - local rssih = rssi_hist[amac] - local ave_h = (rssih.ave_h * rssih.num + beforeh) / (rssih.num + 1) - local sd_h = math.sqrt(((rssih.num - 1) * rssih.sd_h * rssih.sd_h + (beforeh - ave_h) * (beforeh - rssih.ave_h)) / rssih.num) - rssih.ave_h = ave_h - rssih.sd_h = sd_h - local ave_v = (rssih.ave_v * rssih.num + beforeh) / (rssih.num + 1) - local sd_v = math.sqrt(((rssih.num - 1) * rssih.sd_v * rssih.sd_v + (beforeh - ave_v) * (beforeh - rssih.ave_v)) / rssih.num) - rssih.ave_v = ave_v - rssih.sd_v = sd_v - rssih.last = now - if rssih.num < 60 then - rssih.num = rssih.num + 1 - end - end - end - else - if station_count ~= 0 then - station_zero = periodic_scan_tick - 1 - else - station_zero = station_zero + 1 - if math.mod(station_zero, periodic_scan_tick) == 0 then - reset_network() - wait_for_ticks(5) - log:write("No stations detected") - end - end - end - - local f = io.open(datfile, "w") - if f then - for mac, hist in pairs(rssi_hist) - do - f:write(string.format("%s|%f|%f|%f|%f|%d|%s\n", mac, hist.ave_h, hist.sd_h, hist.ave_v, hist.sd_v, hist.num, hist.last)) - end - f:close() - end - - log:flush() -end - -function get_rssi(wifiiface) - if not multiple_ant then - -- easy way - local rssi = {} - local stations = iwinfo.nl80211.assoclist(wifiiface) - for mac, station in pairs(stations) - do - if station.signal ~= 0 then - if station.signal < -95 then - rssi[mac] = { Hrssi = -96, Vrssi = -96 } - else - rssi[mac] = { Hrssi = station.signal, Vrssi = station.signal } - end - end - end - return rssi - else - -- hard way - local rssi = {} - local f = io.popen("/usr/sbin/iw " .. wifiiface .. " station dump 2>&1") - if f then - local mac - for line in f:lines() - do - local m = line:match("Station (%S+) %(on " .. wifiiface) - if m then - mac = m - end - local h, v = line:match("signal:.*%[(.+),%s(.+)%]") - if mac and v then - h = tonumber(h) - v = tonumber(v) - rssi[mac] = { Hrssi = h < -95 and -95 or h, Vrssi = v < -95 and -95 or v } - mac = nil - end - end - f:close() - end - return rssi - end -end - -return rssi_monitor_9k diff --git a/files/usr/local/bin/mgr/station_monitor.lua b/files/usr/local/bin/mgr/station_monitor.lua deleted file mode 100755 index 6dc95cc9..00000000 --- a/files/usr/local/bin/mgr/station_monitor.lua +++ /dev/null @@ -1,154 +0,0 @@ ---[[ - - Part of AREDN -- Used for creating Amateur Radio Emergency Data Networks - Copyright (C) 2023 Tim Wilkinson - See Contributors file for additional contributors - - This program is free software: you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation version 3 of the License. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program. If not, see . - - Additional Terms: - - Additional use restrictions exist on the AREDN(TM) trademark and logo. - See AREDNLicense.txt for more info. - - Attributions to the AREDN Project must be retained in the source code. - If importing this code into a new or existing project attribution - to the AREDN project must be added to the source code. - - You must not misrepresent the origin of the material contained within. - - Modified versions must be modified to attribute to the original source - and be marked in reasonable ways as differentiate it from the original - version - ---]] - -local unresponsive_max = 5 -local unresponsive_report = 3 -local last = {} -local wifiiface -local frequency -local ssid - -local IW = "/usr/sbin/iw" -local ARPING = "/usr/sbin/arping" - -local logfile = "/tmp/station_monitor.log" -if not file_exists(logfile) then - io.open(logfile, "w+"):close() -end -local log = aredn.log.open(logfile, 8000) - -function rejoin_network() - os.execute(IW .. " " .. wifiiface .. " ibss leave") - os.execute(IW .. " " .. wifiiface .. " ibss join " .. ssid .. " " .. frequency .. " fixed-freq") - log:write("Rejoining network") - log:flush() -end - -function station_monitor() - if not string.match(get_ifname("wifi"), "^wlan") then - exit_app() - else - wait_for_ticks(math.max(1, 120 - nixio.sysinfo().uptime)) - - wifiiface = get_ifname("wifi") - frequency = iwinfo.nl80211.frequency(wifiiface) - ssid = iwinfo.nl80211.ssid(wifiiface) - - -- If frequency or ssid is missing (some kind of bad configuration) just exit this - if not (frequency and ssid) then - exit_app() - return - end - - -- Mikrotik AC hardware has some startup issues which we try to resolve - -- by leaving and rejoining the network - local boardid = aredn.hardware.get_board_id():lower() - if boardid:match("mikrotik") and boardid:match("ac") then - rejoin_network() - end - - -- Only monitor if we have LQM information - if uci.cursor():get("aredn", "@lqm[0]", "enable") ~= "1" then - exit_app() - return - end - - while true - do - run_station_monitor() - wait_for_ticks(60) -- 1 minute - end - end -end - -function run_station_monitor() - - -- Use the LQM state to ignore nodes we dont care about - local trackers = nil - local f = io.open("/tmp/lqm.info") - if f then - local lqm = luci.jsonc.parse(f:read("*a")) - f:close() - trackers = lqm.trackers - end - local now = nixio.sysinfo().uptime - - -- Check each station to make sure we can broadcast and unicast to them - local total = 0 - local old = last - last = {} - arptable( - function (entry) - if entry.Device == wifiiface then - local ip = entry["IP address"] - local mac = entry["HW address"] or "" - -- Only consider nodes which have valid ip and macs, routable and not pending - local tracker = { pending = 0, routable = true } - if trackers then - tracker = trackers[mac:upper()] or { pending = now, routable = false } - end - if entry["Flags"] ~= "0x0" and ip and mac ~= "" and tracker.routable and tracker.pending < now then - -- Two arp pings - the first is broadcast, the second unicast - for line in io.popen(ARPING .. " -c 2 -I " .. wifiiface .. " " .. ip):lines() - do - -- If we see exactly one response then we neeed to force the station to reassociate - -- This indicates that broadcasts work, but unicasts dont - if line:match("Received 1 response") then - local val = (old[ip] or 0) + 1 - last[ip] = val - if val > unresponsive_report then - log:write("Possible unresponsive node: " .. ip .. " [" .. mac .. "]") - log:flush() - end - if val > total then - total = val - end - break - end - end - end - end - end - ) - - -- If we find unresponsive nodes too often then we leave and rejoin the network - -- to reset everything - if total >= unresponsive_max then - last = {} - rejoin_network() - end -end - -return station_monitor diff --git a/files/usr/local/bin/mgr/wireless_monitor.lua b/files/usr/local/bin/mgr/wireless_monitor.lua new file mode 100755 index 00000000..8c1b64cf --- /dev/null +++ b/files/usr/local/bin/mgr/wireless_monitor.lua @@ -0,0 +1,287 @@ +--[[ + + Part of AREDN -- Used for creating Amateur Radio Emergency Data Networks + Copyright (C) 2023 Tim Wilkinson + See Contributors file for additional contributors + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation version 3 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . + + Additional Terms: + + Additional use restrictions exist on the AREDN(TM) trademark and logo. + See AREDNLicense.txt for more info. + + Attributions to the AREDN Project must be retained in the source code. + If importing this code into a new or existing project attribution + to the AREDN project must be added to the source code. + + You must not misrepresent the origin of the material contained within. + + Modified versions must be modified to attribute to the original source + and be marked in reasonable ways as differentiate it from the original + version + +--]] + +local ip = require("luci.ip") + +local IW = "/usr/sbin/iw" +local ARPING = "/usr/sbin/arping" + +local M = {} + +local wifi +local phy +local chipset +local frequency +local ssid + +local action_limits = { + unresponsive_report = 3, + unresponsive_trigger1 = 5, + unresponsive_trigger2 = 10, + zero_trigger1 = 10 * 60, -- 10 minutes + zero_trigger2 = 30 * 60 -- 30 minutes +} +-- Start action state assuming the node is active and no actions are pending +local action_state = { + done_scan1 = true, + done_scan2 = true, + done_rejoin1 = true, + done_rejoin2 = true +} +local unresponsive = { + max = 0, + ignore = 15, + stations = {} +} +local station_count = { + first_zero = 0, + first_nonzero = 0, + last_zero = 0, + last_nonzero = 0, + history = {}, + history_limit = 120 -- 2 hours +} + +-- Detect Mikrotik AC which requires special handling +local mikrotik_ac = false +local boardid = aredn.hardware.get_board_id():lower() +if boardid:match("mikrotik") and boardid:match("ac") then + mikrotik_ac = true +end + +local logfile = "/tmp/wireless_monitor.log" +if not file_exists(logfile) then + io.open(logfile, "w+"):close() +end +local log = aredn.log.open(logfile, 8000) + +-- Various forms of network resets -- + +function M.reset_network(mode) + log:write("reset_network: " .. mode) + if mode == "rejoin" then + -- Only observered on Mikrotik AC devices + if mikrotik_ac then + os.execute(IW .. " " .. wifi .. " ibss leave > /dev/null 2>&1") + os.execute(IW .. " " .. wifi .. " ibss join " .. ssid .. " " .. frequency .. " fixed-freq > /dev/null 2>&1") + else + log:write("-- ignoring (mikrotik ac only)") + end + elseif mode == "scan-quick" then + os.execite(IW .. " " .. wifi .. " scan freq " .. frequency .. " > /dev/null 2>&1") + elseif mode == "scan-all" then + os.execite(IW .. " " .. wifi .. " scan > /dev/null 2>&1") + os.execite(IW .. " " .. wifi .. " scan passive > /dev/null 2>&1") + elseif mode == "reset" then + if chipset == "ath9k" then + write_all("/sys/kernel/debug/ieee80211/" .. phy .. "/ath9k/reset", "1") + else + write_all("/sys/kernel/debug/ieee80211/" .. phy .. "/ath10k/simulate_fw_crash", "hw-restart") + end + else + log:write("-- unknown") + end +end + +-- Monitor stations and detect if they become unresponsive -- + +function M.monitor_unresponsive_stations() + + local old = unresponsive.stations + unresponsive.stations = {} + unresponsive.max = 0 + + local now = nixio.sysinfo().uptime + arptable( + function (entry) + if entry.Device == wifi then + local ipaddr = entry["IP address"] + local mac = entry["HW address"] or "" + -- Only consider nodes which have valid ip and mac and routable + if ipaddr then + unresponsive.stations[ipaddr] = -1 + local rt = ip.route(ipaddr) + if entry["Flags"] ~= "0x0" and mac ~= "" and rt and tostring(rt.gw) == ipaddr then + unresponsive.stations[ipaddr] = 0 + -- The first ping is broadcast, the rest unicast + for line in io.popen(ARPING .. " -w 5 -I " .. wifi .. " " .. ipaddr):lines() + do + -- If we see exactly one response then broadcast works and unicast doesnt. + -- We neeed to force the station to reassociate + if line:match("^Received 1 response") then + local val = (old[ipaddr] or 0) + 1 + unresponsive.stations[ipaddr] = val + if val < unresponsive.ignore then + if val > action_limits.unresponsive_report then + log:write("Possible unresponsive node: " .. ipaddr .. " [" .. mac .. "]") + end + if val > unresponsive.max then + unresponsive.max = val + end + end + break + end + end + end + end + end + end + ) +end + +-- Monitor number of connected stations -- + +function M.monitor_station_count() + local count = 0 + for mac, station in pairs(iwinfo.nl80211.assoclist(wifi)) + do + count = count + 1 + end + table.insert(station_count.history, 1, count) + while #station_count.history > station_count.history_limit + do + station_count.history[#station_count.history] = nil + end + local now = nixio.sysinfo().uptime + if count == 0 then + station_count.last_zero = now + if station_count.first_zero <= station_count.first_nonzero then + station_count.first_zero = now + end + else + station_count.last_nonzero = now + if station_count.first_nonzero <= station_count.first_zero then + station_count.first_nonzero = now + end + end +end + +-- Take action depending on the monitor state + +function M.run_actions() + -- No action if we have stations and they're responsive + if station_count.last_nonzero > station_count.last_zero and unresponsive.max < action_limits.unresponsive_trigger1 then + action_state = {} + return + end + + -- Otherwise ... + + -- If network stations falls to zero when it was previously non-zero + if station_count.first_zero > station_count.first_nonzero then + if not action_state.done_scan1 and station_count.last_zero - station_count.first_zero > action_limits.zero_trigger1 then + M.reset_network("scan-quick") + action_state.done_scan1 = true + return + elseif not action_state.done_scan2 and station_count.last_zero - station_count.first_zero > action_limits.zero_trigger2 then + M.reset_network("scan-all") + action_state.done_scan2 = true + return + end + end + + -- We are failing to ping stations we are associated with + if unresponsive.max >= action_limits.unresponsive_trigger1 and not action_state.done_rejoin1 then + M.reset_network("rejoin") + action_state.done_rejoin1 = true + return + elseif unresponsive.max >= action_limits.unresponsive_trigger2 and not action_state.done_rejoin2 then + M.reset_network("rejoin") + action_state.done_rejoin2 = true + return + end +end + +function M.run_monitors() + M.monitor_unresponsive_stations() + M.monitor_station_count() +end + +function M.save() + local f = io.open("/tmp/wireless_monitor.info", "w") + if f then + f:write(luci.jsonc.stringify({ + now = nixio.sysinfo().uptime, + unresponsive = unresponsive, + station_count = station_count, + action_state = action_state + }, true)) + f:close() + end +end + +function M.start_monitor() + if not string.match(get_ifname("wifi"), "^wlan") then + exit_app() + return + end + + wait_for_ticks(math.max(1, 120 - nixio.sysinfo().uptime)) + + -- Extract all the necessary wifi parameters + wifi = get_ifname("wifi") + phy = iwinfo.nl80211.phyname(wifi) + frequency = iwinfo.nl80211.frequency(wifi) + ssid = iwinfo.nl80211.ssid(wifi) + if not (phy and frequency and ssid) then + exit_app() + return + end + + -- Select chipset + if nixio.fs.stat("/sys/kernel/debug/ieee80211/" .. phy .. "/ath9k") then + chipset = "ath9k" + elseif nixio.fs.stat("/sys/kernel/debug/ieee80211/" .. phy .. "/ath10k") then + chipset = "ath10k" + else + exit_app() + return + end + + log:write("Monitoring wireless chipset: " .. chipset) + + M.reset_network("rejoin") + + while true + do + M.run_monitors() + M.run_actions() + M.save() + log:flush() + wait_for_ticks(60) -- 1 minute + end +end + +return M.start_monitor diff --git a/files/www/cgi-bin/supporttool b/files/www/cgi-bin/supporttool index 7455eace..2e2b7a04 100755 --- a/files/www/cgi-bin/supporttool +++ b/files/www/cgi-bin/supporttool @@ -65,6 +65,8 @@ local files = { "/tmp/manager.log.0", "/tmp/AutoDistReset.log", "/tmp/lqm.info", + "/tmp/wireless_monitor.info", + "/tmp/wireless_monitor.log", "/tmp/sysinfo/board_name", "/tmp/sysinfo/boardid", "/tmp/sysinfo/hardware_mfg",