diff --git a/files/usr/local/bin/manager.lua b/files/usr/local/bin/manager.lua index 07b044fb..0bd419f3 100755 --- a/files/usr/local/bin/manager.lua +++ b/files/usr/local/bin/manager.lua @@ -43,6 +43,7 @@ require("iwinfo") require("aredn.hardware") require("aredn.log") require("luci.jsonc") +require("ubus") -- aggressive gc on low memory devices if nixio.sysinfo().totalram < 32 * 1024 * 1024 then diff --git a/files/usr/local/bin/mgr/hw_watchdog.lua b/files/usr/local/bin/mgr/hw_watchdog.lua new file mode 100755 index 00000000..df5bbceb --- /dev/null +++ b/files/usr/local/bin/mgr/hw_watchdog.lua @@ -0,0 +1,197 @@ +--[[ + + Part of AREDN -- Used for creating Amateur Radio Emergency Data Networks + Copyright (C) 2023 Tim Wilkinson + See Contributors file for additional contributors + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation version 3 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . + + Additional Terms: + + Additional use restrictions exist on the AREDN(TM) trademark and logo. + See AREDNLicense.txt for more info. + + Attributions to the AREDN Project must be retained in the source code. + If importing this code into a new or existing project attribution + to the AREDN project must be added to the source code. + + You must not misrepresent the origin of the material contained within. + + Modified versions must be modified to attribute to the original source + and be marked in reasonable ways as differentiate it from the original + version + +--]] + +local PING = "/bin/ping" +local PIDOF = "/bin/pidof" +local REBOOT = "/sbin/reboot" + +local W = {} + +-- Configuration limits and defaults +local config_limits = { + startup_delay = { 600, 600, 3600 }, + ping_count = { 1, 3, 10 }, + ping_timeout = { 1, 5, 10 }, + tick = { 60, 60, 600 }, + failures = { 2, 3, 25 }, + daily = { -1, -1, 23 } +} + +-- Set of daemons to monitor +local default_daemons = "olsrd dnsmasq telnetd dropbear uhttpd" +if uci.cursor():get("vtun", "server_0", "host") or uci.cursor():get("vtun", "client_0", "name") then + default_daemons = default_daemons .. " vtund" +end + +function W.get_config() + local c = uci.cursor() + + if c:get("aredn", "@watchdog[0]", "enable") ~= "1" then + return nil + end + + local ping_addresses = {} + local addresses = c:get("aredn", "@watchdog[0]", "ping_addresses") or "" + for address in addresses:gmatch("(%S+)") do + if address:match("^%d+%.%d+%.%d+%.%d+$") then + mainlog:write("pinging " .. address) + ping_addresses[#ping_addresses + 1] = address + end + end + local daemons = {} + local mydaemons = c:get("aredn", "@watchdog[0]", "daemons") or default_daemons + for daemon in mydaemons:gmatch("(%S+)") do + mainlog:write("monitor " .. daemon) + daemons[#daemons + 1] = daemon + end + local config = { + ping_addresses = ping_addresses, + daemons = daemons + } + for k, v in pairs(config_limits) + do + local val = tonumber(c:get("aredn", "@watchdog[0]", k) or nil) + if not val then + config[k] = v[2] + elseif val < v[1] then + config[k] = v[1] + elseif val > v[3] then + config[k] = v[3] + else + config[k] = val + end + end + return config +end + +function W.start() + local config = W.get_config() + if not config then + exit_app() + return + end + + -- Dont start monitoring too soon. Let the system settle down. + wait_for_ticks(math.max(1, config.startup_delay - nixio.sysinfo().uptime)) + + local ub = ubus.connect() + ub:call("system", "watchdog", { magicclose = true }) + ub:call("system", "watchdog", { stop = true }) + + local wd = io.open("/dev/watchdog", "w") + if not wd then + mainlog:write("Watchdog failed to start: Cannot open /dev/watchdog\n") + ub:call("system", "watchdog", { stop = false }) + exit_app() + return + end + + -- Make sure we have enough tick time for any pings + local total_ping_time = 30 + (config.ping_timeout + config.ping_count) * #config.ping_addresses + if total_ping_time > config.tick then + config.tick = math.ceil(total_ping_time / 60) * 60 + mainlog:write("adjusted tick to " .. config.tick) + end + + -- The reboot timeout seem to be 3-5x the timeout value + -- We make sure it's at least 5 minutes + ub:call("system", "watchdog", { timeout = math.ceil(math.max(300, config.tick * config.failures) / 3) }) + + local daily_reboot_armed = false + + while true + do + local now = os.time() + local success = true + + -- Reboot a device daily at a given time if configured. To avoid rebooting over and + -- over we must have just seen the previous hour + if config.daily ~= -1 then + local time = os.date("*t") + if time.min >= (60 - config.tick * 3) and (time.hour + 1) % 24 == config.daily then + daily_reboot_armed = true + elseif daily_reboot_armed and time.hour == config.daily then + mainlog:write("reboot") + os.execute(REBOOT .. " >/dev/null 2>&1") + daily_reboot_armed = false + else + daily_reboot_armed = false + end + end + + for _ = 1, 1 + do + -- Check various daemons are running + for _, daemon in ipairs(config.daemons) + do + if os.execute(PIDOF .. " " .. daemon .. " > /dev/null ") ~= 0 then + mainlog:write("pidof " .. daemon .. " failed") + success = false + break + end + end + if not success then + break + end + + -- Check we can reach any of the ping addresses + if #config.ping_addresses > 0 then + success = false + for _, address in ipairs(config.ping_addresses) + do + if os.execute(PING .. " -c " .. config.ping_count .. " -A -q -W " .. config.ping_timeout .. " " .. address .. " > /dev/null 2>&1") == 0 then + success = true + break + else + mainlog:write("ping " .. address .. " failed") + end + end + if not success then + break + end + end + + end + if success then + wd:write("V") + else + mainlog:write("failed") + end + + wait_for_ticks(math.max(1, config.tick - (os.time() - now))) + end +end + +return W.start diff --git a/files/usr/local/bin/mgr/watchdog.lua b/files/usr/local/bin/mgr/olsrd_watchdog.lua similarity index 98% rename from files/usr/local/bin/mgr/watchdog.lua rename to files/usr/local/bin/mgr/olsrd_watchdog.lua index 42c0f9a9..88948f06 100644 --- a/files/usr/local/bin/mgr/watchdog.lua +++ b/files/usr/local/bin/mgr/olsrd_watchdog.lua @@ -62,7 +62,7 @@ function olsrd_restart() end end -function watchdog() +function olsrd_watchdog() while true do wait_for_ticks(223) @@ -86,4 +86,4 @@ function watchdog() end end -return watchdog +return olsrd_watchdog diff --git a/files/usr/local/bin/node-setup b/files/usr/local/bin/node-setup index 898eeaf8..e7521301 100755 --- a/files/usr/local/bin/node-setup +++ b/files/usr/local/bin/node-setup @@ -1063,6 +1063,9 @@ local config_special = { lqm_enable = c:get("aredn", "@lqm[0]", "enable"), tunnel_weight = c:get("aredn", "@tunnel[0]", "weight"), supernode_enable = c:get("aredn", "@supernode[0]", "enable"), + watchdog_enable = c:get("aredn", "@watchdog[0]", "enable"), + watchdog_pings = c:get("aredn", "@watchdog[0]", "ping_addresses"), + watchdog_daily = c:get("aredn", "@watchdog[0]", "daily"), wifi_mode_0 = c:get("wireless", "@wifi-iface[0]", "mode"), wifi_mode_1 = c:get("wireless", "@wifi-iface[1]", "mode") } @@ -1097,6 +1100,15 @@ do if oc:get("aredn", "@supernode[0]", "enable") ~= config_special.supernode_enable then changes.reboot = true end + if oc:get("aredn", "@watchdog[0]", "enable") ~= config_special.watchdog_enable then + changes.reboot = true + end + if oc:get("aredn", "@watchdog[0]", "ping_addresses") ~= config_special.watchdog_pings then + changes.manager = true + end + if oc:get("aredn", "@watchdog[0]", "daily") ~= config_special.watchdog_daily then + changes.manager = true + end elseif file == "network" then changes.network = true elseif file == "dhcp" then diff --git a/files/www/cgi-bin/advancedconfig b/files/www/cgi-bin/advancedconfig index e6de4ccc..58f37350 100755 --- a/files/www/cgi-bin/advancedconfig +++ b/files/www/cgi-bin/advancedconfig @@ -247,6 +247,27 @@ local settings = { desc = "WAN-Only Tunnel prevents tunnel traffic from being routed over the Mesh network itself

aredn.@tunnel[0].wanonly", default = "1" }, + { + category = "Watchdog", + key = "aredn.@watchdog[0].enable", + type = "boolean", + desc = "The Watchdog will reboot the node if it stops operating correctly

aredn.@watchdog[0].enable", + default = "0" + }, + { + category = "Watchdog", + key = "aredn.@watchdog[0].ping_addresses", + type = "string", + desc = "Watchdog IP addresses is a whitespace seperated list of IP addresses, one of which should always be pingable

aredn.@watchdog[0].ping_addresses", + default = "" + }, + { + category = "Watchdog", + key = "aredn.@watchdog[0].daily", + type = "string", + desc = "Daily Watchdog hour is the hour every day (0-23) to automatically reboot the node

aredn.@watchdog[0].daily", + default = "" + }, { category = "Memory Settings", key = "aredn.@meshstatus[0].lowmem",