diff --git a/files/usr/local/bin/manager.lua b/files/usr/local/bin/manager.lua
index 07b044fb..0bd419f3 100755
--- a/files/usr/local/bin/manager.lua
+++ b/files/usr/local/bin/manager.lua
@@ -43,6 +43,7 @@ require("iwinfo")
require("aredn.hardware")
require("aredn.log")
require("luci.jsonc")
+require("ubus")
-- aggressive gc on low memory devices
if nixio.sysinfo().totalram < 32 * 1024 * 1024 then
diff --git a/files/usr/local/bin/mgr/hw_watchdog.lua b/files/usr/local/bin/mgr/hw_watchdog.lua
new file mode 100755
index 00000000..df5bbceb
--- /dev/null
+++ b/files/usr/local/bin/mgr/hw_watchdog.lua
@@ -0,0 +1,197 @@
+--[[
+
+ Part of AREDN -- Used for creating Amateur Radio Emergency Data Networks
+ Copyright (C) 2023 Tim Wilkinson
+ See Contributors file for additional contributors
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation version 3 of the License.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see .
+
+ Additional Terms:
+
+ Additional use restrictions exist on the AREDN(TM) trademark and logo.
+ See AREDNLicense.txt for more info.
+
+ Attributions to the AREDN Project must be retained in the source code.
+ If importing this code into a new or existing project attribution
+ to the AREDN project must be added to the source code.
+
+ You must not misrepresent the origin of the material contained within.
+
+ Modified versions must be modified to attribute to the original source
+ and be marked in reasonable ways as differentiate it from the original
+ version
+
+--]]
+
+local PING = "/bin/ping"
+local PIDOF = "/bin/pidof"
+local REBOOT = "/sbin/reboot"
+
+local W = {}
+
+-- Configuration limits and defaults
+local config_limits = {
+ startup_delay = { 600, 600, 3600 },
+ ping_count = { 1, 3, 10 },
+ ping_timeout = { 1, 5, 10 },
+ tick = { 60, 60, 600 },
+ failures = { 2, 3, 25 },
+ daily = { -1, -1, 23 }
+}
+
+-- Set of daemons to monitor
+local default_daemons = "olsrd dnsmasq telnetd dropbear uhttpd"
+if uci.cursor():get("vtun", "server_0", "host") or uci.cursor():get("vtun", "client_0", "name") then
+ default_daemons = default_daemons .. " vtund"
+end
+
+function W.get_config()
+ local c = uci.cursor()
+
+ if c:get("aredn", "@watchdog[0]", "enable") ~= "1" then
+ return nil
+ end
+
+ local ping_addresses = {}
+ local addresses = c:get("aredn", "@watchdog[0]", "ping_addresses") or ""
+ for address in addresses:gmatch("(%S+)") do
+ if address:match("^%d+%.%d+%.%d+%.%d+$") then
+ mainlog:write("pinging " .. address)
+ ping_addresses[#ping_addresses + 1] = address
+ end
+ end
+ local daemons = {}
+ local mydaemons = c:get("aredn", "@watchdog[0]", "daemons") or default_daemons
+ for daemon in mydaemons:gmatch("(%S+)") do
+ mainlog:write("monitor " .. daemon)
+ daemons[#daemons + 1] = daemon
+ end
+ local config = {
+ ping_addresses = ping_addresses,
+ daemons = daemons
+ }
+ for k, v in pairs(config_limits)
+ do
+ local val = tonumber(c:get("aredn", "@watchdog[0]", k) or nil)
+ if not val then
+ config[k] = v[2]
+ elseif val < v[1] then
+ config[k] = v[1]
+ elseif val > v[3] then
+ config[k] = v[3]
+ else
+ config[k] = val
+ end
+ end
+ return config
+end
+
+function W.start()
+ local config = W.get_config()
+ if not config then
+ exit_app()
+ return
+ end
+
+ -- Dont start monitoring too soon. Let the system settle down.
+ wait_for_ticks(math.max(1, config.startup_delay - nixio.sysinfo().uptime))
+
+ local ub = ubus.connect()
+ ub:call("system", "watchdog", { magicclose = true })
+ ub:call("system", "watchdog", { stop = true })
+
+ local wd = io.open("/dev/watchdog", "w")
+ if not wd then
+ mainlog:write("Watchdog failed to start: Cannot open /dev/watchdog\n")
+ ub:call("system", "watchdog", { stop = false })
+ exit_app()
+ return
+ end
+
+ -- Make sure we have enough tick time for any pings
+ local total_ping_time = 30 + (config.ping_timeout + config.ping_count) * #config.ping_addresses
+ if total_ping_time > config.tick then
+ config.tick = math.ceil(total_ping_time / 60) * 60
+ mainlog:write("adjusted tick to " .. config.tick)
+ end
+
+ -- The reboot timeout seem to be 3-5x the timeout value
+ -- We make sure it's at least 5 minutes
+ ub:call("system", "watchdog", { timeout = math.ceil(math.max(300, config.tick * config.failures) / 3) })
+
+ local daily_reboot_armed = false
+
+ while true
+ do
+ local now = os.time()
+ local success = true
+
+ -- Reboot a device daily at a given time if configured. To avoid rebooting over and
+ -- over we must have just seen the previous hour
+ if config.daily ~= -1 then
+ local time = os.date("*t")
+ if time.min >= (60 - config.tick * 3) and (time.hour + 1) % 24 == config.daily then
+ daily_reboot_armed = true
+ elseif daily_reboot_armed and time.hour == config.daily then
+ mainlog:write("reboot")
+ os.execute(REBOOT .. " >/dev/null 2>&1")
+ daily_reboot_armed = false
+ else
+ daily_reboot_armed = false
+ end
+ end
+
+ for _ = 1, 1
+ do
+ -- Check various daemons are running
+ for _, daemon in ipairs(config.daemons)
+ do
+ if os.execute(PIDOF .. " " .. daemon .. " > /dev/null ") ~= 0 then
+ mainlog:write("pidof " .. daemon .. " failed")
+ success = false
+ break
+ end
+ end
+ if not success then
+ break
+ end
+
+ -- Check we can reach any of the ping addresses
+ if #config.ping_addresses > 0 then
+ success = false
+ for _, address in ipairs(config.ping_addresses)
+ do
+ if os.execute(PING .. " -c " .. config.ping_count .. " -A -q -W " .. config.ping_timeout .. " " .. address .. " > /dev/null 2>&1") == 0 then
+ success = true
+ break
+ else
+ mainlog:write("ping " .. address .. " failed")
+ end
+ end
+ if not success then
+ break
+ end
+ end
+
+ end
+ if success then
+ wd:write("V")
+ else
+ mainlog:write("failed")
+ end
+
+ wait_for_ticks(math.max(1, config.tick - (os.time() - now)))
+ end
+end
+
+return W.start
diff --git a/files/usr/local/bin/mgr/watchdog.lua b/files/usr/local/bin/mgr/olsrd_watchdog.lua
similarity index 98%
rename from files/usr/local/bin/mgr/watchdog.lua
rename to files/usr/local/bin/mgr/olsrd_watchdog.lua
index 42c0f9a9..88948f06 100644
--- a/files/usr/local/bin/mgr/watchdog.lua
+++ b/files/usr/local/bin/mgr/olsrd_watchdog.lua
@@ -62,7 +62,7 @@ function olsrd_restart()
end
end
-function watchdog()
+function olsrd_watchdog()
while true
do
wait_for_ticks(223)
@@ -86,4 +86,4 @@ function watchdog()
end
end
-return watchdog
+return olsrd_watchdog
diff --git a/files/usr/local/bin/node-setup b/files/usr/local/bin/node-setup
index 898eeaf8..e7521301 100755
--- a/files/usr/local/bin/node-setup
+++ b/files/usr/local/bin/node-setup
@@ -1063,6 +1063,9 @@ local config_special = {
lqm_enable = c:get("aredn", "@lqm[0]", "enable"),
tunnel_weight = c:get("aredn", "@tunnel[0]", "weight"),
supernode_enable = c:get("aredn", "@supernode[0]", "enable"),
+ watchdog_enable = c:get("aredn", "@watchdog[0]", "enable"),
+ watchdog_pings = c:get("aredn", "@watchdog[0]", "ping_addresses"),
+ watchdog_daily = c:get("aredn", "@watchdog[0]", "daily"),
wifi_mode_0 = c:get("wireless", "@wifi-iface[0]", "mode"),
wifi_mode_1 = c:get("wireless", "@wifi-iface[1]", "mode")
}
@@ -1097,6 +1100,15 @@ do
if oc:get("aredn", "@supernode[0]", "enable") ~= config_special.supernode_enable then
changes.reboot = true
end
+ if oc:get("aredn", "@watchdog[0]", "enable") ~= config_special.watchdog_enable then
+ changes.reboot = true
+ end
+ if oc:get("aredn", "@watchdog[0]", "ping_addresses") ~= config_special.watchdog_pings then
+ changes.manager = true
+ end
+ if oc:get("aredn", "@watchdog[0]", "daily") ~= config_special.watchdog_daily then
+ changes.manager = true
+ end
elseif file == "network" then
changes.network = true
elseif file == "dhcp" then
diff --git a/files/www/cgi-bin/advancedconfig b/files/www/cgi-bin/advancedconfig
index e6de4ccc..58f37350 100755
--- a/files/www/cgi-bin/advancedconfig
+++ b/files/www/cgi-bin/advancedconfig
@@ -247,6 +247,27 @@ local settings = {
desc = "WAN-Only Tunnel prevents tunnel traffic from being routed over the Mesh network itself
aredn.@tunnel[0].wanonly",
default = "1"
},
+ {
+ category = "Watchdog",
+ key = "aredn.@watchdog[0].enable",
+ type = "boolean",
+ desc = "The Watchdog will reboot the node if it stops operating correctly
aredn.@watchdog[0].enable",
+ default = "0"
+ },
+ {
+ category = "Watchdog",
+ key = "aredn.@watchdog[0].ping_addresses",
+ type = "string",
+ desc = "Watchdog IP addresses is a whitespace seperated list of IP addresses, one of which should always be pingable
aredn.@watchdog[0].ping_addresses",
+ default = ""
+ },
+ {
+ category = "Watchdog",
+ key = "aredn.@watchdog[0].daily",
+ type = "string",
+ desc = "Daily Watchdog hour is the hour every day (0-23) to automatically reboot the node
aredn.@watchdog[0].daily",
+ default = ""
+ },
{
category = "Memory Settings",
key = "aredn.@meshstatus[0].lowmem",