mirror of https://github.com/aredn/aredn.git
Olsrd crash fixes (#1234)
* Simplify the OLSR watchdog * Dont pull the routing table into LQM for supernodes. LQM tracks routes on nodes to help keep leaf nodes connected even when circumstances would probably prevent this. However on supernodes the routing table is massive and pulling this into LQM will frequently crash OLSRD. As we dont need this for supernode just dont do it for them.
This commit is contained in:
parent
192e6deaec
commit
0328f0ec7e
|
@ -247,6 +247,7 @@ end
|
||||||
|
|
||||||
local myhostname = canonical_hostname(aredn.info.get_nvram("node") or "localnode")
|
local myhostname = canonical_hostname(aredn.info.get_nvram("node") or "localnode")
|
||||||
local myip = uci.cursor():get("network", "wifi", "ipaddr")
|
local myip = uci.cursor():get("network", "wifi", "ipaddr")
|
||||||
|
local is_supernode = uci.cursor():get("aredn", "@supernode[0]", "enable") == "1"
|
||||||
|
|
||||||
local wgsupport = nixio.fs.stat("/usr/bin/wg")
|
local wgsupport = nixio.fs.stat("/usr/bin/wg")
|
||||||
|
|
||||||
|
@ -803,17 +804,21 @@ function lqm()
|
||||||
|
|
||||||
--
|
--
|
||||||
-- Pull in the routing table to see how many node routes are associated with each tracker.
|
-- Pull in the routing table to see how many node routes are associated with each tracker.
|
||||||
|
-- We dont do this if this is a supernode because the routes table is massive and can cause
|
||||||
|
-- crash olsrd.
|
||||||
--
|
--
|
||||||
total_node_route_count = 0
|
total_node_route_count = 0
|
||||||
for _, route in ipairs(aredn.olsr.getOLSRRoutes())
|
if not is_supernode then
|
||||||
do
|
for _, route in ipairs(aredn.olsr.getOLSRRoutes())
|
||||||
-- Count routes to nodes. There are two routes to most nodes, the node's primary address
|
do
|
||||||
-- and the node's dtdlink address.
|
-- Count routes to nodes. There are two routes to most nodes, the node's primary address
|
||||||
if route.genmask == 32 and route.destination:match("^10%.") then
|
-- and the node's dtdlink address.
|
||||||
local track = ip2tracker[route.gateway];
|
if route.genmask == 32 and route.destination:match("^10%.") then
|
||||||
if track then
|
local track = ip2tracker[route.gateway];
|
||||||
track.node_route_count = track.node_route_count + 1
|
if track then
|
||||||
total_node_route_count = total_node_route_count + 1
|
track.node_route_count = track.node_route_count + 1
|
||||||
|
total_node_route_count = total_node_route_count + 1
|
||||||
|
end
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
|
@ -36,53 +36,22 @@
|
||||||
--]]
|
--]]
|
||||||
|
|
||||||
local watchdogfile = "/tmp/olsrd.watchdog"
|
local watchdogfile = "/tmp/olsrd.watchdog"
|
||||||
local pidfile = "/var/run/olsrd.pid"
|
local sleeptime = 3 * 60 -- 3 minutes
|
||||||
local logfile = "/tmp/olsrd.log"
|
local timeout = 10 * 60 -- 10 minutes
|
||||||
|
|
||||||
function olsrd_restart()
|
|
||||||
-- print "olsrd_restart"
|
|
||||||
|
|
||||||
os.execute("/etc/init.d/olsrd restart")
|
|
||||||
|
|
||||||
if nixio.fs.stat(logfile) then
|
|
||||||
local lines = read_all(logfile):splitNewLine()
|
|
||||||
lines[#lines + 1] = secondsToClock(nixio.sysinfo().uptime) .. " " .. os.date()
|
|
||||||
local start = 1
|
|
||||||
if #lines > 300 then
|
|
||||||
start = #lines - 275
|
|
||||||
end
|
|
||||||
local f = io.open(logfile, "w")
|
|
||||||
if f then
|
|
||||||
for i = start, #lines
|
|
||||||
do
|
|
||||||
f:write(lines[i] .. "\n")
|
|
||||||
end
|
|
||||||
f:close()
|
|
||||||
end
|
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
function olsrd_watchdog()
|
function olsrd_watchdog()
|
||||||
while true
|
while true
|
||||||
do
|
do
|
||||||
wait_for_ticks(223)
|
wait_for_ticks(sleeptime)
|
||||||
|
if nixio.fs.stat(watchdogfile) then
|
||||||
local pid = read_all(pidfile)
|
local watchtime = tonumber(read_all(watchdogfile))
|
||||||
if pid and nixio.fs.stat("/proc/" .. pid) then
|
-- If watchtime hasn't update recently then we restart OLSRD
|
||||||
if nixio.fs.stat(watchdogfile) then
|
if watchtime + timeout < os.time() then
|
||||||
|
nixio.syslog("err", "olsrd watchdog timeout - restarting")
|
||||||
os.remove(watchdogfile)
|
os.remove(watchdogfile)
|
||||||
else
|
os.execute("/etc/init.d/olsrd restart")
|
||||||
olsrd_restart()
|
|
||||||
end
|
|
||||||
else
|
|
||||||
local pids = capture("pidof olsrd"):splitWhiteSpace()
|
|
||||||
if #pids == 1 then
|
|
||||||
write_all(pidfile, pids[1]);
|
|
||||||
elseif #pids == 0 then
|
|
||||||
olsrd_restart()
|
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue