LQM fixes 4 (#370)

* Tidy LQM status
Remove TX Estimate which was duplicating information on the mesh page and
confusing folk.
Sort by name to stop the display jumping around.

* Split out ping and tx qualities and use average of both.

* Improve keeping re-discovered nodes in pending

* Remove .local.mesh from hostname (they're there sometimes)

* Identify why poor quality traffic is blocked
This commit is contained in:
Tim Wilkinson 2022-05-24 08:35:36 -07:00 committed by GitHub
parent 2fb911948a
commit 53632d322d
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 68 additions and 42 deletions

View File

@ -43,7 +43,7 @@ local lastseen_timeout = 60 * 60 -- age out nodes we've not seen for 1 hour
local snr_run_avg = 0.8 -- snr running average
local quality_min_packets = 100 -- minimum number of tx packets before we can safely calculate the link quality
local quality_injection_max = 10 -- number of packets to inject into poor links to update quality
local quality_run_avg = 0.8 -- quality running average
local tx_quality_run_avg = 0.8 -- tx quality running average
local ping_timeout = 1.0 -- timeout before ping gives a qualtiy penalty
local myhostname = (info.get_nvram("node") or "localnode"):lower()
@ -232,6 +232,8 @@ function lqm()
local snr = station.signal - station.noise
if not tracker[mac] then
tracker[mac] = {
firstseen = now,
lastseen = now,
pending = now + pending_timeout,
refresh = 0,
mac = mac,
@ -255,7 +257,10 @@ function lqm()
links = {},
tx_rate = 0,
last_tx = nil,
last_tx_total = nil
last_tx_total = nil,
tx_quality = 100,
ping_quality = 100,
quality = 100
}
end
local track = tracker[mac]
@ -294,7 +299,7 @@ function lqm()
track.last_tx = tx
track.last_tx_total = tx_total
track.last_quality = tx_quality
track.tx_quality = math.min(100, math.max(0, math.ceil(quality_run_avg * track.tx_quality + (1 - quality_run_avg) * tx_quality)))
track.tx_quality = math.min(100, math.max(0, math.ceil(tx_quality_run_avg * track.tx_quality + (1 - tx_quality_run_avg) * tx_quality)))
end
track.tx_rate = station.tx_rate
@ -330,7 +335,7 @@ function lqm()
for _, rtrack in pairs(info.lqm.info.trackers)
do
if rtrack.hostname then
local hostname = rtrack.hostname:lower():gsub("^dtdlink%.","")
local hostname = rtrack.hostname:lower():gsub("^dtdlink%.",""):gsub("%.local%.mesh$", "")
track.links[hostname] = {
type = "RF",
snr = rtrack.snr
@ -355,7 +360,7 @@ function lqm()
for ip, link in pairs(info.link_info)
do
if link.hostname then
local hostname = link.hostname:lower():gsub("^dtdlink%.","")
local hostname = link.hostname:lower():gsub("^dtdlink%.",""):gsub("%.local%.mesh$", "")
if link.linkType == "DTD" then
track.links[hostname] = { type = link.linkType }
elseif link.linkType == "RF" and link.signal and link.noise then
@ -402,15 +407,24 @@ function lqm()
end
-- Ping addresses and penalize quality for excessively slow links
if should_ping(track) then
if config.ping_penalty <= 0 then
track.ping_quality = 100
elseif should_ping(track) then
-- Make an arp request to the target ip to see if we get a timely reply. By using ARP we avoid any
-- potential routing issues and avoid any firewall blocks on the other end.
-- Take a penalty if we fail
-- As the request is broadcast, we avoid any potential distance/scope timing issues as we dont wait for the
-- packet to be acked. The reply will be unicast to us, and our ack to that is unimportant to the latency test.
local success = 100
if os.execute("/usr/sbin/arping -f -w " .. ping_timeout .. " -I " .. wlan .. " " .. track.ip .. " >/dev/null") ~= 0 then
track.tx_quality = math.min(100, math.max(0, math.ceil(track.tx_quality - config.ping_penalty)))
success = 0
end
local ping_loss_run_avg = 1 - config.ping_penalty / 100
track.ping_quality = math.ceil(ping_loss_run_avg * track.ping_quality + (1 - ping_loss_run_avg) * success)
end
-- Calculate overall link quality
track.quality = math.ceil((track.tx_quality + track.ping_quality) / 2)
-- Inject traffic into links with poor quality
-- We do this so we can keep measuring the current link quality otherwise, once it becomes
-- bad, it wont be used and we can never tell if it becomes good again. Beware injecting too
@ -438,7 +452,7 @@ function lqm()
local changes = {
snr = -1,
distance = nil,
tx_quality = nil
quality = nil
}
-- Scan through the list of nodes we're tracking and select the node with the best SNR then
-- adjust our settings so that this node is valid
@ -451,7 +465,7 @@ function lqm()
if snr > changes.snr then
changes.snr = snr
changes.distance = track.distance
changes.tx_quality = track.tx_quality
changes.quality = track.quality
end
end
local cursorb = uci.cursor("/etc/config.mesh")
@ -464,9 +478,9 @@ function lqm()
cursor:set("aredn", "@lqm[0]", "max_distance", changes.distance)
cursorb:set("aredn", "@lqm[0]", "max_distance", changes.distance)
end
if changes.tx_quality and changes.tx_quality < config.min_quality then
cursor:set("aredn", "@lqm[0]", "min_quality", math.max(0, math.floor(changes.tx_quality - 20)))
cursorb:set("aredn", "@lqm[0]", "min_quality", math.max(0, math.floor(changes.tx_quality - 20)))
if changes.quality and changes.quality < config.min_quality then
cursor:set("aredn", "@lqm[0]", "min_quality", math.max(0, math.floor(changes.quality - 20)))
cursorb:set("aredn", "@lqm[0]", "min_quality", math.max(0, math.floor(changes.quality - 20)))
end
end
cursor:set("aredn", "@lqm[0]", "first_run", "0")
@ -491,7 +505,7 @@ function lqm()
-- When signal is good enough to unblock a link but the quality is low, artificially bump
-- it up to give the link chance to recover
if track.blocks.quality then
track.tx_quality = config.min_quality + config.margin_quality
track.quality = config.min_quality + config.margin_quality
end
end
end
@ -514,10 +528,10 @@ function lqm()
end
-- Block if quality is poor
if track.tx_quality then
if not track.blocks.quality and track.tx_quality < config.min_quality then
if track.quality then
if not track.blocks.quality and track.quality < config.min_quality then
track.blocks.quality = true
elseif track.blocks.quality and track.tx_quality >= config.min_quality + config.margin_quality then
elseif track.blocks.quality and track.quality >= config.min_quality + config.margin_quality then
track.blocks.quality = false
end
end
@ -593,8 +607,8 @@ function lqm()
end
end
-- Remove any trackers which are too old or if they disconnect while still pending
if ((now > track.lastseen + lastseen_timeout) or (not is_connected(track) and is_pending(track))) then
-- Remove any trackers which are too old or if they disconnect when first seen
if ((now > track.lastseen + lastseen_timeout) or (not is_connected(track) and track.firstseen + pending_timeout > now)) then
track.blocked = true;
track.blocks = {}
update_block(track)

View File

@ -76,17 +76,17 @@ html.print([[
#links > div {
padding: 2px 0;
}
.m, .b {
.m {
display: inline-block;
width: 190px;
width: 220px;
}
.s {
display: inline-block;
width: 80px;
width: 90px;
}
.p {
display: inline-block;
width: 130px;
width: 110px;
}
</style>
<center>
@ -95,7 +95,7 @@ if node_desc ~= "" then
html.print([[<table id='node_description_display'><tr><td>]] .. node_desc .. [[</td></tr></table>]])
end
html.print([[<hr>
<table width=750>
<table>
<tr><td>
<center>
<button type=button onClick='window.location.reload()' title='Refresh this page'>Refresh</button>
@ -105,7 +105,7 @@ html.print([[<hr>
</td></tr>
<tr><td>
<div class="lt">
<span class="m">RF Neighbor</span><span class="s">SNR</span><span class="p">Distance</span><span class="s">Quality</span><span class="p">TX Estimate</span><span class="p">Status</span>
<span class="m">RF Neighbor</span><span class="s">SNR</span><span class="p">Distance</span><span class="s">Quality</span><span class="p">Status</span>
</div>
<div id="links"></div>
</td></tr>
@ -140,7 +140,12 @@ html.print([[<hr>
return "blocked - dup";
}
if (track.blocks.quality) {
return "blocked - quality";
if (track.tx_quality < track.ping_quality) {
return "blocked - retries";
}
else {
return "blocked - latency";
}
}
return "blocked";
}
@ -169,25 +174,32 @@ html.print([[<hr>
}
const update = data => {
let links = "";
for (let mac in data.info.trackers) {
const track = data.info.trackers[mac];
let txspeed = "-";
let txquality = "-";
const trackers = Object.values(data.info.trackers);
trackers.sort((a, b) => name(a).localeCompare(name(b)));
trackers.forEach(track => {
let quality = "-";
let distance = "-";
let status = get_status(track, data);
if (status !== "disconnected") {
if (status === "pending" || !track.blocked) {
txspeed = (track.tx_rate * wifi_scale).toFixed(2) + " Mbps";
}
if (typeof track.tx_quality === "number" && (status === "pending" || !track.blocked || (track.blocks.quality && !(track.blocks.dtd || track.blocks.signal || track.blocks.distance || track.blocks.user || track.blocks.dup)))) {
txquality = track.tx_quality + "%";
}
if (typeof track.distance === "number") {
distance = convertd(track.distance);
}
switch (status) {
case "disconnected":
break;
case "pending":
case "active":
case "idle":
case "blocked - retries":
case "blocked - latency":
if (typeof track.quality === "number") {
quality = track.quality + "%";
}
// Fall through
default:
if (typeof track.distance === "number") {
distance = convertd(track.distance);
}
break;
}
links += `<div><span class="m">${name(track)}</span><span class="s">${track.snr}${"rev_snr" in track ? "/" + track.rev_snr : ""}</span><span class="p">${distance}</span><span class="s">${txquality}</span><span class="p">${txspeed}</span><span class="p">${status}</span></div>`;
}
links += `<div><span class="m">${name(track)}</span><span class="s">${track.snr}${"rev_snr" in track ? "/" + track.rev_snr : ""}</span><span class="p">${distance}</span><span class="s">${quality}</span><span class="p">${status}</span></div>`;
});
if (links.length) {
document.getElementById("links").innerHTML = links;
}