From 4da340384ae0697d3083523d95253a3506b0aecf Mon Sep 17 00:00:00 2001 From: Raymond Hill Date: Sun, 10 Feb 2019 12:19:05 -0500 Subject: [PATCH] Update publicSuffixLibrary to latest (WASM-able) version See https://github.com/gorhill/publicsuffixlist.js --- src/background.html | 2 +- src/js/storage.js | 4 + src/js/uritools.js | 4 +- src/lib/publicsuffixlist.js | 328 --------- src/lib/publicsuffixlist/publicsuffixlist.js | 627 ++++++++++++++++++ src/lib/publicsuffixlist/wasm/README.md | 29 + .../wasm/publicsuffixlist.wasm | Bin 0 -> 400 bytes .../wasm/publicsuffixlist.wat | 317 +++++++++ 8 files changed, 980 insertions(+), 331 deletions(-) delete mode 100644 src/lib/publicsuffixlist.js create mode 100644 src/lib/publicsuffixlist/publicsuffixlist.js create mode 100644 src/lib/publicsuffixlist/wasm/README.md create mode 100644 src/lib/publicsuffixlist/wasm/publicsuffixlist.wasm create mode 100644 src/lib/publicsuffixlist/wasm/publicsuffixlist.wat diff --git a/src/background.html b/src/background.html index 29b9a2970..104b94131 100644 --- a/src/background.html +++ b/src/background.html @@ -7,7 +7,7 @@ - + diff --git a/src/js/storage.js b/src/js/storage.js index 52b3eef6f..bd281decc 100644 --- a/src/js/storage.js +++ b/src/js/storage.js @@ -1009,6 +1009,10 @@ /******************************************************************************/ µBlock.loadPublicSuffixList = function() { + if ( this.hiddenSettings.disableWebAssembly === false ) { + publicSuffixList.enableWASM(); + } + return new Promise(resolve => { // start of executor this.assets.get('compiled/' + this.pslAssetKey, details => { diff --git a/src/js/uritools.js b/src/js/uritools.js index 6d5a12742..0bb59c8d0 100644 --- a/src/js/uritools.js +++ b/src/js/uritools.js @@ -307,7 +307,7 @@ URI.domain = function() { // It is expected that there is higher-scoped `publicSuffixList` lingering // somewhere. Cache it. See . -var psl = publicSuffixList; +const psl = publicSuffixList; /******************************************************************************/ @@ -391,7 +391,7 @@ const domainCachePrune = function() { } }; -window.addEventListener('publicSuffixList', function() { +window.addEventListener('publicSuffixListChanged', function() { domainCache.clear(); }); diff --git a/src/lib/publicsuffixlist.js b/src/lib/publicsuffixlist.js deleted file mode 100644 index cc7ae2dfc..000000000 --- a/src/lib/publicsuffixlist.js +++ /dev/null @@ -1,328 +0,0 @@ -/******************************************************************************* - - publicsuffixlist.js - an efficient javascript implementation to deal with - Mozilla Foundation's Public Suffix List - Copyright (C) 2013-2018 Raymond Hill - - This program is free software: you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation, either version 3 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program. If not, see {http://www.gnu.org/licenses/}. - -*/ - -/*! Home: https://github.com/gorhill/publicsuffixlist.js */ - -/* - This code is mostly dumb: I consider this to be lower-level code, thus - in order to ensure efficiency, the caller is responsible for sanitizing - the inputs. -*/ - -/******************************************************************************/ - -// A single instance of PublicSuffixList is enough. - -;(function(root) { - -'use strict'; - -/******************************************************************************/ - -let exceptions = new Map(); -let rules = new Map(); - -// This value dictate how the search will be performed: -// < this.cutoffLength = indexOf() -// >= this.cutoffLength = binary search -const cutoffLength = 256; -const mustPunycode = /[^a-z0-9.-]/; - -/******************************************************************************/ - -// In the context of this code, a domain is defined as: -// "{label}.{public suffix}". -// A single standalone label is a public suffix as per -// http://publicsuffix.org/list/: -// "If no rules match, the prevailing rule is '*' " -// This means 'localhost' is not deemed a domain by this -// code, since according to the definition above, it would be -// evaluated as a public suffix. The caller is therefore responsible to -// decide how to further interpret such public suffix. -// -// `hostname` must be a valid ascii-based hostname. - -function getDomain(hostname) { - // A hostname starting with a dot is not a valid hostname. - if ( !hostname || hostname.charAt(0) === '.' ) { - return ''; - } - hostname = hostname.toLowerCase(); - var suffix = getPublicSuffix(hostname); - if ( suffix === hostname ) { - return ''; - } - var pos = hostname.lastIndexOf('.', hostname.lastIndexOf('.', hostname.length - suffix.length) - 1); - if ( pos <= 0 ) { - return hostname; - } - return hostname.slice(pos + 1); -} - -/******************************************************************************/ - -// Return longest public suffix. -// -// `hostname` must be a valid ascii-based string which respect hostname naming. - -function getPublicSuffix(hostname) { - if ( !hostname ) { - return ''; - } - // Since we slice down the hostname with each pass, the first match - // is the longest, so no need to find all the matching rules. - var pos; - while ( true ) { - pos = hostname.indexOf('.'); - if ( pos < 0 ) { - return hostname; - } - if ( search(exceptions, hostname) ) { - return hostname.slice(pos + 1); - } - if ( search(rules, hostname) ) { - return hostname; - } - if ( search(rules, '*' + hostname.slice(pos)) ) { - return hostname; - } - hostname = hostname.slice(pos + 1); - } - // unreachable -} - -/******************************************************************************/ - -// Look up a specific hostname. - -function search(store, hostname) { - // Extract TLD - var pos = hostname.lastIndexOf('.'); - var tld, remainder; - if ( pos < 0 ) { - tld = hostname; - remainder = hostname; - } else { - tld = hostname.slice(pos + 1); - remainder = hostname.slice(0, pos); - } - var substore = store.get(tld); - if ( substore === undefined ) { return false; } - // If substore is a string, use indexOf() - if ( typeof substore === 'string' ) { - return substore.indexOf(' ' + remainder + ' ') >= 0; - } - // It is an array: use binary search. - var l = remainder.length; - if ( l >= substore.length ) { return false; } - var haystack = substore[l]; - if ( haystack.length === 0 ) { return false; } - var left = 0; - var right = Math.floor(haystack.length / l + 0.5); - var i, needle; - while ( left < right ) { - i = left + right >> 1; - needle = haystack.substr( l * i, l ); - if ( remainder < needle ) { - right = i; - } else if ( remainder > needle ) { - left = i + 1; - } else { - return true; - } - } - return false; -} - -/******************************************************************************/ - -// Parse and set a UTF-8 text-based suffix list. Format is same as found at: -// http://publicsuffix.org/list/ -// -// `toAscii` is a converter from unicode to punycode. Required since the -// Public Suffix List contains unicode characters. -// Suggestion: use it's quite good. - -function parse(text, toAscii) { - exceptions = new Map(); - rules = new Map(); - - // http://publicsuffix.org/list/: - // "... all rules must be canonicalized in the normal way - // for hostnames - lower-case, Punycode ..." - text = text.toLowerCase(); - - var lineBeg = 0, lineEnd; - var textEnd = text.length; - var line, store, pos, tld; - - while ( lineBeg < textEnd ) { - lineEnd = text.indexOf('\n', lineBeg); - if ( lineEnd < 0 ) { - lineEnd = text.indexOf('\r', lineBeg); - if ( lineEnd < 0 ) { - lineEnd = textEnd; - } - } - line = text.slice(lineBeg, lineEnd).trim(); - lineBeg = lineEnd + 1; - - if ( line.length === 0 ) { - continue; - } - - // Ignore comments - pos = line.indexOf('//'); - if ( pos >= 0 ) { - line = line.slice(0, pos); - } - - // Ignore surrounding whitespaces - line = line.trim(); - if ( !line ) { - continue; - } - - if ( mustPunycode.test(line) ) { - line = toAscii(line); - } - - // Is this an exception rule? - if ( line.charAt(0) === '!' ) { - store = exceptions; - line = line.slice(1); - } else { - store = rules; - } - - // Extract TLD - pos = line.lastIndexOf('.'); - if ( pos < 0 ) { - tld = line; - } else { - tld = line.slice(pos + 1); - line = line.slice(0, pos); - } - - // Store suffix using tld as key - var substore = store.get(tld); - if ( substore === undefined ) { - store.set(tld, (substore = [])); - } - if ( line ) { - substore.push(line); - } - } - crystallize(exceptions); - crystallize(rules); - - window.dispatchEvent(new CustomEvent('publicSuffixList')); -} - -/******************************************************************************/ - -// Cristallize the storage of suffixes using optimal internal representation -// for future look up. - -function crystallize(store) { - for ( var entry of store ) { - var tld = entry[0]; - var suffixes = entry[1]; - // No suffix - if ( suffixes.length === 0 ) { - store.set(tld, ''); - continue; - } - // Concatenated list of suffixes less than cutoff length: - // Store as string, lookup using indexOf() - var s = suffixes.join(' '); - if ( s.length < cutoffLength ) { - store.set(tld, ' ' + s + ' '); - continue; - } - // Concatenated list of suffixes greater or equal to cutoff length - // Store as array keyed on suffix length, lookup using binary search. - // I borrowed the idea to key on string length here: - // http://ejohn.org/blog/dictionary-lookups-in-javascript/#comment-392072 - var i = suffixes.length, l; - var aa = []; - while ( i-- ) { - var suffix = suffixes[i]; - var j = aa.length; - l = suffix.length; - while ( j <= l ) { - aa[j] = []; j += 1; - } - aa[l].push(suffix); - } - l = aa.length; - while ( l-- ) { - aa[l] = aa[l].sort().join(''); - } - store.set(tld, aa); - } - return store; -} - -/******************************************************************************/ - -const selfieMagic = 1; - -function toSelfie() { - return { - magic: selfieMagic, - rules: Array.from(rules), - exceptions: Array.from(exceptions) - }; -} - -function fromSelfie(selfie) { - if ( typeof selfie !== 'object' || selfie.magic !== selfieMagic ) { - return false; - } - rules = new Map(selfie.rules); - exceptions = new Map(selfie.exceptions); - window.dispatchEvent(new CustomEvent('publicSuffixList')); - return true; -} - -/******************************************************************************/ - -// Public API - -root = root || window; - -root.publicSuffixList = { - version: '1.0', - parse: parse, - getDomain: getDomain, - getPublicSuffix: getPublicSuffix, - toSelfie: toSelfie, - fromSelfie: fromSelfie, - get empty() { - return rules.size === 0; - } -}; - -/******************************************************************************/ - -})(this); - diff --git a/src/lib/publicsuffixlist/publicsuffixlist.js b/src/lib/publicsuffixlist/publicsuffixlist.js new file mode 100644 index 000000000..34473cd6e --- /dev/null +++ b/src/lib/publicsuffixlist/publicsuffixlist.js @@ -0,0 +1,627 @@ +/******************************************************************************* + + publicsuffixlist.js - an efficient javascript implementation to deal with + Mozilla Foundation's Public Suffix List + + Copyright (C) 2013-present Raymond Hill + + License: pick the one which suits you: + GPL v3 see + APL v2 see + +*/ + +/*! Home: https://github.com/gorhill/publicsuffixlist.js -- GPLv3 APLv2 */ + +/* jshint browser:true, esversion:6, laxbreak:true, undef:true, unused:true */ +/* globals WebAssembly, console, exports:true, module */ + +/******************************************************************************* + + Reference: + https://publicsuffix.org/list/ + + Excerpt: + + > Algorithm + > + > 1. Match domain against all rules and take note of the matching ones. + > 2. If no rules match, the prevailing rule is "*". + > 3. If more than one rule matches, the prevailing rule is the one which + is an exception rule. + > 4. If there is no matching exception rule, the prevailing rule is the + one with the most labels. + > 5. If the prevailing rule is a exception rule, modify it by removing + the leftmost label. + > 6. The public suffix is the set of labels from the domain which match + the labels of the prevailing rule, using the matching algorithm above. + > 7. The registered or registrable domain is the public suffix plus one + additional label. + +*/ + +/******************************************************************************/ + +(function(context) { +// >>>>>>>> start of anonymous namespace + +'use strict'; + +/******************************************************************************* + + Tree encoding in array buffer: + + Node: + + u16: length of array of children + + u8: flags => bit 0: is_publicsuffix, bit 1: is_exception + + u8: length of char data + + u32: char data or offset to char data + + u32: offset to array of children + = 12 bytes + + More bits in flags could be used; for example: + - to distinguish private suffixes + +*/ + + // i32 / i8 +const HOSTNAME_SLOT = 0; // jshint ignore:line +const LABEL_INDICES_SLOT = 256; // -- / 256 +const RULES_PTR_SLOT = 100; // 100 / 400 +const CHARDATA_PTR_SLOT = 101; // 101 / 404 +const EMPTY_STRING = ''; +const SELFIE_MAGIC = 2; + +let wasmMemory; +let pslBuffer32; +let pslBuffer8; +let pslByteLength = 0; +let hostnameArg = EMPTY_STRING; + +/******************************************************************************/ + +const fireChangedEvent = function() { + if ( + window instanceof Object && + window.dispatchEvent instanceof Function && + window.CustomEvent instanceof Function + ) { + window.dispatchEvent(new CustomEvent('publicSuffixListChanged')); + } +}; + +/******************************************************************************/ + +const allocateBuffers = function(byteLength) { + pslByteLength = byteLength + 3 & ~3; + if ( + pslBuffer32 !== undefined && + pslBuffer32.byteLength >= pslByteLength + ) { + return; + } + if ( wasmMemory !== undefined ) { + const newPageCount = pslByteLength + 0xFFFF >>> 16; + const curPageCount = wasmMemory.buffer.byteLength >>> 16; + const delta = newPageCount - curPageCount; + if ( delta > 0 ) { + wasmMemory.grow(delta); + pslBuffer32 = new Uint32Array(wasmMemory.buffer); + pslBuffer8 = new Uint8Array(wasmMemory.buffer); + } + } else { + pslBuffer8 = new Uint8Array(pslByteLength); + pslBuffer32 = new Uint32Array(pslBuffer8.buffer); + } + hostnameArg = ''; + pslBuffer8[LABEL_INDICES_SLOT] = 0; +}; + +/******************************************************************************/ + +// Parse and set a UTF-8 text-based suffix list. Format is same as found at: +// http://publicsuffix.org/list/ +// +// `toAscii` is a converter from unicode to punycode. Required since the +// Public Suffix List contains unicode characters. +// Suggestion: use + +const parse = function(text, toAscii) { + // Use short property names for better minifying results + const rootRule = { + l: EMPTY_STRING, // l => label + f: 0, // f => flags + c: undefined // c => children + }; + + // Tree building + { + const compareLabels = function(a, b) { + let n = a.length; + let d = n - b.length; + if ( d !== 0 ) { return d; } + for ( let i = 0; i < n; i++ ) { + d = a.charCodeAt(i) - b.charCodeAt(i); + if ( d !== 0 ) { return d; } + } + return 0; + }; + + const addToTree = function(rule, exception) { + let node = rootRule; + let end = rule.length; + while ( end > 0 ) { + const beg = rule.lastIndexOf('.', end - 1); + const label = rule.slice(beg + 1, end); + end = beg; + + if ( Array.isArray(node.c) === false ) { + const child = { l: label, f: 0, c: undefined }; + node.c = [ child ]; + node = child; + continue; + } + + let left = 0; + let right = node.c.length; + while ( left < right ) { + const i = left + right >>> 1; + const d = compareLabels(label, node.c[i].l); + if ( d < 0 ) { + right = i; + if ( right === left ) { + const child = { + l: label, + f: 0, + c: undefined + }; + node.c.splice(left, 0, child); + node = child; + break; + } + continue; + } + if ( d > 0 ) { + left = i + 1; + if ( left === right ) { + const child = { + l: label, + f: 0, + c: undefined + }; + node.c.splice(right, 0, child); + node = child; + break; + } + continue; + } + /* d === 0 */ + node = node.c[i]; + break; + } + } + node.f |= 0b01; + if ( exception ) { + node.f |= 0b10; + } + }; + + // 2. If no rules match, the prevailing rule is "*". + addToTree('*', false); + + const mustPunycode = /[^a-z0-9.-]/; + const textEnd = text.length; + let lineBeg = 0; + + while ( lineBeg < textEnd ) { + let lineEnd = text.indexOf('\n', lineBeg); + if ( lineEnd === -1 ) { + lineEnd = text.indexOf('\r', lineBeg); + if ( lineEnd === -1 ) { + lineEnd = textEnd; + } + } + let line = text.slice(lineBeg, lineEnd).trim(); + lineBeg = lineEnd + 1; + + // Ignore comments + const pos = line.indexOf('//'); + if ( pos !== -1 ) { + line = line.slice(0, pos); + } + + // Ignore surrounding whitespaces + line = line.trim(); + if ( line.length === 0 ) { continue; } + + const exception = line.charCodeAt(0) === 0x21 /* '!' */; + if ( exception ) { + line = line.slice(1); + } + + if ( mustPunycode.test(line) ) { + line = toAscii(line.toLowerCase()); + } + + addToTree(line, exception); + } + } + + { + const labelToOffsetMap = new Map(); + const treeData = []; + const charData = []; + + const allocate = function(n) { + const ibuf = treeData.length; + for ( let i = 0; i < n; i++ ) { + treeData.push(0); + } + return ibuf; + }; + + const storeNode = function(ibuf, node) { + const nChars = node.l.length; + const nChildren = node.c !== undefined + ? node.c.length + : 0; + treeData[ibuf+0] = nChildren << 16 | node.f << 8 | nChars; + // char data + if ( nChars <= 4 ) { + let v = 0; + if ( nChars > 0 ) { + v |= node.l.charCodeAt(0); + if ( nChars > 1 ) { + v |= node.l.charCodeAt(1) << 8; + if ( nChars > 2 ) { + v |= node.l.charCodeAt(2) << 16; + if ( nChars > 3 ) { + v |= node.l.charCodeAt(3) << 24; + } + } + } + } + treeData[ibuf+1] = v; + } else { + let offset = labelToOffsetMap.get(node.l); + if ( offset === undefined ) { + offset = charData.length; + for ( let i = 0; i < nChars; i++ ) { + charData.push(node.l.charCodeAt(i)); + } + labelToOffsetMap.set(node.l, offset); + } + treeData[ibuf+1] = offset; + } + // child nodes + if ( Array.isArray(node.c) === false ) { + treeData[ibuf+2] = 0; + return; + } + + const iarray = allocate(nChildren * 3); + treeData[ibuf+2] = iarray; + for ( let i = 0; i < nChildren; i++ ) { + storeNode(iarray + i * 3, node.c[i]); + } + }; + + // First 512 bytes are reserved for internal use + allocate(512 >> 2); + + const iRootRule = allocate(3); + storeNode(iRootRule, rootRule); + treeData[RULES_PTR_SLOT] = iRootRule; + + const iCharData = treeData.length << 2; + treeData[CHARDATA_PTR_SLOT] = iCharData; + + const byteLength = (treeData.length << 2) + (charData.length + 3 & ~3); + allocateBuffers(byteLength); + pslBuffer32.set(treeData); + pslBuffer8.set(charData, treeData.length << 2); + } + + fireChangedEvent(); +}; + +/******************************************************************************/ + +const setHostnameArg = function(hostname) { + const buf = pslBuffer8; + if ( hostname === hostnameArg ) { return buf[LABEL_INDICES_SLOT]; } + if ( hostname === null || hostname.length === 0 ) { + return (buf[LABEL_INDICES_SLOT] = 0); + } + hostname = hostname.toLowerCase(); + hostnameArg = hostname; + let n = hostname.length; + if ( n > 255 ) { n = 255; } + buf[LABEL_INDICES_SLOT] = n; + let i = n; + let j = LABEL_INDICES_SLOT + 1; + while ( i-- ) { + const c = hostname.charCodeAt(i); + if ( c === 0x2E /* '.' */ ) { + buf[j+0] = i + 1; + buf[j+1] = i; + j += 2; + } + buf[i] = c; + } + buf[j] = 0; + return n; +}; + +/******************************************************************************/ + +// Returns an offset to the start of the public suffix. +// +// WASM-able, because no information outside the buffer content is required. + +const getPublicSuffixPosJS = function() { + const buf8 = pslBuffer8; + const buf32 = pslBuffer32; + const iCharData = buf32[CHARDATA_PTR_SLOT]; + + let iNode = pslBuffer32[RULES_PTR_SLOT]; + let cursorPos = -1; + let iLabel = LABEL_INDICES_SLOT; + + // Label-lookup loop + for (;;) { + // Extract label indices + const labelBeg = buf8[iLabel+1]; + const labelLen = buf8[iLabel+0] - labelBeg; + // Match-lookup loop: binary search + let r = buf32[iNode+0] >>> 16; + if ( r === 0 ) { break; } + const iCandidates = buf32[iNode+2]; + let l = 0; + let iFound = 0; + while ( l < r ) { + const iCandidate = l + r >>> 1; + const iCandidateNode = iCandidates + iCandidate + (iCandidate << 1); + const candidateLen = buf32[iCandidateNode+0] & 0x000000FF; + let d = labelLen - candidateLen; + if ( d === 0 ) { + const iCandidateChar = candidateLen <= 4 + ? iCandidateNode + 1 << 2 + : iCharData + buf32[iCandidateNode+1]; + for ( let i = 0; i < labelLen; i++ ) { + d = buf8[labelBeg+i] - buf8[iCandidateChar+i]; + if ( d !== 0 ) { break; } + } + } + if ( d < 0 ) { + r = iCandidate; + } else if ( d > 0 ) { + l = iCandidate + 1; + } else /* if ( d === 0 ) */ { + iFound = iCandidateNode; + break; + } + } + // 2. If no rules match, the prevailing rule is "*". + if ( iFound === 0 ) { + if ( buf8[iCandidates + 1 << 2] !== 0x2A /* '*' */ ) { break; } + iFound = iCandidates; + } + iNode = iFound; + // 5. If the prevailing rule is a exception rule, modify it by + // removing the leftmost label. + if ( (buf32[iNode+0] & 0x00000200) !== 0 ) { + if ( iLabel > LABEL_INDICES_SLOT ) { + return iLabel - 2; + } + break; + } + if ( (buf32[iNode+0] & 0x00000100) !== 0 ) { + cursorPos = iLabel; + } + if ( labelBeg === 0 ) { break; } + iLabel += 2; + } + + return cursorPos; +}; + +let getPublicSuffixPosWASM; +let getPublicSuffixPos = getPublicSuffixPosJS; + +/******************************************************************************/ + +const getPublicSuffix = function(hostname) { + if ( pslBuffer32 === undefined ) { return EMPTY_STRING; } + + const hostnameLen = setHostnameArg(hostname); + const buf8 = pslBuffer8; + if ( hostnameLen === 0 || buf8[0] === 0x2E /* '.' */ ) { + return EMPTY_STRING; + } + + const cursorPos = getPublicSuffixPos(); + if ( cursorPos === -1 ) { + return EMPTY_STRING; + } + + const beg = buf8[cursorPos + 1]; + return beg === 0 ? hostnameArg : hostnameArg.slice(beg); +}; + +/******************************************************************************/ + +const getDomain = function(hostname) { + if ( pslBuffer32 === undefined ) { return EMPTY_STRING; } + + const hostnameLen = setHostnameArg(hostname); + const buf8 = pslBuffer8; + if ( hostnameLen === 0 || buf8[0] === 0x2E /* '.' */ ) { + return EMPTY_STRING; + } + + const cursorPos = getPublicSuffixPos(); + if ( cursorPos === -1 || buf8[cursorPos + 1] === 0 ) { + return EMPTY_STRING; + } + + // 7. The registered or registrable domain is the public suffix plus one + // additional label. + const beg = buf8[cursorPos + 3]; + return beg === 0 ? hostnameArg : hostnameArg.slice(beg); +}; + +/******************************************************************************/ + +const toSelfie = function() { + const selfie = { + magic: SELFIE_MAGIC, + byteLength: pslByteLength, + buffer: pslBuffer32 !== undefined + ? Array.from(new Uint32Array(pslBuffer32.buffer, 0, pslByteLength >>> 2)) + : null, + }; + return selfie; +}; + +const fromSelfie = function(selfie) { + if ( + selfie instanceof Object === false || + selfie.magic !== SELFIE_MAGIC || + typeof selfie.byteLength !== 'number' || + Array.isArray(selfie.buffer) === false + ) { + return false; + } + + allocateBuffers(selfie.byteLength); + pslBuffer32.set(selfie.buffer); + + // Important! + hostnameArg = ''; + pslBuffer8[LABEL_INDICES_SLOT] = 0; + + fireChangedEvent(); + + return true; +}; + +/******************************************************************************/ + +// The WASM module is entirely optional, the JS implementation will be +// used should the WASM module be unavailable for whatever reason. + +const enableWASM = (function() { + // The directory from which the current script was fetched should also + // contain the related WASM file. The script is fetched from a trusted + // location, and consequently so will be the related WASM file. + let workingDir; + { + const url = new URL(document.currentScript.src); + const match = /[^\/]+$/.exec(url.pathname); + if ( match !== null ) { + url.pathname = url.pathname.slice(0, match.index); + } + workingDir = url.href; + } + + let memory; + + return function() { + if ( getPublicSuffixPosWASM instanceof Function ) { + return Promise.resolve(true); + } + + if ( + typeof WebAssembly !== 'object' || + typeof WebAssembly.instantiateStreaming !== 'function' + ) { + return Promise.resolve(false); + } + + // The wasm code will work only if CPU is natively little-endian, + // as we use native uint32 array in our js code. + const uint32s = new Uint32Array(1); + const uint8s = new Uint8Array(uint32s.buffer); + uint32s[0] = 1; + if ( uint8s[0] !== 1 ) { + return Promise.resolve(false); + } + + return fetch( + workingDir + 'wasm/publicsuffixlist.wasm', + { mode: 'same-origin' } + ).then(response => { + const pageCount = pslBuffer8 !== undefined + ? pslBuffer8.byteLength + 0xFFFF >>> 16 + : 1; + memory = new WebAssembly.Memory({ initial: pageCount }); + return WebAssembly.instantiateStreaming( + response, + { imports: { memory: memory } } + ); + }).then(({ instance }) => { + const curPageCount = memory.buffer.byteLength; + const newPageCount = pslBuffer8 !== undefined + ? pslBuffer8.byteLength + 0xFFFF >>> 16 + : 0; + if ( newPageCount > curPageCount ) { + memory.grow(newPageCount - curPageCount); + } + const buf8 = new Uint8Array(memory.buffer); + const buf32 = new Uint32Array(memory.buffer); + if ( pslBuffer32 !== undefined ) { + buf32.set(pslBuffer32); + } + pslBuffer8 = buf8; + pslBuffer32 = buf32; + wasmMemory = memory; + getPublicSuffixPosWASM = instance.exports.getPublicSuffixPos; + getPublicSuffixPos = getPublicSuffixPosWASM; + memory = undefined; + return true; + }).catch(reason => { + console.info(reason); + return false; + }); + }; +})(); + +const disableWASM = function() { + if ( getPublicSuffixPosWASM instanceof Function ) { + getPublicSuffixPos = getPublicSuffixPosJS; + getPublicSuffixPosWASM = undefined; + } + if ( wasmMemory !== undefined ) { + const buf8 = new Uint8Array(pslByteLength); + const buf32 = new Uint32Array(buf8.buffer); + buf32.set(pslBuffer32); + pslBuffer8 = buf8; + pslBuffer32 = buf32; + wasmMemory = undefined; + } +}; + +/******************************************************************************/ + +context = context || window; + +context.publicSuffixList = { + version: '2.0', + parse, + getDomain, + getPublicSuffix, + toSelfie, fromSelfie, + disableWASM, enableWASM, +}; + +if ( typeof module !== 'undefined' ) { + module.exports = context.publicSuffixList; +} else if ( typeof exports !== 'undefined' ) { + exports = context.publicSuffixList; +} + +/******************************************************************************/ + +// <<<<<<<< end of anonymous namespace +})(this); diff --git a/src/lib/publicsuffixlist/wasm/README.md b/src/lib/publicsuffixlist/wasm/README.md new file mode 100644 index 000000000..5c1c4839f --- /dev/null +++ b/src/lib/publicsuffixlist/wasm/README.md @@ -0,0 +1,29 @@ +### For code reviewers + +All `wasm` files in that directory where created by compiling the +corresponding `wat` file using the command (using +`publicsuffixlist.wat`/`publicsuffixlist.wasm` as example): + + wat2wasm publicsuffixlist.wat -o publicsuffixlist.wasm + +Assuming: + +- The command is executed from within the present directory. + +### `wat2wasm` tool + +The `wat2wasm` tool can be downloaded from an official WebAssembly project: +. + +### `wat2wasm` tool online + +You can also use the following online `wat2wasm` tool: +. + +Just paste the whole content of the `wat` file to compile into the WAT pane. +Click "Download" button to retrieve the resulting `wasm` file. + +### See also + +For the curious, the following online tool allows you to find out the machine +code as a result from the WASM code: https://mbebenita.github.io/WasmExplorer/ diff --git a/src/lib/publicsuffixlist/wasm/publicsuffixlist.wasm b/src/lib/publicsuffixlist/wasm/publicsuffixlist.wasm new file mode 100644 index 0000000000000000000000000000000000000000..40e76e65025eb613fbc53afa1f722a01192dcf78 GIT binary patch literal 400 zcmW+yOHRW;47Htvq?x2mT8Xtis|u;&3Y|p(fe@9r03WreQ=&qfBG^!lz=BgDaVI?4 z%(MNT{p^AJU4sCC3Elx7XoTsidD(2+-BGh>Hro#Zj0hn;#^KYVo$nu>SC6;*<#P2t z-|PVLKf+(a(ZT&hV*&y99neuL=)OqE9VDV@G-a9qga>WBmW1&X$>y^X8OM|bed@sW zBp}eiQa@cxKy)AW?^f}@F+L~EB<-Uur6-pO-86*r`jwytOW9>`4 z40#}RH@XUY)cR8xV2oko;6f)Bk{8%zmmv#+UMQ53sVEEH=>%eT ra$XXrTI9@PII?o|swC+rTy^;A2c{qs3_C(1T`t^Dj=WHC<;eU4jyE~G literal 0 HcmV?d00001 diff --git a/src/lib/publicsuffixlist/wasm/publicsuffixlist.wat b/src/lib/publicsuffixlist/wasm/publicsuffixlist.wat new file mode 100644 index 000000000..7ae2942ba --- /dev/null +++ b/src/lib/publicsuffixlist/wasm/publicsuffixlist.wat @@ -0,0 +1,317 @@ +;; +;; uBlock Origin - a browser extension to block requests. +;; Copyright (C) 2019-present Raymond Hill +;; +;; License: pick the one which suits you: +;; GPL v3 see +;; APL v2 see +;; +;; Home: https://github.com/gorhill/publicsuffixlist.js +;; File: publicsuffixlist.wat +;; +;; Description: WebAssembly implementation for core lookup method in +;; publicsuffixlist.js +;; +;; How to compile: +;; +;; wat2wasm publicsuffixlist.wat -o publicsuffixlist.wasm +;; +;; The `wat2wasm` tool can be downloaded from an official WebAssembly +;; project: +;; https://github.com/WebAssembly/wabt/releases + + +(module +;; +;; module start +;; + +(memory (import "imports" "memory") 1) + +;; +;; Tree encoding in array buffer: +;; +;; Node: +;; + u8: length of char data +;; + u8: flags => bit 0: is_publicsuffix, bit 1: is_exception +;; + u16: length of array of children +;; + u32: char data or offset to char data +;; + u32: offset to array of children +;; = 12 bytes +;; +;; // i32 / i8 +;; const HOSTNAME_SLOT = 0; // jshint ignore:line +;; const LABEL_INDICES_SLOT = 256; // -- / 256 +;; const RULES_PTR_SLOT = 100; // 100 / 400 +;; const CHARDATA_PTR_SLOT = 101; // 101 / 404 +;; const EMPTY_STRING = ''; +;; const SELFIE_MAGIC = 2; +;; + +;; +;; Public functions +;; + +;; +;; unsigned int getPublicSuffixPos() +;; +;; Returns an offset to the start of the public suffix. +;; +(func (export "getPublicSuffixPos") + (result i32) ;; result = match index, -1 = miss + (local $iCharData i32) ;; offset to start of character data + (local $iNode i32) ;; offset to current node + (local $iLabel i32) ;; offset to label indices + (local $cursorPos i32) ;; position of cursor within hostname argument + (local $labelBeg i32) + (local $labelLen i32) + (local $nCandidates i32) + (local $iCandidates i32) + (local $iFound i32) + (local $l i32) + (local $r i32) + (local $d i32) + (local $iCandidate i32) + (local $iCandidateNode i32) + (local $candidateLen i32) + (local $iCandidateChar i32) + (local $_1 i32) + (local $_2 i32) + (local $_3 i32) + ;; + ;; const iCharData = buf32[CHARDATA_PTR_SLOT]; + i32.const 404 + i32.load + set_local $iCharData + ;; let iNode = pslBuffer32[RULES_PTR_SLOT]; + i32.const 400 + i32.load + i32.const 2 + i32.shl + set_local $iNode + ;; let iLabel = LABEL_INDICES_SLOT; + i32.const 256 + set_local $iLabel + ;; let cursorPos = -1; + i32.const -1 + set_local $cursorPos + ;; label-lookup loop + ;; for (;;) { + block $labelLookupDone loop $labelLookup + ;; // Extract label indices + ;; const labelBeg = buf8[iLabel+1]; + ;; const labelLen = buf8[iLabel+0] - labelBeg; + get_local $iLabel + i32.load8_u + get_local $iLabel + i32.load8_u offset=1 + tee_local $labelBeg + i32.sub + set_local $labelLen + ;; // Match-lookup loop: binary search + ;; let r = buf32[iNode+0] >>> 16; + ;; if ( r === 0 ) { break; } + get_local $iNode + i32.load16_u offset=2 + tee_local $r + i32.eqz + br_if $labelLookupDone + ;; const iCandidates = buf32[iNode+2]; + get_local $iNode + i32.load offset=8 + i32.const 2 + i32.shl + set_local $iCandidates + ;; let l = 0; + ;; let iFound = 0; + i32.const 0 + tee_local $l + set_local $iFound + ;; while ( l < r ) { + block $binarySearchDone loop $binarySearch + get_local $l + get_local $r + i32.ge_u + br_if $binarySearchDone + ;; const iCandidate = l + r >>> 1; + get_local $l + get_local $r + i32.add + i32.const 1 + i32.shr_u + tee_local $iCandidate + ;; const iCandidateNode = iCandidates + iCandidate + (iCandidate << 1); + i32.const 2 + i32.shl + tee_local $_1 + get_local $_1 + i32.const 1 + i32.shl + i32.add + get_local $iCandidates + i32.add + tee_local $iCandidateNode + ;; const candidateLen = buf32[iCandidateNode+0] & 0x000000FF; + i32.load8_u + set_local $candidateLen + ;; let d = labelLen - candidateLen; + get_local $labelLen + get_local $candidateLen + i32.sub + tee_local $d + ;; if ( d === 0 ) { + i32.eqz + if + ;; const iCandidateChar = candidateLen <= 4 + get_local $candidateLen + i32.const 4 + i32.le_u + if + ;; ? iCandidateNode + 1 << 2 + get_local $iCandidateNode + i32.const 4 + i32.add + set_local $iCandidateChar + else + ;; : buf32[CHARDATA_PTR_SLOT] + buf32[iCandidateNode+1]; + get_local $iCharData + get_local $iCandidateNode + i32.load offset=4 + i32.add + set_local $iCandidateChar + end + ;; for ( let i = 0; i < labelLen; i++ ) { + get_local $labelBeg + tee_local $_1 + get_local $labelLen + i32.add + set_local $_3 + get_local $iCandidateChar + set_local $_2 + block $findDiffDone loop $findDiff + ;; d = buf8[labelBeg+i] - buf8[iCandidateChar+i]; + ;; if ( d !== 0 ) { break; } + get_local $_1 + i32.load8_u + get_local $_2 + i32.load8_u + i32.sub + tee_local $d + br_if $findDiffDone + get_local $_1 + i32.const 1 + i32.add + tee_local $_1 + get_local $_3 + i32.eq + br_if $findDiffDone + get_local $_2 + i32.const 1 + i32.add + set_local $_2 + br $findDiff + ;; } + end end + ;; } + end + ;; if ( d < 0 ) { + ;; r = iCandidate; + get_local $d + i32.const 0 + i32.lt_s + if + get_local $iCandidate + set_local $r + br $binarySearch + end + ;; } else if ( d > 0 ) { + ;; l = iCandidate + 1; + get_local $d + i32.const 0 + i32.gt_s + if + get_local $iCandidate + i32.const 1 + i32.add + set_local $l + br $binarySearch + end + ;; } else /* if ( d === 0 ) */ { + ;; iFound = iCandidateNode; + ;; break; + ;; } + get_local $iCandidateNode + set_local $iFound + end end + ;; } + ;; // 2. If no rules match, the prevailing rule is "*". + ;; if ( iFound === 0 ) { + ;; if ( buf8[iCandidates + 1 << 2] !== 0x2A /* '*' */ ) { break; } + ;; iFound = iCandidates; + ;; } + get_local $iFound + i32.eqz + if + get_local $iCandidates + i32.load8_u offset=4 + i32.const 0x2A + i32.ne + br_if $labelLookupDone + get_local $iCandidates + set_local $iFound + end + ;; iNode = iFound; + get_local $iFound + tee_local $iNode + ;; // 5. If the prevailing rule is a exception rule, modify it by + ;; // removing the leftmost label. + ;; if ( (buf32[iNode+0] & 0x00000200) !== 0 ) { + ;; if ( iLabel > LABEL_INDICES_SLOT ) { + ;; return iLabel - 2; + ;; } + ;; break; + ;; } + i32.load8_u offset=1 + tee_local $_1 + i32.const 0x02 + i32.and + if + get_local $iLabel + i32.const 256 + i32.gt_u + if + get_local $iLabel + i32.const -2 + i32.add + return + end + br $labelLookupDone + end + ;; if ( (buf32[iNode+0] & 0x00000100) !== 0 ) { + ;; cursorPos = labelBeg; + ;; } + get_local $_1 + i32.const 0x01 + i32.and + if + get_local $iLabel + set_local $cursorPos + end + ;; if ( labelBeg === 0 ) { break; } + get_local $labelBeg + i32.eqz + br_if $labelLookupDone + ;; iLabel += 2; + get_local $iLabel + i32.const 2 + i32.add + set_local $iLabel + br $labelLookup + end end + get_local $cursorPos +) + +;; +;; module end +;; +)