Revisit the nodejs API

This commit is contained in:
Raymond Hill 2021-08-08 09:17:14 -04:00
parent 65f0909ba0
commit 7cd583a301
No known key found for this signature in database
GPG Key ID: 25E1490B761470C2
4 changed files with 159 additions and 179 deletions

View File

@ -28,70 +28,69 @@ and also lists of domain names or hosts file format (i.e. block lists from [The
## Usage
At the moment, there can be only one instance of the static network filtering
engine, which API must be imported as follow:
engine ("SNFE"), which proxy API must be imported as follow:
```js
import { FilteringContext, pslInit, useRawLists } from '@gorhill/ubo-core';
import { StaticNetFilteringEngine } from '@gorhill/ubo-core';
```
If you must import as a NodeJS module:
```js
const { FilteringContext, pslInit, useRawLists } await import from '@gorhill/ubo-core';
const { StaticNetFilteringEngine } await import from '@gorhill/ubo-core';
```
uBO's SNFE works best with a properly initialized Public Suffix List database,
since it needs to evaluate whether a network request to match is either 1st-
or 3rd-party to the context in which it is fired:
Create an instance of SNFE:
```js
await pslInit();
const snfe = StaticNetFilteringEngine.create();
```
Now feed the SNFE with filter lists -- `useRawLists()` accepts an array of
Feed the SNFE with filter lists -- `useLists()` accepts an array of
objects (or promises to object) which expose the raw text of a list
through the `raw` property, and optionally the name of the list through the
`name` property (how you fetch the lists is up to you):
```js
const snfe = await useRawLists([
await snfe.useLists([
fetch('easylist').then(raw => ({ name: 'easylist', raw })),
fetch('easyprivacy').then(raw => ({ name: 'easyprivacy', raw })),
]);
```
`useRawLists()` returns a reference to the SNFE, which you can use later to
match network requests. First we need a filtering context instance, which is
required as an argument to match network requests:
```js
const fctxt = new FilteringContext();
```
Now we are ready to match network requests:
```js
// Not blocked
fctxt.setDocOriginFromURL('https://www.bloomberg.com/');
fctxt.setURL('https://www.bloomberg.com/tophat/assets/v2.6.1/that.css');
fctxt.setType('stylesheet');
if ( snfe.matchRequest(fctxt) !== 0 ) {
if ( snfe.matchRequest({
originURL: 'https://www.bloomberg.com/',
url: 'https://www.bloomberg.com/tophat/assets/v2.6.1/that.css',
type: 'stylesheet'
}) !== 0 ) {
console.log(snfe.toLogData());
}
// Blocked
fctxt.setDocOriginFromURL('https://www.bloomberg.com/');
fctxt.setURL('https://securepubads.g.doubleclick.net/tag/js/gpt.js');
fctxt.setType('script');
if ( snfe.matchRequest(fctxt) !== 0 ) {
if ( snfe.matchRequest({
originURL: 'https://www.bloomberg.com/',
url: 'https://securepubads.g.doubleclick.net/tag/js/gpt.js',
type: 'script'
}) !== 0 ) {
console.log(snfe.toLogData());
}
// Unblocked
fctxt.setDocOriginFromURL('https://www.bloomberg.com/');
fctxt.setURL('https://sourcepointcmp.bloomberg.com/ccpa.js');
fctxt.setType('script');
if ( snfe.matchRequest(fctxt) !== 0 ) {
if ( snfe.matchRequest({
originURL: 'https://www.bloomberg.com/',
url: 'https://sourcepointcmp.bloomberg.com/ccpa.js',
type: 'script'
}) !== 0 ) {
console.log(snfe.toLogData());
}
```
It is possible to pre-parse filter lists and save the intermediate results for
later use -- useful to speed up the loading of filter lists. This will be
documented eventually, but if you feel adventurous, you can look at the code
and use this capability now if you figure out the details.

View File

@ -51,12 +51,67 @@ function loadJSON(path) {
return JSON.parse(readFileSync(resolve(__dirname, path), 'utf8'));
}
function compileList(list, compiler, writer, options = {}) {
const lineIter = new LineIterator(list.raw);
/******************************************************************************/
async function enableWASM() {
const wasmModuleFetcher = function(path) {
const require = createRequire(import.meta.url); // jshint ignore:line
const wasm = new Uint8Array(require(`${path}.wasm.json`));
return globals.WebAssembly.compile(wasm);
};
try {
const results = await Promise.all([
globals.publicSuffixList.enableWASM(wasmModuleFetcher, './lib/publicsuffixlist/wasm/'),
snfe.enableWASM(wasmModuleFetcher, './js/wasm/'),
]);
return results.every(a => a === true);
} catch(reason) {
console.log(reason);
}
return false;
}
/******************************************************************************/
function pslInit(raw) {
if ( typeof raw === 'string' && raw.trim() !== '' ) {
globals.publicSuffixList.parse(raw, globals.punycode.toASCII);
return globals.publicSuffixList;
}
// Use serialized version if available
let serialized = null;
try {
// Use loadJSON() because require() would keep the string in memory.
serialized = loadJSON('build/publicsuffixlist.json');
} catch (error) {
if ( process.env.npm_lifecycle_event !== 'install' ) {
// This should never happen except during package installation.
console.error(error);
}
}
if ( serialized !== null ) {
globals.publicSuffixList.fromSelfie(serialized);
return globals.publicSuffixList;
}
const require = createRequire(import.meta.url); // jshint ignore:line
raw = require('./data/effective_tld_names.json');
if ( typeof raw !== 'string' || raw.trim() === '' ) {
console.error('Unable to populate public suffix list');
return;
}
return globals.publicSuffixList;
}
/******************************************************************************/
function compileList({ name, raw }, compiler, writer, options = {}) {
const lineIter = new LineIterator(raw);
const events = Array.isArray(options.events) ? options.events : undefined;
if ( list.name ) {
writer.properties.set('name', list.name);
if ( name ) {
writer.properties.set('name', name);
}
const { parser } = compiler;
@ -81,168 +136,93 @@ function compileList(list, compiler, writer, options = {}) {
});
}
}
}
async function enableWASM() {
const wasmModuleFetcher = function(path) {
const require = createRequire(import.meta.url); // jshint ignore:line
const wasm = new Uint8Array(require(`${path}.wasm.json`));
return globals.WebAssembly.compile(wasm);
};
try {
const results = await Promise.all([
globals.publicSuffixList.enableWASM(wasmModuleFetcher, './lib/publicsuffixlist/wasm/'),
snfe.enableWASM(wasmModuleFetcher, './js/wasm/'),
]);
return results.every(a => a === true);
} catch(reason) {
console.log(reason);
}
return false;
}
function pslInit(raw) {
if ( typeof raw !== 'string' || raw.trim() === '' ) {
const require = createRequire(import.meta.url); // jshint ignore:line
let serialized = null;
// Use serialized version if available
try {
// Use loadJSON() because require() would keep the string in memory.
serialized = loadJSON('build/publicsuffixlist.json');
} catch (error) {
if ( process.env.npm_lifecycle_event !== 'install' ) {
// This should never happen except during package installation.
console.error(error);
}
}
if ( serialized !== null ) {
globals.publicSuffixList.fromSelfie(serialized);
return globals.publicSuffixList;
}
raw = require('./data/effective_tld_names.json');
if ( typeof raw !== 'string' || raw.trim() === '' ) {
console.error('Unable to populate public suffix list');
return;
}
}
globals.publicSuffixList.parse(raw, globals.punycode.toASCII);
return globals.publicSuffixList;
}
function createCompiler(parser) {
return snfe.createCompiler(parser);
}
async function useCompiledLists(lists) {
// Remove all filters
reset();
if ( Array.isArray(lists) === false || lists.length === 0 ) {
return snfe;
}
const consumeList = list => {
snfe.fromCompiled(new CompiledListReader(list.compiled));
};
// Populate filtering engine with filter lists
const promises = [];
for ( const list of lists ) {
const promise = list instanceof Promise ? list : Promise.resolve(list);
promises.push(promise.then(list => consumeList(list)));
}
await Promise.all(promises);
// Commit changes
snfe.freeze();
snfe.optimize();
return snfe;
}
async function useRawLists(lists, options = {}) {
// Remove all filters
reset();
if ( Array.isArray(lists) === false || lists.length === 0 ) {
return snfe;
}
const compiler = createCompiler(new StaticFilteringParser());
const consumeList = list => {
const writer = new CompiledListWriter();
compileList(list, compiler, writer, options);
snfe.fromCompiled(new CompiledListReader(writer.toString()));
};
// Populate filtering engine with filter lists
const promises = [];
for ( const list of lists ) {
const promise = list instanceof Promise ? list : Promise.resolve(list);
promises.push(promise.then(list => consumeList(list)));
}
await Promise.all(promises);
// Commit changes
snfe.freeze();
snfe.optimize();
return snfe;
}
function reset() {
snfe.reset();
return writer.toString();
}
/******************************************************************************/
let pslInitialized = false;
let staticNetFilteringEngineInstance = null;
async function useLists(lists, options = {}) {
// Remove all filters
snfe.reset();
if ( Array.isArray(lists) === false || lists.length === 0 ) {
return snfe;
}
let compiler = null;
const consumeList = list => {
let { compiled } = list;
if ( typeof compiled !== 'string' || compiled === '' ) {
const writer = new CompiledListWriter();
if ( compiler === null ) {
compiler = snfe.createCompiler(new StaticFilteringParser());
}
compiled = compileList(list, compiler, writer, options);
}
snfe.fromCompiled(new CompiledListReader(compiled));
};
// Populate filtering engine with resolved filter lists
const promises = [];
for ( const list of lists ) {
const promise = list instanceof Promise ? list : Promise.resolve(list);
promises.push(promise.then(list => consumeList(list)));
}
await Promise.all(promises);
// Commit changes
snfe.freeze();
snfe.optimize();
return snfe;
}
/******************************************************************************/
const fctx = new FilteringContext();
let snfeInstance = null;
class StaticNetFilteringEngine {
constructor() {
if ( staticNetFilteringEngineInstance !== null ) {
if ( snfeInstance !== null ) {
throw new Error('Only a single instance is supported.');
}
staticNetFilteringEngineInstance = this;
this._context = new FilteringContext();
snfeInstance = this;
}
async useLists(lists) {
await useRawLists(lists);
return useLists(lists);
}
matchRequest({ url, originURL, type }) {
this._context.setDocOriginFromURL(originURL);
this._context.setURL(url);
this._context.setType(type);
return snfe.matchRequest(this._context);
matchRequest(details) {
return snfe.matchRequest(fctx.fromDetails(details));
}
toLogData() {
return snfe.toLogData();
}
}
StaticNetFilteringEngine.initialize = async function initialize() {
if ( !pslInitialized ) {
if ( !pslInit() ) {
createCompiler(parser) {
return snfe.createCompiler(parser);
}
compileList(...args) {
return compileList(...args);
}
static async create({ noPSL } = {}) {
const instance = new StaticNetFilteringEngine();
if ( noPSL !== true && !pslInit() ) {
throw new Error('Failed to initialize public suffix list.');
}
pslInitialized = true;
return instance;
}
};
}
/******************************************************************************/
@ -255,11 +235,7 @@ if ( typeof module !== 'undefined' && typeof exports !== 'undefined' ) {
}
export {
FilteringContext,
StaticNetFilteringEngine,
enableWASM,
pslInit,
createCompiler,
useCompiledLists,
useRawLists,
StaticNetFilteringEngine,
};

View File

@ -52,16 +52,14 @@ async function main() {
console.log(ex);
}
await StaticNetFilteringEngine.initialize();
const engine = new StaticNetFilteringEngine();
const engine = await StaticNetFilteringEngine.create();
await engine.useLists([
fetch('easylist').then(raw => ({ name: 'easylist', raw })),
fetch('easyprivacy').then(raw => ({ name: 'easyprivacy', raw })),
]);
let result = null;
let result = 0;
// Tests
// Not blocked

View File

@ -150,6 +150,13 @@ const FilteringContext = class {
return this;
}
fromDetails({ originURL, url, type }) {
this.setDocOriginFromURL(originURL);
this.setURL(url);
this.setType(type);
return this;
}
duplicate() {
return (new FilteringContext(this));
}