Change compiled list format to a saner block id management

Just use self-described readable section identifiers instead
of difficult-to-manage arbitrary integers.
This commit is contained in:
Raymond Hill 2021-12-07 11:15:14 -05:00
parent 8309cc548e
commit 72bb89495b
No known key found for this signature in database
GPG Key ID: 25E1490B761470C2
9 changed files with 59 additions and 66 deletions

View File

@ -175,8 +175,8 @@ const µBlock = { // jshint ignore:line
// Read-only
systemSettings: {
compiledMagic: 41, // Increase when compiled format changes
selfieMagic: 41, // Increase when selfie format changes
compiledMagic: 42, // Increase when compiled format changes
selfieMagic: 42, // Increase when selfie format changes
},
// https://github.com/uBlockOrigin/uBlock-issues/issues/759#issuecomment-546654501
@ -189,13 +189,6 @@ const µBlock = { // jshint ignore:line
compiledFormatChanged: false,
selfieIsInvalid: false,
compiledCosmeticSection: 200,
compiledScriptletSection: 300,
compiledHTMLSection: 400,
compiledHTTPHeaderSection: 500,
compiledSentinelSection: 1000,
compiledBadSubsection: 1,
restoreBackupSettings: {
lastRestoreFile: '',
lastRestoreTime: 0,

View File

@ -38,9 +38,6 @@ const cosmeticSurveyingMissCountMax =
parseInt(vAPI.localStorage.getItem('cosmeticSurveyingMissCountMax'), 10) ||
15;
const COMPILED_SPECIFIC_SECTION = 0;
const COMPILED_GENERIC_SECTION = 1;
/******************************************************************************/
/******************************************************************************/
@ -398,7 +395,7 @@ FilterContainer.prototype.compileGenericHideSelector = function(
return;
}
writer.select(µb.compiledCosmeticSection + COMPILED_GENERIC_SECTION);
writer.select('COSMETIC_FILTERS:GENERIC');
const type = compiled.charCodeAt(0);
let key;
@ -501,7 +498,7 @@ FilterContainer.prototype.compileGenericUnhideSelector = function(
return;
}
writer.select(µb.compiledCosmeticSection + COMPILED_SPECIFIC_SECTION);
writer.select('COSMETIC_FILTERS:SPECIFIC');
// https://github.com/chrisaljoudi/uBlock/issues/497
// All generic exception filters are stored as hostname-based filter
@ -531,7 +528,7 @@ FilterContainer.prototype.compileSpecificSelector = function(
return;
}
writer.select(µb.compiledCosmeticSection + COMPILED_SPECIFIC_SECTION);
writer.select('COSMETIC_FILTERS:SPECIFIC');
// https://github.com/chrisaljoudi/uBlock/issues/145
let unhide = exception ? 1 : 0;
@ -564,13 +561,13 @@ FilterContainer.prototype.compileTemporary = function(parser) {
FilterContainer.prototype.fromCompiledContent = function(reader, options) {
if ( options.skipCosmetic ) {
this.skipCompiledContent(reader, COMPILED_SPECIFIC_SECTION);
this.skipCompiledContent(reader, COMPILED_GENERIC_SECTION);
this.skipCompiledContent(reader, 'SPECIFIC');
this.skipCompiledContent(reader, 'GENERIC');
return;
}
// Specific cosmetic filter section
reader.select(µb.compiledCosmeticSection + COMPILED_SPECIFIC_SECTION);
reader.select('COSMETIC_FILTERS:SPECIFIC');
while ( reader.next() ) {
this.acceptedCount += 1;
const fingerprint = reader.fingerprint();
@ -606,12 +603,12 @@ FilterContainer.prototype.fromCompiledContent = function(reader, options) {
}
if ( options.skipGenericCosmetic ) {
this.skipCompiledContent(reader, COMPILED_GENERIC_SECTION);
this.skipCompiledContent(reader, 'GENERIC');
return;
}
// Generic cosmetic filter section
reader.select(µb.compiledCosmeticSection + COMPILED_GENERIC_SECTION);
reader.select('COSMETIC_FILTERS:GENERIC');
while ( reader.next() ) {
this.acceptedCount += 1;
const fingerprint = reader.fingerprint();
@ -675,7 +672,7 @@ FilterContainer.prototype.fromCompiledContent = function(reader, options) {
/******************************************************************************/
FilterContainer.prototype.skipCompiledContent = function(reader, sectionId) {
reader.select(µb.compiledCosmeticSection + sectionId);
reader.select(`COSMETIC_FILTERS:${sectionId}`);
while ( reader.next() ) {
this.acceptedCount += 1;
this.discardedCount += 1;

View File

@ -316,7 +316,7 @@ htmlFilteringEngine.compile = function(parser, writer) {
return;
}
writer.select(µb.compiledHTMLSection);
writer.select('HTML_FILTERS');
// TODO: Mind negated hostnames, they are currently discarded.
@ -345,7 +345,7 @@ htmlFilteringEngine.fromCompiledContent = function(reader) {
// Don't bother loading filters if stream filtering is not supported.
if ( µb.canFilterResponseData === false ) { return; }
reader.select(µb.compiledHTMLSection);
reader.select('HTML_FILTERS');
while ( reader.next() ) {
acceptedCount += 1;

View File

@ -90,7 +90,7 @@ httpheaderFilteringEngine.freeze = function() {
};
httpheaderFilteringEngine.compile = function(parser, writer) {
writer.select(µb.compiledHTTPHeaderSection);
writer.select('HTTPHEADER_FILTERS');
const { compiled, exception } = parser.result;
const headerName = compiled.slice(15, -1);
@ -136,7 +136,7 @@ httpheaderFilteringEngine.compileTemporary = function(parser) {
// 15 -1
httpheaderFilteringEngine.fromCompiledContent = function(reader) {
reader.select(µb.compiledHTTPHeaderSection);
reader.select('HTTPHEADER_FILTERS');
while ( reader.next() ) {
acceptedCount += 1;

View File

@ -28,18 +28,18 @@
/******************************************************************************/
const reBlockStart = /^#block-start-(\d+)\n/gm;
const reBlockStart = /^#block-start-([\w:]+)\n/gm;
let listEntries = Object.create(null);
const extractBlocks = function(content, begId, endId) {
const extractBlocks = function(content, ...ids) {
reBlockStart.lastIndex = 0;
const out = [];
let match = reBlockStart.exec(content);
while ( match !== null ) {
const beg = match.index + match[0].length;
const blockId = parseInt(match[1], 10);
if ( blockId >= begId && blockId < endId ) {
const end = content.indexOf('#block-end-' + match[1], beg);
const id = match[1];
if ( ids.includes(id) ) {
const end = content.indexOf(`#block-end-${id}`, beg);
out.push(content.slice(beg, end));
reBlockStart.lastIndex = end;
}
@ -58,7 +58,7 @@ const fromNetFilter = function(details) {
for ( const assetKey in listEntries ) {
const entry = listEntries[assetKey];
if ( entry === undefined ) { continue; }
const content = extractBlocks(entry.content, 100, 101);
const content = extractBlocks(entry.content, 'NETWORK_FILTERS:GOOD');
let pos = 0;
for (;;) {
pos = content.indexOf(compiledFilter, pos);
@ -159,9 +159,15 @@ const fromCosmeticFilter = function(details) {
for ( const assetKey in listEntries ) {
const entry = listEntries[assetKey];
if ( entry === undefined ) { continue; }
let content = extractBlocks(entry.content, 200, 1000),
isProcedural,
found;
const content = extractBlocks(
entry.content,
'COSMETIC_FILTERS:GENERIC',
'COSMETIC_FILTERS:SPECIFIC',
'SCRIPTLET_FILTERS',
'HTML_FILTERS',
'HTTPHEADER_FILTERS'
);
let found;
let pos = 0;
while ( (pos = content.indexOf(needle, pos)) !== -1 ) {
let beg = content.lastIndexOf('\n', pos);
@ -216,9 +222,9 @@ const fromCosmeticFilter = function(details) {
case 8:
// HTML filtering
// Response header filtering
case 64:
case 64: {
if ( exception !== ((fargs[2] & 0b001) !== 0) ) { break; }
isProcedural = (fargs[2] & 0b010) !== 0;
const isProcedural = (fargs[2] & 0b010) !== 0;
if (
isProcedural === false && fargs[3] !== selector ||
isProcedural && JSON.parse(fargs[3]).raw !== selector
@ -237,6 +243,7 @@ const fromCosmeticFilter = function(details) {
}
found = fargs[1] + prefix + selector;
break;
}
// Scriptlet injection
case 32:
if ( exception !== ((fargs[2] & 0b001) !== 0) ) { break; }

View File

@ -249,7 +249,7 @@ scriptletFilteringEngine.freeze = function() {
};
scriptletFilteringEngine.compile = function(parser, writer) {
writer.select(µb.compiledScriptletSection);
writer.select('SCRIPTLET_FILTERS');
// Only exception filters are allowed to be global.
const { raw, exception } = parser.result;
@ -295,7 +295,7 @@ scriptletFilteringEngine.compileTemporary = function(parser) {
// 4 -1
scriptletFilteringEngine.fromCompiledContent = function(reader) {
reader.select(µb.compiledScriptletSection);
reader.select('SCRIPTLET_FILTERS');
while ( reader.next() ) {
acceptedCount += 1;

View File

@ -85,12 +85,13 @@ class CompiledListReader {
this.line = '';
this.blocks = new Map();
this.properties = new Map();
const reBlockStart = new RegExp(`^${blockStartPrefix}(\\d+)\\n`, 'gm');
const reBlockStart = new RegExp(`^${blockStartPrefix}([\\w:]+)\\n`, 'gm');
let match = reBlockStart.exec(raw);
while ( match !== null ) {
let beg = match.index + match[0].length;
let end = raw.indexOf(blockEndPrefix + match[1], beg);
this.blocks.set(parseInt(match[1], 10), raw.slice(beg, end));
const sectionId = match[1];
const beg = match.index + match[0].length;
const end = raw.indexOf(blockEndPrefix + sectionId, beg);
this.blocks.set(sectionId, raw.slice(beg, end));
reBlockStart.lastIndex = end;
match = reBlockStart.exec(raw);
}
@ -130,15 +131,6 @@ class CompiledListReader {
}
}
CompiledListWriter.prototype.NETWORK_SECTION =
CompiledListReader.prototype.NETWORK_SECTION = 100;
CompiledListWriter.blockStartPrefix =
CompiledListReader.blockStartPrefix = blockStartPrefix;
CompiledListWriter.blockEndPrefix =
CompiledListReader.blockEndPrefix = blockEndPrefix;
/******************************************************************************/
export {

View File

@ -111,10 +111,8 @@ const typeNameToTypeValue = {
'inline-font': 17 << TypeBitsOffset,
'inline-script': 18 << TypeBitsOffset,
'cname': 19 << TypeBitsOffset,
// 'unused': 20 << TypeBitsOffset,
// 'unused': 21 << TypeBitsOffset,
'webrtc': 22 << TypeBitsOffset,
'unsupported': 23 << TypeBitsOffset,
'webrtc': 20 << TypeBitsOffset,
'unsupported': 21 << TypeBitsOffset,
};
const otherTypeBitValue = typeNameToTypeValue.other;
@ -169,8 +167,6 @@ const typeValueToTypeName = [
const MAX_TOKEN_LENGTH = 7;
const COMPILED_BAD_SECTION = 1;
// Four upper bits of token hash are reserved for built-in predefined
// token hashes, which should never end up being used when tokenizing
// any arbitrary string.
@ -1779,12 +1775,10 @@ registerFilterClass(FilterCompositeAll);
const FilterHostnameDict = class {
static getCount(idata) {
const itrie = filterData[idata+1];
if ( itrie === 0 ) {
return filterRefs[filterData[idata+3]].length;
if ( itrie !== 0 ) {
return Array.from(destHNTrieContainer.trieIterator(itrie)).length;
}
return Array.from(
destHNTrieContainer.trieIterator(filterData[idata+1])
).length;
return filterRefs[filterData[idata+3]].length;
}
static match(idata) {
@ -2640,6 +2634,12 @@ class FilterCompiler {
return this;
}
start(/* writer */) {
}
finish(/* writer */) {
}
clone() {
return new FilterCompiler(this.parser, this);
}
@ -3105,8 +3105,8 @@ class FilterCompiler {
writer.select(
this.badFilter
? writer.NETWORK_SECTION + COMPILED_BAD_SECTION
: writer.NETWORK_SECTION
? 'NETWORK_FILTERS:BAD'
: 'NETWORK_FILTERS:GOOD'
);
// Reminder:
@ -3715,7 +3715,7 @@ FilterContainer.prototype.createCompiler = function(parser) {
/******************************************************************************/
FilterContainer.prototype.fromCompiled = function(reader) {
reader.select(reader.NETWORK_SECTION);
reader.select('NETWORK_FILTERS:GOOD');
while ( reader.next() ) {
this.acceptedCount += 1;
if ( this.goodFilters.has(reader.line) ) {
@ -3725,7 +3725,7 @@ FilterContainer.prototype.fromCompiled = function(reader) {
}
}
reader.select(reader.NETWORK_SECTION + COMPILED_BAD_SECTION);
reader.select('NETWORK_FILTERS:BAD');
while ( reader.next() ) {
this.badFilters.add(reader.line);
}

View File

@ -979,6 +979,8 @@ self.addEventListener('hiddenSettingsChanged', ( ) => {
parser.setMaxTokenLength(staticNetFilteringEngine.MAX_TOKEN_LENGTH);
compiler.start(writer);
while ( lineIter.eot() === false ) {
let line = lineIter.next();
@ -1013,6 +1015,8 @@ self.addEventListener('hiddenSettingsChanged', ( ) => {
}
}
compiler.finish(writer);
// https://github.com/uBlockOrigin/uBlock-issues/issues/1365
// Embed version into compiled list itself: it is encoded in as the
// first digits followed by a whitespace.