Add some more watermark removal code
This commit is contained in:
parent
1545d76803
commit
1b391da815
|
@ -90,12 +90,9 @@ PLUGIN_VERSION = ".".join([str(x)for x in PLUGIN_VERSION_TUPLE])
|
||||||
RESOURCE_NAME = PLUGIN_NAME + '_Help.htm'
|
RESOURCE_NAME = PLUGIN_NAME + '_Help.htm'
|
||||||
|
|
||||||
import codecs
|
import codecs
|
||||||
import sys, os, re
|
import sys, os
|
||||||
import time
|
import time
|
||||||
import zipfile
|
|
||||||
import traceback
|
import traceback
|
||||||
from zipfile import ZipInfo, ZipFile, ZIP_STORED, ZIP_DEFLATED
|
|
||||||
from contextlib import closing
|
|
||||||
|
|
||||||
|
|
||||||
class DeDRMError(Exception):
|
class DeDRMError(Exception):
|
||||||
|
@ -211,55 +208,31 @@ class DeDRM(FileTypePlugin):
|
||||||
# This is called after the DRM is removed (or if no DRM was present)
|
# This is called after the DRM is removed (or if no DRM was present)
|
||||||
# It does stuff like de-obfuscating fonts (by calling checkFonts)
|
# It does stuff like de-obfuscating fonts (by calling checkFonts)
|
||||||
# or removing watermarks.
|
# or removing watermarks.
|
||||||
path_to_ebook = self.checkFonts(path_to_ebook)
|
|
||||||
path_to_ebook = self.removeCDPwatermarkFromEPUB(path_to_ebook)
|
|
||||||
|
|
||||||
return path_to_ebook
|
|
||||||
|
|
||||||
def removeCDPwatermarkFromEPUB(self, path_to_ebook):
|
|
||||||
# "META-INF/cdp.info" is a watermark file used by some Tolino vendors.
|
|
||||||
# We don't want that in our eBooks, so lets remove that file.
|
|
||||||
try:
|
try:
|
||||||
infile = ZipFile(open(path_to_ebook, 'rb'))
|
import calibre_plugins.dedrm.prefs as prefs
|
||||||
namelist = infile.namelist()
|
dedrmprefs = prefs.DeDRM_Prefs()
|
||||||
if 'META-INF/cdp.info' not in namelist:
|
|
||||||
|
if dedrmprefs["deobfuscate_fonts"] is True:
|
||||||
|
# Deobfuscate fonts
|
||||||
|
path_to_ebook = self.checkFonts(path_to_ebook) or path_to_ebook
|
||||||
|
|
||||||
|
if dedrmprefs["remove_watermarks"] is True:
|
||||||
|
import calibre_plugins.dedrm.epubwatermark as watermark
|
||||||
|
|
||||||
|
# Remove Tolino's CDP watermark file
|
||||||
|
path_to_ebook = watermark.removeCDPwatermark(self, path_to_ebook) or path_to_ebook
|
||||||
|
|
||||||
|
# Remove watermarks (currently just Amazon) from the OPF file
|
||||||
|
path_to_ebook = watermark.removeOPFwatermarks(self, path_to_ebook) or path_to_ebook
|
||||||
|
|
||||||
|
# Remove watermarks (currently just Adobe's resource ID) from all HTML and XHTML files
|
||||||
|
path_to_ebook = watermark.removeHTMLwatermarks(self, path_to_ebook) or path_to_ebook
|
||||||
|
|
||||||
return path_to_ebook
|
return path_to_ebook
|
||||||
|
|
||||||
namelist.remove("mimetype")
|
|
||||||
namelist.remove("META-INF/cdp.info")
|
|
||||||
|
|
||||||
output = self.temporary_file(".epub").name
|
|
||||||
|
|
||||||
kwds = dict(compression=ZIP_DEFLATED, allowZip64=False)
|
|
||||||
with closing(ZipFile(open(output, 'wb'), 'w', **kwds)) as outf:
|
|
||||||
for path in (["mimetype"] + namelist):
|
|
||||||
|
|
||||||
data = infile.read(path)
|
|
||||||
|
|
||||||
zi = ZipInfo(path)
|
|
||||||
oldzi = infile.getinfo(path)
|
|
||||||
try:
|
|
||||||
zi.compress_type = oldzi.compress_type
|
|
||||||
if path == "mimetype":
|
|
||||||
zi.compress_type = ZIP_STORED
|
|
||||||
zi.date_time = oldzi.date_time
|
|
||||||
zi.comment = oldzi.comment
|
|
||||||
zi.extra = oldzi.extra
|
|
||||||
zi.internal_attr = oldzi.internal_attr
|
|
||||||
zi.external_attr = oldzi.external_attr
|
|
||||||
zi.create_system = oldzi.create_system
|
|
||||||
if any(ord(c) >= 128 for c in path) or any(ord(c) >= 128 for c in zi.comment):
|
|
||||||
# If the file name or the comment contains any non-ASCII char, set the UTF8-flag
|
|
||||||
zi.flag_bits |= 0x800
|
|
||||||
except:
|
|
||||||
pass
|
|
||||||
|
|
||||||
outf.writestr(zi, data)
|
|
||||||
|
|
||||||
print("{0} v{1}: Successfully removed cdp.info watermark".format(PLUGIN_NAME, PLUGIN_VERSION))
|
|
||||||
return output
|
|
||||||
|
|
||||||
except:
|
except:
|
||||||
|
print("Error while checking settings")
|
||||||
return path_to_ebook
|
return path_to_ebook
|
||||||
|
|
||||||
def checkFonts(self, path_to_ebook):
|
def checkFonts(self, path_to_ebook):
|
||||||
|
@ -267,10 +240,6 @@ class DeDRM(FileTypePlugin):
|
||||||
# It checks if there's fonts that need to be deobfuscated
|
# It checks if there's fonts that need to be deobfuscated
|
||||||
|
|
||||||
try:
|
try:
|
||||||
import calibre_plugins.dedrm.prefs as prefs
|
|
||||||
dedrmprefs = prefs.DeDRM_Prefs()
|
|
||||||
|
|
||||||
if dedrmprefs["deobfuscate_fonts"] is True:
|
|
||||||
import calibre_plugins.dedrm.epubfontdecrypt as epubfontdecrypt
|
import calibre_plugins.dedrm.epubfontdecrypt as epubfontdecrypt
|
||||||
|
|
||||||
output = self.temporary_file(".epub").name
|
output = self.temporary_file(".epub").name
|
||||||
|
@ -283,10 +252,10 @@ class DeDRM(FileTypePlugin):
|
||||||
else:
|
else:
|
||||||
print("{0} v{1}: Error during font deobfuscation".format(PLUGIN_NAME, PLUGIN_VERSION))
|
print("{0} v{1}: Error during font deobfuscation".format(PLUGIN_NAME, PLUGIN_VERSION))
|
||||||
raise DeDRMError("Font deobfuscation failed")
|
raise DeDRMError("Font deobfuscation failed")
|
||||||
else:
|
|
||||||
return path_to_ebook
|
|
||||||
except:
|
except:
|
||||||
print("{0} v{1}: Error during font deobfuscation".format(PLUGIN_NAME, PLUGIN_VERSION))
|
print("{0} v{1}: Error during font deobfuscation".format(PLUGIN_NAME, PLUGIN_VERSION))
|
||||||
|
traceback.print_exc()
|
||||||
return path_to_ebook
|
return path_to_ebook
|
||||||
|
|
||||||
def ePubDecrypt(self,path_to_ebook):
|
def ePubDecrypt(self,path_to_ebook):
|
||||||
|
|
|
@ -83,6 +83,7 @@ class ConfigWidget(QWidget):
|
||||||
self.tempdedrmprefs['adobewineprefix'] = self.dedrmprefs['adobewineprefix']
|
self.tempdedrmprefs['adobewineprefix'] = self.dedrmprefs['adobewineprefix']
|
||||||
self.tempdedrmprefs['kindlewineprefix'] = self.dedrmprefs['kindlewineprefix']
|
self.tempdedrmprefs['kindlewineprefix'] = self.dedrmprefs['kindlewineprefix']
|
||||||
self.tempdedrmprefs['deobfuscate_fonts'] = self.dedrmprefs['deobfuscate_fonts']
|
self.tempdedrmprefs['deobfuscate_fonts'] = self.dedrmprefs['deobfuscate_fonts']
|
||||||
|
self.tempdedrmprefs['remove_watermarks'] = self.dedrmprefs['remove_watermarks']
|
||||||
|
|
||||||
# Start Qt Gui dialog layout
|
# Start Qt Gui dialog layout
|
||||||
layout = QVBoxLayout(self)
|
layout = QVBoxLayout(self)
|
||||||
|
@ -146,6 +147,11 @@ class ConfigWidget(QWidget):
|
||||||
self.chkFontObfuscation.setChecked(self.tempdedrmprefs["deobfuscate_fonts"])
|
self.chkFontObfuscation.setChecked(self.tempdedrmprefs["deobfuscate_fonts"])
|
||||||
button_layout.addWidget(self.chkFontObfuscation)
|
button_layout.addWidget(self.chkFontObfuscation)
|
||||||
|
|
||||||
|
self.chkRemoveWatermarks = QtGui.QCheckBox(_("Remove watermarks"))
|
||||||
|
self.chkRemoveWatermarks.setToolTip("Tries to remove watermarks from files")
|
||||||
|
self.chkRemoveWatermarks.setChecked(self.tempdedrmprefs["remove_watermarks"])
|
||||||
|
button_layout.addWidget(self.chkRemoveWatermarks)
|
||||||
|
|
||||||
self.resize(self.sizeHint())
|
self.resize(self.sizeHint())
|
||||||
|
|
||||||
def kindle_serials(self):
|
def kindle_serials(self):
|
||||||
|
@ -209,6 +215,7 @@ class ConfigWidget(QWidget):
|
||||||
self.dedrmprefs.set('kindlewineprefix', self.tempdedrmprefs['kindlewineprefix'])
|
self.dedrmprefs.set('kindlewineprefix', self.tempdedrmprefs['kindlewineprefix'])
|
||||||
self.dedrmprefs.set('configured', True)
|
self.dedrmprefs.set('configured', True)
|
||||||
self.dedrmprefs.set('deobfuscate_fonts', self.chkFontObfuscation.isChecked())
|
self.dedrmprefs.set('deobfuscate_fonts', self.chkFontObfuscation.isChecked())
|
||||||
|
self.dedrmprefs.set('remove_watermarks', self.chkRemoveWatermarks.isChecked())
|
||||||
self.dedrmprefs.writeprefs()
|
self.dedrmprefs.writeprefs()
|
||||||
|
|
||||||
def load_resource(self, name):
|
def load_resource(self, name):
|
||||||
|
|
|
@ -0,0 +1,244 @@
|
||||||
|
#!/usr/bin/env python3
|
||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
|
||||||
|
# epubwatermark.py
|
||||||
|
# Copyright © 2021 NoDRM
|
||||||
|
|
||||||
|
# Revision history:
|
||||||
|
# 1.0 - Initial version
|
||||||
|
|
||||||
|
# Released under the terms of the GNU General Public Licence, version 3
|
||||||
|
# <http://www.gnu.org/licenses/>
|
||||||
|
|
||||||
|
"""
|
||||||
|
Removes various watermarks from EPUB files
|
||||||
|
"""
|
||||||
|
|
||||||
|
import traceback
|
||||||
|
from zipfile import ZipInfo, ZipFile, ZIP_STORED, ZIP_DEFLATED
|
||||||
|
from contextlib import closing
|
||||||
|
from lxml import etree
|
||||||
|
import re
|
||||||
|
|
||||||
|
# Runs a RegEx over all HTML/XHTML files to remove watermakrs.
|
||||||
|
def removeHTMLwatermarks(object, path_to_ebook):
|
||||||
|
try:
|
||||||
|
inf = ZipFile(open(path_to_ebook, 'rb'))
|
||||||
|
namelist = inf.namelist()
|
||||||
|
|
||||||
|
modded_names = []
|
||||||
|
modded_contents = []
|
||||||
|
|
||||||
|
for file in namelist:
|
||||||
|
if not (file.endswith('.html') or file.endswith('.xhtml')):
|
||||||
|
continue
|
||||||
|
|
||||||
|
try:
|
||||||
|
file_str = inf.read(file).decode("utf-8")
|
||||||
|
str_new = file_str
|
||||||
|
|
||||||
|
# Remove Adobe ADEPT watermarks
|
||||||
|
# Match optional newline at the beginning, then a "meta" tag with name = "Adept.expected.resource" or "Adept.resource"
|
||||||
|
# and either a "value" or a "content" element with an Adobe UUID
|
||||||
|
str_new = re.sub(r'((\r\n|\r|\n)\s*)?\<meta\s+name=\"(Adept\.resource|Adept\.expected\.resource)\"\s+(content|value)=\"urn:uuid:[0-9a-fA-F\-]+\"\s*\/>', '', str_new)
|
||||||
|
str_new = re.sub(r'((\r\n|\r|\n)\s*)?\<meta\s+(content|value)=\"urn:uuid:[0-9a-fA-F\-]+\"\s+name=\"(Adept\.resource|Adept\.expected\.resource)\"\s*\/>', '', str_new)
|
||||||
|
except:
|
||||||
|
traceback.print_exc()
|
||||||
|
continue
|
||||||
|
|
||||||
|
if (file_str == str_new):
|
||||||
|
continue
|
||||||
|
|
||||||
|
modded_names.append(file)
|
||||||
|
modded_contents.append(str_new)
|
||||||
|
|
||||||
|
if len(modded_names) == 0:
|
||||||
|
# No file modified, return original
|
||||||
|
return path_to_ebook
|
||||||
|
|
||||||
|
if len(modded_names) != len(modded_contents):
|
||||||
|
# Something went terribly wrong, return original
|
||||||
|
print("Watermark: Error during ADEPT watermark removal")
|
||||||
|
return path_to_ebook
|
||||||
|
|
||||||
|
# Re-package with modified files:
|
||||||
|
namelist.remove("mimetype")
|
||||||
|
|
||||||
|
try:
|
||||||
|
output = object.temporary_file(".epub").name
|
||||||
|
kwds = dict(compression=ZIP_DEFLATED, allowZip64=False)
|
||||||
|
with closing(ZipFile(open(output, 'wb'), 'w', **kwds)) as outf:
|
||||||
|
for path in (["mimetype"] + namelist):
|
||||||
|
|
||||||
|
data = inf.read(path)
|
||||||
|
|
||||||
|
try:
|
||||||
|
modded_index = None
|
||||||
|
modded_index = modded_names.index(path)
|
||||||
|
except:
|
||||||
|
pass
|
||||||
|
|
||||||
|
if modded_index is not None:
|
||||||
|
# Found modified file - replace contents
|
||||||
|
data = modded_contents[modded_index]
|
||||||
|
|
||||||
|
zi = ZipInfo(path)
|
||||||
|
oldzi = inf.getinfo(path)
|
||||||
|
try:
|
||||||
|
zi.compress_type = oldzi.compress_type
|
||||||
|
if path == "mimetype":
|
||||||
|
zi.compress_type = ZIP_STORED
|
||||||
|
zi.date_time = oldzi.date_time
|
||||||
|
zi.comment = oldzi.comment
|
||||||
|
zi.extra = oldzi.extra
|
||||||
|
zi.internal_attr = oldzi.internal_attr
|
||||||
|
zi.external_attr = oldzi.external_attr
|
||||||
|
zi.create_system = oldzi.create_system
|
||||||
|
if any(ord(c) >= 128 for c in path) or any(ord(c) >= 128 for c in zi.comment):
|
||||||
|
# If the file name or the comment contains any non-ASCII char, set the UTF8-flag
|
||||||
|
zi.flag_bits |= 0x800
|
||||||
|
except:
|
||||||
|
pass
|
||||||
|
|
||||||
|
outf.writestr(zi, data)
|
||||||
|
except:
|
||||||
|
traceback.print_exc()
|
||||||
|
return path_to_ebook
|
||||||
|
|
||||||
|
except:
|
||||||
|
traceback.print_exc()
|
||||||
|
return path_to_ebook
|
||||||
|
|
||||||
|
print("Watermark: Successfully stripped {0} ADEPT watermark(s) from ebook.".format(len(modded_names)))
|
||||||
|
return output
|
||||||
|
|
||||||
|
|
||||||
|
# Finds the main OPF file, then uses RegEx to remove watermarks
|
||||||
|
def removeOPFwatermarks(object, path_to_ebook):
|
||||||
|
contNS = lambda tag: '{%s}%s' % ('urn:oasis:names:tc:opendocument:xmlns:container', tag)
|
||||||
|
opf_path = None
|
||||||
|
|
||||||
|
try:
|
||||||
|
inf = ZipFile(open(path_to_ebook, 'rb'))
|
||||||
|
container = etree.fromstring(inf.read("META-INF/container.xml"))
|
||||||
|
rootfiles = container.find(contNS("rootfiles")).findall(contNS("rootfile"))
|
||||||
|
for rootfile in rootfiles:
|
||||||
|
opf_path = rootfile.get("full-path", None)
|
||||||
|
if (opf_path is not None):
|
||||||
|
break
|
||||||
|
except:
|
||||||
|
traceback.print_exc()
|
||||||
|
return path_to_ebook
|
||||||
|
|
||||||
|
# If path is None, we didn't find an OPF, so we probably don't have a font key.
|
||||||
|
# If path is set, it's the path to the main content OPF file.
|
||||||
|
|
||||||
|
if (opf_path is None):
|
||||||
|
# No OPF found - no watermark
|
||||||
|
return path_to_ebook
|
||||||
|
else:
|
||||||
|
try:
|
||||||
|
container_str = inf.read(opf_path).decode("utf-8")
|
||||||
|
container_str_new = container_str
|
||||||
|
|
||||||
|
# Remove Amazon hex watermarks
|
||||||
|
# Match optional newline at the beginning, then spaces, then a "meta" tag with name = "Watermark" or "Watermark_(hex)" and a "content" element.
|
||||||
|
container_str_new = re.sub(r'((\r\n|\r|\n)\s*)?\<meta\s+name=\"Watermark(_\(hex\))?\"\s+content=\"[0-9a-fA-F]+\"\s*\/>', '', container_str_new)
|
||||||
|
container_str_new = re.sub(r'((\r\n|\r|\n)\s*)?\<meta\s+content=\"[0-9a-fA-F]+\"\s+name=\"Watermark(_\(hex\))?\"\s*\/>', '', container_str_new)
|
||||||
|
except:
|
||||||
|
traceback.print_exc()
|
||||||
|
return path_to_ebook
|
||||||
|
|
||||||
|
if (container_str == container_str_new):
|
||||||
|
# container didn't change - no watermark
|
||||||
|
return path_to_ebook
|
||||||
|
|
||||||
|
# Re-package without watermark
|
||||||
|
namelist = inf.namelist()
|
||||||
|
namelist.remove("mimetype")
|
||||||
|
|
||||||
|
try:
|
||||||
|
output = object.temporary_file(".epub").name
|
||||||
|
kwds = dict(compression=ZIP_DEFLATED, allowZip64=False)
|
||||||
|
with closing(ZipFile(open(output, 'wb'), 'w', **kwds)) as outf:
|
||||||
|
for path in (["mimetype"] + namelist):
|
||||||
|
|
||||||
|
data = inf.read(path)
|
||||||
|
if path == opf_path:
|
||||||
|
# Found OPF, replacing ...
|
||||||
|
data = container_str_new
|
||||||
|
|
||||||
|
zi = ZipInfo(path)
|
||||||
|
oldzi = inf.getinfo(path)
|
||||||
|
try:
|
||||||
|
zi.compress_type = oldzi.compress_type
|
||||||
|
if path == "mimetype":
|
||||||
|
zi.compress_type = ZIP_STORED
|
||||||
|
zi.date_time = oldzi.date_time
|
||||||
|
zi.comment = oldzi.comment
|
||||||
|
zi.extra = oldzi.extra
|
||||||
|
zi.internal_attr = oldzi.internal_attr
|
||||||
|
zi.external_attr = oldzi.external_attr
|
||||||
|
zi.create_system = oldzi.create_system
|
||||||
|
if any(ord(c) >= 128 for c in path) or any(ord(c) >= 128 for c in zi.comment):
|
||||||
|
# If the file name or the comment contains any non-ASCII char, set the UTF8-flag
|
||||||
|
zi.flag_bits |= 0x800
|
||||||
|
except:
|
||||||
|
pass
|
||||||
|
|
||||||
|
outf.writestr(zi, data)
|
||||||
|
except:
|
||||||
|
traceback.print_exc()
|
||||||
|
return path_to_ebook
|
||||||
|
|
||||||
|
print("Watermark: Successfully stripped Amazon watermark from OPF file.")
|
||||||
|
return output
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
def removeCDPwatermark(object, path_to_ebook):
|
||||||
|
# "META-INF/cdp.info" is a watermark file used by some Tolino vendors.
|
||||||
|
# We don't want that in our eBooks, so lets remove that file.
|
||||||
|
try:
|
||||||
|
infile = ZipFile(open(path_to_ebook, 'rb'))
|
||||||
|
namelist = infile.namelist()
|
||||||
|
if 'META-INF/cdp.info' not in namelist:
|
||||||
|
return path_to_ebook
|
||||||
|
|
||||||
|
namelist.remove("mimetype")
|
||||||
|
namelist.remove("META-INF/cdp.info")
|
||||||
|
|
||||||
|
output = object.temporary_file(".epub").name
|
||||||
|
|
||||||
|
kwds = dict(compression=ZIP_DEFLATED, allowZip64=False)
|
||||||
|
with closing(ZipFile(open(output, 'wb'), 'w', **kwds)) as outf:
|
||||||
|
for path in (["mimetype"] + namelist):
|
||||||
|
|
||||||
|
data = infile.read(path)
|
||||||
|
|
||||||
|
zi = ZipInfo(path)
|
||||||
|
oldzi = infile.getinfo(path)
|
||||||
|
try:
|
||||||
|
zi.compress_type = oldzi.compress_type
|
||||||
|
if path == "mimetype":
|
||||||
|
zi.compress_type = ZIP_STORED
|
||||||
|
zi.date_time = oldzi.date_time
|
||||||
|
zi.comment = oldzi.comment
|
||||||
|
zi.extra = oldzi.extra
|
||||||
|
zi.internal_attr = oldzi.internal_attr
|
||||||
|
zi.external_attr = oldzi.external_attr
|
||||||
|
zi.create_system = oldzi.create_system
|
||||||
|
if any(ord(c) >= 128 for c in path) or any(ord(c) >= 128 for c in zi.comment):
|
||||||
|
# If the file name or the comment contains any non-ASCII char, set the UTF8-flag
|
||||||
|
zi.flag_bits |= 0x800
|
||||||
|
except:
|
||||||
|
pass
|
||||||
|
|
||||||
|
outf.writestr(zi, data)
|
||||||
|
|
||||||
|
print("Watermark: Successfully removed cdp.info watermark")
|
||||||
|
return output
|
||||||
|
|
||||||
|
except:
|
||||||
|
traceback.print_exc()
|
||||||
|
return path_to_ebook
|
|
@ -20,6 +20,7 @@ class DeDRM_Prefs():
|
||||||
|
|
||||||
self.dedrmprefs.defaults['configured'] = False
|
self.dedrmprefs.defaults['configured'] = False
|
||||||
self.dedrmprefs.defaults['deobfuscate_fonts'] = True
|
self.dedrmprefs.defaults['deobfuscate_fonts'] = True
|
||||||
|
self.dedrmprefs.defaults['remove_watermarks'] = False
|
||||||
self.dedrmprefs.defaults['bandnkeys'] = {}
|
self.dedrmprefs.defaults['bandnkeys'] = {}
|
||||||
self.dedrmprefs.defaults['adeptkeys'] = {}
|
self.dedrmprefs.defaults['adeptkeys'] = {}
|
||||||
self.dedrmprefs.defaults['ereaderkeys'] = {}
|
self.dedrmprefs.defaults['ereaderkeys'] = {}
|
||||||
|
|
Loading…
Reference in New Issue