From 17ccc4d1b94ad033e34a80d95d3d7c69e6a7aaff Mon Sep 17 00:00:00 2001 From: NoDRM Date: Mon, 15 Nov 2021 17:59:48 +0100 Subject: [PATCH] Add IETF and Adobe font deobfuscation code --- DeDRM_plugin/__init__.py | 42 ++++- DeDRM_plugin/config.py | 9 +- DeDRM_plugin/epubfontdecrypt.py | 313 ++++++++++++++++++++++++++++++++ DeDRM_plugin/prefs.py | 1 + 4 files changed, 356 insertions(+), 9 deletions(-) create mode 100644 DeDRM_plugin/epubfontdecrypt.py diff --git a/DeDRM_plugin/__init__.py b/DeDRM_plugin/__init__.py index bf2741f..892e464 100644 --- a/DeDRM_plugin/__init__.py +++ b/DeDRM_plugin/__init__.py @@ -200,6 +200,31 @@ class DeDRM(FileTypePlugin): traceback.print_exc() raise + def checkFonts(self, path_to_ebook): + # This is called after the normal DRM removal is done. + # It checks if there's fonts that need to be deobfuscated + + import calibre_plugins.dedrm.prefs as prefs + dedrmprefs = prefs.DeDRM_Prefs() + + if dedrmprefs["deobfuscate_fonts"] is True: + import calibre_plugins.dedrm.epubfontdecrypt as epubfontdecrypt + + output = self.temporary_file(".epub").name + ret = epubfontdecrypt.decryptFontsBook(path_to_ebook, output) + + if (ret == 0): + print("Font deobfuscation successful") + return output + elif (ret == 1): + print("No font obfuscation found") + return path_to_ebook + else: + print("Errors during font deobfuscation!") + raise DeDRMError("Font deobfuscation failed") + else: + return path_to_ebook + def ePubDecrypt(self,path_to_ebook): # Create a TemporaryPersistent file to work with. # Check original epub archive for zip errors. @@ -245,7 +270,7 @@ class DeDRM(FileTypePlugin): if result == 0: # Decryption was successful. # Return the modified PersistentTemporary file to calibre. - return of.name + return self.checkFonts(of.name) print("{0} v{1}: Failed to decrypt with key {2:s} after {3:.1f} seconds".format(PLUGIN_NAME, PLUGIN_VERSION,keyname_masked,time.time()-self.starttime)) @@ -304,7 +329,7 @@ class DeDRM(FileTypePlugin): print("{0} v{1}: Exception saving a new default key after {2:.1f} seconds".format(PLUGIN_NAME, PLUGIN_VERSION, time.time()-self.starttime)) traceback.print_exc() # Return the modified PersistentTemporary file to calibre. - return of.name + return self.checkFonts(of.name) print("{0} v{1}: Failed to decrypt with new default key after {2:.1f} seconds".format(PLUGIN_NAME, PLUGIN_VERSION,time.time()-self.starttime)) except Exception as e: @@ -347,7 +372,7 @@ class DeDRM(FileTypePlugin): of.close() if result == 0: print("{0} v{1}: Decrypted with key {2:s} after {3:.1f} seconds".format(PLUGIN_NAME, PLUGIN_VERSION,keyname,time.time()-self.starttime)) - return of.name + return self.checkFonts(of.name) except: print("{0} v{1}: Exception when decrypting after {2:.1f} seconds - trying other keys".format(PLUGIN_NAME, PLUGIN_VERSION, time.time()-self.starttime)) traceback.print_exc() @@ -376,7 +401,7 @@ class DeDRM(FileTypePlugin): # Decryption was successful. # Return the modified PersistentTemporary file to calibre. print("{0} v{1}: Decrypted with key {2:s} after {3:.1f} seconds".format(PLUGIN_NAME, PLUGIN_VERSION,keyname,time.time()-self.starttime)) - return of.name + return self.checkFonts(of.name) print("{0} v{1}: Failed to decrypt with key {2:s} after {3:.1f} seconds".format(PLUGIN_NAME, PLUGIN_VERSION,keyname,time.time()-self.starttime)) @@ -444,7 +469,7 @@ class DeDRM(FileTypePlugin): traceback.print_exc() print("{0} v{1}: Decrypted with new default key after {2:.1f} seconds".format(PLUGIN_NAME, PLUGIN_VERSION,time.time()-self.starttime)) # Return the modified PersistentTemporary file to calibre. - return of.name + return self.checkFonts(of.name) print("{0} v{1}: Failed to decrypt with new default key after {2:.1f} seconds".format(PLUGIN_NAME, PLUGIN_VERSION,time.time()-self.starttime)) except Exception as e: @@ -457,9 +482,10 @@ class DeDRM(FileTypePlugin): raise DeDRMError("{0} v{1}: Ultimately failed to decrypt after {2:.1f} seconds. Read the FAQs at Harper's repository: https://github.com/apprenticeharper/DeDRM_tools/blob/master/FAQs.md".format(PLUGIN_NAME, PLUGIN_VERSION,time.time()-self.starttime)) # Not a Barnes & Noble nor an Adobe Adept - # Import the fixed epub. + # Probably a DRM-free EPUB, but we should still check for fonts. print("{0} v{1}: “{2}” is neither an Adobe Adept nor a Barnes & Noble encrypted ePub".format(PLUGIN_NAME, PLUGIN_VERSION, os.path.basename(path_to_ebook))) - raise DeDRMError("{0} v{1}: Couldn't decrypt after {2:.1f} seconds. DRM free perhaps?".format(PLUGIN_NAME, PLUGIN_VERSION,time.time()-self.starttime)) + return self.checkFonts(inf.name) + #raise DeDRMError("{0} v{1}: Couldn't decrypt after {2:.1f} seconds. DRM free perhaps?".format(PLUGIN_NAME, PLUGIN_VERSION,time.time()-self.starttime)) def PDFDecrypt(self,path_to_ebook): import calibre_plugins.dedrm.prefs as prefs @@ -501,7 +527,7 @@ class DeDRM(FileTypePlugin): # If we end up here, we didn't find a key with a matching UUID, so lets just try all of them. - + # Attempt to decrypt epub with each encryption key (generated or provided). for keyname, userkeyhex in dedrmprefs['adeptkeys'].items(): userkey = codecs.decode(userkeyhex,'hex') diff --git a/DeDRM_plugin/config.py b/DeDRM_plugin/config.py index 0b7b257..73df759 100755 --- a/DeDRM_plugin/config.py +++ b/DeDRM_plugin/config.py @@ -9,7 +9,7 @@ __license__ = 'GPL v3' import os, traceback, json, codecs from PyQt5.Qt import (Qt, QWidget, QHBoxLayout, QVBoxLayout, QLabel, QLineEdit, - QGroupBox, QPushButton, QListWidget, QListWidgetItem, + QGroupBox, QPushButton, QListWidget, QListWidgetItem, QCheckBox, QAbstractItemView, QIcon, QDialog, QDialogButtonBox, QUrl, QCheckBox) @@ -51,6 +51,7 @@ class ConfigWidget(QWidget): self.tempdedrmprefs['serials'] = list(self.dedrmprefs['serials']) self.tempdedrmprefs['adobewineprefix'] = self.dedrmprefs['adobewineprefix'] self.tempdedrmprefs['kindlewineprefix'] = self.dedrmprefs['kindlewineprefix'] + self.tempdedrmprefs['deobfuscate_fonts'] = self.dedrmprefs['deobfuscate_fonts'] # Start Qt Gui dialog layout layout = QVBoxLayout(self) @@ -109,6 +110,11 @@ class ConfigWidget(QWidget): button_layout.addWidget(self.adept_button) button_layout.addWidget(self.kindle_key_button) + self.chkFontObfuscation = QtGui.QCheckBox(_("Deobfuscate EPUB fonts")) + self.chkFontObfuscation.setToolTip("Deobfuscates fonts in EPUB files after DRM removal") + self.chkFontObfuscation.setChecked(self.tempdedrmprefs["deobfuscate_fonts"]) + button_layout.addWidget(self.chkFontObfuscation) + self.resize(self.sizeHint()) def kindle_serials(self): @@ -171,6 +177,7 @@ class ConfigWidget(QWidget): self.dedrmprefs.set('adobewineprefix', self.tempdedrmprefs['adobewineprefix']) self.dedrmprefs.set('kindlewineprefix', self.tempdedrmprefs['kindlewineprefix']) self.dedrmprefs.set('configured', True) + self.dedrmprefs.set('deobfuscate_fonts', self.chkFontObfuscation.isChecked()) self.dedrmprefs.writeprefs() def load_resource(self, name): diff --git a/DeDRM_plugin/epubfontdecrypt.py b/DeDRM_plugin/epubfontdecrypt.py new file mode 100644 index 0000000..6ddaa87 --- /dev/null +++ b/DeDRM_plugin/epubfontdecrypt.py @@ -0,0 +1,313 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- + +# epubfontdecrypt.py +# Copyright © 2021 by noDRM + +# Released under the terms of the GNU General Public Licence, version 3 +# + + +# Revision history: +# 1 - Initial release + +""" +Decrypts / deobfuscates font files in EPUB files +""" + +__license__ = 'GPL v3' +__version__ = "1" + +import os +import traceback +import zlib +import zipfile +from zipfile import ZipInfo, ZipFile, ZIP_STORED, ZIP_DEFLATED +from contextlib import closing +import xml.etree.ElementTree as etree +import itertools +import hashlib +import binascii + + +class Decryptor(object): + def __init__(self, obfuscationkeyIETF, obfuscationkeyAdobe, encryption): + enc = lambda tag: '{%s}%s' % ('http://www.w3.org/2001/04/xmlenc#', tag) + dsig = lambda tag: '{%s}%s' % ('http://www.w3.org/2000/09/xmldsig#', tag) + self.obfuscation_key_Adobe = obfuscationkeyAdobe + self.obfuscation_key_IETF = obfuscationkeyIETF + + self._encryption = etree.fromstring(encryption) + # This loops through all entries in the "encryption.xml" file + # to figure out which files need to be decrypted. + self._obfuscatedIETF = obfuscatedIETF = set() + self._obfuscatedAdobe = obfuscatedAdobe = set() + self._other = other = set() + + self._json_elements_to_remove = json_elements_to_remove = set() + self._has_remaining_xml = False + expr = './%s/%s/%s' % (enc('EncryptedData'), enc('CipherData'), + enc('CipherReference')) + for elem in self._encryption.findall(expr): + path = elem.get('URI', None) + encryption_type_url = (elem.getparent().getparent().find("./%s" % (enc('EncryptionMethod'))).get('Algorithm', None)) + if path is not None: + + if encryption_type_url == "http://www.idpf.org/2008/embedding": + # Font files obfuscated with the IETF algorithm + path = path.encode('utf-8') + obfuscatedIETF.add(path) + if (self.obfuscation_key_IETF is None): + self._has_remaining_xml = True + else: + json_elements_to_remove.add(elem.getparent().getparent()) + + elif encryption_type_url == "http://ns.adobe.com/pdf/enc#RC": + # Font files obfuscated with the Adobe algorithm. + path = path.encode('utf-8') + obfuscatedAdobe.add(path) + if (self.obfuscation_key_Adobe is None): + self._has_remaining_xml = True + else: + json_elements_to_remove.add(elem.getparent().getparent()) + + else: + path = path.encode('utf-8') + other.add(path) + self._has_remaining_xml = True + # Other unsupported type. + + for elem in json_elements_to_remove: + elem.getparent().remove(elem) + + def check_if_remaining(self): + return self._has_remaining_xml + + def get_xml(self): + return "\n" + etree.tostring(self._encryption, encoding="utf-8", pretty_print=True, xml_declaration=False).decode("utf-8") + + def decompress(self, bytes): + dc = zlib.decompressobj(-15) + try: + decompressed_bytes = dc.decompress(bytes) + ex = dc.decompress(b'Z') + dc.flush() + if ex: + decompressed_bytes = decompressed_bytes + ex + except: + # possibly not compressed by zip - just return bytes + return bytes, False + return decompressed_bytes , True + + def decrypt(self, path, data): + if path.encode('utf-8') in self._obfuscatedIETF and self.obfuscation_key_IETF is not None: + # de-obfuscate according to the IETF standard + data, was_decomp = self.decompress(data) + + if len(data) <= 1040: + # de-obfuscate whole file + out = self.deobfuscate_single_data(self.obfuscation_key_IETF, data) + else: + out = self.deobfuscate_single_data(self.obfuscation_key_IETF, data[:1040]) + data[1040:] + + if (not was_decomp): + out, was_decomp = self.decompress(out) + return out + + elif path.encode('utf-8') in self._obfuscatedAdobe and self.obfuscation_key_Adobe is not None: + # de-obfuscate according to the Adobe standard + data, was_decomp = self.decompress(data) + + if len(data) <= 1024: + # de-obfuscate whole file + out = self.deobfuscate_single_data(self.obfuscation_key_Adobe, data) + else: + out = self.deobfuscate_single_data(self.obfuscation_key_Adobe, data[:1024]) + data[1024:] + + if (not was_decomp): + out, was_decomp = self.decompress(out) + return out + + else: + # Not encrypted or obfuscated + return data + + def deobfuscate_single_data(self, key, data): + msg = bytes([c^k for c,k in zip(data, itertools.cycle(key))]) + return msg + + + +def decryptFontsBook(inpath, outpath): + + with closing(ZipFile(open(inpath, 'rb'))) as inf: + namelist = set(inf.namelist()) + if 'META-INF/encryption.xml' not in namelist: + print("{0:s} has no obfuscated fonts".format(os.path.basename(inpath))) + return 1 + + # Font key handling: + + font_master_key = None + adobe_master_encryption_key = None + + contNS = lambda tag: '{%s}%s' % ('urn:oasis:names:tc:opendocument:xmlns:container', tag) + path = None + + try: + container = etree.fromstring(inf.read("META-INF/container.xml")) + rootfiles = container.find(contNS("rootfiles")).findall(contNS("rootfile")) + for rootfile in rootfiles: + path = rootfile.get("full-path", None) + if (path is not None): + break + except: + pass + + # If path is None, we didn't find an OPF, so we probably don't have a font key. + # If path is set, it's the path to the main content OPF file. + + if (path is None): + print("No OPF for font obfuscation found") + return 1 + else: + packageNS = lambda tag: '{%s}%s' % ('http://www.idpf.org/2007/opf', tag) + metadataDCNS = lambda tag: '{%s}%s' % ('http://purl.org/dc/elements/1.1/', tag) + + try: + container = etree.fromstring(inf.read(path)) + except: + container = [] + + ## IETF font key algorithm: + print("Checking {0} for IETF font obfuscation keys ... ".format(path), end='') + secret_key_name = None + try: + secret_key_name = container.get("unique-identifier") + except: + pass + + try: + identify_element = container.find(packageNS("metadata")).find(metadataDCNS("identifier")) + if (secret_key_name is None or secret_key_name == identify_element.get("id")): + font_master_key = identify_element.text + except: + pass + + if (font_master_key is not None): + if (secret_key_name is None): + print("found '%s'" % (font_master_key)) + else: + print("found '%s' (%s)" % (font_master_key, secret_key_name)) + + # Trim / remove forbidden characters from the key, then hash it: + font_master_key = font_master_key.replace(' ', '') + font_master_key = font_master_key.replace('\t', '') + font_master_key = font_master_key.replace('\r', '') + font_master_key = font_master_key.replace('\n', '') + font_master_key = font_master_key.encode('utf-8') + font_master_key = hashlib.sha1(font_master_key).digest() + else: + print("not found") + + ## Adobe font key algorithm + print("Checking {0} for Adobe font obfuscation keys ... ".format(path), end='') + + try: + metadata = container.find(packageNS("metadata")) + identifiers = metadata.findall(metadataDCNS("identifier")) + + uid = None + uidMalformed = False + + for identifier in identifiers: + if identifier.get(packageNS("scheme")) == "UUID": + if identifier.text[:9] == "urn:uuid:": + uid = identifier.text[9:] + else: + uid = identifier.text + break + if identifier.text[:9] == "urn:uuid:": + uid = identifier.text[9:] + break + + + if uid is not None: + uid = uid.replace(chr(0x20),'').replace(chr(0x09),'') + uid = uid.replace(chr(0x0D),'').replace(chr(0x0A),'').replace('-','') + + if len(uid) < 16: + uidMalformed = True + if not all(c in "0123456789abcdefABCDEF" for c in uid): + uidMalformed = True + + + if not uidMalformed: + print("found '{0}'".format(uid)) + uid = uid + uid + adobe_master_encryption_key = binascii.unhexlify(uid[:32]) + + if adobe_master_encryption_key is None: + print("not found") + + except: + print("exception") + pass + + # Begin decrypting. + + try: + encryption = inf.read('META-INF/encryption.xml') + decryptor = Decryptor(font_master_key, adobe_master_encryption_key, encryption) + kwds = dict(compression=ZIP_DEFLATED, allowZip64=False) + with closing(ZipFile(open(outpath, 'wb'), 'w', **kwds)) as outf: + + # Mimetype needs to be the first entry, so remove it from the list + # whereever it is, then add it at the beginning. + namelist.remove("mimetype") + + for path in (["mimetype"] + namelist): + data = inf.read(path) + zi = ZipInfo(path) + zi.compress_type=ZIP_DEFLATED + + if path == "mimetype": + # mimetype must not be compressed + zi.compress_type = ZIP_STORED + + elif path == "META-INF/encryption.xml": + # Check if there's still other entries not related to fonts + if (decryptor.check_if_remaining()): + data = decryptor.get_xml() + print("There's remaining entries in encryption.xml, adding file ...") + else: + # No remaining entries, no need for that file. + continue + + try: + # get the file info, including time-stamp + oldzi = inf.getinfo(path) + # copy across useful fields + zi.date_time = oldzi.date_time + zi.comment = oldzi.comment + zi.extra = oldzi.extra + zi.internal_attr = oldzi.internal_attr + # external attributes are dependent on the create system, so copy both. + zi.external_attr = oldzi.external_attr + zi.create_system = oldzi.create_system + if any(ord(c) >= 128 for c in path) or any(ord(c) >= 128 for c in zi.comment): + # If the file name or the comment contains any non-ASCII char, set the UTF8-flag + zi.flag_bits |= 0x800 + except: + pass + + if path == "mimetype": + outf.writestr(zi, inf.read('mimetype')) + elif path == "META-INF/encryption.xml": + outf.writestr(zi, data) + else: + outf.writestr(zi, decryptor.decrypt(path, data)) + except: + print("Could not decrypt fonts in {0:s} because of an exception:\n{1:s}".format(os.path.basename(inpath), traceback.format_exc())) + return 2 + return 0 + diff --git a/DeDRM_plugin/prefs.py b/DeDRM_plugin/prefs.py index 3e8d78b..e1d8cc6 100755 --- a/DeDRM_plugin/prefs.py +++ b/DeDRM_plugin/prefs.py @@ -19,6 +19,7 @@ class DeDRM_Prefs(): self.dedrmprefs = JSONConfig(JSON_PATH) self.dedrmprefs.defaults['configured'] = False + self.dedrmprefs.defaults['deobfuscate_fonts'] = True self.dedrmprefs.defaults['bandnkeys'] = {} self.dedrmprefs.defaults['adeptkeys'] = {} self.dedrmprefs.defaults['ereaderkeys'] = {}