Add IETF and Adobe font deobfuscation code

2021-11-15 17:59:48 +01:00 · 2021-11-15 17:59:48 +01:00 · 17ccc4d1b9
parent 30425c1ec8
commit 17ccc4d1b9
4 changed files with 356 additions and 9 deletions
--- a/DeDRM_plugin/init.py
+++ b/DeDRM_plugin/init.py
@ -200,6 +200,31 @@ class DeDRM(FileTypePlugin):
            traceback.print_exc()
            raise

+    def checkFonts(self, path_to_ebook):
+        # This is called after the normal DRM removal is done. 
+        # It checks if there's fonts that need to be deobfuscated
+
+        import calibre_plugins.dedrm.prefs as prefs
+        dedrmprefs = prefs.DeDRM_Prefs()
+
+        if dedrmprefs["deobfuscate_fonts"] is True:
+            import calibre_plugins.dedrm.epubfontdecrypt as epubfontdecrypt
+
+            output = self.temporary_file(".epub").name
+            ret = epubfontdecrypt.decryptFontsBook(path_to_ebook, output)
+
+            if (ret == 0):
+                print("Font deobfuscation successful")
+                return output
+            elif (ret == 1):
+                print("No font obfuscation found")
+                return path_to_ebook
+            else:
+                print("Errors during font deobfuscation!")
+                raise DeDRMError("Font deobfuscation failed")
+        else: 
+            return path_to_ebook
+
    def ePubDecrypt(self,path_to_ebook):
        # Create a TemporaryPersistent file to work with.
        # Check original epub archive for zip errors.
@ -245,7 +270,7 @@ class DeDRM(FileTypePlugin):
                if  result == 0:
                    # Decryption was successful.
                    # Return the modified PersistentTemporary file to calibre.
-                    return of.name
+                    return self.checkFonts(of.name)

                print("{0} v{1}: Failed to decrypt with key {2:s} after {3:.1f} seconds".format(PLUGIN_NAME, PLUGIN_VERSION,keyname_masked,time.time()-self.starttime))

@ -304,7 +329,7 @@ class DeDRM(FileTypePlugin):
                                print("{0} v{1}: Exception saving a new default key after {2:.1f} seconds".format(PLUGIN_NAME, PLUGIN_VERSION, time.time()-self.starttime))
                                traceback.print_exc()
                            # Return the modified PersistentTemporary file to calibre.
-                            return of.name
+                            return self.checkFonts(of.name)

                        print("{0} v{1}: Failed to decrypt with new default key after {2:.1f} seconds".format(PLUGIN_NAME, PLUGIN_VERSION,time.time()-self.starttime))
                except Exception as e:
@ -347,7 +372,7 @@ class DeDRM(FileTypePlugin):
                        of.close()
                        if result == 0:
                            print("{0} v{1}: Decrypted with key {2:s} after {3:.1f} seconds".format(PLUGIN_NAME, PLUGIN_VERSION,keyname,time.time()-self.starttime))
-                            return of.name
+                            return self.checkFonts(of.name)
                    except:
                        print("{0} v{1}: Exception when decrypting after {2:.1f} seconds - trying other keys".format(PLUGIN_NAME, PLUGIN_VERSION, time.time()-self.starttime))
                        traceback.print_exc()
@ -376,7 +401,7 @@ class DeDRM(FileTypePlugin):
                    # Decryption was successful.
                    # Return the modified PersistentTemporary file to calibre.
                    print("{0} v{1}: Decrypted with key {2:s} after {3:.1f} seconds".format(PLUGIN_NAME, PLUGIN_VERSION,keyname,time.time()-self.starttime))
-                    return of.name
+                    return self.checkFonts(of.name)

                print("{0} v{1}: Failed to decrypt with key {2:s} after {3:.1f} seconds".format(PLUGIN_NAME, PLUGIN_VERSION,keyname,time.time()-self.starttime))

@ -444,7 +469,7 @@ class DeDRM(FileTypePlugin):
                                traceback.print_exc()
                            print("{0} v{1}: Decrypted with new default key after {2:.1f} seconds".format(PLUGIN_NAME, PLUGIN_VERSION,time.time()-self.starttime))
                            # Return the modified PersistentTemporary file to calibre.
-                            return of.name
+                            return self.checkFonts(of.name)

                        print("{0} v{1}: Failed to decrypt with new default key after {2:.1f} seconds".format(PLUGIN_NAME, PLUGIN_VERSION,time.time()-self.starttime))
                except Exception as e:
@ -457,9 +482,10 @@ class DeDRM(FileTypePlugin):
            raise DeDRMError("{0} v{1}: Ultimately failed to decrypt after {2:.1f} seconds. Read the FAQs at Harper's repository: https://github.com/apprenticeharper/DeDRM_tools/blob/master/FAQs.md".format(PLUGIN_NAME, PLUGIN_VERSION,time.time()-self.starttime))

        # Not a Barnes & Noble nor an Adobe Adept
-        # Import the fixed epub.
+        # Probably a DRM-free EPUB, but we should still check for fonts.
        print("{0} v{1}: “{2}” is neither an Adobe Adept nor a Barnes & Noble encrypted ePub".format(PLUGIN_NAME, PLUGIN_VERSION, os.path.basename(path_to_ebook)))
-        raise DeDRMError("{0} v{1}: Couldn't decrypt after {2:.1f} seconds. DRM free perhaps?".format(PLUGIN_NAME, PLUGIN_VERSION,time.time()-self.starttime))
+        return self.checkFonts(inf.name)
+        #raise DeDRMError("{0} v{1}: Couldn't decrypt after {2:.1f} seconds. DRM free perhaps?".format(PLUGIN_NAME, PLUGIN_VERSION,time.time()-self.starttime))

    def PDFDecrypt(self,path_to_ebook):
        import calibre_plugins.dedrm.prefs as prefs
--- a/DeDRM_plugin/config.py
+++ b/DeDRM_plugin/config.py
@ -9,7 +9,7 @@ __license__ = 'GPL v3'
 import os, traceback, json, codecs

 from PyQt5.Qt import (Qt, QWidget, QHBoxLayout, QVBoxLayout, QLabel, QLineEdit,
-                      QGroupBox, QPushButton, QListWidget, QListWidgetItem,
+                      QGroupBox, QPushButton, QListWidget, QListWidgetItem, QCheckBox,
                      QAbstractItemView, QIcon, QDialog, QDialogButtonBox, QUrl, 
                      QCheckBox)

@ -51,6 +51,7 @@ class ConfigWidget(QWidget):
        self.tempdedrmprefs['serials'] = list(self.dedrmprefs['serials'])
        self.tempdedrmprefs['adobewineprefix'] = self.dedrmprefs['adobewineprefix']
        self.tempdedrmprefs['kindlewineprefix'] = self.dedrmprefs['kindlewineprefix']
+        self.tempdedrmprefs['deobfuscate_fonts'] = self.dedrmprefs['deobfuscate_fonts']

        # Start Qt Gui dialog layout
        layout = QVBoxLayout(self)
@ -109,6 +110,11 @@ class ConfigWidget(QWidget):
        button_layout.addWidget(self.adept_button)
        button_layout.addWidget(self.kindle_key_button)

+        self.chkFontObfuscation = QtGui.QCheckBox(_("Deobfuscate EPUB fonts"))
+        self.chkFontObfuscation.setToolTip("Deobfuscates fonts in EPUB files after DRM removal")
+        self.chkFontObfuscation.setChecked(self.tempdedrmprefs["deobfuscate_fonts"])
+        button_layout.addWidget(self.chkFontObfuscation)
+
        self.resize(self.sizeHint())

    def kindle_serials(self):
@ -171,6 +177,7 @@ class ConfigWidget(QWidget):
        self.dedrmprefs.set('adobewineprefix', self.tempdedrmprefs['adobewineprefix'])
        self.dedrmprefs.set('kindlewineprefix', self.tempdedrmprefs['kindlewineprefix'])
        self.dedrmprefs.set('configured', True)
+        self.dedrmprefs.set('deobfuscate_fonts', self.chkFontObfuscation.isChecked())
        self.dedrmprefs.writeprefs()

    def load_resource(self, name):
--- a/DeDRM_plugin/epubfontdecrypt.py
+++ b/DeDRM_plugin/epubfontdecrypt.py
@ -0,0 +1,313 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+
+# epubfontdecrypt.py
+# Copyright © 2021 by noDRM
+
+# Released under the terms of the GNU General Public Licence, version 3
+# <http://www.gnu.org/licenses/>
+
+
+# Revision history:
+#   1 - Initial release
+
+"""
+Decrypts / deobfuscates font files in EPUB files
+"""
+
+__license__ = 'GPL v3'
+__version__ = "1"
+
+import os
+import traceback
+import zlib
+import zipfile
+from zipfile import ZipInfo, ZipFile, ZIP_STORED, ZIP_DEFLATED
+from contextlib import closing
+import xml.etree.ElementTree as etree
+import itertools
+import hashlib
+import binascii
+
+
+class Decryptor(object):
+    def __init__(self, obfuscationkeyIETF, obfuscationkeyAdobe, encryption):
+        enc = lambda tag: '{%s}%s' % ('http://www.w3.org/2001/04/xmlenc#', tag)
+        dsig = lambda tag: '{%s}%s' % ('http://www.w3.org/2000/09/xmldsig#', tag)
+        self.obfuscation_key_Adobe = obfuscationkeyAdobe
+        self.obfuscation_key_IETF = obfuscationkeyIETF
+        
+        self._encryption = etree.fromstring(encryption)
+        # This loops through all entries in the "encryption.xml" file
+        # to figure out which files need to be decrypted.
+        self._obfuscatedIETF = obfuscatedIETF = set()
+        self._obfuscatedAdobe = obfuscatedAdobe = set()
+        self._other = other = set()
+
+        self._json_elements_to_remove = json_elements_to_remove = set()
+        self._has_remaining_xml = False
+        expr = './%s/%s/%s' % (enc('EncryptedData'), enc('CipherData'),
+                               enc('CipherReference'))
+        for elem in self._encryption.findall(expr):
+            path = elem.get('URI', None)
+            encryption_type_url = (elem.getparent().getparent().find("./%s" % (enc('EncryptionMethod'))).get('Algorithm', None))
+            if path is not None:
+
+                if encryption_type_url == "http://www.idpf.org/2008/embedding":
+                    # Font files obfuscated with the IETF algorithm
+                    path = path.encode('utf-8')
+                    obfuscatedIETF.add(path)
+                    if (self.obfuscation_key_IETF is None):
+                        self._has_remaining_xml = True
+                    else:
+                        json_elements_to_remove.add(elem.getparent().getparent())
+
+                elif encryption_type_url == "http://ns.adobe.com/pdf/enc#RC":
+                    # Font files obfuscated with the Adobe algorithm.
+                    path = path.encode('utf-8')
+                    obfuscatedAdobe.add(path)
+                    if (self.obfuscation_key_Adobe is None):
+                        self._has_remaining_xml = True
+                    else:
+                        json_elements_to_remove.add(elem.getparent().getparent())
+
+                else: 
+                    path = path.encode('utf-8')
+                    other.add(path)
+                    self._has_remaining_xml = True
+                    # Other unsupported type.
+        
+        for elem in json_elements_to_remove:
+            elem.getparent().remove(elem)
+
+    def check_if_remaining(self):
+        return self._has_remaining_xml
+
+    def get_xml(self):
+        return "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n" + etree.tostring(self._encryption, encoding="utf-8", pretty_print=True, xml_declaration=False).decode("utf-8")
+
+    def decompress(self, bytes):
+        dc = zlib.decompressobj(-15)
+        try:
+            decompressed_bytes = dc.decompress(bytes)
+            ex = dc.decompress(b'Z') + dc.flush()
+            if ex:
+                decompressed_bytes = decompressed_bytes + ex
+        except:
+            # possibly not compressed by zip - just return bytes
+            return bytes, False
+        return decompressed_bytes , True
+    
+    def decrypt(self, path, data):
+        if path.encode('utf-8') in self._obfuscatedIETF and self.obfuscation_key_IETF is not None:
+            # de-obfuscate according to the IETF standard
+            data, was_decomp = self.decompress(data)
+
+            if len(data) <= 1040:
+                # de-obfuscate whole file
+                out = self.deobfuscate_single_data(self.obfuscation_key_IETF, data)
+            else: 
+                out = self.deobfuscate_single_data(self.obfuscation_key_IETF, data[:1040]) + data[1040:]
+
+            if (not was_decomp):
+                out, was_decomp = self.decompress(out)
+            return out
+
+        elif path.encode('utf-8') in self._obfuscatedAdobe and self.obfuscation_key_Adobe is not None:
+            # de-obfuscate according to the Adobe standard
+            data, was_decomp = self.decompress(data)
+
+            if len(data) <= 1024:
+                # de-obfuscate whole file
+                out = self.deobfuscate_single_data(self.obfuscation_key_Adobe, data)
+            else: 
+                out = self.deobfuscate_single_data(self.obfuscation_key_Adobe, data[:1024]) + data[1024:]
+
+            if (not was_decomp):
+                out, was_decomp = self.decompress(out)
+            return out
+
+        else: 
+            # Not encrypted or obfuscated
+            return data
+
+    def deobfuscate_single_data(self, key, data):
+        msg = bytes([c^k for c,k in zip(data, itertools.cycle(key))])
+        return msg
+
+
+
+def decryptFontsBook(inpath, outpath):
+
+    with closing(ZipFile(open(inpath, 'rb'))) as inf:
+        namelist = set(inf.namelist())
+        if 'META-INF/encryption.xml' not in namelist:
+            print("{0:s} has no obfuscated fonts".format(os.path.basename(inpath)))
+            return 1
+
+        # Font key handling:
+
+        font_master_key = None
+        adobe_master_encryption_key = None
+
+        contNS = lambda tag: '{%s}%s' % ('urn:oasis:names:tc:opendocument:xmlns:container', tag)
+        path = None
+
+        try:
+            container = etree.fromstring(inf.read("META-INF/container.xml"))
+            rootfiles = container.find(contNS("rootfiles")).findall(contNS("rootfile"))
+            for rootfile in rootfiles: 
+                path = rootfile.get("full-path", None)
+                if (path is not None):
+                    break
+        except: 
+            pass
+
+        # If path is None, we didn't find an OPF, so we probably don't have a font key.
+        # If path is set, it's the path to the main content OPF file.
+
+        if (path is None):
+            print("No OPF for font obfuscation found")
+            return 1
+        else:
+            packageNS = lambda tag: '{%s}%s' % ('http://www.idpf.org/2007/opf', tag)
+            metadataDCNS = lambda tag: '{%s}%s' % ('http://purl.org/dc/elements/1.1/', tag) 
+
+            try:
+                container = etree.fromstring(inf.read(path))
+            except: 
+                container = []
+
+            ## IETF font key algorithm:
+            print("Checking {0} for IETF font obfuscation keys ... ".format(path), end='')
+            secret_key_name = None
+            try:
+                secret_key_name = container.get("unique-identifier")
+            except: 
+                pass
+
+            try: 
+                identify_element = container.find(packageNS("metadata")).find(metadataDCNS("identifier"))
+                if (secret_key_name is None or secret_key_name == identify_element.get("id")):
+                    font_master_key = identify_element.text
+            except: 
+                pass
+
+            if (font_master_key is not None):
+                if (secret_key_name is None):
+                    print("found '%s'" % (font_master_key))
+                else:
+                    print("found '%s' (%s)" % (font_master_key, secret_key_name))
+
+                # Trim / remove forbidden characters from the key, then hash it:
+                font_master_key = font_master_key.replace(' ', '')
+                font_master_key = font_master_key.replace('\t', '')
+                font_master_key = font_master_key.replace('\r', '')
+                font_master_key = font_master_key.replace('\n', '')
+                font_master_key = font_master_key.encode('utf-8')
+                font_master_key = hashlib.sha1(font_master_key).digest()
+            else:
+                print("not found")
+
+            ## Adobe font key algorithm
+            print("Checking {0} for Adobe font obfuscation keys ... ".format(path), end='')
+
+            try: 
+                metadata = container.find(packageNS("metadata"))
+                identifiers = metadata.findall(metadataDCNS("identifier"))
+
+                uid = None
+                uidMalformed = False
+
+                for identifier in identifiers: 
+                    if identifier.get(packageNS("scheme")) == "UUID":
+                        if identifier.text[:9] == "urn:uuid:":
+                            uid = identifier.text[9:]
+                        else: 
+                            uid = identifier.text
+                        break
+                    if identifier.text[:9] == "urn:uuid:":
+                        uid = identifier.text[9:]
+                        break
+
+                
+                if uid is not None:
+                    uid = uid.replace(chr(0x20),'').replace(chr(0x09),'')
+                    uid = uid.replace(chr(0x0D),'').replace(chr(0x0A),'').replace('-','')
+
+                    if len(uid) < 16:
+                        uidMalformed = True
+                    if not all(c in "0123456789abcdefABCDEF" for c in uid):
+                        uidMalformed = True
+                    
+                    
+                    if not uidMalformed:
+                        print("found '{0}'".format(uid))
+                        uid = uid + uid
+                        adobe_master_encryption_key = binascii.unhexlify(uid[:32])
+                
+                if adobe_master_encryption_key is None:
+                    print("not found")
+
+            except:
+                print("exception")
+                pass
+
+        # Begin decrypting.
+
+        try:
+            encryption = inf.read('META-INF/encryption.xml')
+            decryptor = Decryptor(font_master_key, adobe_master_encryption_key, encryption)
+            kwds = dict(compression=ZIP_DEFLATED, allowZip64=False)
+            with closing(ZipFile(open(outpath, 'wb'), 'w', **kwds)) as outf:
+
+                # Mimetype needs to be the first entry, so remove it from the list
+                # whereever it is, then add it at the beginning. 
+                namelist.remove("mimetype")
+
+                for path in (["mimetype"] + namelist):
+                    data = inf.read(path)
+                    zi = ZipInfo(path)
+                    zi.compress_type=ZIP_DEFLATED
+
+                    if path == "mimetype":
+                        # mimetype must not be compressed
+                        zi.compress_type = ZIP_STORED
+                    
+                    elif path == "META-INF/encryption.xml":
+                        # Check if there's still other entries not related to fonts
+                        if (decryptor.check_if_remaining()):
+                            data = decryptor.get_xml()
+                            print("There's remaining entries in encryption.xml, adding file ...")
+                        else: 
+                            # No remaining entries, no need for that file.
+                            continue
+
+                    try:
+                        # get the file info, including time-stamp
+                        oldzi = inf.getinfo(path)
+                        # copy across useful fields
+                        zi.date_time = oldzi.date_time
+                        zi.comment = oldzi.comment
+                        zi.extra = oldzi.extra
+                        zi.internal_attr = oldzi.internal_attr
+                        # external attributes are dependent on the create system, so copy both.
+                        zi.external_attr = oldzi.external_attr
+                        zi.create_system = oldzi.create_system
+                        if any(ord(c) >= 128 for c in path) or any(ord(c) >= 128 for c in zi.comment):
+                            # If the file name or the comment contains any non-ASCII char, set the UTF8-flag
+                            zi.flag_bits |= 0x800
+                    except:
+                        pass
+
+                    if path == "mimetype":
+                        outf.writestr(zi, inf.read('mimetype'))
+                    elif path == "META-INF/encryption.xml":
+                        outf.writestr(zi, data)
+                    else: 
+                        outf.writestr(zi, decryptor.decrypt(path, data))
+        except:
+            print("Could not decrypt fonts in {0:s} because of an exception:\n{1:s}".format(os.path.basename(inpath), traceback.format_exc()))
+            return 2
+    return 0
+
--- a/DeDRM_plugin/prefs.py
+++ b/DeDRM_plugin/prefs.py
@ -19,6 +19,7 @@ class DeDRM_Prefs():
        self.dedrmprefs = JSONConfig(JSON_PATH)

        self.dedrmprefs.defaults['configured'] = False
+        self.dedrmprefs.defaults['deobfuscate_fonts'] = True
        self.dedrmprefs.defaults['bandnkeys'] = {}
        self.dedrmprefs.defaults['adeptkeys'] = {}
        self.dedrmprefs.defaults['ereaderkeys'] = {}