DeDRM_tools/DeDRM_plugin/zipfix.py

#!/usr/bin/env python
# -*- coding: utf-8 -*-

# zipfix.py
# Copyright © 2010-2020 by some_updates, DiapDealer and Apprentice Alf

# Released under the terms of the GNU General Public Licence, version 3
# <http://www.gnu.org/licenses/>

# Revision history:
#   1.0 - Initial release
#   1.1 - Updated to handle zip file metadata correctly
#   2.0 - Added Python 3 compatibility for calibre 5.0

"""
Re-write zip (or ePub) fixing problems with file names (and mimetype entry).
"""
from __future__ import print_function

__license__ = 'GPL v3'
__version__ = "1.1"

import sys
import zlib
import calibre_plugins.dedrm.zipfilerugged as zipfilerugged
import os
import os.path
import getopt
from struct import unpack


_FILENAME_LEN_OFFSET = 26
_EXTRA_LEN_OFFSET = 28
_FILENAME_OFFSET = 30
_MAX_SIZE = 64 * 1024
_MIMETYPE = 'application/epub+zip'

class ZipInfo(zipfilerugged.ZipInfo):
    def __init__(self, *args, **kwargs):
        if 'compress_type' in kwargs:
            compress_type = kwargs.pop('compress_type')
        super(ZipInfo, self).__init__(*args, **kwargs)
        self.compress_type = compress_type

class fixZip:
    def __init__(self, zinput, zoutput):
        self.ztype = 'zip'
        if zinput.lower().find('.epub') >= 0 :
            self.ztype = 'epub'
        self.inzip = zipfilerugged.ZipFile(zinput,'r')
        self.outzip = zipfilerugged.ZipFile(zoutput,'w')
        # open the input zip for reading only as a raw file
        self.bzf = open(zinput,'rb')

    def getlocalname(self, zi):
        local_header_offset = zi.header_offset
        self.bzf.seek(local_header_offset + _FILENAME_LEN_OFFSET)
        leninfo = self.bzf.read(2)
        local_name_length, = unpack('<H', leninfo)
        self.bzf.seek(local_header_offset + _FILENAME_OFFSET)
        local_name = self.bzf.read(local_name_length)
        return local_name

    def uncompress(self, cmpdata):
        dc = zlib.decompressobj(-15)
        data = ''
        while len(cmpdata) > 0:
            if len(cmpdata) > _MAX_SIZE :
                newdata = cmpdata[0:_MAX_SIZE]
                cmpdata = cmpdata[_MAX_SIZE:]
            else:
                newdata = cmpdata
                cmpdata = ''
            newdata = dc.decompress(newdata)
            unprocessed = dc.unconsumed_tail
            if len(unprocessed) == 0:
                newdata += dc.flush()
            data += newdata
            cmpdata += unprocessed
            unprocessed = ''
        return data

    def getfiledata(self, zi):
        # get file name length and exta data length to find start of file data
        local_header_offset = zi.header_offset

        self.bzf.seek(local_header_offset + _FILENAME_LEN_OFFSET)
        leninfo = self.bzf.read(2)
        local_name_length, = unpack('<H', leninfo)

        self.bzf.seek(local_header_offset + _EXTRA_LEN_OFFSET)
        exinfo = self.bzf.read(2)
        extra_field_length, = unpack('<H', exinfo)

        self.bzf.seek(local_header_offset + _FILENAME_OFFSET + local_name_length + extra_field_length)
        data = None

        # if not compressed we are good to go
        if zi.compress_type == zipfilerugged.ZIP_STORED:
            data = self.bzf.read(zi.file_size)

        # if compressed we must decompress it using zlib
        if zi.compress_type == zipfilerugged.ZIP_DEFLATED:
            cmpdata = self.bzf.read(zi.compress_size)
            data = self.uncompress(cmpdata)

        return data


    def fix(self):
        # get the zipinfo for each member of the input archive
        # and copy member over to output archive
        # if problems exist with local vs central filename, fix them

        # if epub write mimetype file first, with no compression
        if self.ztype == 'epub':
            # first get a ZipInfo with current time and no compression
            mimeinfo = ZipInfo(b'mimetype',compress_type=zipfilerugged.ZIP_STORED)
            mimeinfo.internal_attr = 1 # text file
            try:
                # if the mimetype is present, get its info, including time-stamp
                oldmimeinfo = self.inzip.getinfo('mimetype')
                # copy across useful fields
                mimeinfo.date_time = oldmimeinfo.date_time
                mimeinfo.comment = oldmimeinfo.comment
                mimeinfo.extra = oldmimeinfo.extra
                mimeinfo.internal_attr = oldmimeinfo.internal_attr
                mimeinfo.external_attr = oldmimeinfo.external_attr
                mimeinfo.create_system = oldmimeinfo.create_system
            except:
                pass
            self.outzip.writestr(mimeinfo, _MIMETYPE.encode('ascii'))

        # write the rest of the files
        for zinfo in self.inzip.infolist():
            if zinfo.filename != "mimetype" or self.ztype != 'epub':
                data = None
                try:
                    data = self.inzip.read(zinfo.filename)
                except zipfilerugged.BadZipfile or zipfilerugged.error:
                    local_name = self.getlocalname(zinfo)
                    data = self.getfiledata(zinfo)
                    zinfo.filename = local_name

                # create new ZipInfo with only the useful attributes from the old info
                nzinfo = ZipInfo(zinfo.filename, zinfo.date_time, compress_type=zinfo.compress_type)
                nzinfo.comment=zinfo.comment
                nzinfo.extra=zinfo.extra
                nzinfo.internal_attr=zinfo.internal_attr
                nzinfo.external_attr=zinfo.external_attr
                nzinfo.create_system=zinfo.create_system
                self.outzip.writestr(nzinfo,data)

        self.bzf.close()
        self.inzip.close()
        self.outzip.close()


def usage():
    print("""usage: zipfix.py inputzip outputzip
     inputzip is the source zipfile to fix
     outputzip is the fixed zip archive
    """)


def repairBook(infile, outfile):
    if not os.path.exists(infile):
        print("Error: Input Zip File does not exist")
        return 1
    try:
        fr = fixZip(infile, outfile)
        fr.fix()
        return 0
    except Exception as e:
        print("Error Occurred ", e)
        return 2


def main(argv=sys.argv):
    if len(argv)!=3:
        usage()
        return 1
    infile = argv[1]
    outfile = argv[2]
    return repairBook(infile, outfile)


if __name__ == '__main__' :
    sys.exit(main())
tools v2.2 2010-11-11 15:11:36 -07:00			`#!/usr/bin/env python`
tools v5.5 Plugins now include unaltered stand-alone scripts, so no longer need to keep separate copies. 2012-12-19 06:48:11 -07:00			`# -- coding: utf-8 --`
tools v2.2 2010-11-11 15:11:36 -07:00
Starting on Version 7.0 using the work done by others. Completely untested. I will be testing things, but I thought I'd get this base version up for others to give pull requests. THIS IS ON THE MASTER BRANCH. The Master branch will be Python 3.0 from now on. While Python 2.7 support will not be deliberately broken, all efforts should now focus on Python 3.0 compatibility. I can see a lot of work has been done. There's more to do. I've bumped the version number of everything I came across to the next major number for Python 3.0 compatibility indication. Thanks everyone. I hope to update here at least once a week until we have a stable 7.0 release for calibre 5.0 2020-09-26 14:22:47 -06:00			`# zipfix.py`
			`# Copyright © 2010-2020 by some_updates, DiapDealer and Apprentice Alf`
tools v6.0.8 2013-10-02 12:59:40 -06:00
			`# Released under the terms of the GNU General Public Licence, version 3`
			`# <http://www.gnu.org/licenses/>`

			`# Revision history:`
			`# 1.0 - Initial release`
			`# 1.1 - Updated to handle zip file metadata correctly`
Starting on Version 7.0 using the work done by others. Completely untested. I will be testing things, but I thought I'd get this base version up for others to give pull requests. THIS IS ON THE MASTER BRANCH. The Master branch will be Python 3.0 from now on. While Python 2.7 support will not be deliberately broken, all efforts should now focus on Python 3.0 compatibility. I can see a lot of work has been done. There's more to do. I've bumped the version number of everything I came across to the next major number for Python 3.0 compatibility indication. Thanks everyone. I hope to update here at least once a week until we have a stable 7.0 release for calibre 5.0 2020-09-26 14:22:47 -06:00			`# 2.0 - Added Python 3 compatibility for calibre 5.0`
tools v6.0.8 2013-10-02 12:59:40 -06:00
			`"""`
			`Re-write zip (or ePub) fixing problems with file names (and mimetype entry).`
			`"""`
Use print() function in both Python 2 and Python 3 Legacy __print__ statements are syntax errors in Python 3 but __print()__ function works as expected in both Python 2 and Python 3. 2019-06-24 10:49:38 -06:00			`from __future__ import print_function`
tools v6.0.0 The first unified calibre plugin 2013-03-20 04:23:54 -06:00
			`__license__ = 'GPL v3'`
tools v6.0.8 2013-10-02 12:59:40 -06:00			`__version__ = "1.1"`

			`import sys`
			`import zlib`
Starting on Version 7.0 using the work done by others. Completely untested. I will be testing things, but I thought I'd get this base version up for others to give pull requests. THIS IS ON THE MASTER BRANCH. The Master branch will be Python 3.0 from now on. While Python 2.7 support will not be deliberately broken, all efforts should now focus on Python 3.0 compatibility. I can see a lot of work has been done. There's more to do. I've bumped the version number of everything I came across to the next major number for Python 3.0 compatibility indication. Thanks everyone. I hope to update here at least once a week until we have a stable 7.0 release for calibre 5.0 2020-09-26 14:22:47 -06:00			`import calibre_plugins.dedrm.zipfilerugged as zipfilerugged`
tools v6.0.8 2013-10-02 12:59:40 -06:00			`import os`
			`import os.path`
			`import getopt`
			`from struct import unpack`


			`_FILENAME_LEN_OFFSET = 26`
			`_EXTRA_LEN_OFFSET = 28`
			`_FILENAME_OFFSET = 30`
			`_MAX_SIZE = 64 * 1024`
			`_MIMETYPE = 'application/epub+zip'`

			`class ZipInfo(zipfilerugged.ZipInfo):`
			`def __init__(self, args, *kwargs):`
			`if 'compress_type' in kwargs:`
			`compress_type = kwargs.pop('compress_type')`
			`super(ZipInfo, self).__init__(args, *kwargs)`
			`self.compress_type = compress_type`

			`class fixZip:`
			`def __init__(self, zinput, zoutput):`
			`self.ztype = 'zip'`
			`if zinput.lower().find('.epub') >= 0 :`
			`self.ztype = 'epub'`
			`self.inzip = zipfilerugged.ZipFile(zinput,'r')`
			`self.outzip = zipfilerugged.ZipFile(zoutput,'w')`
			`# open the input zip for reading only as a raw file`
Starting on Version 7.0 using the work done by others. Completely untested. I will be testing things, but I thought I'd get this base version up for others to give pull requests. THIS IS ON THE MASTER BRANCH. The Master branch will be Python 3.0 from now on. While Python 2.7 support will not be deliberately broken, all efforts should now focus on Python 3.0 compatibility. I can see a lot of work has been done. There's more to do. I've bumped the version number of everything I came across to the next major number for Python 3.0 compatibility indication. Thanks everyone. I hope to update here at least once a week until we have a stable 7.0 release for calibre 5.0 2020-09-26 14:22:47 -06:00			`self.bzf = open(zinput,'rb')`
tools v6.0.8 2013-10-02 12:59:40 -06:00
			`def getlocalname(self, zi):`
			`local_header_offset = zi.header_offset`
			`self.bzf.seek(local_header_offset + _FILENAME_LEN_OFFSET)`
			`leninfo = self.bzf.read(2)`
			`local_name_length, = unpack('<H', leninfo)`
			`self.bzf.seek(local_header_offset + _FILENAME_OFFSET)`
			`local_name = self.bzf.read(local_name_length)`
			`return local_name`

			`def uncompress(self, cmpdata):`
			`dc = zlib.decompressobj(-15)`
			`data = ''`
			`while len(cmpdata) > 0:`
			`if len(cmpdata) > _MAX_SIZE :`
			`newdata = cmpdata[0:_MAX_SIZE]`
			`cmpdata = cmpdata[_MAX_SIZE:]`
			`else:`
			`newdata = cmpdata`
			`cmpdata = ''`
			`newdata = dc.decompress(newdata)`
			`unprocessed = dc.unconsumed_tail`
			`if len(unprocessed) == 0:`
			`newdata += dc.flush()`
			`data += newdata`
			`cmpdata += unprocessed`
			`unprocessed = ''`
			`return data`

			`def getfiledata(self, zi):`
			`# get file name length and exta data length to find start of file data`
			`local_header_offset = zi.header_offset`

			`self.bzf.seek(local_header_offset + _FILENAME_LEN_OFFSET)`
			`leninfo = self.bzf.read(2)`
			`local_name_length, = unpack('<H', leninfo)`
tools v2.2 2010-11-11 15:11:36 -07:00
tools v6.0.8 2013-10-02 12:59:40 -06:00			`self.bzf.seek(local_header_offset + _EXTRA_LEN_OFFSET)`
			`exinfo = self.bzf.read(2)`
			`extra_field_length, = unpack('<H', exinfo)`
tools v2.2 2010-11-11 15:11:36 -07:00
tools v6.0.8 2013-10-02 12:59:40 -06:00			`self.bzf.seek(local_header_offset + _FILENAME_OFFSET + local_name_length + extra_field_length)`
			`data = None`
tools v2.2 2010-11-11 15:11:36 -07:00
tools v6.0.8 2013-10-02 12:59:40 -06:00			`# if not compressed we are good to go`
			`if zi.compress_type == zipfilerugged.ZIP_STORED:`
			`data = self.bzf.read(zi.file_size)`
tools v2.2 2010-11-11 15:11:36 -07:00
tools v6.0.8 2013-10-02 12:59:40 -06:00			`# if compressed we must decompress it using zlib`
			`if zi.compress_type == zipfilerugged.ZIP_DEFLATED:`
			`cmpdata = self.bzf.read(zi.compress_size)`
			`data = self.uncompress(cmpdata)`
tools v2.2 2010-11-11 15:11:36 -07:00
tools v6.0.8 2013-10-02 12:59:40 -06:00			`return data`
tools v2.2 2010-11-11 15:11:36 -07:00
tools v5.4 2012-11-07 06:14:25 -07:00
tools v2.2 2010-11-11 15:11:36 -07:00
tools v6.0.8 2013-10-02 12:59:40 -06:00			`def fix(self):`
			`# get the zipinfo for each member of the input archive`
			`# and copy member over to output archive`
			`# if problems exist with local vs central filename, fix them`

			`# if epub write mimetype file first, with no compression`
			`if self.ztype == 'epub':`
			`# first get a ZipInfo with current time and no compression`
Starting on Version 7.0 using the work done by others. Completely untested. I will be testing things, but I thought I'd get this base version up for others to give pull requests. THIS IS ON THE MASTER BRANCH. The Master branch will be Python 3.0 from now on. While Python 2.7 support will not be deliberately broken, all efforts should now focus on Python 3.0 compatibility. I can see a lot of work has been done. There's more to do. I've bumped the version number of everything I came across to the next major number for Python 3.0 compatibility indication. Thanks everyone. I hope to update here at least once a week until we have a stable 7.0 release for calibre 5.0 2020-09-26 14:22:47 -06:00			`mimeinfo = ZipInfo(b'mimetype',compress_type=zipfilerugged.ZIP_STORED)`
tools v6.0.8 2013-10-02 12:59:40 -06:00			`mimeinfo.internal_attr = 1 # text file`
			`try:`
			`# if the mimetype is present, get its info, including time-stamp`
			`oldmimeinfo = self.inzip.getinfo('mimetype')`
			`# copy across useful fields`
			`mimeinfo.date_time = oldmimeinfo.date_time`
			`mimeinfo.comment = oldmimeinfo.comment`
			`mimeinfo.extra = oldmimeinfo.extra`
			`mimeinfo.internal_attr = oldmimeinfo.internal_attr`
			`mimeinfo.external_attr = oldmimeinfo.external_attr`
			`mimeinfo.create_system = oldmimeinfo.create_system`
			`except:`
			`pass`
Starting on Version 7.0 using the work done by others. Completely untested. I will be testing things, but I thought I'd get this base version up for others to give pull requests. THIS IS ON THE MASTER BRANCH. The Master branch will be Python 3.0 from now on. While Python 2.7 support will not be deliberately broken, all efforts should now focus on Python 3.0 compatibility. I can see a lot of work has been done. There's more to do. I've bumped the version number of everything I came across to the next major number for Python 3.0 compatibility indication. Thanks everyone. I hope to update here at least once a week until we have a stable 7.0 release for calibre 5.0 2020-09-26 14:22:47 -06:00			`self.outzip.writestr(mimeinfo, _MIMETYPE.encode('ascii'))`
tools v6.0.8 2013-10-02 12:59:40 -06:00
			`# write the rest of the files`
			`for zinfo in self.inzip.infolist():`
			`if zinfo.filename != "mimetype" or self.ztype != 'epub':`
			`data = None`
			`try:`
			`data = self.inzip.read(zinfo.filename)`
			`except zipfilerugged.BadZipfile or zipfilerugged.error:`
			`local_name = self.getlocalname(zinfo)`
			`data = self.getfiledata(zinfo)`
			`zinfo.filename = local_name`

			`# create new ZipInfo with only the useful attributes from the old info`
			`nzinfo = ZipInfo(zinfo.filename, zinfo.date_time, compress_type=zinfo.compress_type)`
			`nzinfo.comment=zinfo.comment`
			`nzinfo.extra=zinfo.extra`
			`nzinfo.internal_attr=zinfo.internal_attr`
			`nzinfo.external_attr=zinfo.external_attr`
			`nzinfo.create_system=zinfo.create_system`
			`self.outzip.writestr(nzinfo,data)`

			`self.bzf.close()`
			`self.inzip.close()`
			`self.outzip.close()`


			`def usage():`
Use print() function in both Python 2 and Python 3 Legacy __print__ statements are syntax errors in Python 3 but __print()__ function works as expected in both Python 2 and Python 3. 2019-06-24 10:49:38 -06:00			`print("""usage: zipfix.py inputzip outputzip`
tools v6.0.8 2013-10-02 12:59:40 -06:00			`inputzip is the source zipfile to fix`
			`outputzip is the fixed zip archive`
Use print() function in both Python 2 and Python 3 Legacy __print__ statements are syntax errors in Python 3 but __print()__ function works as expected in both Python 2 and Python 3. 2019-06-24 10:49:38 -06:00			`""")`
tools v6.0.8 2013-10-02 12:59:40 -06:00

			`def repairBook(infile, outfile):`
			`if not os.path.exists(infile):`
Use print() function in both Python 2 and Python 3 Legacy __print__ statements are syntax errors in Python 3 but __print()__ function works as expected in both Python 2 and Python 3. 2019-06-24 10:49:38 -06:00			`print("Error: Input Zip File does not exist")`
tools v6.0.8 2013-10-02 12:59:40 -06:00			`return 1`
tools v2.2 2010-11-11 15:11:36 -07:00			`try:`
tools v6.0.8 2013-10-02 12:59:40 -06:00			`fr = fixZip(infile, outfile)`
			`fr.fix()`
			`return 0`
Starting on Version 7.0 using the work done by others. Completely untested. I will be testing things, but I thought I'd get this base version up for others to give pull requests. THIS IS ON THE MASTER BRANCH. The Master branch will be Python 3.0 from now on. While Python 2.7 support will not be deliberately broken, all efforts should now focus on Python 3.0 compatibility. I can see a lot of work has been done. There's more to do. I've bumped the version number of everything I came across to the next major number for Python 3.0 compatibility indication. Thanks everyone. I hope to update here at least once a week until we have a stable 7.0 release for calibre 5.0 2020-09-26 14:22:47 -06:00			`except Exception as e:`
Use print() function in both Python 2 and Python 3 Legacy __print__ statements are syntax errors in Python 3 but __print()__ function works as expected in both Python 2 and Python 3. 2019-06-24 10:49:38 -06:00			`print("Error Occurred ", e)`
tools v6.0.8 2013-10-02 12:59:40 -06:00			`return 2`


			`def main(argv=sys.argv):`
			`if len(argv)!=3:`
			`usage()`
			`return 1`
			`infile = argv[1]`
			`outfile = argv[2]`
			`return repairBook(infile, outfile)`


			`if __name__ == '__main__' :`
			`sys.exit(main())`