2020-09-27 04:54:49 -06:00
|
|
|
#!/usr/bin/env python3
|
2012-12-19 06:48:11 -07:00
|
|
|
# -*- coding: utf-8 -*-
|
2010-11-11 15:11:36 -07:00
|
|
|
|
2020-09-26 14:22:47 -06:00
|
|
|
# zipfix.py
|
2020-09-27 04:54:49 -06:00
|
|
|
# Copyright © 2010-2020 by Apprentice Harper et al.
|
2013-10-02 12:59:40 -06:00
|
|
|
|
|
|
|
# Released under the terms of the GNU General Public Licence, version 3
|
|
|
|
# <http://www.gnu.org/licenses/>
|
|
|
|
|
|
|
|
# Revision history:
|
|
|
|
# 1.0 - Initial release
|
|
|
|
# 1.1 - Updated to handle zip file metadata correctly
|
2020-09-27 04:54:49 -06:00
|
|
|
# 2.0 - Python 3 for calibre 5.0
|
2013-10-02 12:59:40 -06:00
|
|
|
|
|
|
|
"""
|
|
|
|
Re-write zip (or ePub) fixing problems with file names (and mimetype entry).
|
|
|
|
"""
|
2020-10-14 09:23:49 -06:00
|
|
|
|
2013-03-20 04:23:54 -06:00
|
|
|
|
|
|
|
__license__ = 'GPL v3'
|
2013-10-02 12:59:40 -06:00
|
|
|
__version__ = "1.1"
|
|
|
|
|
2021-12-29 01:26:29 -07:00
|
|
|
import sys, os
|
|
|
|
|
2021-12-29 03:36:59 -07:00
|
|
|
#@@CALIBRE_COMPAT_CODE@@
|
2021-12-29 01:26:29 -07:00
|
|
|
|
2013-10-02 12:59:40 -06:00
|
|
|
import zlib
|
2021-12-29 01:26:29 -07:00
|
|
|
import zipfilerugged
|
2022-08-06 05:53:03 -06:00
|
|
|
from zipfilerugged import ZipInfo, ZeroedZipInfo
|
2013-10-02 12:59:40 -06:00
|
|
|
import getopt
|
|
|
|
from struct import unpack
|
|
|
|
|
|
|
|
|
|
|
|
_FILENAME_LEN_OFFSET = 26
|
|
|
|
_EXTRA_LEN_OFFSET = 28
|
|
|
|
_FILENAME_OFFSET = 30
|
|
|
|
_MAX_SIZE = 64 * 1024
|
|
|
|
_MIMETYPE = 'application/epub+zip'
|
|
|
|
|
|
|
|
|
|
|
|
class fixZip:
|
|
|
|
def __init__(self, zinput, zoutput):
|
|
|
|
self.ztype = 'zip'
|
|
|
|
if zinput.lower().find('.epub') >= 0 :
|
|
|
|
self.ztype = 'epub'
|
|
|
|
self.inzip = zipfilerugged.ZipFile(zinput,'r')
|
|
|
|
self.outzip = zipfilerugged.ZipFile(zoutput,'w')
|
|
|
|
# open the input zip for reading only as a raw file
|
2020-09-26 14:22:47 -06:00
|
|
|
self.bzf = open(zinput,'rb')
|
2013-10-02 12:59:40 -06:00
|
|
|
|
|
|
|
def getlocalname(self, zi):
|
|
|
|
local_header_offset = zi.header_offset
|
|
|
|
self.bzf.seek(local_header_offset + _FILENAME_LEN_OFFSET)
|
|
|
|
leninfo = self.bzf.read(2)
|
|
|
|
local_name_length, = unpack('<H', leninfo)
|
|
|
|
self.bzf.seek(local_header_offset + _FILENAME_OFFSET)
|
|
|
|
local_name = self.bzf.read(local_name_length)
|
|
|
|
return local_name
|
|
|
|
|
|
|
|
def uncompress(self, cmpdata):
|
|
|
|
dc = zlib.decompressobj(-15)
|
2020-12-26 07:36:10 -07:00
|
|
|
data = b''
|
2013-10-02 12:59:40 -06:00
|
|
|
while len(cmpdata) > 0:
|
|
|
|
if len(cmpdata) > _MAX_SIZE :
|
|
|
|
newdata = cmpdata[0:_MAX_SIZE]
|
|
|
|
cmpdata = cmpdata[_MAX_SIZE:]
|
|
|
|
else:
|
|
|
|
newdata = cmpdata
|
2020-12-26 07:36:10 -07:00
|
|
|
cmpdata = b''
|
2013-10-02 12:59:40 -06:00
|
|
|
newdata = dc.decompress(newdata)
|
|
|
|
unprocessed = dc.unconsumed_tail
|
|
|
|
if len(unprocessed) == 0:
|
|
|
|
newdata += dc.flush()
|
|
|
|
data += newdata
|
|
|
|
cmpdata += unprocessed
|
2020-12-26 07:36:10 -07:00
|
|
|
unprocessed = b''
|
2013-10-02 12:59:40 -06:00
|
|
|
return data
|
|
|
|
|
|
|
|
def getfiledata(self, zi):
|
|
|
|
# get file name length and exta data length to find start of file data
|
|
|
|
local_header_offset = zi.header_offset
|
|
|
|
|
|
|
|
self.bzf.seek(local_header_offset + _FILENAME_LEN_OFFSET)
|
|
|
|
leninfo = self.bzf.read(2)
|
|
|
|
local_name_length, = unpack('<H', leninfo)
|
2010-11-11 15:11:36 -07:00
|
|
|
|
2013-10-02 12:59:40 -06:00
|
|
|
self.bzf.seek(local_header_offset + _EXTRA_LEN_OFFSET)
|
|
|
|
exinfo = self.bzf.read(2)
|
|
|
|
extra_field_length, = unpack('<H', exinfo)
|
2010-11-11 15:11:36 -07:00
|
|
|
|
2013-10-02 12:59:40 -06:00
|
|
|
self.bzf.seek(local_header_offset + _FILENAME_OFFSET + local_name_length + extra_field_length)
|
|
|
|
data = None
|
2010-11-11 15:11:36 -07:00
|
|
|
|
2013-10-02 12:59:40 -06:00
|
|
|
# if not compressed we are good to go
|
|
|
|
if zi.compress_type == zipfilerugged.ZIP_STORED:
|
|
|
|
data = self.bzf.read(zi.file_size)
|
2010-11-11 15:11:36 -07:00
|
|
|
|
2013-10-02 12:59:40 -06:00
|
|
|
# if compressed we must decompress it using zlib
|
|
|
|
if zi.compress_type == zipfilerugged.ZIP_DEFLATED:
|
|
|
|
cmpdata = self.bzf.read(zi.compress_size)
|
|
|
|
data = self.uncompress(cmpdata)
|
2010-11-11 15:11:36 -07:00
|
|
|
|
2013-10-02 12:59:40 -06:00
|
|
|
return data
|
2010-11-11 15:11:36 -07:00
|
|
|
|
2012-11-07 06:14:25 -07:00
|
|
|
|
2010-11-11 15:11:36 -07:00
|
|
|
|
2013-10-02 12:59:40 -06:00
|
|
|
def fix(self):
|
|
|
|
# get the zipinfo for each member of the input archive
|
|
|
|
# and copy member over to output archive
|
|
|
|
# if problems exist with local vs central filename, fix them
|
|
|
|
|
|
|
|
# if epub write mimetype file first, with no compression
|
|
|
|
if self.ztype == 'epub':
|
|
|
|
# first get a ZipInfo with current time and no compression
|
2022-08-06 05:53:03 -06:00
|
|
|
mimeinfo = ZipInfo(b'mimetype')
|
|
|
|
mimeinfo.compress_type = zipfilerugged.ZIP_STORED
|
2013-10-02 12:59:40 -06:00
|
|
|
mimeinfo.internal_attr = 1 # text file
|
|
|
|
try:
|
|
|
|
# if the mimetype is present, get its info, including time-stamp
|
2020-12-26 07:36:10 -07:00
|
|
|
oldmimeinfo = self.inzip.getinfo(b'mimetype')
|
2013-10-02 12:59:40 -06:00
|
|
|
# copy across useful fields
|
|
|
|
mimeinfo.date_time = oldmimeinfo.date_time
|
|
|
|
mimeinfo.comment = oldmimeinfo.comment
|
|
|
|
mimeinfo.extra = oldmimeinfo.extra
|
|
|
|
mimeinfo.internal_attr = oldmimeinfo.internal_attr
|
|
|
|
mimeinfo.external_attr = oldmimeinfo.external_attr
|
|
|
|
mimeinfo.create_system = oldmimeinfo.create_system
|
2022-08-06 05:53:03 -06:00
|
|
|
mimeinfo.create_version = oldmimeinfo.create_version
|
|
|
|
mimeinfo.volume = oldmimeinfo.volume
|
2013-10-02 12:59:40 -06:00
|
|
|
except:
|
|
|
|
pass
|
2022-08-06 05:53:03 -06:00
|
|
|
|
|
|
|
# Python 3 has a bug where the external_attr is reset to `0o600 << 16`
|
|
|
|
# if it's NULL, so we need a workaround:
|
|
|
|
if mimeinfo.external_attr == 0:
|
|
|
|
mimeinfo = ZeroedZipInfo(mimeinfo)
|
|
|
|
|
2020-09-26 14:22:47 -06:00
|
|
|
self.outzip.writestr(mimeinfo, _MIMETYPE.encode('ascii'))
|
2013-10-02 12:59:40 -06:00
|
|
|
|
|
|
|
# write the rest of the files
|
|
|
|
for zinfo in self.inzip.infolist():
|
2020-12-26 07:36:10 -07:00
|
|
|
if zinfo.filename != b"mimetype" or self.ztype != 'epub':
|
2013-10-02 12:59:40 -06:00
|
|
|
data = None
|
|
|
|
try:
|
|
|
|
data = self.inzip.read(zinfo.filename)
|
|
|
|
except zipfilerugged.BadZipfile or zipfilerugged.error:
|
|
|
|
local_name = self.getlocalname(zinfo)
|
|
|
|
data = self.getfiledata(zinfo)
|
|
|
|
zinfo.filename = local_name
|
|
|
|
|
|
|
|
# create new ZipInfo with only the useful attributes from the old info
|
2022-08-06 05:53:03 -06:00
|
|
|
nzinfo = ZipInfo(zinfo.filename)
|
|
|
|
nzinfo.date_time = zinfo.date_time
|
|
|
|
nzinfo.compress_type = zinfo.compress_type
|
2013-10-02 12:59:40 -06:00
|
|
|
nzinfo.comment=zinfo.comment
|
|
|
|
nzinfo.extra=zinfo.extra
|
|
|
|
nzinfo.internal_attr=zinfo.internal_attr
|
|
|
|
nzinfo.external_attr=zinfo.external_attr
|
|
|
|
nzinfo.create_system=zinfo.create_system
|
2022-08-06 05:53:03 -06:00
|
|
|
nzinfo.create_version = zinfo.create_version
|
|
|
|
nzinfo.volume = zinfo.volume
|
2021-02-18 03:38:19 -07:00
|
|
|
nzinfo.flag_bits = zinfo.flag_bits & 0x800 # preserve UTF-8 flag
|
2022-08-06 05:53:03 -06:00
|
|
|
|
|
|
|
# Python 3 has a bug where the external_attr is reset to `0o600 << 16`
|
|
|
|
# if it's NULL, so we need a workaround:
|
|
|
|
if nzinfo.external_attr == 0:
|
|
|
|
nzinfo = ZeroedZipInfo(nzinfo)
|
|
|
|
|
2013-10-02 12:59:40 -06:00
|
|
|
self.outzip.writestr(nzinfo,data)
|
|
|
|
|
|
|
|
self.bzf.close()
|
|
|
|
self.inzip.close()
|
|
|
|
self.outzip.close()
|
|
|
|
|
|
|
|
|
|
|
|
def usage():
|
2019-06-24 10:49:38 -06:00
|
|
|
print("""usage: zipfix.py inputzip outputzip
|
2013-10-02 12:59:40 -06:00
|
|
|
inputzip is the source zipfile to fix
|
|
|
|
outputzip is the fixed zip archive
|
2019-06-24 10:49:38 -06:00
|
|
|
""")
|
2013-10-02 12:59:40 -06:00
|
|
|
|
|
|
|
|
|
|
|
def repairBook(infile, outfile):
|
|
|
|
if not os.path.exists(infile):
|
2019-06-24 10:49:38 -06:00
|
|
|
print("Error: Input Zip File does not exist")
|
2013-10-02 12:59:40 -06:00
|
|
|
return 1
|
2010-11-11 15:11:36 -07:00
|
|
|
try:
|
2013-10-02 12:59:40 -06:00
|
|
|
fr = fixZip(infile, outfile)
|
|
|
|
fr.fix()
|
|
|
|
return 0
|
2020-09-26 14:22:47 -06:00
|
|
|
except Exception as e:
|
2019-06-24 10:49:38 -06:00
|
|
|
print("Error Occurred ", e)
|
2013-10-02 12:59:40 -06:00
|
|
|
return 2
|
|
|
|
|
|
|
|
|
|
|
|
def main(argv=sys.argv):
|
|
|
|
if len(argv)!=3:
|
|
|
|
usage()
|
|
|
|
return 1
|
|
|
|
infile = argv[1]
|
|
|
|
outfile = argv[2]
|
|
|
|
return repairBook(infile, outfile)
|
|
|
|
|
|
|
|
|
|
|
|
if __name__ == '__main__' :
|
|
|
|
sys.exit(main())
|