Fix tons of PDF-related issues
This commit is contained in:
parent
52cf3faa59
commit
88b0966961
|
@ -73,8 +73,9 @@ List of changes since the fork of Apprentice Harper's repository:
|
||||||
- Fix some more Calibre-6 bugs in the Obok plugin (should fix #114).
|
- Fix some more Calibre-6 bugs in the Obok plugin (should fix #114).
|
||||||
- Fix a bug where invalid Adobe keys could cause the plugin to stop trying subsequent keys (partially fixes #109).
|
- Fix a bug where invalid Adobe keys could cause the plugin to stop trying subsequent keys (partially fixes #109).
|
||||||
- Fix DRM removal sometimes resetting the ZIP's internal "external_attr" value on Calibre 5 and newer.
|
- Fix DRM removal sometimes resetting the ZIP's internal "external_attr" value on Calibre 5 and newer.
|
||||||
- Fix PDF decryption issues on Calibre 4 (hopefully fixes #104).
|
- Fix tons of PDF decryption issues (hopefully fixes #104 and other PDF-related issues).
|
||||||
- Small Python 2 / Calibre 4 bugfix for Obok.
|
- Small Python 2 / Calibre 4 bugfix for Obok.
|
||||||
- Removing ancient AlfCrypto machine code libraries, moving all encryption / decryption to Python code.
|
- Removing ancient AlfCrypto machine code libraries, moving all encryption / decryption to Python code.
|
||||||
- General cleanup and removal of dead code.
|
- General cleanup and removal of dead code.
|
||||||
- Fix a bug where ADE account keys weren't automatically imported from the DeACSM plugin when importing a PDF file.
|
- Fix a bug where ADE account keys weren't automatically imported from the DeACSM plugin when importing a PDF file.
|
||||||
|
- Re-enable Xrefs in exported PDF files since the file corruption bug is hopefully fixed. Please open bug reports if you encounter new issues with PDF files.
|
||||||
|
|
|
@ -117,7 +117,7 @@ def SHA256(message):
|
||||||
# 1 = only if present in input
|
# 1 = only if present in input
|
||||||
# 2 = always
|
# 2 = always
|
||||||
|
|
||||||
GEN_XREF_STM = 0
|
GEN_XREF_STM = 1
|
||||||
|
|
||||||
# This is the value for the current document
|
# This is the value for the current document
|
||||||
gen_xref_stm = False # will be set in PDFSerializer
|
gen_xref_stm = False # will be set in PDFSerializer
|
||||||
|
@ -565,7 +565,11 @@ class PSBaseParser(object):
|
||||||
while 1:
|
while 1:
|
||||||
self.fillbuf()
|
self.fillbuf()
|
||||||
if eol:
|
if eol:
|
||||||
c = bytes([self.buf[self.charpos]])
|
if sys.version_info[0] == 2:
|
||||||
|
c = self.buf[self.charpos]
|
||||||
|
else:
|
||||||
|
c = bytes([self.buf[self.charpos]])
|
||||||
|
|
||||||
# handle '\r\n'
|
# handle '\r\n'
|
||||||
if c == b'\n':
|
if c == b'\n':
|
||||||
linebuf += c
|
linebuf += c
|
||||||
|
@ -575,10 +579,17 @@ class PSBaseParser(object):
|
||||||
if m:
|
if m:
|
||||||
linebuf += self.buf[self.charpos:m.end(0)]
|
linebuf += self.buf[self.charpos:m.end(0)]
|
||||||
self.charpos = m.end(0)
|
self.charpos = m.end(0)
|
||||||
if bytes([linebuf[-1]]) == b'\r':
|
if sys.version_info[0] == 2:
|
||||||
eol = True
|
if linebuf[-1] == b'\r':
|
||||||
|
eol = True
|
||||||
|
else:
|
||||||
|
break
|
||||||
else:
|
else:
|
||||||
break
|
if bytes([linebuf[-1]]) == b'\r':
|
||||||
|
eol = True
|
||||||
|
else:
|
||||||
|
break
|
||||||
|
|
||||||
else:
|
else:
|
||||||
linebuf += self.buf[self.charpos:]
|
linebuf += self.buf[self.charpos:]
|
||||||
self.charpos = len(self.buf)
|
self.charpos = len(self.buf)
|
||||||
|
@ -954,9 +965,14 @@ class PDFStream(PDFObject):
|
||||||
for i in range(0, len(data), columns+1):
|
for i in range(0, len(data), columns+1):
|
||||||
pred = data[i]
|
pred = data[i]
|
||||||
ent1 = data[i+1:i+1+columns]
|
ent1 = data[i+1:i+1+columns]
|
||||||
if pred == 2:
|
if sys.version_info[0] == 2:
|
||||||
ent1 = b''.join(bytes([(a+b) & 255]) \
|
if pred == '\x02':
|
||||||
for (a,b) in zip(ent0,ent1))
|
ent1 = ''.join(chr((ord(a)+ord(b)) & 255) \
|
||||||
|
for (a,b) in zip(ent0,ent1))
|
||||||
|
else:
|
||||||
|
if pred == 2:
|
||||||
|
ent1 = b''.join(bytes([(a+b) & 255]) \
|
||||||
|
for (a,b) in zip(ent0,ent1))
|
||||||
buf += ent1
|
buf += ent1
|
||||||
ent0 = ent1
|
ent0 = ent1
|
||||||
data = buf
|
data = buf
|
||||||
|
@ -1070,8 +1086,6 @@ class PDFXRef(object):
|
||||||
return (None, pos)
|
return (None, pos)
|
||||||
|
|
||||||
|
|
||||||
## PDFXRefStream
|
|
||||||
##
|
|
||||||
class PDFXRefStream(object):
|
class PDFXRefStream(object):
|
||||||
|
|
||||||
def __init__(self):
|
def __init__(self):
|
||||||
|
@ -1404,7 +1418,10 @@ class PDFDocument(object):
|
||||||
x = ARC4.new(hash).decrypt(Odata) # 4
|
x = ARC4.new(hash).decrypt(Odata) # 4
|
||||||
if R >= 3:
|
if R >= 3:
|
||||||
for i in range(1,19+1):
|
for i in range(1,19+1):
|
||||||
k = b''.join(bytes([c ^ i]) for c in hash )
|
if sys.version_info[0] == 2:
|
||||||
|
k = b''.join(chr(ord(c) ^ i) for c in hash )
|
||||||
|
else:
|
||||||
|
k = b''.join(bytes([c ^ i]) for c in hash )
|
||||||
x = ARC4.new(k).decrypt(x)
|
x = ARC4.new(k).decrypt(x)
|
||||||
|
|
||||||
|
|
||||||
|
@ -1462,7 +1479,10 @@ class PDFDocument(object):
|
||||||
hash.update(docid[0]) # 3
|
hash.update(docid[0]) # 3
|
||||||
x = ARC4.new(key).decrypt(hash.digest()[:16]) # 4
|
x = ARC4.new(key).decrypt(hash.digest()[:16]) # 4
|
||||||
for i in range(1,19+1):
|
for i in range(1,19+1):
|
||||||
k = b''.join(bytes([c ^ i]) for c in key )
|
if sys.version_info[0] == 2:
|
||||||
|
k = b''.join(chr(ord(c) ^ i) for c in key )
|
||||||
|
else:
|
||||||
|
k = b''.join(bytes([c ^ i]) for c in key )
|
||||||
x = ARC4.new(k).decrypt(x)
|
x = ARC4.new(k).decrypt(x)
|
||||||
u1 = x+x # 32bytes total
|
u1 = x+x # 32bytes total
|
||||||
if R == 2:
|
if R == 2:
|
||||||
|
@ -1490,8 +1510,8 @@ class PDFDocument(object):
|
||||||
|
|
||||||
# check owner pass:
|
# check owner pass:
|
||||||
retval = self.check_owner_password(password, docid, param)
|
retval = self.check_owner_password(password, docid, param)
|
||||||
if retval is True or retval is not None:
|
if retval is True or (retval is not False and retval is not None):
|
||||||
#print("Owner pass is valid - " + str(retval))
|
#print("Owner pass is valid")
|
||||||
if retval is True:
|
if retval is True:
|
||||||
self.decrypt_key = self.recover_encryption_key_with_password(password, docid, param)
|
self.decrypt_key = self.recover_encryption_key_with_password(password, docid, param)
|
||||||
else:
|
else:
|
||||||
|
@ -1500,7 +1520,7 @@ class PDFDocument(object):
|
||||||
if self.decrypt_key is None or self.decrypt_key is True or self.decrypt_key is False:
|
if self.decrypt_key is None or self.decrypt_key is True or self.decrypt_key is False:
|
||||||
# That's not the owner password. Check if it's the user password.
|
# That's not the owner password. Check if it's the user password.
|
||||||
retval = self.check_user_password(password, docid, param)
|
retval = self.check_user_password(password, docid, param)
|
||||||
if retval is True or retval is not None:
|
if retval is True or (retval is not False and retval is not None):
|
||||||
#print("User pass is valid")
|
#print("User pass is valid")
|
||||||
if retval is True:
|
if retval is True:
|
||||||
self.decrypt_key = self.recover_encryption_key_with_password(password, docid, param)
|
self.decrypt_key = self.recover_encryption_key_with_password(password, docid, param)
|
||||||
|
@ -1723,7 +1743,11 @@ class PDFDocument(object):
|
||||||
data = data[16:]
|
data = data[16:]
|
||||||
plaintext = AES.new(key,AES.MODE_CBC,ivector).decrypt(data)
|
plaintext = AES.new(key,AES.MODE_CBC,ivector).decrypt(data)
|
||||||
# remove pkcs#5 aes padding
|
# remove pkcs#5 aes padding
|
||||||
cutter = -1 * plaintext[-1]
|
if sys.version_info[0] == 2:
|
||||||
|
cutter = -1 * ord(plaintext[-1])
|
||||||
|
else:
|
||||||
|
cutter = -1 * plaintext[-1]
|
||||||
|
|
||||||
plaintext = plaintext[:cutter]
|
plaintext = plaintext[:cutter]
|
||||||
return plaintext
|
return plaintext
|
||||||
|
|
||||||
|
@ -2199,7 +2223,11 @@ class PDFSerializer(object):
|
||||||
elif isinstance(obj, bytearray):
|
elif isinstance(obj, bytearray):
|
||||||
self.write(b'(%s)' % self.escape_string(obj))
|
self.write(b'(%s)' % self.escape_string(obj))
|
||||||
elif isinstance(obj, bytes):
|
elif isinstance(obj, bytes):
|
||||||
self.write(b'(%s)' % self.escape_string(obj))
|
# I'm not 100% sure if this is correct, but it seems to fix some PDFs ...
|
||||||
|
# If needed, revert that change.
|
||||||
|
self.write(b'<%s>' % binascii.hexlify(obj).upper())
|
||||||
|
print("ineptpdf.py: Unknown bytes element found - guessing.")
|
||||||
|
print("If this PDF is corrupted and/or doesn't work, please open a bug report.")
|
||||||
elif isinstance(obj, str):
|
elif isinstance(obj, str):
|
||||||
self.write(b'(%s)' % self.escape_string(obj.encode('utf-8')))
|
self.write(b'(%s)' % self.escape_string(obj.encode('utf-8')))
|
||||||
elif isinstance(obj, bool):
|
elif isinstance(obj, bool):
|
||||||
|
@ -2226,6 +2254,20 @@ class PDFSerializer(object):
|
||||||
self.write(b'(deleted)')
|
self.write(b'(deleted)')
|
||||||
else:
|
else:
|
||||||
data = obj.get_decdata()
|
data = obj.get_decdata()
|
||||||
|
|
||||||
|
# Fix length:
|
||||||
|
# We've decompressed and then recompressed the PDF stream.
|
||||||
|
# Depending on the algorithm, the implementation, and the compression level,
|
||||||
|
# the resulting recompressed stream is unlikely to have the same length as the original.
|
||||||
|
# So we need to update the PDF object to contain the new proper length.
|
||||||
|
|
||||||
|
# Without this change, all PDFs exported by this plugin are slightly corrupted -
|
||||||
|
# even though most if not all PDF readers can correct that on-the-fly.
|
||||||
|
|
||||||
|
if 'Length' in obj.dic:
|
||||||
|
obj.dic['Length'] = len(data)
|
||||||
|
|
||||||
|
|
||||||
self.serialize_object(obj.dic)
|
self.serialize_object(obj.dic)
|
||||||
self.write(b'stream\n')
|
self.write(b'stream\n')
|
||||||
self.write(data)
|
self.write(data)
|
||||||
|
|
Loading…
Reference in New Issue