Add support for empty arrays (<>) in PDF objects. Fixes #183.
This commit is contained in:
parent
6c8051eded
commit
06648eeb1c
|
@ -81,3 +81,4 @@ List of changes since the fork of Apprentice Harper's repository:
|
||||||
- Re-enable Xrefs in exported PDF files since the file corruption bug is hopefully fixed. Please open bug reports if you encounter new issues with PDF files.
|
- Re-enable Xrefs in exported PDF files since the file corruption bug is hopefully fixed. Please open bug reports if you encounter new issues with PDF files.
|
||||||
- Fix a bug that would sometimes cause corrupted keys to be added when adding them through the config dialog (fixes #145, #134, #119, #116, #115, #109).
|
- Fix a bug that would sometimes cause corrupted keys to be added when adding them through the config dialog (fixes #145, #134, #119, #116, #115, #109).
|
||||||
- Update the README (fixes #136) to indicate that Apprentice Harper's version is no longer being updated.
|
- Update the README (fixes #136) to indicate that Apprentice Harper's version is no longer being updated.
|
||||||
|
- Fix a bug where PDFs with empty arrays (`<>`) in a PDF object failed to decrypt, fixes #183.
|
||||||
|
|
|
@ -270,6 +270,11 @@ END_STRING = re.compile(br'[()\\]')
|
||||||
OCT_STRING = re.compile(br'[0-7]')
|
OCT_STRING = re.compile(br'[0-7]')
|
||||||
ESC_STRING = { b'b':8, b't':9, b'n':10, b'f':12, b'r':13, b'(':40, b')':41, b'\\':92 }
|
ESC_STRING = { b'b':8, b't':9, b'n':10, b'f':12, b'r':13, b'(':40, b')':41, b'\\':92 }
|
||||||
|
|
||||||
|
class EmptyArrayValue(object):
|
||||||
|
def __str__(self):
|
||||||
|
return "<>"
|
||||||
|
|
||||||
|
|
||||||
class PSBaseParser(object):
|
class PSBaseParser(object):
|
||||||
|
|
||||||
'''
|
'''
|
||||||
|
@ -519,6 +524,13 @@ class PSBaseParser(object):
|
||||||
if c == b'<':
|
if c == b'<':
|
||||||
self.add_token(KEYWORD_DICT_BEGIN)
|
self.add_token(KEYWORD_DICT_BEGIN)
|
||||||
i += 1
|
i += 1
|
||||||
|
if c == b'>':
|
||||||
|
# Empty array without any contents. Why though?
|
||||||
|
# We need to add some dummy python object that will serialize to
|
||||||
|
# nothing, otherwise the code removes the whole array.
|
||||||
|
self.add_token(EmptyArrayValue())
|
||||||
|
i += 1
|
||||||
|
|
||||||
return (self.parse_main, i)
|
return (self.parse_main, i)
|
||||||
|
|
||||||
def parse_wclose(self, s, i):
|
def parse_wclose(self, s, i):
|
||||||
|
@ -544,7 +556,6 @@ class PSBaseParser(object):
|
||||||
else:
|
else:
|
||||||
token = HEX_PAIR.sub(lambda m: bytes([int(m.group(0), 16)]),
|
token = HEX_PAIR.sub(lambda m: bytes([int(m.group(0), 16)]),
|
||||||
SPC.sub(b'', self.token))
|
SPC.sub(b'', self.token))
|
||||||
|
|
||||||
self.add_token(token)
|
self.add_token(token)
|
||||||
return (self.parse_main, j)
|
return (self.parse_main, j)
|
||||||
|
|
||||||
|
@ -1591,7 +1602,13 @@ class PDFDocument(object):
|
||||||
|
|
||||||
def initialize_ebx_ignoble(self, keyb64, docid, param):
|
def initialize_ebx_ignoble(self, keyb64, docid, param):
|
||||||
self.is_printable = self.is_modifiable = self.is_extractable = True
|
self.is_printable = self.is_modifiable = self.is_extractable = True
|
||||||
|
|
||||||
|
try:
|
||||||
key = keyb64.decode('base64')[:16]
|
key = keyb64.decode('base64')[:16]
|
||||||
|
# This will probably always error, but I'm not 100% sure, so lets leave the old code in.
|
||||||
|
except AttributeError:
|
||||||
|
key = codecs.decode(keyb64.encode("ascii"), 'base64')[:16]
|
||||||
|
|
||||||
|
|
||||||
length = int_value(param.get('Length', 0)) / 8
|
length = int_value(param.get('Length', 0)) / 8
|
||||||
rights = codecs.decode(str_value(param.get('ADEPT_LICENSE')), "base64")
|
rights = codecs.decode(str_value(param.get('ADEPT_LICENSE')), "base64")
|
||||||
|
@ -2225,11 +2242,7 @@ class PDFSerializer(object):
|
||||||
elif isinstance(obj, bytearray):
|
elif isinstance(obj, bytearray):
|
||||||
self.write(b'(%s)' % self.escape_string(obj))
|
self.write(b'(%s)' % self.escape_string(obj))
|
||||||
elif isinstance(obj, bytes):
|
elif isinstance(obj, bytes):
|
||||||
# I'm not 100% sure if this is correct, but it seems to fix some PDFs ...
|
|
||||||
# If needed, revert that change.
|
|
||||||
self.write(b'<%s>' % binascii.hexlify(obj).upper())
|
self.write(b'<%s>' % binascii.hexlify(obj).upper())
|
||||||
print("ineptpdf.py: Unknown bytes element found - guessing.")
|
|
||||||
print("If this PDF is corrupted and/or doesn't work, please open a bug report.")
|
|
||||||
elif isinstance(obj, str):
|
elif isinstance(obj, str):
|
||||||
self.write(b'(%s)' % self.escape_string(obj.encode('utf-8')))
|
self.write(b'(%s)' % self.escape_string(obj.encode('utf-8')))
|
||||||
elif isinstance(obj, bool):
|
elif isinstance(obj, bool):
|
||||||
|
|
Loading…
Reference in New Issue