ineptpdf 7.2
This commit is contained in:
parent
4f19f5ac11
commit
b92458c8c2
207
ineptpdf.pyw
207
ineptpdf.pyw
|
@ -1,7 +1,7 @@
|
||||||
#! /usr/bin/python
|
#! /usr/bin/python
|
||||||
|
|
||||||
# ineptpdf7.pyw
|
# ineptpdf72.pyw
|
||||||
# ineptpdf, version 7
|
# ineptpdf, version 7.2
|
||||||
|
|
||||||
# To run this program install Python 2.6 from http://www.python.org/download/
|
# To run this program install Python 2.6 from http://www.python.org/download/
|
||||||
# and PyCrypto from http://www.voidspace.org.uk/python/modules.shtml#pycrypto
|
# and PyCrypto from http://www.voidspace.org.uk/python/modules.shtml#pycrypto
|
||||||
|
@ -17,9 +17,12 @@
|
||||||
# 6 - changed to adeptkey4.der format for 1.7.2 support (anon)
|
# 6 - changed to adeptkey4.der format for 1.7.2 support (anon)
|
||||||
# 6.1 - backward compatibility for 1.7.1 and old adeptkey.der
|
# 6.1 - backward compatibility for 1.7.1 and old adeptkey.der
|
||||||
# 7 - Get cross reference streams and object streams working for input.
|
# 7 - Get cross reference streams and object streams working for input.
|
||||||
# Not yet supported on output but this only affects file size,
|
# Not yet supported on output but this only effects file size,
|
||||||
# not functionality. (by anon2)
|
# not functionality. (by anon2)
|
||||||
|
# 7.1 - Correct a problem when an old trailer is not followed by startxref
|
||||||
|
# 7.2 - Correct malformed Mac OS resource forks for Stanza
|
||||||
|
# - Support for cross ref streams on output (decreases file size)
|
||||||
|
#
|
||||||
"""
|
"""
|
||||||
Decrypt Adobe ADEPT-encrypted PDF files.
|
Decrypt Adobe ADEPT-encrypted PDF files.
|
||||||
"""
|
"""
|
||||||
|
@ -56,6 +59,15 @@ except ImportError:
|
||||||
class ADEPTError(Exception):
|
class ADEPTError(Exception):
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
# Do we generate cross reference streams on output?
|
||||||
|
# 0 = never
|
||||||
|
# 1 = only if present in input
|
||||||
|
# 2 = always
|
||||||
|
|
||||||
|
GEN_XREF_STM = 1
|
||||||
|
|
||||||
|
# This is the value for the current document
|
||||||
|
gen_xref_stm = False # will be set in PDFSerializer
|
||||||
|
|
||||||
###
|
###
|
||||||
### ASN.1 parsing code from tlslite
|
### ASN.1 parsing code from tlslite
|
||||||
|
@ -298,6 +310,7 @@ END_KEYWORD = re.compile(r'[#/%\[\]()<>{}\s]')
|
||||||
END_STRING = re.compile(r'[()\134]')
|
END_STRING = re.compile(r'[()\134]')
|
||||||
OCT_STRING = re.compile(r'[0-7]')
|
OCT_STRING = re.compile(r'[0-7]')
|
||||||
ESC_STRING = { 'b':8, 't':9, 'n':10, 'f':12, 'r':13, '(':40, ')':41, '\\':92 }
|
ESC_STRING = { 'b':8, 't':9, 'n':10, 'f':12, 'r':13, '(':40, ')':41, '\\':92 }
|
||||||
|
|
||||||
class PSBaseParser(object):
|
class PSBaseParser(object):
|
||||||
|
|
||||||
'''
|
'''
|
||||||
|
@ -644,7 +657,7 @@ class PSStackParser(PSBaseParser):
|
||||||
def do_keyword(self, pos, token):
|
def do_keyword(self, pos, token):
|
||||||
return
|
return
|
||||||
|
|
||||||
def nextobject(self):
|
def nextobject(self, direct=False):
|
||||||
'''
|
'''
|
||||||
Yields a list of objects: keywords, literals, strings,
|
Yields a list of objects: keywords, literals, strings,
|
||||||
numbers, arrays and dictionaries. Arrays and dictionaries
|
numbers, arrays and dictionaries. Arrays and dictionaries
|
||||||
|
@ -689,6 +702,8 @@ class PSStackParser(PSBaseParser):
|
||||||
if self.context:
|
if self.context:
|
||||||
continue
|
continue
|
||||||
else:
|
else:
|
||||||
|
if direct:
|
||||||
|
return self.pop(1)[0]
|
||||||
self.flush()
|
self.flush()
|
||||||
obj = self.results.pop(0)
|
obj = self.results.pop(0)
|
||||||
return obj
|
return obj
|
||||||
|
@ -714,13 +729,13 @@ class PDFNotImplementedError(PSException): pass
|
||||||
##
|
##
|
||||||
class PDFObjRef(PDFObject):
|
class PDFObjRef(PDFObject):
|
||||||
|
|
||||||
def __init__(self, doc, objid, _):
|
def __init__(self, doc, objid, genno):
|
||||||
if objid == 0:
|
if objid == 0:
|
||||||
if STRICT:
|
if STRICT:
|
||||||
raise PDFValueError('PDF object id cannot be 0.')
|
raise PDFValueError('PDF object id cannot be 0.')
|
||||||
self.doc = doc
|
self.doc = doc
|
||||||
self.objid = objid
|
self.objid = objid
|
||||||
#self.genno = genno # Never used.
|
self.genno = genno
|
||||||
return
|
return
|
||||||
|
|
||||||
def __repr__(self):
|
def __repr__(self):
|
||||||
|
@ -863,6 +878,7 @@ class PDFStream(PDFObject):
|
||||||
self.rawdata = rawdata
|
self.rawdata = rawdata
|
||||||
self.decipher = decipher
|
self.decipher = decipher
|
||||||
self.data = None
|
self.data = None
|
||||||
|
self.decdata = None
|
||||||
self.objid = None
|
self.objid = None
|
||||||
self.genno = None
|
self.genno = None
|
||||||
return
|
return
|
||||||
|
@ -873,8 +889,12 @@ class PDFStream(PDFObject):
|
||||||
return
|
return
|
||||||
|
|
||||||
def __repr__(self):
|
def __repr__(self):
|
||||||
return '<PDFStream(%r): raw=%d, %r>' % \
|
if self.rawdata:
|
||||||
(self.objid, len(self.rawdata), self.dic)
|
return '<PDFStream(%r): raw=%d, %r>' % \
|
||||||
|
(self.objid, len(self.rawdata), self.dic)
|
||||||
|
else:
|
||||||
|
return '<PDFStream(%r): data=%d, %r>' % \
|
||||||
|
(self.objid, len(self.data), self.dic)
|
||||||
|
|
||||||
def decode(self):
|
def decode(self):
|
||||||
assert self.data is None and self.rawdata is not None
|
assert self.data is None and self.rawdata is not None
|
||||||
|
@ -882,6 +902,8 @@ class PDFStream(PDFObject):
|
||||||
if self.decipher:
|
if self.decipher:
|
||||||
# Handle encryption
|
# Handle encryption
|
||||||
data = self.decipher(self.objid, self.genno, data)
|
data = self.decipher(self.objid, self.genno, data)
|
||||||
|
if gen_xref_stm:
|
||||||
|
self.decdata = data # keep decrypted data
|
||||||
if 'Filter' not in self.dic:
|
if 'Filter' not in self.dic:
|
||||||
self.data = data
|
self.data = data
|
||||||
self.rawdata = None
|
self.rawdata = None
|
||||||
|
@ -940,13 +962,8 @@ class PDFStream(PDFObject):
|
||||||
return self.rawdata
|
return self.rawdata
|
||||||
|
|
||||||
def get_decdata(self):
|
def get_decdata(self):
|
||||||
if self.data is not None:
|
if self.decdata is not None:
|
||||||
# Data has already been decrypted and decoded. This is the case
|
return self.decdata
|
||||||
# for object streams. Note: this data is wrong to put in the
|
|
||||||
# output because it should be stored decrypted but
|
|
||||||
# uncompressed. This can be done by storing the intermediate
|
|
||||||
# data. For now object streams are useless in the output.
|
|
||||||
return self.data
|
|
||||||
data = self.rawdata
|
data = self.rawdata
|
||||||
if self.decipher and data:
|
if self.decipher and data:
|
||||||
# Handle encryption
|
# Handle encryption
|
||||||
|
@ -1024,7 +1041,7 @@ class PDFXRef(object):
|
||||||
try:
|
try:
|
||||||
(_,kwd) = parser.nexttoken()
|
(_,kwd) = parser.nexttoken()
|
||||||
assert kwd is self.KEYWORD_TRAILER
|
assert kwd is self.KEYWORD_TRAILER
|
||||||
(_,dic) = parser.nextobject()
|
(_,dic) = parser.nextobject(direct=True)
|
||||||
except PSEOF:
|
except PSEOF:
|
||||||
x = parser.pop(1)
|
x = parser.pop(1)
|
||||||
if not x:
|
if not x:
|
||||||
|
@ -1138,6 +1155,7 @@ class PDFDocument(object):
|
||||||
for xref in self.xrefs:
|
for xref in self.xrefs:
|
||||||
trailer = xref.trailer
|
trailer = xref.trailer
|
||||||
if not trailer: continue
|
if not trailer: continue
|
||||||
|
|
||||||
# If there's an encryption info, remember it.
|
# If there's an encryption info, remember it.
|
||||||
if 'Encrypt' in trailer:
|
if 'Encrypt' in trailer:
|
||||||
#assert not self.encryption
|
#assert not self.encryption
|
||||||
|
@ -1315,9 +1333,9 @@ class PDFDocument(object):
|
||||||
# raise PDFSyntaxError('Cannot locate objid=%r' % objid)
|
# raise PDFSyntaxError('Cannot locate objid=%r' % objid)
|
||||||
return None
|
return None
|
||||||
if stmid:
|
if stmid:
|
||||||
# Later try to introduce PDFObjStmRef's
|
if gen_xref_stm:
|
||||||
# return PDFObjStmRef(objid, stmid, index)
|
return PDFObjStmRef(objid, stmid, index)
|
||||||
# Stuff from pdfminer
|
# Stuff from pdfminer: extract objects from object stream
|
||||||
stream = stream_value(self.getobj(stmid))
|
stream = stream_value(self.getobj(stmid))
|
||||||
if stream.dic.get('Type') is not LITERAL_OBJSTM:
|
if stream.dic.get('Type') is not LITERAL_OBJSTM:
|
||||||
if STRICT:
|
if STRICT:
|
||||||
|
@ -1368,10 +1386,13 @@ class PDFDocument(object):
|
||||||
return obj
|
return obj
|
||||||
|
|
||||||
class PDFObjStmRef(object):
|
class PDFObjStmRef(object):
|
||||||
|
maxindex = 0
|
||||||
def __init__(self, objid, stmid, index):
|
def __init__(self, objid, stmid, index):
|
||||||
self.objid = objid
|
self.objid = objid
|
||||||
self.stmid = stmid
|
self.stmid = stmid
|
||||||
self.index = index
|
self.index = index
|
||||||
|
if index > PDFObjStmRef.maxindex:
|
||||||
|
PDFObjStmRef.maxindex = index
|
||||||
|
|
||||||
|
|
||||||
## PDFParser
|
## PDFParser
|
||||||
|
@ -1477,6 +1498,9 @@ class PDFParser(PSStackParser):
|
||||||
raise PDFNoValidXRef('Unexpected EOF')
|
raise PDFNoValidXRef('Unexpected EOF')
|
||||||
if isinstance(token, int):
|
if isinstance(token, int):
|
||||||
# XRefStream: PDF-1.5
|
# XRefStream: PDF-1.5
|
||||||
|
if GEN_XREF_STM == 1:
|
||||||
|
global gen_xref_stm
|
||||||
|
gen_xref_stm = True
|
||||||
self.seek(pos)
|
self.seek(pos)
|
||||||
self.reset()
|
self.reset()
|
||||||
xref = PDFXRefStream()
|
xref = PDFXRefStream()
|
||||||
|
@ -1562,6 +1586,8 @@ class PDFObjStrmParser(PDFParser):
|
||||||
|
|
||||||
class PDFSerializer(object):
|
class PDFSerializer(object):
|
||||||
def __init__(self, inf, keypath):
|
def __init__(self, inf, keypath):
|
||||||
|
global GEN_XREF_STM, gen_xref_stm
|
||||||
|
gen_xref_stm = GEN_XREF_STM > 1
|
||||||
self.version = inf.read(8)
|
self.version = inf.read(8)
|
||||||
inf.seek(0)
|
inf.seek(0)
|
||||||
self.doc = doc = PDFDocument()
|
self.doc = doc = PDFDocument()
|
||||||
|
@ -1586,62 +1612,93 @@ class PDFSerializer(object):
|
||||||
doc = self.doc
|
doc = self.doc
|
||||||
objids = self.objids
|
objids = self.objids
|
||||||
xrefs = {}
|
xrefs = {}
|
||||||
xrefstm = {}
|
|
||||||
maxobj = max(objids)
|
maxobj = max(objids)
|
||||||
trailer = dict(self.trailer)
|
trailer = dict(self.trailer)
|
||||||
trailer['Size'] = maxobj + 1
|
trailer['Size'] = maxobj + 1
|
||||||
for objid in objids:
|
for objid in objids:
|
||||||
obj = doc.getobj(objid)
|
obj = doc.getobj(objid)
|
||||||
if isinstance(obj, PDFObjStmRef):
|
if isinstance(obj, PDFObjStmRef):
|
||||||
xrefstm[objid] = obj
|
xrefs[objid] = obj
|
||||||
continue
|
continue
|
||||||
if obj is not None:
|
if obj is not None:
|
||||||
xrefs[objid] = self.tell()
|
try:
|
||||||
self.serialize_indirect(objid, obj)
|
genno = obj.genno
|
||||||
|
except AttributeError:
|
||||||
|
genno = 0
|
||||||
|
xrefs[objid] = (self.tell(), genno)
|
||||||
|
self.serialize_indirect(objid, genno, obj)
|
||||||
startxref = self.tell()
|
startxref = self.tell()
|
||||||
self.write('xref\n')
|
|
||||||
self.write('0 %d\n' % (maxobj + 1,))
|
if not gen_xref_stm:
|
||||||
for objid in xrange(0, maxobj + 1):
|
self.write('xref\n')
|
||||||
if objid in xrefs:
|
self.write('0 %d\n' % (maxobj + 1,))
|
||||||
self.write("%010d %05d n \n" % (xrefs[objid], 0))
|
for objid in xrange(0, maxobj + 1):
|
||||||
else:
|
if objid in xrefs:
|
||||||
self.write("%010d %05d f \n" % (0, 65535))
|
self.write("%010d %05d n \n" % xrefs[objid])
|
||||||
self.write('trailer\n')
|
else:
|
||||||
self.serialize_object(trailer)
|
self.write("%010d %05d f \n" % (0, 65535))
|
||||||
self.write('\nstartxref\n%d\n%%%%EOF' % startxref)
|
|
||||||
if not xrefstm:
|
self.write('trailer\n')
|
||||||
return
|
self.serialize_object(trailer)
|
||||||
index = []
|
self.write('\nstartxref\n%d\n%%%%EOF' % startxref)
|
||||||
first = None
|
|
||||||
prev = None
|
else: # Generate crossref stream.
|
||||||
data = []
|
|
||||||
for objid in sorted(xrefstm):
|
# Calculate size of entries
|
||||||
if first is None:
|
maxoffset = max(startxref, maxobj)
|
||||||
first = objid
|
maxindex = PDFObjStmRef.maxindex
|
||||||
elif objid != prev + 1:
|
# TODO - max genno should also be taken into account
|
||||||
index.extend((first, prev - first + 1))
|
fl2 = 2
|
||||||
first = objid
|
power = 65536
|
||||||
prev = objid
|
while maxoffset >= power:
|
||||||
stmid = xrefstm[objid].stmid
|
fl2 += 1
|
||||||
data.append(struct.pack('>BHB', 2, stmid, 0))
|
power *= 256
|
||||||
index.extend((first, prev - first + 1))
|
fl3 = 1
|
||||||
data = zlib.compress(''.join(data))
|
power = 256
|
||||||
dic = {'Type': LITERAL_XREF, 'Size': prev + 1, 'Index': index,
|
while maxindex >= power:
|
||||||
'W': [1, 2, 1], 'Length': len(data), 'Prev': startxref,
|
fl3 += 1
|
||||||
'Filter': LITERALS_FLATE_DECODE[0],}
|
power *= 256
|
||||||
obj = PDFStream(dic, data)
|
|
||||||
self.write('\n')
|
index = []
|
||||||
trailer['XRefStm'] = startxrefstm = self.tell()
|
first = None
|
||||||
self.serialize_indirect(maxobj + 1, obj)
|
prev = None
|
||||||
trailer['Prev'] = startxref
|
data = []
|
||||||
startxref = self.tell()
|
for objid in sorted(xrefs):
|
||||||
self.write('xref\n')
|
if first is None:
|
||||||
self.write('%d 1\n' % (maxobj + 1,))
|
first = objid
|
||||||
self.write("%010d %05d n \n" % (startxrefstm, 0))
|
elif objid != prev + 1:
|
||||||
self.write('trailer\n')
|
index.extend((first, prev - first + 1))
|
||||||
self.serialize_object(trailer)
|
first = objid
|
||||||
self.write('\nstartxref\n%d\n%%%%EOF' % startxref)
|
prev = objid
|
||||||
|
objref = xrefs[objid]
|
||||||
|
if isinstance(objref, PDFObjStmRef):
|
||||||
|
f1 = 2
|
||||||
|
f2 = objref.stmid
|
||||||
|
f3 = objref.index
|
||||||
|
else:
|
||||||
|
f1 = 1
|
||||||
|
f2 = objref[0]
|
||||||
|
f3 = objref[1]
|
||||||
|
|
||||||
|
data.append(struct.pack('>B', f1))
|
||||||
|
data.append(struct.pack('>L', f2)[-fl2:])
|
||||||
|
data.append(struct.pack('>L', f3)[-fl3:])
|
||||||
|
index.extend((first, prev - first + 1))
|
||||||
|
data = zlib.compress(''.join(data))
|
||||||
|
dic = {'Type': LITERAL_XREF, 'Size': prev + 1, 'Index': index,
|
||||||
|
'W': [1, fl2, fl3], 'Length': len(data),
|
||||||
|
'Filter': LITERALS_FLATE_DECODE[0],}
|
||||||
|
obj = PDFStream(dic, data)
|
||||||
|
trailer['XRefStm'] = startxrefstm = self.tell()
|
||||||
|
self.serialize_indirect(maxobj + 1, 0, obj)
|
||||||
|
startxref = self.tell()
|
||||||
|
self.write('xref\n')
|
||||||
|
self.write('%d 1\n' % (maxobj + 1,))
|
||||||
|
self.write("%010d %05d n \n" % (startxrefstm, 0))
|
||||||
|
self.write('trailer\n')
|
||||||
|
self.serialize_object(trailer)
|
||||||
|
self.write('\nstartxref\n%d\n%%%%EOF' % startxref)
|
||||||
|
|
||||||
def write(self, data):
|
def write(self, data):
|
||||||
self.outf.write(data)
|
self.outf.write(data)
|
||||||
self.last = data[-1:]
|
self.last = data[-1:]
|
||||||
|
@ -1661,6 +1718,12 @@ class PDFSerializer(object):
|
||||||
|
|
||||||
def serialize_object(self, obj):
|
def serialize_object(self, obj):
|
||||||
if isinstance(obj, dict):
|
if isinstance(obj, dict):
|
||||||
|
# Correct malformed Mac OS resource forks for Stanza
|
||||||
|
if 'ResFork' in obj and 'Type' in obj and 'Subtype' not in obj \
|
||||||
|
and isinstance(obj['Type'], int):
|
||||||
|
obj['Subtype'] = obj['Type']
|
||||||
|
del obj['Type']
|
||||||
|
# end - hope this doesn't have bad effects
|
||||||
self.write('<<')
|
self.write('<<')
|
||||||
for key, val in obj.items():
|
for key, val in obj.items():
|
||||||
self.write('/%s' % key)
|
self.write('/%s' % key)
|
||||||
|
@ -1690,8 +1753,8 @@ class PDFSerializer(object):
|
||||||
### so we don't need these any more. Therefore leave them out
|
### so we don't need these any more. Therefore leave them out
|
||||||
### of the output. Later we could try to use object streams in
|
### of the output. Later we could try to use object streams in
|
||||||
### the output again to get smaller output.
|
### the output again to get smaller output.
|
||||||
if obj.dic.get('Type') == LITERAL_OBJSTM:
|
if obj.dic.get('Type') == LITERAL_OBJSTM and not gen_xref_stm:
|
||||||
self.write('(deleted)')
|
self.write('(deleted)')
|
||||||
else:
|
else:
|
||||||
data = obj.get_decdata()
|
data = obj.get_decdata()
|
||||||
self.serialize_object(obj.dic)
|
self.serialize_object(obj.dic)
|
||||||
|
@ -1704,8 +1767,8 @@ class PDFSerializer(object):
|
||||||
self.write(' ')
|
self.write(' ')
|
||||||
self.write(data)
|
self.write(data)
|
||||||
|
|
||||||
def serialize_indirect(self, objid, obj):
|
def serialize_indirect(self, objid, genno, obj):
|
||||||
self.write('%d 0 obj' % (objid,))
|
self.write('%d %d obj' % (objid, genno))
|
||||||
self.serialize_object(obj)
|
self.serialize_object(obj)
|
||||||
if self.last.isalnum():
|
if self.last.isalnum():
|
||||||
self.write('\n')
|
self.write('\n')
|
||||||
|
@ -1843,4 +1906,4 @@ def gui_main():
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
if len(sys.argv) > 1:
|
if len(sys.argv) > 1:
|
||||||
sys.exit(cli_main())
|
sys.exit(cli_main())
|
||||||
sys.exit(gui_main())
|
sys.exit(gui_main())
|
Loading…
Reference in New Issue