DeDRM_tools/dedrm_src/ineptpdf.py

#! /usr/bin/python
# -*- coding: utf-8 -*-

from __future__ import with_statement

# ineptpdf.pyw, version 8.0.6
# Copyright © 2009-2010 by i♥cabbages

# Released under the terms of the GNU General Public Licence, version 3
# <http://www.gnu.org/licenses/>

# Modified 2010–2012 by some_updates, DiapDealer and Apprentice Alf
# Modified 2015-2017 by Apprentice Harper

# Windows users: Before running this program, you must first install Python 2.7
#   from <http://www.python.org/download/> and PyCrypto from
#   <http://www.voidspace.org.uk/python/modules.shtml#pycrypto> (make sure to
#   install the version for Python 2.7).  Save this script file as
#   ineptpdf.pyw and double-click on it to run it.
#
# Mac OS X users: Save this script file as ineptpdf.pyw.  You can run this
#   program from the command line (pythonw ineptpdf.pyw) or by double-clicking
#   it when it has been associated with PythonLauncher.

# Revision history:
#   1 - Initial release
#   2 - Improved determination of key-generation algorithm
#   3 - Correctly handle PDF >=1.5 cross-reference streams
#   4 - Removal of ciando's personal ID
#   5 - Automated decryption of a complete directory
#   6.1 - backward compatibility for 1.7.1 and old adeptkey.der
#   7 - Get cross reference streams and object streams working for input.
#       Not yet supported on output but this only effects file size,
#       not functionality. (anon2)
#   7.1 - Correct a problem when an old trailer is not followed by startxref
#   7.2 - Correct malformed Mac OS resource forks for Stanza (anon2)
#       - Support for cross ref streams on output (decreases file size)
#   7.3 - Correct bug in trailer with cross ref stream that caused the error
#         "The root object is missing or invalid" in Adobe Reader. (anon2)
#   7.4 - Force all generation numbers in output file to be 0, like in v6.
#         Fallback code for wrong xref improved (search till last trailer
#         instead of first) (anon2)
#   7.5 - allow support for OpenSSL to replace pycrypto on all platforms
#         implemented ARC4 interface to OpenSSL
#         fixed minor typos
#   7.6 - backported AES and other fixes from version 8.4.48
#   7.7 - On Windows try PyCrypto first and OpenSSL next
#   7.8 - Modify interface to allow use of import
#   7.9 - Bug fix for some session key errors when len(bookkey) > length required
#   7.10 - Various tweaks to fix minor problems.
#   7.11 - More tweaks to fix minor problems.
#   7.12 - Revised to allow use in calibre plugins to eliminate need for duplicate code
#   7.13 - Fixed erroneous mentions of ineptepub
#   7.14 - moved unicode_argv call inside main for Windows DeDRM compatibility
#   8.0  - Work if TkInter is missing
#   8.0.1 - Broken Metadata fix.
#   8.0.2 - Add additional check on DER file sanity
#   8.0.3 - Remove erroneous check on DER file sanity
#   8.0.4 - Completely remove erroneous check on DER file sanity
#   8.0.5 - Do not process DRM-free documents
#   8.0.6 - Replace use of float by Decimal for greater precision, and import tkFileDialog


"""
Decrypts Adobe ADEPT-encrypted PDF files.
"""

__license__ = 'GPL v3'
__version__ = "8.0.6"

import sys
import os
import re
import zlib
import struct
import hashlib
from decimal import *
from itertools import chain, islice
import xml.etree.ElementTree as etree

# Wrap a stream so that output gets flushed immediately
# and also make sure that any unicode strings get
# encoded using "replace" before writing them.
class SafeUnbuffered:
    def __init__(self, stream):
        self.stream = stream
        self.encoding = stream.encoding
        if self.encoding == None:
            self.encoding = "utf-8"
    def write(self, data):
        if isinstance(data,unicode):
            data = data.encode(self.encoding,"replace")
        self.stream.write(data)
        self.stream.flush()
    def __getattr__(self, attr):
        return getattr(self.stream, attr)

iswindows = sys.platform.startswith('win')
isosx = sys.platform.startswith('darwin')

def unicode_argv():
    if iswindows:
        # Uses shell32.GetCommandLineArgvW to get sys.argv as a list of Unicode
        # strings.

        # Versions 2.x of Python don't support Unicode in sys.argv on
        # Windows, with the underlying Windows API instead replacing multi-byte
        # characters with '?'.


        from ctypes import POINTER, byref, cdll, c_int, windll
        from ctypes.wintypes import LPCWSTR, LPWSTR

        GetCommandLineW = cdll.kernel32.GetCommandLineW
        GetCommandLineW.argtypes = []
        GetCommandLineW.restype = LPCWSTR

        CommandLineToArgvW = windll.shell32.CommandLineToArgvW
        CommandLineToArgvW.argtypes = [LPCWSTR, POINTER(c_int)]
        CommandLineToArgvW.restype = POINTER(LPWSTR)

        cmd = GetCommandLineW()
        argc = c_int(0)
        argv = CommandLineToArgvW(cmd, byref(argc))
        if argc.value > 0:
            # Remove Python executable and commands if present
            start = argc.value - len(sys.argv)
            return [argv[i] for i in
                    xrange(start, argc.value)]
        return [u"ineptpdf.py"]
    else:
        argvencoding = sys.stdin.encoding
        if argvencoding == None:
            argvencoding = "utf-8"
        return [arg if (type(arg) == unicode) else unicode(arg,argvencoding) for arg in sys.argv]


class ADEPTError(Exception):
    pass


import hashlib

def SHA256(message):
    ctx = hashlib.sha256()
    ctx.update(message)
    return ctx.digest()


def _load_crypto_libcrypto():
    from ctypes import CDLL, POINTER, c_void_p, c_char_p, c_int, c_long, \
        Structure, c_ulong, create_string_buffer, cast
    from ctypes.util import find_library

    if sys.platform.startswith('win'):
        libcrypto = find_library('libeay32')
    else:
        libcrypto = find_library('crypto')

    if libcrypto is None:
        raise ADEPTError('libcrypto not found')
    libcrypto = CDLL(libcrypto)

    AES_MAXNR = 14

    RSA_NO_PADDING = 3

    c_char_pp = POINTER(c_char_p)
    c_int_p = POINTER(c_int)

    class AES_KEY(Structure):
        _fields_ = [('rd_key', c_long * (4 * (AES_MAXNR + 1))), ('rounds', c_int)]
    AES_KEY_p = POINTER(AES_KEY)

    class RC4_KEY(Structure):
        _fields_ = [('x', c_int), ('y', c_int), ('box', c_int * 256)]
    RC4_KEY_p = POINTER(RC4_KEY)

    class RSA(Structure):
        pass
    RSA_p = POINTER(RSA)

    def F(restype, name, argtypes):
        func = getattr(libcrypto, name)
        func.restype = restype
        func.argtypes = argtypes
        return func

    AES_cbc_encrypt = F(None, 'AES_cbc_encrypt',[c_char_p, c_char_p, c_ulong, AES_KEY_p, c_char_p,c_int])
    AES_set_decrypt_key = F(c_int, 'AES_set_decrypt_key',[c_char_p, c_int, AES_KEY_p])

    RC4_set_key = F(None,'RC4_set_key',[RC4_KEY_p, c_int, c_char_p])
    RC4_crypt = F(None,'RC4',[RC4_KEY_p, c_int, c_char_p, c_char_p])

    d2i_RSAPrivateKey = F(RSA_p, 'd2i_RSAPrivateKey',
                          [RSA_p, c_char_pp, c_long])
    RSA_size = F(c_int, 'RSA_size', [RSA_p])
    RSA_private_decrypt = F(c_int, 'RSA_private_decrypt',
                            [c_int, c_char_p, c_char_p, RSA_p, c_int])
    RSA_free = F(None, 'RSA_free', [RSA_p])

    class RSA(object):
        def __init__(self, der):
            buf = create_string_buffer(der)
            pp = c_char_pp(cast(buf, c_char_p))
            rsa = self._rsa = d2i_RSAPrivateKey(None, pp, len(der))
            if rsa is None:
                raise ADEPTError('Error parsing ADEPT user key DER')

        def decrypt(self, from_):
            rsa = self._rsa
            to = create_string_buffer(RSA_size(rsa))
            dlen = RSA_private_decrypt(len(from_), from_, to, rsa,
                                       RSA_NO_PADDING)
            if dlen < 0:
                raise ADEPTError('RSA decryption failed')
            return to[1:dlen]

        def __del__(self):
            if self._rsa is not None:
                RSA_free(self._rsa)
                self._rsa = None

    class ARC4(object):
        @classmethod
        def new(cls, userkey):
            self = ARC4()
            self._blocksize = len(userkey)
            key = self._key = RC4_KEY()
            RC4_set_key(key, self._blocksize, userkey)
            return self
        def __init__(self):
            self._blocksize = 0
            self._key = None
        def decrypt(self, data):
            out = create_string_buffer(len(data))
            RC4_crypt(self._key, len(data), data, out)
            return out.raw

    class AES(object):
        MODE_CBC = 0
        @classmethod
        def new(cls, userkey, mode, iv):
            self = AES()
            self._blocksize = len(userkey)
            # mode is ignored since CBCMODE is only thing supported/used so far
            self._mode = mode
            if (self._blocksize != 16) and (self._blocksize != 24) and (self._blocksize != 32) :
                raise ADEPTError('AES improper key used')
                return
            keyctx = self._keyctx = AES_KEY()
            self._iv = iv
            rv = AES_set_decrypt_key(userkey, len(userkey) * 8, keyctx)
            if rv < 0:
                raise ADEPTError('Failed to initialize AES key')
            return self
        def __init__(self):
            self._blocksize = 0
            self._keyctx = None
            self._iv = 0
            self._mode = 0
        def decrypt(self, data):
            out = create_string_buffer(len(data))
            rv = AES_cbc_encrypt(data, out, len(data), self._keyctx, self._iv, 0)
            if rv == 0:
                raise ADEPTError('AES decryption failed')
            return out.raw

    return (ARC4, RSA, AES)


def _load_crypto_pycrypto():
    from Crypto.PublicKey import RSA as _RSA
    from Crypto.Cipher import ARC4 as _ARC4
    from Crypto.Cipher import AES as _AES

    # ASN.1 parsing code from tlslite
    class ASN1Error(Exception):
        pass

    class ASN1Parser(object):
        class Parser(object):
            def __init__(self, bytes):
                self.bytes = bytes
                self.index = 0

            def get(self, length):
                if self.index + length > len(self.bytes):
                    raise ASN1Error("Error decoding ASN.1")
                x = 0
                for count in range(length):
                    x <<= 8
                    x |= self.bytes[self.index]
                    self.index += 1
                return x

            def getFixBytes(self, lengthBytes):
                bytes = self.bytes[self.index : self.index+lengthBytes]
                self.index += lengthBytes
                return bytes

            def getVarBytes(self, lengthLength):
                lengthBytes = self.get(lengthLength)
                return self.getFixBytes(lengthBytes)

            def getFixList(self, length, lengthList):
                l = [0] * lengthList
                for x in range(lengthList):
                    l[x] = self.get(length)
                return l

            def getVarList(self, length, lengthLength):
                lengthList = self.get(lengthLength)
                if lengthList % length != 0:
                    raise ASN1Error("Error decoding ASN.1")
                lengthList = int(lengthList/length)
                l = [0] * lengthList
                for x in range(lengthList):
                    l[x] = self.get(length)
                return l

            def startLengthCheck(self, lengthLength):
                self.lengthCheck = self.get(lengthLength)
                self.indexCheck = self.index

            def setLengthCheck(self, length):
                self.lengthCheck = length
                self.indexCheck = self.index

            def stopLengthCheck(self):
                if (self.index - self.indexCheck) != self.lengthCheck:
                    raise ASN1Error("Error decoding ASN.1")

            def atLengthCheck(self):
                if (self.index - self.indexCheck) < self.lengthCheck:
                    return False
                elif (self.index - self.indexCheck) == self.lengthCheck:
                    return True
                else:
                    raise ASN1Error("Error decoding ASN.1")

        def __init__(self, bytes):
            p = self.Parser(bytes)
            p.get(1)
            self.length = self._getASN1Length(p)
            self.value = p.getFixBytes(self.length)

        def getChild(self, which):
            p = self.Parser(self.value)
            for x in range(which+1):
                markIndex = p.index
                p.get(1)
                length = self._getASN1Length(p)
                p.getFixBytes(length)
            return ASN1Parser(p.bytes[markIndex:p.index])

        def _getASN1Length(self, p):
            firstLength = p.get(1)
            if firstLength<=127:
                return firstLength
            else:
                lengthLength = firstLength & 0x7F
                return p.get(lengthLength)

    class ARC4(object):
        @classmethod
        def new(cls, userkey):
            self = ARC4()
            self._arc4 = _ARC4.new(userkey)
            return self
        def __init__(self):
            self._arc4 = None
        def decrypt(self, data):
            return self._arc4.decrypt(data)

    class AES(object):
        MODE_CBC = _AES.MODE_CBC
        @classmethod
        def new(cls, userkey, mode, iv):
            self = AES()
            self._aes = _AES.new(userkey, mode, iv)
            return self
        def __init__(self):
            self._aes = None
        def decrypt(self, data):
            return self._aes.decrypt(data)

    class RSA(object):
        def __init__(self, der):
            key = ASN1Parser([ord(x) for x in der])
            key = [key.getChild(x).value for x in xrange(1, 4)]
            key = [self.bytesToNumber(v) for v in key]
            self._rsa = _RSA.construct(key)

        def bytesToNumber(self, bytes):
            total = 0L
            for byte in bytes:
                total = (total << 8) + byte
            return total

        def decrypt(self, data):
            return self._rsa.decrypt(data)

    return (ARC4, RSA, AES)

def _load_crypto():
    ARC4 = RSA = AES = None
    cryptolist = (_load_crypto_libcrypto, _load_crypto_pycrypto)
    if sys.platform.startswith('win'):
        cryptolist = (_load_crypto_pycrypto, _load_crypto_libcrypto)
    for loader in cryptolist:
        try:
            ARC4, RSA, AES = loader()
            break
        except (ImportError, ADEPTError):
            pass
    return (ARC4, RSA, AES)
ARC4, RSA, AES = _load_crypto()


try:
    from cStringIO import StringIO
except ImportError:
    from StringIO import StringIO


# Do we generate cross reference streams on output?
# 0 = never
# 1 = only if present in input
# 2 = always

GEN_XREF_STM = 1

# This is the value for the current document
gen_xref_stm = False # will be set in PDFSerializer

# PDF parsing routines from pdfminer, with changes for EBX_HANDLER

#  Utilities

def choplist(n, seq):
    '''Groups every n elements of the list.'''
    r = []
    for x in seq:
        r.append(x)
        if len(r) == n:
            yield tuple(r)
            r = []
    return

def nunpack(s, default=0):
    '''Unpacks up to 4 bytes big endian.'''
    l = len(s)
    if not l:
        return default
    elif l == 1:
        return ord(s)
    elif l == 2:
        return struct.unpack('>H', s)[0]
    elif l == 3:
        return struct.unpack('>L', '\x00'+s)[0]
    elif l == 4:
        return struct.unpack('>L', s)[0]
    else:
        return TypeError('invalid length: %d' % l)


STRICT = 0


#  PS Exceptions

class PSException(Exception): pass
class PSEOF(PSException): pass
class PSSyntaxError(PSException): pass
class PSTypeError(PSException): pass
class PSValueError(PSException): pass


#  Basic PostScript Types


# PSLiteral
class PSObject(object): pass

class PSLiteral(PSObject):
    '''
    PS literals (e.g. "/Name").
    Caution: Never create these objects directly.
    Use PSLiteralTable.intern() instead.
    '''
    def __init__(self, name):
        self.name = name
        return

    def __repr__(self):
        name = []
        for char in self.name:
            if not char.isalnum():
                char = '#%02x' % ord(char)
            name.append(char)
        return '/%s' % ''.join(name)

# PSKeyword
class PSKeyword(PSObject):
    '''
    PS keywords (e.g. "showpage").
    Caution: Never create these objects directly.
    Use PSKeywordTable.intern() instead.
    '''
    def __init__(self, name):
        self.name = name
        return

    def __repr__(self):
        return self.name

# PSSymbolTable
class PSSymbolTable(object):

    '''
    Symbol table that stores PSLiteral or PSKeyword.
    '''

    def __init__(self, classe):
        self.dic = {}
        self.classe = classe
        return

    def intern(self, name):
        if name in self.dic:
            lit = self.dic[name]
        else:
            lit = self.classe(name)
            self.dic[name] = lit
        return lit

PSLiteralTable = PSSymbolTable(PSLiteral)
PSKeywordTable = PSSymbolTable(PSKeyword)
LIT = PSLiteralTable.intern
KWD = PSKeywordTable.intern
KEYWORD_BRACE_BEGIN = KWD('{')
KEYWORD_BRACE_END = KWD('}')
KEYWORD_ARRAY_BEGIN = KWD('[')
KEYWORD_ARRAY_END = KWD(']')
KEYWORD_DICT_BEGIN = KWD('<<')
KEYWORD_DICT_END = KWD('>>')


def literal_name(x):
    if not isinstance(x, PSLiteral):
        if STRICT:
            raise PSTypeError('Literal required: %r' % x)
        else:
            return str(x)
    return x.name

def keyword_name(x):
    if not isinstance(x, PSKeyword):
        if STRICT:
            raise PSTypeError('Keyword required: %r' % x)
        else:
            return str(x)
    return x.name


##  PSBaseParser
##
EOL = re.compile(r'[\r\n]')
SPC = re.compile(r'\s')
NONSPC = re.compile(r'\S')
HEX = re.compile(r'[0-9a-fA-F]')
END_LITERAL = re.compile(r'[#/%\[\]()<>{}\s]')
END_HEX_STRING = re.compile(r'[^\s0-9a-fA-F]')
HEX_PAIR = re.compile(r'[0-9a-fA-F]{2}|.')
END_NUMBER = re.compile(r'[^0-9]')
END_KEYWORD = re.compile(r'[#/%\[\]()<>{}\s]')
END_STRING = re.compile(r'[()\134]')
OCT_STRING = re.compile(r'[0-7]')
ESC_STRING = { 'b':8, 't':9, 'n':10, 'f':12, 'r':13, '(':40, ')':41, '\\':92 }

class PSBaseParser(object):

    '''
    Most basic PostScript parser that performs only basic tokenization.
    '''
    BUFSIZ = 4096

    def __init__(self, fp):
        self.fp = fp
        self.seek(0)
        return

    def __repr__(self):
        return '<PSBaseParser: %r, bufpos=%d>' % (self.fp, self.bufpos)

    def flush(self):
        return

    def close(self):
        self.flush()
        return

    def tell(self):
        return self.bufpos+self.charpos

    def poll(self, pos=None, n=80):
        pos0 = self.fp.tell()
        if not pos:
            pos = self.bufpos+self.charpos
        self.fp.seek(pos)
        ##print >>sys.stderr, 'poll(%d): %r' % (pos, self.fp.read(n))
        self.fp.seek(pos0)
        return

    def seek(self, pos):
        '''
        Seeks the parser to the given position.
        '''
        self.fp.seek(pos)
        # reset the status for nextline()
        self.bufpos = pos
        self.buf = ''
        self.charpos = 0
        # reset the status for nexttoken()
        self.parse1 = self.parse_main
        self.tokens = []
        return

    def fillbuf(self):
        if self.charpos < len(self.buf): return
        # fetch next chunk.
        self.bufpos = self.fp.tell()
        self.buf = self.fp.read(self.BUFSIZ)
        if not self.buf:
            raise PSEOF('Unexpected EOF')
        self.charpos = 0
        return

    def parse_main(self, s, i):
        m = NONSPC.search(s, i)
        if not m:
            return (self.parse_main, len(s))
        j = m.start(0)
        c = s[j]
        self.tokenstart = self.bufpos+j
        if c == '%':
            self.token = '%'
            return (self.parse_comment, j+1)
        if c == '/':
            self.token = ''
            return (self.parse_literal, j+1)
        if c in '-+' or c.isdigit():
            self.token = c
            return (self.parse_number, j+1)
        if c == '.':
            self.token = c
            return (self.parse_decimal, j+1)
        if c.isalpha():
            self.token = c
            return (self.parse_keyword, j+1)
        if c == '(':
            self.token = ''
            self.paren = 1
            return (self.parse_string, j+1)
        if c == '<':
            self.token = ''
            return (self.parse_wopen, j+1)
        if c == '>':
            self.token = ''
            return (self.parse_wclose, j+1)
        self.add_token(KWD(c))
        return (self.parse_main, j+1)

    def add_token(self, obj):
        self.tokens.append((self.tokenstart, obj))
        return

    def parse_comment(self, s, i):
        m = EOL.search(s, i)
        if not m:
            self.token += s[i:]
            return (self.parse_comment, len(s))
        j = m.start(0)
        self.token += s[i:j]
        # We ignore comments.
        #self.tokens.append(self.token)
        return (self.parse_main, j)

    def parse_literal(self, s, i):
        m = END_LITERAL.search(s, i)
        if not m:
            self.token += s[i:]
            return (self.parse_literal, len(s))
        j = m.start(0)
        self.token += s[i:j]
        c = s[j]
        if c == '#':
            self.hex = ''
            return (self.parse_literal_hex, j+1)
        self.add_token(LIT(self.token))
        return (self.parse_main, j)

    def parse_literal_hex(self, s, i):
        c = s[i]
        if HEX.match(c) and len(self.hex) < 2:
            self.hex += c
            return (self.parse_literal_hex, i+1)
        if self.hex:
            self.token += chr(int(self.hex, 16))
        return (self.parse_literal, i)

    def parse_number(self, s, i):
        m = END_NUMBER.search(s, i)
        if not m:
            self.token += s[i:]
            return (self.parse_number, len(s))
        j = m.start(0)
        self.token += s[i:j]
        c = s[j]
        if c == '.':
            self.token += c
            return (self.parse_decimal, j+1)
        try:
            self.add_token(int(self.token))
        except ValueError:
            pass
        return (self.parse_main, j)
        
    def parse_decimal(self, s, i):
        m = END_NUMBER.search(s, i)
        if not m:
            self.token += s[i:]
            return (self.parse_decimal, len(s))
        j = m.start(0)
        self.token += s[i:j]
        self.add_token(Decimal(self.token))
        return (self.parse_main, j)

    def parse_keyword(self, s, i):
        m = END_KEYWORD.search(s, i)
        if not m:
            self.token += s[i:]
            return (self.parse_keyword, len(s))
        j = m.start(0)
        self.token += s[i:j]
        if self.token == 'true':
            token = True
        elif self.token == 'false':
            token = False
        else:
            token = KWD(self.token)
        self.add_token(token)
        return (self.parse_main, j)

    def parse_string(self, s, i):
        m = END_STRING.search(s, i)
        if not m:
            self.token += s[i:]
            return (self.parse_string, len(s))
        j = m.start(0)
        self.token += s[i:j]
        c = s[j]
        if c == '\\':
            self.oct = ''
            return (self.parse_string_1, j+1)
        if c == '(':
            self.paren += 1
            self.token += c
            return (self.parse_string, j+1)
        if c == ')':
            self.paren -= 1
            if self.paren:
                self.token += c
                return (self.parse_string, j+1)
        self.add_token(self.token)
        return (self.parse_main, j+1)
    def parse_string_1(self, s, i):
        c = s[i]
        if OCT_STRING.match(c) and len(self.oct) < 3:
            self.oct += c
            return (self.parse_string_1, i+1)
        if self.oct:
            self.token += chr(int(self.oct, 8))
            return (self.parse_string, i)
        if c in ESC_STRING:
            self.token += chr(ESC_STRING[c])
        return (self.parse_string, i+1)

    def parse_wopen(self, s, i):
        c = s[i]
        if c.isspace() or HEX.match(c):
            return (self.parse_hexstring, i)
        if c == '<':
            self.add_token(KEYWORD_DICT_BEGIN)
            i += 1
        return (self.parse_main, i)

    def parse_wclose(self, s, i):
        c = s[i]
        if c == '>':
            self.add_token(KEYWORD_DICT_END)
            i += 1
        return (self.parse_main, i)

    def parse_hexstring(self, s, i):
        m = END_HEX_STRING.search(s, i)
        if not m:
            self.token += s[i:]
            return (self.parse_hexstring, len(s))
        j = m.start(0)
        self.token += s[i:j]
        token = HEX_PAIR.sub(lambda m: chr(int(m.group(0), 16)),
                                                 SPC.sub('', self.token))
        self.add_token(token)
        return (self.parse_main, j)

    def nexttoken(self):
        while not self.tokens:
            self.fillbuf()
            (self.parse1, self.charpos) = self.parse1(self.buf, self.charpos)
        token = self.tokens.pop(0)
        return token

    def nextline(self):
        '''
        Fetches a next line that ends either with \\r or \\n.
        '''
        linebuf = ''
        linepos = self.bufpos + self.charpos
        eol = False
        while 1:
            self.fillbuf()
            if eol:
                c = self.buf[self.charpos]
                # handle '\r\n'
                if c == '\n':
                    linebuf += c
                    self.charpos += 1
                break
            m = EOL.search(self.buf, self.charpos)
            if m:
                linebuf += self.buf[self.charpos:m.end(0)]
                self.charpos = m.end(0)
                if linebuf[-1] == '\r':
                    eol = True
                else:
                    break
            else:
                linebuf += self.buf[self.charpos:]
                self.charpos = len(self.buf)
        return (linepos, linebuf)

    def revreadlines(self):
        '''
        Fetches a next line backword. This is used to locate
        the trailers at the end of a file.
        '''
        self.fp.seek(0, 2)
        pos = self.fp.tell()
        buf = ''
        while 0 < pos:
            prevpos = pos
            pos = max(0, pos-self.BUFSIZ)
            self.fp.seek(pos)
            s = self.fp.read(prevpos-pos)
            if not s: break
            while 1:
                n = max(s.rfind('\r'), s.rfind('\n'))
                if n == -1:
                    buf = s + buf
                    break
                yield s[n:]+buf
                s = s[:n]
                buf = ''
        return


##  PSStackParser
##
class PSStackParser(PSBaseParser):

    def __init__(self, fp):
        PSBaseParser.__init__(self, fp)
        self.reset()
        return

    def reset(self):
        self.context = []
        self.curtype = None
        self.curstack = []
        self.results = []
        return

    def seek(self, pos):
        PSBaseParser.seek(self, pos)
        self.reset()
        return

    def push(self, *objs):
        self.curstack.extend(objs)
        return
    def pop(self, n):
        objs = self.curstack[-n:]
        self.curstack[-n:] = []
        return objs
    def popall(self):
        objs = self.curstack
        self.curstack = []
        return objs
    def add_results(self, *objs):
        self.results.extend(objs)
        return

    def start_type(self, pos, type):
        self.context.append((pos, self.curtype, self.curstack))
        (self.curtype, self.curstack) = (type, [])
        return
    def end_type(self, type):
        if self.curtype != type:
            raise PSTypeError('Type mismatch: %r != %r' % (self.curtype, type))
        objs = [ obj for (_,obj) in self.curstack ]
        (pos, self.curtype, self.curstack) = self.context.pop()
        return (pos, objs)

    def do_keyword(self, pos, token):
        return

    def nextobject(self, direct=False):
        '''
        Yields a list of objects: keywords, literals, strings,
        numbers, arrays and dictionaries. Arrays and dictionaries
        are represented as Python sequence and dictionaries.
        '''
        while not self.results:
            (pos, token) = self.nexttoken()
            ##print (pos,token), (self.curtype, self.curstack)
            if (isinstance(token, int) or
                    isinstance(token, Decimal) or
                    isinstance(token, bool) or
                    isinstance(token, str) or
                    isinstance(token, PSLiteral)):
                # normal token
                self.push((pos, token))
            elif token == KEYWORD_ARRAY_BEGIN:
                # begin array
                self.start_type(pos, 'a')
            elif token == KEYWORD_ARRAY_END:
                # end array
                try:
                    self.push(self.end_type('a'))
                except PSTypeError:
                    if STRICT: raise
            elif token == KEYWORD_DICT_BEGIN:
                # begin dictionary
                self.start_type(pos, 'd')
            elif token == KEYWORD_DICT_END:
                # end dictionary
                try:
                    (pos, objs) = self.end_type('d')
                    if len(objs) % 2 != 0:
                        print "Incomplete dictionary construct"
                        objs.append("") # this isn't necessary.
                        # temporary fix. is this due to rental books?
                        # raise PSSyntaxError(
                        #     'Invalid dictionary construct: %r' % objs)
                    d = dict((literal_name(k), v) \
                                 for (k,v) in choplist(2, objs))
                    self.push((pos, d))
                except PSTypeError:
                    if STRICT: raise
            else:
                self.do_keyword(pos, token)
            if self.context:
                continue
            else:
                if direct:
                    return self.pop(1)[0]
                self.flush()
        obj = self.results.pop(0)
        return obj


LITERAL_CRYPT = PSLiteralTable.intern('Crypt')
LITERALS_FLATE_DECODE = (PSLiteralTable.intern('FlateDecode'), PSLiteralTable.intern('Fl'))
LITERALS_LZW_DECODE = (PSLiteralTable.intern('LZWDecode'), PSLiteralTable.intern('LZW'))
LITERALS_ASCII85_DECODE = (PSLiteralTable.intern('ASCII85Decode'), PSLiteralTable.intern('A85'))


##  PDF Objects
##
class PDFObject(PSObject): pass

class PDFException(PSException): pass
class PDFTypeError(PDFException): pass
class PDFValueError(PDFException): pass
class PDFNotImplementedError(PSException): pass


##  PDFObjRef
##
class PDFObjRef(PDFObject):

    def __init__(self, doc, objid, genno):
        if objid == 0:
            if STRICT:
                raise PDFValueError('PDF object id cannot be 0.')
        self.doc = doc
        self.objid = objid
        self.genno = genno
        return

    def __repr__(self):
        return '<PDFObjRef:%d %d>' % (self.objid, self.genno)

    def resolve(self):
        return self.doc.getobj(self.objid)


# resolve
def resolve1(x):
    '''
    Resolve an object. If this is an array or dictionary,
    it may still contains some indirect objects inside.
    '''
    while isinstance(x, PDFObjRef):
        x = x.resolve()
    return x

def resolve_all(x):
    '''
    Recursively resolve X and all the internals.
    Make sure there is no indirect reference within the nested object.
    This procedure might be slow.
    '''
    while isinstance(x, PDFObjRef):
        x = x.resolve()
    if isinstance(x, list):
        x = [ resolve_all(v) for v in x ]
    elif isinstance(x, dict):
        for (k,v) in x.iteritems():
            x[k] = resolve_all(v)
    return x

def decipher_all(decipher, objid, genno, x):
    '''
    Recursively decipher X.
    '''
    if isinstance(x, str):
        return decipher(objid, genno, x)
    decf = lambda v: decipher_all(decipher, objid, genno, v)
    if isinstance(x, list):
        x = [decf(v) for v in x]
    elif isinstance(x, dict):
        x = dict((k, decf(v)) for (k, v) in x.iteritems())
    return x


# Type cheking
def int_value(x):
    x = resolve1(x)
    if not isinstance(x, int):
        if STRICT:
            raise PDFTypeError('Integer required: %r' % x)
        return 0
    return x

def decimal_value(x):
    x = resolve1(x)
    if not isinstance(x, Decimal):
        if STRICT:
            raise PDFTypeError('Decimal required: %r' % x)
        return 0.0
    return x

def num_value(x):
    x = resolve1(x)
    if not (isinstance(x, int) or isinstance(x, Decimal)):
        if STRICT:
            raise PDFTypeError('Int or Float required: %r' % x)
        return 0
    return x

def str_value(x):
    x = resolve1(x)
    if not isinstance(x, str):
        if STRICT:
            raise PDFTypeError('String required: %r' % x)
        return ''
    return x

def list_value(x):
    x = resolve1(x)
    if not (isinstance(x, list) or isinstance(x, tuple)):
        if STRICT:
            raise PDFTypeError('List required: %r' % x)
        return []
    return x

def dict_value(x):
    x = resolve1(x)
    if not isinstance(x, dict):
        if STRICT:
            raise PDFTypeError('Dict required: %r' % x)
        return {}
    return x

def stream_value(x):
    x = resolve1(x)
    if not isinstance(x, PDFStream):
        if STRICT:
            raise PDFTypeError('PDFStream required: %r' % x)
        return PDFStream({}, '')
    return x

# ascii85decode(data)
def ascii85decode(data):
    n = b = 0
    out = ''
    for c in data:
        if '!' <= c and c <= 'u':
            n += 1
            b = b*85+(ord(c)-33)
            if n == 5:
                out += struct.pack('>L',b)
                n = b = 0
        elif c == 'z':
            assert n == 0
            out += '\0\0\0\0'
        elif c == '~':
            if n:
                for _ in range(5-n):
                    b = b*85+84
                out += struct.pack('>L',b)[:n-1]
            break
    return out


##  PDFStream type
class PDFStream(PDFObject):
    def __init__(self, dic, rawdata, decipher=None):
        length = int_value(dic.get('Length', 0))
        eol = rawdata[length:]
        # quick and dirty fix for false length attribute,
        # might not work if the pdf stream parser has a problem
        if decipher != None and decipher.__name__ == 'decrypt_aes':
            if (len(rawdata) % 16) != 0:
                cutdiv = len(rawdata) // 16
                rawdata = rawdata[:16*cutdiv]
        else:
            if eol in ('\r', '\n', '\r\n'):
                rawdata = rawdata[:length]

        self.dic = dic
        self.rawdata = rawdata
        self.decipher = decipher
        self.data = None
        self.decdata = None
        self.objid = None
        self.genno = None
        return

    def set_objid(self, objid, genno):
        self.objid = objid
        self.genno = genno
        return

    def __repr__(self):
        if self.rawdata:
            return '<PDFStream(%r): raw=%d, %r>' % \
                   (self.objid, len(self.rawdata), self.dic)
        else:
            return '<PDFStream(%r): data=%d, %r>' % \
                   (self.objid, len(self.data), self.dic)

    def decode(self):
        assert self.data is None and self.rawdata is not None
        data = self.rawdata
        if self.decipher:
            # Handle encryption
            data = self.decipher(self.objid, self.genno, data)
            if gen_xref_stm:
                self.decdata = data # keep decrypted data
        if 'Filter' not in self.dic:
            self.data = data
            self.rawdata = None
            ##print self.dict
            return
        filters = self.dic['Filter']
        if not isinstance(filters, list):
            filters = [ filters ]
        for f in filters:
            if f in LITERALS_FLATE_DECODE:
                # will get errors if the document is encrypted.
                data = zlib.decompress(data)
            elif f in LITERALS_LZW_DECODE:
                data = ''.join(LZWDecoder(StringIO(data)).run())
            elif f in LITERALS_ASCII85_DECODE:
                data = ascii85decode(data)
            elif f == LITERAL_CRYPT:
                raise PDFNotImplementedError('/Crypt filter is unsupported')
            else:
                raise PDFNotImplementedError('Unsupported filter: %r' % f)
            # apply predictors
            if 'DP' in self.dic:
                params = self.dic['DP']
            else:
                params = self.dic.get('DecodeParms', {})
            if 'Predictor' in params:
                pred = int_value(params['Predictor'])
                if pred:
                    if pred != 12:
                        raise PDFNotImplementedError(
                            'Unsupported predictor: %r' % pred)
                    if 'Columns' not in params:
                        raise PDFValueError(
                            'Columns undefined for predictor=12')
                    columns = int_value(params['Columns'])
                    buf = ''
                    ent0 = '\x00' * columns
                    for i in xrange(0, len(data), columns+1):
                        pred = data[i]
                        ent1 = data[i+1:i+1+columns]
                        if pred == '\x02':
                            ent1 = ''.join(chr((ord(a)+ord(b)) & 255) \
                                               for (a,b) in zip(ent0,ent1))
                        buf += ent1
                        ent0 = ent1
                    data = buf
        self.data = data
        self.rawdata = None
        return

    def get_data(self):
        if self.data is None:
            self.decode()
        return self.data

    def get_rawdata(self):
        return self.rawdata

    def get_decdata(self):
        if self.decdata is not None:
            return self.decdata
        data = self.rawdata
        if self.decipher and data:
            # Handle encryption
            data = self.decipher(self.objid, self.genno, data)
        return data


##  PDF Exceptions
##
class PDFSyntaxError(PDFException): pass
class PDFNoValidXRef(PDFSyntaxError): pass
class PDFEncryptionError(PDFException): pass
class PDFPasswordIncorrect(PDFEncryptionError): pass

# some predefined literals and keywords.
LITERAL_OBJSTM = PSLiteralTable.intern('ObjStm')
LITERAL_XREF = PSLiteralTable.intern('XRef')
LITERAL_PAGE = PSLiteralTable.intern('Page')
LITERAL_PAGES = PSLiteralTable.intern('Pages')
LITERAL_CATALOG = PSLiteralTable.intern('Catalog')


##  XRefs
##

##  PDFXRef
##
class PDFXRef(object):

    def __init__(self):
        self.offsets = None
        return

    def __repr__(self):
        return '<PDFXRef: objs=%d>' % len(self.offsets)

    def objids(self):
        return self.offsets.iterkeys()

    def load(self, parser):
        self.offsets = {}
        while 1:
            try:
                (pos, line) = parser.nextline()
            except PSEOF:
                raise PDFNoValidXRef('Unexpected EOF - file corrupted?')
            if not line:
                raise PDFNoValidXRef('Premature eof: %r' % parser)
            if line.startswith('trailer'):
                parser.seek(pos)
                break
            f = line.strip().split(' ')
            if len(f) != 2:
                raise PDFNoValidXRef('Trailer not found: %r: line=%r' % (parser, line))
            try:
                (start, nobjs) = map(int, f)
            except ValueError:
                raise PDFNoValidXRef('Invalid line: %r: line=%r' % (parser, line))
            for objid in xrange(start, start+nobjs):
                try:
                    (_, line) = parser.nextline()
                except PSEOF:
                    raise PDFNoValidXRef('Unexpected EOF - file corrupted?')
                f = line.strip().split(' ')
                if len(f) != 3:
                    raise PDFNoValidXRef('Invalid XRef format: %r, line=%r' % (parser, line))
                (pos, genno, use) = f
                if use != 'n': continue
                self.offsets[objid] = (int(genno), int(pos))
        self.load_trailer(parser)
        return

    KEYWORD_TRAILER = PSKeywordTable.intern('trailer')
    def load_trailer(self, parser):
        try:
            (_,kwd) = parser.nexttoken()
            assert kwd is self.KEYWORD_TRAILER
            (_,dic) = parser.nextobject(direct=True)
        except PSEOF:
            x = parser.pop(1)
            if not x:
                raise PDFNoValidXRef('Unexpected EOF - file corrupted')
            (_,dic) = x[0]
        self.trailer = dict_value(dic)
        return

    def getpos(self, objid):
        try:
            (genno, pos) = self.offsets[objid]
        except KeyError:
            raise
        return (None, pos)


##  PDFXRefStream
##
class PDFXRefStream(object):

    def __init__(self):
        self.index = None
        self.data = None
        self.entlen = None
        self.fl1 = self.fl2 = self.fl3 = None
        return

    def __repr__(self):
        return '<PDFXRef: objids=%s>' % self.index

    def objids(self):
        for first, size in self.index:
            for objid in xrange(first, first + size):
                yield objid

    def load(self, parser, debug=0):
        (_,objid) = parser.nexttoken() # ignored
        (_,genno) = parser.nexttoken() # ignored
        (_,kwd) = parser.nexttoken()
        (_,stream) = parser.nextobject()
        if not isinstance(stream, PDFStream) or \
           stream.dic['Type'] is not LITERAL_XREF:
            raise PDFNoValidXRef('Invalid PDF stream spec.')
        size = stream.dic['Size']
        index = stream.dic.get('Index', (0,size))
        self.index = zip(islice(index, 0, None, 2),
                         islice(index, 1, None, 2))
        (self.fl1, self.fl2, self.fl3) = stream.dic['W']
        self.data = stream.get_data()
        self.entlen = self.fl1+self.fl2+self.fl3
        self.trailer = stream.dic
        return

    def getpos(self, objid):
        offset = 0
        for first, size in self.index:
            if first <= objid  and objid < (first + size):
                break
            offset += size
        else:
            raise KeyError(objid)
        i = self.entlen * ((objid - first) + offset)
        ent = self.data[i:i+self.entlen]
        f1 = nunpack(ent[:self.fl1], 1)
        if f1 == 1:
            pos = nunpack(ent[self.fl1:self.fl1+self.fl2])
            genno = nunpack(ent[self.fl1+self.fl2:])
            return (None, pos)
        elif f1 == 2:
            objid = nunpack(ent[self.fl1:self.fl1+self.fl2])
            index = nunpack(ent[self.fl1+self.fl2:])
            return (objid, index)
        # this is a free object
        raise KeyError(objid)


##  PDFDocument
##
##  A PDFDocument object represents a PDF document.
##  Since a PDF file is usually pretty big, normally it is not loaded
##  at once. Rather it is parsed dynamically as processing goes.
##  A PDF parser is associated with the document.
##
class PDFDocument(object):

    def __init__(self):
        self.xrefs = []
        self.objs = {}
        self.parsed_objs = {}
        self.root = None
        self.catalog = None
        self.parser = None
        self.encryption = None
        self.decipher = None
        return

    # set_parser(parser)
    #   Associates the document with an (already initialized) parser object.
    def set_parser(self, parser):
        if self.parser: return
        self.parser = parser
        # The document is set to be temporarily ready during collecting
        # all the basic information about the document, e.g.
        # the header, the encryption information, and the access rights
        # for the document.
        self.ready = True
        # Retrieve the information of each header that was appended
        # (maybe multiple times) at the end of the document.
        self.xrefs = parser.read_xref()
        for xref in self.xrefs:
            trailer = xref.trailer
            if not trailer: continue

            # If there's an encryption info, remember it.
            if 'Encrypt' in trailer:
                #assert not self.encryption
                try:
                    self.encryption = (list_value(trailer['ID']),
                                   dict_value(trailer['Encrypt']))
                # fix for bad files
                except:
                    self.encryption = ('ffffffffffffffffffffffffffffffffffff',
                                       dict_value(trailer['Encrypt']))
            if 'Root' in trailer:
                self.set_root(dict_value(trailer['Root']))
                break
        else:
            raise PDFSyntaxError('No /Root object! - Is this really a PDF?')
        # The document is set to be non-ready again, until all the
        # proper initialization (asking the password key and
        # verifying the access permission, so on) is finished.
        self.ready = False
        return

    # set_root(root)
    #   Set the Root dictionary of the document.
    #   Each PDF file must have exactly one /Root dictionary.
    def set_root(self, root):
        self.root = root
        self.catalog = dict_value(self.root)
        if self.catalog.get('Type') is not LITERAL_CATALOG:
            if STRICT:
                raise PDFSyntaxError('Catalog not found!')
        return
    # initialize(password='')
    #   Perform the initialization with a given password.
    #   This step is mandatory even if there's no password associated
    #   with the document.
    def initialize(self, password=''):
        if not self.encryption:
            self.is_printable = self.is_modifiable = self.is_extractable = True
            self.ready = True
            raise PDFEncryptionError('Document is not encrypted.')
            return
        (docid, param) = self.encryption
        type = literal_name(param['Filter'])
        if type == 'Adobe.APS':
            return self.initialize_adobe_ps(password, docid, param)
        if type == 'Standard':
            return self.initialize_standard(password, docid, param)
        if type == 'EBX_HANDLER':
            return self.initialize_ebx(password, docid, param)
        raise PDFEncryptionError('Unknown filter: param=%r' % param)

    def initialize_adobe_ps(self, password, docid, param):
        global KEYFILEPATH
        self.decrypt_key = self.genkey_adobe_ps(param)
        self.genkey = self.genkey_v4
        self.decipher = self.decrypt_aes
        self.ready = True
        return

    def genkey_adobe_ps(self, param):
        # nice little offline principal keys dictionary
        # global static principal key for German Onleihe / Bibliothek Digital
        principalkeys = { 'bibliothek-digital.de': 'rRwGv2tbpKov1krvv7PO0ws9S436/lArPlfipz5Pqhw='.decode('base64')}
        self.is_printable = self.is_modifiable = self.is_extractable = True
        length = int_value(param.get('Length', 0)) / 8
        edcdata = str_value(param.get('EDCData')).decode('base64')
        pdrllic = str_value(param.get('PDRLLic')).decode('base64')
        pdrlpol = str_value(param.get('PDRLPol')).decode('base64')
        edclist = []
        for pair in edcdata.split('\n'):
            edclist.append(pair)
        # principal key request
        for key in principalkeys:
            if key in pdrllic:
                principalkey = principalkeys[key]
            else:
                raise ADEPTError('Cannot find principal key for this pdf')
        shakey = SHA256(principalkey)
        ivector = 16 * chr(0)
        plaintext = AES.new(shakey,AES.MODE_CBC,ivector).decrypt(edclist[9].decode('base64'))
        if plaintext[-16:] != 16 * chr(16):
            raise ADEPTError('Offlinekey cannot be decrypted, aborting ...')
        pdrlpol = AES.new(plaintext[16:32],AES.MODE_CBC,edclist[2].decode('base64')).decrypt(pdrlpol)
        if ord(pdrlpol[-1]) < 1 or ord(pdrlpol[-1]) > 16:
            raise ADEPTError('Could not decrypt PDRLPol, aborting ...')
        else:
            cutter = -1 * ord(pdrlpol[-1])
            pdrlpol = pdrlpol[:cutter]
        return plaintext[:16]

    PASSWORD_PADDING = '(\xbfN^Nu\x8aAd\x00NV\xff\xfa\x01\x08..' \
                       '\x00\xb6\xd0h>\x80/\x0c\xa9\xfedSiz'
    # experimental aes pw support
    def initialize_standard(self, password, docid, param):
        # copy from a global variable
        V = int_value(param.get('V', 0))
        if (V <=0 or V > 4):
            raise PDFEncryptionError('Unknown algorithm: param=%r' % param)
        length = int_value(param.get('Length', 40)) # Key length (bits)
        O = str_value(param['O'])
        R = int_value(param['R']) # Revision
        if 5 <= R:
            raise PDFEncryptionError('Unknown revision: %r' % R)
        U = str_value(param['U'])
        P = int_value(param['P'])
        try:
            EncMetadata = str_value(param['EncryptMetadata'])
        except:
            EncMetadata = 'True'
        self.is_printable = bool(P & 4)
        self.is_modifiable = bool(P & 8)
        self.is_extractable = bool(P & 16)
        self.is_annotationable = bool(P & 32)
        self.is_formsenabled = bool(P & 256)
        self.is_textextractable = bool(P & 512)
        self.is_assemblable = bool(P & 1024)
        self.is_formprintable = bool(P & 2048)
        # Algorithm 3.2
        password = (password+self.PASSWORD_PADDING)[:32] # 1
        hash = hashlib.md5(password) # 2
        hash.update(O) # 3
        hash.update(struct.pack('<l', P)) # 4
        hash.update(docid[0]) # 5
        # aes special handling if metadata isn't encrypted
        if EncMetadata == ('False' or 'false'):
            hash.update('ffffffff'.decode('hex'))
        if 5 <= R:
            # 8
            for _ in xrange(50):
                hash = hashlib.md5(hash.digest()[:length/8])
        key = hash.digest()[:length/8]
        if R == 2:
            # Algorithm 3.4
            u1 = ARC4.new(key).decrypt(password)
        elif R >= 3:
            # Algorithm 3.5
            hash = hashlib.md5(self.PASSWORD_PADDING) # 2
            hash.update(docid[0]) # 3
            x = ARC4.new(key).decrypt(hash.digest()[:16]) # 4
            for i in xrange(1,19+1):
                k = ''.join( chr(ord(c) ^ i) for c in key )
                x = ARC4.new(k).decrypt(x)
            u1 = x+x # 32bytes total
        if R == 2:
            is_authenticated = (u1 == U)
        else:
            is_authenticated = (u1[:16] == U[:16])
        if not is_authenticated:
            raise ADEPTError('Password is not correct.')
        self.decrypt_key = key
        # genkey method
        if V == 1 or V == 2:
            self.genkey = self.genkey_v2
        elif V == 3:
            self.genkey = self.genkey_v3
        elif V == 4:
            self.genkey = self.genkey_v2
        #self.genkey = self.genkey_v3 if V == 3 else self.genkey_v2
        # rc4
        if V != 4:
            self.decipher = self.decipher_rc4  # XXX may be AES
        # aes
        elif V == 4 and Length == 128:
            elf.decipher = self.decipher_aes
        elif V == 4 and Length == 256:
            raise PDFNotImplementedError('AES256 encryption is currently unsupported')
        self.ready = True
        return

    def initialize_ebx(self, password, docid, param):
        self.is_printable = self.is_modifiable = self.is_extractable = True
        rsa = RSA(password)
        length = int_value(param.get('Length', 0)) / 8
        rights = str_value(param.get('ADEPT_LICENSE')).decode('base64')
        rights = zlib.decompress(rights, -15)
        rights = etree.fromstring(rights)
        expr = './/{http://ns.adobe.com/adept}encryptedKey'
        bookkey = ''.join(rights.findtext(expr)).decode('base64')
        bookkey = rsa.decrypt(bookkey)
        if bookkey[0] != '\x02':
            raise ADEPTError('error decrypting book session key')
        index = bookkey.index('\0') + 1
        bookkey = bookkey[index:]
        ebx_V = int_value(param.get('V', 4))
        ebx_type = int_value(param.get('EBX_ENCRYPTIONTYPE', 6))
        # added because of improper booktype / decryption book session key errors
        if length > 0:
            if len(bookkey) == length:
                if ebx_V == 3:
                    V = 3
                else:
                    V = 2
            elif len(bookkey) == length + 1:
                V = ord(bookkey[0])
                bookkey = bookkey[1:]
            else:
                print "ebx_V is %d  and ebx_type is %d" % (ebx_V, ebx_type)
                print "length is %d and len(bookkey) is %d" % (length, len(bookkey))
                print "bookkey[0] is %d" % ord(bookkey[0])
                raise ADEPTError('error decrypting book session key - mismatched length')
        else:
            # proper length unknown try with whatever you have
            print "ebx_V is %d  and ebx_type is %d" % (ebx_V, ebx_type)
            print "length is %d and len(bookkey) is %d" % (length, len(bookkey))
            print "bookkey[0] is %d" % ord(bookkey[0])
            if ebx_V == 3:
                V = 3
            else:
                V = 2
        self.decrypt_key = bookkey
        self.genkey = self.genkey_v3 if V == 3 else self.genkey_v2
        self.decipher = self.decrypt_rc4
        self.ready = True
        return

    # genkey functions
    def genkey_v2(self, objid, genno):
        objid = struct.pack('<L', objid)[:3]
        genno = struct.pack('<L', genno)[:2]
        key = self.decrypt_key + objid + genno
        hash = hashlib.md5(key)
        key = hash.digest()[:min(len(self.decrypt_key) + 5, 16)]
        return key

    def genkey_v3(self, objid, genno):
        objid = struct.pack('<L', objid ^ 0x3569ac)
        genno = struct.pack('<L', genno ^ 0xca96)
        key = self.decrypt_key
        key += objid[0] + genno[0] + objid[1] + genno[1] + objid[2] + 'sAlT'
        hash = hashlib.md5(key)
        key = hash.digest()[:min(len(self.decrypt_key) + 5, 16)]
        return key

    # aes v2 and v4 algorithm
    def genkey_v4(self, objid, genno):
        objid = struct.pack('<L', objid)[:3]
        genno = struct.pack('<L', genno)[:2]
        key = self.decrypt_key + objid + genno + 'sAlT'
        hash = hashlib.md5(key)
        key = hash.digest()[:min(len(self.decrypt_key) + 5, 16)]
        return key

    def decrypt_aes(self, objid, genno, data):
        key = self.genkey(objid, genno)
        ivector = data[:16]
        data = data[16:]
        plaintext = AES.new(key,AES.MODE_CBC,ivector).decrypt(data)
        # remove pkcs#5 aes padding
        cutter = -1 * ord(plaintext[-1])
        #print cutter
        plaintext = plaintext[:cutter]
        return plaintext

    def decrypt_aes256(self, objid, genno, data):
        key = self.genkey(objid, genno)
        ivector = data[:16]
        data = data[16:]
        plaintext = AES.new(key,AES.MODE_CBC,ivector).decrypt(data)
        # remove pkcs#5 aes padding
        cutter = -1 * ord(plaintext[-1])
        #print cutter
        plaintext = plaintext[:cutter]
        return plaintext

    def decrypt_rc4(self, objid, genno, data):
        key = self.genkey(objid, genno)
        return ARC4.new(key).decrypt(data)


    KEYWORD_OBJ = PSKeywordTable.intern('obj')

    def getobj(self, objid):
        if not self.ready:
            raise PDFException('PDFDocument not initialized')
        #assert self.xrefs
        if objid in self.objs:
            genno = 0
            obj = self.objs[objid]
        else:
            for xref in self.xrefs:
                try:
                    (stmid, index) = xref.getpos(objid)
                    break
                except KeyError:
                    pass
            else:
                #if STRICT:
                #    raise PDFSyntaxError('Cannot locate objid=%r' % objid)
                return None
            if stmid:
                if gen_xref_stm:
                    return PDFObjStmRef(objid, stmid, index)
                # Stuff from pdfminer: extract objects from object stream
                stream = stream_value(self.getobj(stmid))
                if stream.dic.get('Type') is not LITERAL_OBJSTM:
                    if STRICT:
                        raise PDFSyntaxError('Not a stream object: %r' % stream)
                try:
                    n = stream.dic['N']
                except KeyError:
                    if STRICT:
                        raise PDFSyntaxError('N is not defined: %r' % stream)
                    n = 0

                if stmid in self.parsed_objs:
                    objs = self.parsed_objs[stmid]
                else:
                    parser = PDFObjStrmParser(stream.get_data(), self)
                    objs = []
                    try:
                        while 1:
                            (_,obj) = parser.nextobject()
                            objs.append(obj)
                    except PSEOF:
                        pass
                    self.parsed_objs[stmid] = objs
                genno = 0
                i = n*2+index
                try:
                    obj = objs[i]
                except IndexError:
                    raise PDFSyntaxError('Invalid object number: objid=%r' % (objid))
                if isinstance(obj, PDFStream):
                    obj.set_objid(objid, 0)
            else:
                self.parser.seek(index)
                (_,objid1) = self.parser.nexttoken() # objid
                (_,genno) = self.parser.nexttoken() # genno
                #assert objid1 == objid, (objid, objid1)
                (_,kwd) = self.parser.nexttoken()
        # #### hack around malformed pdf files
        #        assert objid1 == objid, (objid, objid1)
##                if objid1 != objid:
##                    x = []
##                    while kwd is not self.KEYWORD_OBJ:
##                        (_,kwd) = self.parser.nexttoken()
##                        x.append(kwd)
##                    if x:
##                        objid1 = x[-2]
##                        genno = x[-1]
##
                if kwd is not self.KEYWORD_OBJ:
                    raise PDFSyntaxError(
                        'Invalid object spec: offset=%r' % index)
                (_,obj) = self.parser.nextobject()
                if isinstance(obj, PDFStream):
                    obj.set_objid(objid, genno)
                if self.decipher:
                    obj = decipher_all(self.decipher, objid, genno, obj)
            self.objs[objid] = obj
        return obj


class PDFObjStmRef(object):
    maxindex = 0
    def __init__(self, objid, stmid, index):
        self.objid = objid
        self.stmid = stmid
        self.index = index
        if index > PDFObjStmRef.maxindex:
            PDFObjStmRef.maxindex = index


##  PDFParser
##
class PDFParser(PSStackParser):

    def __init__(self, doc, fp):
        PSStackParser.__init__(self, fp)
        self.doc = doc
        self.doc.set_parser(self)
        return

    def __repr__(self):
        return '<PDFParser>'

    KEYWORD_R = PSKeywordTable.intern('R')
    KEYWORD_ENDOBJ = PSKeywordTable.intern('endobj')
    KEYWORD_STREAM = PSKeywordTable.intern('stream')
    KEYWORD_XREF = PSKeywordTable.intern('xref')
    KEYWORD_STARTXREF = PSKeywordTable.intern('startxref')
    def do_keyword(self, pos, token):
        if token in (self.KEYWORD_XREF, self.KEYWORD_STARTXREF):
            self.add_results(*self.pop(1))
            return
        if token is self.KEYWORD_ENDOBJ:
            self.add_results(*self.pop(4))
            return

        if token is self.KEYWORD_R:
            # reference to indirect object
            try:
                ((_,objid), (_,genno)) = self.pop(2)
                (objid, genno) = (int(objid), int(genno))
                obj = PDFObjRef(self.doc, objid, genno)
                self.push((pos, obj))
            except PSSyntaxError:
                pass
            return

        if token is self.KEYWORD_STREAM:
            # stream object
            ((_,dic),) = self.pop(1)
            dic = dict_value(dic)
            try:
                objlen = int_value(dic['Length'])
            except KeyError:
                if STRICT:
                    raise PDFSyntaxError('/Length is undefined: %r' % dic)
                objlen = 0
            self.seek(pos)
            try:
                (_, line) = self.nextline()  # 'stream'
            except PSEOF:
                if STRICT:
                    raise PDFSyntaxError('Unexpected EOF')
                return
            pos += len(line)
            self.fp.seek(pos)
            data = self.fp.read(objlen)
            self.seek(pos+objlen)
            while 1:
                try:
                    (linepos, line) = self.nextline()
                except PSEOF:
                    if STRICT:
                        raise PDFSyntaxError('Unexpected EOF')
                    break
                if 'endstream' in line:
                    i = line.index('endstream')
                    objlen += i
                    data += line[:i]
                    break
                objlen += len(line)
                data += line
            self.seek(pos+objlen)
            obj = PDFStream(dic, data, self.doc.decipher)
            self.push((pos, obj))
            return

        # others
        self.push((pos, token))
        return

    def find_xref(self):
        # search the last xref table by scanning the file backwards.
        prev = None
        for line in self.revreadlines():
            line = line.strip()
            if line == 'startxref': break
            if line:
                prev = line
        else:
            raise PDFNoValidXRef('Unexpected EOF')
        return int(prev)

    # read xref table
    def read_xref_from(self, start, xrefs):
        self.seek(start)
        self.reset()
        try:
            (pos, token) = self.nexttoken()
        except PSEOF:
            raise PDFNoValidXRef('Unexpected EOF')
        if isinstance(token, int):
            # XRefStream: PDF-1.5
            if GEN_XREF_STM == 1:
                global gen_xref_stm
                gen_xref_stm = True
            self.seek(pos)
            self.reset()
            xref = PDFXRefStream()
            xref.load(self)
        else:
            if token is not self.KEYWORD_XREF:
                raise PDFNoValidXRef('xref not found: pos=%d, token=%r' %
                                     (pos, token))
            self.nextline()
            xref = PDFXRef()
            xref.load(self)
        xrefs.append(xref)
        trailer = xref.trailer
        if 'XRefStm' in trailer:
            pos = int_value(trailer['XRefStm'])
            self.read_xref_from(pos, xrefs)
        if 'Prev' in trailer:
            # find previous xref
            pos = int_value(trailer['Prev'])
            self.read_xref_from(pos, xrefs)
        return

    # read xref tables and trailers
    def read_xref(self):
        xrefs = []
        trailerpos = None
        try:
            pos = self.find_xref()
            self.read_xref_from(pos, xrefs)
        except PDFNoValidXRef:
            # fallback
            self.seek(0)
            pat = re.compile(r'^(\d+)\s+(\d+)\s+obj\b')
            offsets = {}
            xref = PDFXRef()
            while 1:
                try:
                    (pos, line) = self.nextline()
                except PSEOF:
                    break
                if line.startswith('trailer'):
                    trailerpos = pos # remember last trailer
                m = pat.match(line)
                if not m: continue
                (objid, genno) = m.groups()
                offsets[int(objid)] = (0, pos)
            if not offsets: raise
            xref.offsets = offsets
            if trailerpos:
                self.seek(trailerpos)
                xref.load_trailer(self)
                xrefs.append(xref)
        return xrefs

##  PDFObjStrmParser
##
class PDFObjStrmParser(PDFParser):

    def __init__(self, data, doc):
        PSStackParser.__init__(self, StringIO(data))
        self.doc = doc
        return

    def flush(self):
        self.add_results(*self.popall())
        return

    KEYWORD_R = KWD('R')
    def do_keyword(self, pos, token):
        if token is self.KEYWORD_R:
            # reference to indirect object
            try:
                ((_,objid), (_,genno)) = self.pop(2)
                (objid, genno) = (int(objid), int(genno))
                obj = PDFObjRef(self.doc, objid, genno)
                self.push((pos, obj))
            except PSSyntaxError:
                pass
            return
        # others
        self.push((pos, token))
        return

###
### My own code, for which there is none else to blame

class PDFSerializer(object):
    def __init__(self, inf, userkey):
        global GEN_XREF_STM, gen_xref_stm
        gen_xref_stm = GEN_XREF_STM > 1
        self.version = inf.read(8)
        inf.seek(0)
        self.doc = doc = PDFDocument()
        parser = PDFParser(doc, inf)
        doc.initialize(userkey)
        self.objids = objids = set()
        for xref in reversed(doc.xrefs):
            trailer = xref.trailer
            for objid in xref.objids():
                objids.add(objid)
        trailer = dict(trailer)
        trailer.pop('Prev', None)
        trailer.pop('XRefStm', None)
        if 'Encrypt' in trailer:
            objids.remove(trailer.pop('Encrypt').objid)
        self.trailer = trailer

    def dump(self, outf):
        self.outf = outf
        self.write(self.version)
        self.write('\n%\xe2\xe3\xcf\xd3\n')
        doc = self.doc
        objids = self.objids
        xrefs = {}
        maxobj = max(objids)
        trailer = dict(self.trailer)
        trailer['Size'] = maxobj + 1
        for objid in objids:
            obj = doc.getobj(objid)
            if isinstance(obj, PDFObjStmRef):
                xrefs[objid] = obj
                continue
            if obj is not None:
                try:
                    genno = obj.genno
                except AttributeError:
                    genno = 0
                xrefs[objid] = (self.tell(), genno)
                self.serialize_indirect(objid, obj)
        startxref = self.tell()

        if not gen_xref_stm:
            self.write('xref\n')
            self.write('0 %d\n' % (maxobj + 1,))
            for objid in xrange(0, maxobj + 1):
                if objid in xrefs:
                    # force the genno to be 0
                    self.write("%010d 00000 n \n" % xrefs[objid][0])
                else:
                    self.write("%010d %05d f \n" % (0, 65535))

            self.write('trailer\n')
            self.serialize_object(trailer)
            self.write('\nstartxref\n%d\n%%%%EOF' % startxref)

        else: # Generate crossref stream.

            # Calculate size of entries
            maxoffset = max(startxref, maxobj)
            maxindex = PDFObjStmRef.maxindex
            fl2 = 2
            power = 65536
            while maxoffset >= power:
                fl2 += 1
                power *= 256
            fl3 = 1
            power = 256
            while maxindex >= power:
                fl3 += 1
                power *= 256

            index = []
            first = None
            prev = None
            data = []
            # Put the xrefstream's reference in itself
            startxref = self.tell()
            maxobj += 1
            xrefs[maxobj] = (startxref, 0)
            for objid in sorted(xrefs):
                if first is None:
                    first = objid
                elif objid != prev + 1:
                    index.extend((first, prev - first + 1))
                    first = objid
                prev = objid
                objref = xrefs[objid]
                if isinstance(objref, PDFObjStmRef):
                    f1 = 2
                    f2 = objref.stmid
                    f3 = objref.index
                else:
                    f1 = 1
                    f2 = objref[0]
                    # we force all generation numbers to be 0
                    # f3 = objref[1]
                    f3 = 0

                data.append(struct.pack('>B', f1))
                data.append(struct.pack('>L', f2)[-fl2:])
                data.append(struct.pack('>L', f3)[-fl3:])
            index.extend((first, prev - first + 1))
            data = zlib.compress(''.join(data))
            dic = {'Type': LITERAL_XREF, 'Size': prev + 1, 'Index': index,
                   'W': [1, fl2, fl3], 'Length': len(data),
                   'Filter': LITERALS_FLATE_DECODE[0],
                   'Root': trailer['Root'],}
            if 'Info' in trailer:
                dic['Info'] = trailer['Info']
            xrefstm = PDFStream(dic, data)
            self.serialize_indirect(maxobj, xrefstm)
            self.write('startxref\n%d\n%%%%EOF' % startxref)
    def write(self, data):
        self.outf.write(data)
        self.last = data[-1:]

    def tell(self):
        return self.outf.tell()

    def escape_string(self, string):
        string = string.replace('\\', '\\\\')
        string = string.replace('\n', r'\n')
        string = string.replace('(', r'\(')
        string = string.replace(')', r'\)')
         # get rid of ciando id
        regularexp = re.compile(r'http://www.ciando.com/index.cfm/intRefererID/\d{5}')
        if regularexp.match(string): return ('http://www.ciando.com')
        return string

    def serialize_object(self, obj):
        if isinstance(obj, dict):
            # Correct malformed Mac OS resource forks for Stanza
            if 'ResFork' in obj and 'Type' in obj and 'Subtype' not in obj \
                   and isinstance(obj['Type'], int):
                obj['Subtype'] = obj['Type']
                del obj['Type']
            # end - hope this doesn't have bad effects
            self.write('<<')
            for key, val in obj.items():
                self.write('/%s' % key)
                self.serialize_object(val)
            self.write('>>')
        elif isinstance(obj, list):
            self.write('[')
            for val in obj:
                self.serialize_object(val)
            self.write(']')
        elif isinstance(obj, str):
            self.write('(%s)' % self.escape_string(obj))
        elif isinstance(obj, bool):
            if self.last.isalnum():
                self.write(' ')
            self.write(str(obj).lower())
        elif isinstance(obj, (int, long)):
            if self.last.isalnum():
                self.write(' ')
            self.write(str(obj))
        elif isinstance(obj, Decimal):
            if self.last.isalnum():
                self.write(' ')
            self.write(str(obj))
        elif isinstance(obj, PDFObjRef):
            if self.last.isalnum():
                self.write(' ')
            self.write('%d %d R' % (obj.objid, 0))
        elif isinstance(obj, PDFStream):
            ### If we don't generate cross ref streams the object streams
            ### are no longer useful, as we have extracted all objects from
            ### them. Therefore leave them out from the output.
            if obj.dic.get('Type') == LITERAL_OBJSTM and not gen_xref_stm:
                self.write('(deleted)')
            else:
                data = obj.get_decdata()
                self.serialize_object(obj.dic)
                self.write('stream\n')
                self.write(data)
                self.write('\nendstream')
        else:
            data = str(obj)
            if data[0].isalnum() and self.last.isalnum():
                self.write(' ')
            self.write(data)

    def serialize_indirect(self, objid, obj):
        self.write('%d 0 obj' % (objid,))
        self.serialize_object(obj)
        if self.last.isalnum():
            self.write('\n')
        self.write('endobj\n')


def decryptBook(userkey, inpath, outpath):
    if RSA is None:
        raise ADEPTError(u"PyCrypto or OpenSSL must be installed.")
    with open(inpath, 'rb') as inf:
        #try:
        serializer = PDFSerializer(inf, userkey)
        #except:
        #    print u"Error serializing pdf {0}. Probably wrong key.".format(os.path.basename(inpath))
        #    return 2
        # hope this will fix the 'bad file descriptor' problem
        with open(outpath, 'wb') as outf:
            # help construct to make sure the method runs to the end
            try:
                serializer.dump(outf)
            except Exception, e:
                print u"error writing pdf: {0}".format(e.args[0])
                return 2
    return 0


def cli_main():
    sys.stdout=SafeUnbuffered(sys.stdout)
    sys.stderr=SafeUnbuffered(sys.stderr)
    argv=unicode_argv()
    progname = os.path.basename(argv[0])
    if len(argv) != 4:
        print u"usage: {0} <keyfile.der> <inbook.pdf> <outbook.pdf>".format(progname)
        return 1
    keypath, inpath, outpath = argv[1:]
    userkey = open(keypath,'rb').read()
    result = decryptBook(userkey, inpath, outpath)
    if result == 0:
        print u"Successfully decrypted {0:s} as {1:s}".format(os.path.basename(inpath),os.path.basename(outpath))
    return result


def gui_main():
    try:
        import Tkinter
        import Tkconstants
        import tkFileDialog
        import tkMessageBox
        import traceback
    except:
        return cli_main()

    class DecryptionDialog(Tkinter.Frame):
        def __init__(self, root):
            Tkinter.Frame.__init__(self, root, border=5)
            self.status = Tkinter.Label(self, text=u"Select files for decryption")
            self.status.pack(fill=Tkconstants.X, expand=1)
            body = Tkinter.Frame(self)
            body.pack(fill=Tkconstants.X, expand=1)
            sticky = Tkconstants.E + Tkconstants.W
            body.grid_columnconfigure(1, weight=2)
            Tkinter.Label(body, text=u"Key file").grid(row=0)
            self.keypath = Tkinter.Entry(body, width=30)
            self.keypath.grid(row=0, column=1, sticky=sticky)
            if os.path.exists(u"adeptkey.der"):
                self.keypath.insert(0, u"adeptkey.der")
            button = Tkinter.Button(body, text=u"...", command=self.get_keypath)
            button.grid(row=0, column=2)
            Tkinter.Label(body, text=u"Input file").grid(row=1)
            self.inpath = Tkinter.Entry(body, width=30)
            self.inpath.grid(row=1, column=1, sticky=sticky)
            button = Tkinter.Button(body, text=u"...", command=self.get_inpath)
            button.grid(row=1, column=2)
            Tkinter.Label(body, text=u"Output file").grid(row=2)
            self.outpath = Tkinter.Entry(body, width=30)
            self.outpath.grid(row=2, column=1, sticky=sticky)
            button = Tkinter.Button(body, text=u"...", command=self.get_outpath)
            button.grid(row=2, column=2)
            buttons = Tkinter.Frame(self)
            buttons.pack()
            botton = Tkinter.Button(
                buttons, text=u"Decrypt", width=10, command=self.decrypt)
            botton.pack(side=Tkconstants.LEFT)
            Tkinter.Frame(buttons, width=10).pack(side=Tkconstants.LEFT)
            button = Tkinter.Button(
                buttons, text=u"Quit", width=10, command=self.quit)
            button.pack(side=Tkconstants.RIGHT)

        def get_keypath(self):
            keypath = tkFileDialog.askopenfilename(
                parent=None, title=u"Select Adobe Adept \'.der\' key file",
                defaultextension=u".der",
                filetypes=[('Adobe Adept DER-encoded files', '.der'),
                           ('All Files', '.*')])
            if keypath:
                keypath = os.path.normpath(keypath)
                self.keypath.delete(0, Tkconstants.END)
                self.keypath.insert(0, keypath)
            return

        def get_inpath(self):
            inpath = tkFileDialog.askopenfilename(
                parent=None, title=u"Select ADEPT-encrypted PDF file to decrypt",
                defaultextension=u".pdf", filetypes=[('PDF files', '.pdf')])
            if inpath:
                inpath = os.path.normpath(inpath)
                self.inpath.delete(0, Tkconstants.END)
                self.inpath.insert(0, inpath)
            return

        def get_outpath(self):
            outpath = tkFileDialog.asksaveasfilename(
                parent=None, title=u"Select unencrypted PDF file to produce",
                defaultextension=u".pdf", filetypes=[('PDF files', '.pdf')])
            if outpath:
                outpath = os.path.normpath(outpath)
                self.outpath.delete(0, Tkconstants.END)
                self.outpath.insert(0, outpath)
            return

        def decrypt(self):
            keypath = self.keypath.get()
            inpath = self.inpath.get()
            outpath = self.outpath.get()
            if not keypath or not os.path.exists(keypath):
                self.status['text'] = u"Specified key file does not exist"
                return
            if not inpath or not os.path.exists(inpath):
                self.status['text'] = u"Specified input file does not exist"
                return
            if not outpath:
                self.status['text'] = u"Output file not specified"
                return
            if inpath == outpath:
                self.status['text'] = u"Must have different input and output files"
                return
            userkey = open(keypath,'rb').read()
            self.status['text'] = u"Decrypting..."
            try:
                decrypt_status = decryptBook(userkey, inpath, outpath)
            except Exception, e:
                self.status['text'] = u"Error; {0}".format(e.args[0])
                return
            if decrypt_status == 0:
                self.status['text'] = u"File successfully decrypted"
            else:
                self.status['text'] = u"The was an error decrypting the file."


    root = Tkinter.Tk()
    if RSA is None:
        root.withdraw()
        tkMessageBox.showerror(
            "INEPT PDF",
            "This script requires OpenSSL or PyCrypto, which must be installed "
            "separately.  Read the top-of-script comment for details.")
        return 1
    root.title(u"Adobe Adept PDF Decrypter v.{0}".format(__version__))
    root.resizable(True, False)
    root.minsize(370, 0)
    DecryptionDialog(root).pack(fill=Tkconstants.X, expand=1)
    root.mainloop()
    return 0


if __name__ == '__main__':
    if len(sys.argv) > 1:
        sys.exit(cli_main())
    sys.exit(gui_main())
-												tools v6.0.8

											
										
										
											2013-10-02 12:59:40 -06:00
+								#! /usr/bin/python
-												tools v5.5

Plugins now include unaltered stand-alone scripts, so no longer need to keep separate copies.

											
										
										
											2012-12-19 06:48:11 -07:00
+								# -*- coding: utf-8 -*-
-												first Adobe Adept PDF tool

											
										
										
											2009-02-26 03:42:22 -07:00
-												tools v3.2

First appearance of combined windows python app

											
										
										
											2011-01-17 00:24:53 -07:00
+								from __future__ import with_statement
-												Update PDF to use Decimal instead of float to handle very precise numbers. Update for changes to ActiveState Python. Fix a few copyright dates. Update version to 6.5.4. Minor changes to obok script for stand-alone use.

											
										
										
											2017-06-26 23:50:24 -06:00
+								# ineptpdf.pyw, version 8.0.6
-												tools v6.0.8

											
										
										
											2013-10-02 12:59:40 -06:00
+								# Copyright © 2009-2010 by i♥cabbages
-												ineptpdf 7.5

											
										
										
											2010-12-01 00:16:54 -07:00
-												tools v6.0.8

											
										
										
											2013-10-02 12:59:40 -06:00
+								# Released under the terms of the GNU General Public Licence, version 3
 								# <http://www.gnu.org/licenses/>
 								# Modified 2010–2012 by some_updates, DiapDealer and Apprentice Alf
-												Update PDF to use Decimal instead of float to handle very precise numbers. Update for changes to ActiveState Python. Fix a few copyright dates. Update version to 6.5.4. Minor changes to obok script for stand-alone use.

											
										
										
											2017-06-26 23:50:24 -06:00
+								# Modified 2015-2017 by Apprentice Harper
-												tools v6.0.8

											
										
										
											2013-10-02 12:59:40 -06:00
-												Update to DeDRM to try to fix Linux python problem, and improve Adobe logging

											
										
										
											2016-01-10 23:44:44 -07:00
+								# Windows users: Before running this program, you must first install Python 2.7
-												tools v6.0.8

											
										
										
											2013-10-02 12:59:40 -06:00
+								#   from <http://www.python.org/download/> and PyCrypto from
 								#   <http://www.voidspace.org.uk/python/modules.shtml#pycrypto> (make sure to
-												Update to DeDRM to try to fix Linux python problem, and improve Adobe logging

											
										
										
											2016-01-10 23:44:44 -07:00
+								#   install the version for Python 2.7).  Save this script file as
-												tools v6.0.8

											
										
										
											2013-10-02 12:59:40 -06:00
+								#   ineptpdf.pyw and double-click on it to run it.
 								#
 								# Mac OS X users: Save this script file as ineptpdf.pyw.  You can run this
 								#   program from the command line (pythonw ineptpdf.pyw) or by double-clicking
 								#   it when it has been associated with PythonLauncher.
-												first Adobe Adept PDF tool

											
										
										
											2009-02-26 03:42:22 -07:00
-												tools v6.0.8

											
										
										
											2013-10-02 12:59:40 -06:00
+								# Revision history:
 								#   1 - Initial release
 								#   2 - Improved determination of key-generation algorithm
 								#   3 - Correctly handle PDF >=1.5 cross-reference streams
 								#   4 - Removal of ciando's personal ID
 								#   5 - Automated decryption of a complete directory
 								#   6.1 - backward compatibility for 1.7.1 and old adeptkey.der
 								#   7 - Get cross reference streams and object streams working for input.
 								#       Not yet supported on output but this only effects file size,
 								#       not functionality. (anon2)
 								#   7.1 - Correct a problem when an old trailer is not followed by startxref
 								#   7.2 - Correct malformed Mac OS resource forks for Stanza (anon2)
 								#       - Support for cross ref streams on output (decreases file size)
 								#   7.3 - Correct bug in trailer with cross ref stream that caused the error
 								#         "The root object is missing or invalid" in Adobe Reader. (anon2)
 								#   7.4 - Force all generation numbers in output file to be 0, like in v6.
 								#         Fallback code for wrong xref improved (search till last trailer
 								#         instead of first) (anon2)
 								#   7.5 - allow support for OpenSSL to replace pycrypto on all platforms
 								#         implemented ARC4 interface to OpenSSL
 								#         fixed minor typos
 								#   7.6 - backported AES and other fixes from version 8.4.48
 								#   7.7 - On Windows try PyCrypto first and OpenSSL next
 								#   7.8 - Modify interface to allow use of import
 								#   7.9 - Bug fix for some session key errors when len(bookkey) > length required
 								#   7.10 - Various tweaks to fix minor problems.
 								#   7.11 - More tweaks to fix minor problems.
 								#   7.12 - Revised to allow use in calibre plugins to eliminate need for duplicate code
 								#   7.13 - Fixed erroneous mentions of ineptepub
 								#   7.14 - moved unicode_argv call inside main for Windows DeDRM compatibility
 								#   8.0  - Work if TkInter is missing
 								#   8.0.1 - Broken Metadata fix.
-												Update to DeDRM to try to fix Linux python problem, and improve Adobe logging

											
										
										
											2016-01-10 23:44:44 -07:00
+								#   8.0.2 - Add additional check on DER file sanity
-												Regression bug fixes

											
										
										
											2016-01-14 10:15:43 -07:00
+								#   8.0.3 - Remove erroneous check on DER file sanity
-												Completely remove erroneous check

											
										
										
											2016-01-14 23:30:54 -07:00
+								#   8.0.4 - Completely remove erroneous check on DER file sanity
-												Change to ineptpdf.py, so that we throw an exception for DRM-free PDFs, rather than processing them.

											
										
										
											2016-08-09 23:40:48 -06:00
+								#   8.0.5 - Do not process DRM-free documents
-												Update PDF to use Decimal instead of float to handle very precise numbers. Update for changes to ActiveState Python. Fix a few copyright dates. Update version to 6.5.4. Minor changes to obok script for stand-alone use.

											
										
										
											2017-06-26 23:50:24 -06:00
+								#   8.0.6 - Replace use of float by Decimal for greater precision, and import tkFileDialog
-												Update to DeDRM to try to fix Linux python problem, and improve Adobe logging

											
										
										
											2016-01-10 23:44:44 -07:00
-												first Adobe Adept PDF tool

											
										
										
											2009-02-26 03:42:22 -07:00
-												tools v6.0.8

											
										
										
											2013-10-02 12:59:40 -06:00
+								"""
 								Decrypts Adobe ADEPT-encrypted PDF files.
 								"""
 								__license__ = 'GPL v3'
-												Update PDF to use Decimal instead of float to handle very precise numbers. Update for changes to ActiveState Python. Fix a few copyright dates. Update version to 6.5.4. Minor changes to obok script for stand-alone use.

											
										
										
											2017-06-26 23:50:24 -06:00
+								__version__ = "8.0.6"
-												tools v6.0.8

											
										
										
											2013-10-02 12:59:40 -06:00
 								import sys
 								import os
 								import re
 								import zlib
 								import struct
 								import hashlib
-												Update PDF to use Decimal instead of float to handle very precise numbers. Update for changes to ActiveState Python. Fix a few copyright dates. Update version to 6.5.4. Minor changes to obok script for stand-alone use.

											
										
										
											2017-06-26 23:50:24 -06:00
+								from decimal import *
-												tools v6.0.8

											
										
										
											2013-10-02 12:59:40 -06:00
+								from itertools import chain, islice
 								import xml.etree.ElementTree as etree
-												tools v5.5

Plugins now include unaltered stand-alone scripts, so no longer need to keep separate copies.

											
										
										
											2012-12-19 06:48:11 -07:00
 								# Wrap a stream so that output gets flushed immediately
 								# and also make sure that any unicode strings get
 								# encoded using "replace" before writing them.
 								class SafeUnbuffered:
 								    def __init__(self, stream):
 								        self.stream = stream
 								        self.encoding = stream.encoding
 								        if self.encoding == None:
 								            self.encoding = "utf-8"
 								    def write(self, data):
 								        if isinstance(data,unicode):
 								            data = data.encode(self.encoding,"replace")
 								        self.stream.write(data)
 								        self.stream.flush()
 								    def __getattr__(self, attr):
 								        return getattr(self.stream, attr)
 								iswindows = sys.platform.startswith('win')
 								isosx = sys.platform.startswith('darwin')
 								def unicode_argv():
 								    if iswindows:
 								        # Uses shell32.GetCommandLineArgvW to get sys.argv as a list of Unicode
 								        # strings.
 								        # Versions 2.x of Python don't support Unicode in sys.argv on
 								        # Windows, with the underlying Windows API instead replacing multi-byte
 								        # characters with '?'.
 								        from ctypes import POINTER, byref, cdll, c_int, windll
 								        from ctypes.wintypes import LPCWSTR, LPWSTR
 								        GetCommandLineW = cdll.kernel32.GetCommandLineW
 								        GetCommandLineW.argtypes = []
 								        GetCommandLineW.restype = LPCWSTR
 								        CommandLineToArgvW = windll.shell32.CommandLineToArgvW
 								        CommandLineToArgvW.argtypes = [LPCWSTR, POINTER(c_int)]
 								        CommandLineToArgvW.restype = POINTER(LPWSTR)
 								        cmd = GetCommandLineW()
 								        argc = c_int(0)
 								        argv = CommandLineToArgvW(cmd, byref(argc))
 								        if argc.value > 0:
 								            # Remove Python executable and commands if present
 								            start = argc.value - len(sys.argv)
 								            return [argv[i] for i in
 								                    xrange(start, argc.value)]
-												tools v6.0.8

											
										
										
											2013-10-02 12:59:40 -06:00
+								        return [u"ineptpdf.py"]
-												tools v5.5

Plugins now include unaltered stand-alone scripts, so no longer need to keep separate copies.

											
										
										
											2012-12-19 06:48:11 -07:00
+								    else:
 								        argvencoding = sys.stdin.encoding
 								        if argvencoding == None:
 								            argvencoding = "utf-8"
 								        return [arg if (type(arg) == unicode) else unicode(arg,argvencoding) for arg in sys.argv]
-												tools v6.0.8

											
										
										
											2013-10-02 12:59:40 -06:00
 								class ADEPTError(Exception):
 								    pass
 								import hashlib
 								def SHA256(message):
 								    ctx = hashlib.sha256()
 								    ctx.update(message)
 								    return ctx.digest()
 								def _load_crypto_libcrypto():
 								    from ctypes import CDLL, POINTER, c_void_p, c_char_p, c_int, c_long, \
 								        Structure, c_ulong, create_string_buffer, cast
 								    from ctypes.util import find_library
 								    if sys.platform.startswith('win'):
 								        libcrypto = find_library('libeay32')
 								    else:
 								        libcrypto = find_library('crypto')
 								    if libcrypto is None:
 								        raise ADEPTError('libcrypto not found')
 								    libcrypto = CDLL(libcrypto)
 								    AES_MAXNR = 14
 								    RSA_NO_PADDING = 3
 								    c_char_pp = POINTER(c_char_p)
 								    c_int_p = POINTER(c_int)
 								    class AES_KEY(Structure):
 								        _fields_ = [('rd_key', c_long * (4 * (AES_MAXNR + 1))), ('rounds', c_int)]
 								    AES_KEY_p = POINTER(AES_KEY)
 								    class RC4_KEY(Structure):
 								        _fields_ = [('x', c_int), ('y', c_int), ('box', c_int * 256)]
 								    RC4_KEY_p = POINTER(RC4_KEY)
 								    class RSA(Structure):
 								        pass
 								    RSA_p = POINTER(RSA)
 								    def F(restype, name, argtypes):
 								        func = getattr(libcrypto, name)
 								        func.restype = restype
 								        func.argtypes = argtypes
 								        return func
 								    AES_cbc_encrypt = F(None, 'AES_cbc_encrypt',[c_char_p, c_char_p, c_ulong, AES_KEY_p, c_char_p,c_int])
 								    AES_set_decrypt_key = F(c_int, 'AES_set_decrypt_key',[c_char_p, c_int, AES_KEY_p])
 								    RC4_set_key = F(None,'RC4_set_key',[RC4_KEY_p, c_int, c_char_p])
 								    RC4_crypt = F(None,'RC4',[RC4_KEY_p, c_int, c_char_p, c_char_p])
 								    d2i_RSAPrivateKey = F(RSA_p, 'd2i_RSAPrivateKey',
 								                          [RSA_p, c_char_pp, c_long])
 								    RSA_size = F(c_int, 'RSA_size', [RSA_p])
 								    RSA_private_decrypt = F(c_int, 'RSA_private_decrypt',
 								                            [c_int, c_char_p, c_char_p, RSA_p, c_int])
 								    RSA_free = F(None, 'RSA_free', [RSA_p])
 								    class RSA(object):
 								        def __init__(self, der):
 								            buf = create_string_buffer(der)
 								            pp = c_char_pp(cast(buf, c_char_p))
 								            rsa = self._rsa = d2i_RSAPrivateKey(None, pp, len(der))
 								            if rsa is None:
 								                raise ADEPTError('Error parsing ADEPT user key DER')
 								        def decrypt(self, from_):
 								            rsa = self._rsa
 								            to = create_string_buffer(RSA_size(rsa))
 								            dlen = RSA_private_decrypt(len(from_), from_, to, rsa,
 								                                       RSA_NO_PADDING)
 								            if dlen < 0:
 								                raise ADEPTError('RSA decryption failed')
 								            return to[1:dlen]
 								        def __del__(self):
 								            if self._rsa is not None:
 								                RSA_free(self._rsa)
 								                self._rsa = None
 								    class ARC4(object):
 								        @classmethod
 								        def new(cls, userkey):
 								            self = ARC4()
 								            self._blocksize = len(userkey)
 								            key = self._key = RC4_KEY()
 								            RC4_set_key(key, self._blocksize, userkey)
 								            return self
 								        def __init__(self):
 								            self._blocksize = 0
 								            self._key = None
 								        def decrypt(self, data):
 								            out = create_string_buffer(len(data))
 								            RC4_crypt(self._key, len(data), data, out)
 								            return out.raw
 								    class AES(object):
 								        MODE_CBC = 0
 								        @classmethod
 								        def new(cls, userkey, mode, iv):
 								            self = AES()
 								            self._blocksize = len(userkey)
 								            # mode is ignored since CBCMODE is only thing supported/used so far
 								            self._mode = mode
 								            if (self._blocksize != 16) and (self._blocksize != 24) and (self._blocksize != 32) :
 								                raise ADEPTError('AES improper key used')
 								                return
 								            keyctx = self._keyctx = AES_KEY()
 								            self._iv = iv
 								            rv = AES_set_decrypt_key(userkey, len(userkey) * 8, keyctx)
 								            if rv < 0:
 								                raise ADEPTError('Failed to initialize AES key')
 								            return self
 								        def __init__(self):
 								            self._blocksize = 0
 								            self._keyctx = None
 								            self._iv = 0
 								            self._mode = 0
 								        def decrypt(self, data):
 								            out = create_string_buffer(len(data))
 								            rv = AES_cbc_encrypt(data, out, len(data), self._keyctx, self._iv, 0)
 								            if rv == 0:
 								                raise ADEPTError('AES decryption failed')
 								            return out.raw
 								    return (ARC4, RSA, AES)
 								def _load_crypto_pycrypto():
 								    from Crypto.PublicKey import RSA as _RSA
 								    from Crypto.Cipher import ARC4 as _ARC4
 								    from Crypto.Cipher import AES as _AES
 								    # ASN.1 parsing code from tlslite
 								    class ASN1Error(Exception):
 								        pass
 								    class ASN1Parser(object):
 								        class Parser(object):
 								            def __init__(self, bytes):
 								                self.bytes = bytes
 								                self.index = 0
 								            def get(self, length):
 								                if self.index + length > len(self.bytes):
 								                    raise ASN1Error("Error decoding ASN.1")
 								                x = 0
 								                for count in range(length):
 								                    x <<= 8
 								                    x |= self.bytes[self.index]
 								                    self.index += 1
 								                return x
 								            def getFixBytes(self, lengthBytes):
 								                bytes = self.bytes[self.index : self.index+lengthBytes]
 								                self.index += lengthBytes
 								                return bytes
 								            def getVarBytes(self, lengthLength):
 								                lengthBytes = self.get(lengthLength)
 								                return self.getFixBytes(lengthBytes)
 								            def getFixList(self, length, lengthList):
 								                l = [0] * lengthList
 								                for x in range(lengthList):
 								                    l[x] = self.get(length)
 								                return l
 								            def getVarList(self, length, lengthLength):
 								                lengthList = self.get(lengthLength)
 								                if lengthList % length != 0:
 								                    raise ASN1Error("Error decoding ASN.1")
 								                lengthList = int(lengthList/length)
 								                l = [0] * lengthList
 								                for x in range(lengthList):
 								                    l[x] = self.get(length)
 								                return l
 								            def startLengthCheck(self, lengthLength):
 								                self.lengthCheck = self.get(lengthLength)
 								                self.indexCheck = self.index
 								            def setLengthCheck(self, length):
 								                self.lengthCheck = length
 								                self.indexCheck = self.index
 								            def stopLengthCheck(self):
 								                if (self.index - self.indexCheck) != self.lengthCheck:
 								                    raise ASN1Error("Error decoding ASN.1")
 								            def atLengthCheck(self):
 								                if (self.index - self.indexCheck) < self.lengthCheck:
 								                    return False
 								                elif (self.index - self.indexCheck) == self.lengthCheck:
 								                    return True
-												tools v4.0

New calibre plugin interface (0.7.55)
Dropped unswindle.pyw
Added Android patch

											
										
										
											2011-06-15 23:59:20 -06:00
+								                else:
-												tools v6.0.8

											
										
										
											2013-10-02 12:59:40 -06:00
+								                    raise ASN1Error("Error decoding ASN.1")
 								        def __init__(self, bytes):
 								            p = self.Parser(bytes)
 								            p.get(1)
 								            self.length = self._getASN1Length(p)
 								            self.value = p.getFixBytes(self.length)
 								        def getChild(self, which):
 								            p = self.Parser(self.value)
 								            for x in range(which+1):
 								                markIndex = p.index
 								                p.get(1)
 								                length = self._getASN1Length(p)
 								                p.getFixBytes(length)
 								            return ASN1Parser(p.bytes[markIndex:p.index])
 								        def _getASN1Length(self, p):
 								            firstLength = p.get(1)
 								            if firstLength<=127:
 								                return firstLength
 								            else:
 								                lengthLength = firstLength & 0x7F
 								                return p.get(lengthLength)
 								    class ARC4(object):
 								        @classmethod
 								        def new(cls, userkey):
 								            self = ARC4()
 								            self._arc4 = _ARC4.new(userkey)
 								            return self
 								        def __init__(self):
 								            self._arc4 = None
 								        def decrypt(self, data):
 								            return self._arc4.decrypt(data)
 								    class AES(object):
 								        MODE_CBC = _AES.MODE_CBC
 								        @classmethod
 								        def new(cls, userkey, mode, iv):
 								            self = AES()
 								            self._aes = _AES.new(userkey, mode, iv)
 								            return self
 								        def __init__(self):
 								            self._aes = None
 								        def decrypt(self, data):
 								            return self._aes.decrypt(data)
 								    class RSA(object):
 								        def __init__(self, der):
 								            key = ASN1Parser([ord(x) for x in der])
 								            key = [key.getChild(x).value for x in xrange(1, 4)]
 								            key = [self.bytesToNumber(v) for v in key]
 								            self._rsa = _RSA.construct(key)
 								        def bytesToNumber(self, bytes):
 								            total = 0L
 								            for byte in bytes:
 								                total = (total << 8) + byte
 								            return total
 								        def decrypt(self, data):
 								            return self._rsa.decrypt(data)
 								    return (ARC4, RSA, AES)
 								def _load_crypto():
 								    ARC4 = RSA = AES = None
 								    cryptolist = (_load_crypto_libcrypto, _load_crypto_pycrypto)
 								    if sys.platform.startswith('win'):
 								        cryptolist = (_load_crypto_pycrypto, _load_crypto_libcrypto)
 								    for loader in cryptolist:
 								        try:
 								            ARC4, RSA, AES = loader()
 								            break
 								        except (ImportError, ADEPTError):
 								            pass
 								    return (ARC4, RSA, AES)
 								ARC4, RSA, AES = _load_crypto()
 								try:
 								    from cStringIO import StringIO
 								except ImportError:
 								    from StringIO import StringIO
 								# Do we generate cross reference streams on output?
 								# 0 = never
 								# 1 = only if present in input
 								# 2 = always
 								GEN_XREF_STM = 1
 								# This is the value for the current document
 								gen_xref_stm = False # will be set in PDFSerializer
 								# PDF parsing routines from pdfminer, with changes for EBX_HANDLER
 								#  Utilities
 								def choplist(n, seq):
 								    '''Groups every n elements of the list.'''
 								    r = []
 								    for x in seq:
 								        r.append(x)
 								        if len(r) == n:
 								            yield tuple(r)
 								            r = []
 								    return
 								def nunpack(s, default=0):
 								    '''Unpacks up to 4 bytes big endian.'''
 								    l = len(s)
 								    if not l:
 								        return default
 								    elif l == 1:
 								        return ord(s)
 								    elif l == 2:
 								        return struct.unpack('>H', s)[0]
 								    elif l == 3:
 								        return struct.unpack('>L', '\x00'+s)[0]
 								    elif l == 4:
 								        return struct.unpack('>L', s)[0]
 								    else:
 								        return TypeError('invalid length: %d' % l)
 								STRICT = 0
 								#  PS Exceptions
 								class PSException(Exception): pass
 								class PSEOF(PSException): pass
 								class PSSyntaxError(PSException): pass
 								class PSTypeError(PSException): pass
 								class PSValueError(PSException): pass
 								#  Basic PostScript Types
 								# PSLiteral
 								class PSObject(object): pass
 								class PSLiteral(PSObject):
 								    '''
 								    PS literals (e.g. "/Name").
 								    Caution: Never create these objects directly.
 								    Use PSLiteralTable.intern() instead.
 								    '''
 								    def __init__(self, name):
 								        self.name = name
 								        return
 								    def __repr__(self):
 								        name = []
 								        for char in self.name:
 								            if not char.isalnum():
 								                char = '#%02x' % ord(char)
 								            name.append(char)
 								        return '/%s' % ''.join(name)
 								# PSKeyword
 								class PSKeyword(PSObject):
 								    '''
 								    PS keywords (e.g. "showpage").
 								    Caution: Never create these objects directly.
 								    Use PSKeywordTable.intern() instead.
 								    '''
 								    def __init__(self, name):
 								        self.name = name
 								        return
 								    def __repr__(self):
 								        return self.name
 								# PSSymbolTable
 								class PSSymbolTable(object):
 								    '''
 								    Symbol table that stores PSLiteral or PSKeyword.
 								    '''
 								    def __init__(self, classe):
 								        self.dic = {}
 								        self.classe = classe
 								        return
 								    def intern(self, name):
 								        if name in self.dic:
 								            lit = self.dic[name]
 								        else:
 								            lit = self.classe(name)
 								            self.dic[name] = lit
 								        return lit
 								PSLiteralTable = PSSymbolTable(PSLiteral)
 								PSKeywordTable = PSSymbolTable(PSKeyword)
 								LIT = PSLiteralTable.intern
 								KWD = PSKeywordTable.intern
 								KEYWORD_BRACE_BEGIN = KWD('{')
 								KEYWORD_BRACE_END = KWD('}')
 								KEYWORD_ARRAY_BEGIN = KWD('[')
 								KEYWORD_ARRAY_END = KWD(']')
 								KEYWORD_DICT_BEGIN = KWD('<<')
 								KEYWORD_DICT_END = KWD('>>')
 								def literal_name(x):
 								    if not isinstance(x, PSLiteral):
 								        if STRICT:
 								            raise PSTypeError('Literal required: %r' % x)
 								        else:
 								            return str(x)
 								    return x.name
 								def keyword_name(x):
 								    if not isinstance(x, PSKeyword):
 								        if STRICT:
 								            raise PSTypeError('Keyword required: %r' % x)
 								        else:
 								            return str(x)
 								    return x.name
 								##  PSBaseParser
 								##
 								EOL = re.compile(r'[\r\n]')
 								SPC = re.compile(r'\s')
 								NONSPC = re.compile(r'\S')
 								HEX = re.compile(r'[0-9a-fA-F]')
 								END_LITERAL = re.compile(r'[#/%\[\]()<>{}\s]')
 								END_HEX_STRING = re.compile(r'[^\s0-9a-fA-F]')
 								HEX_PAIR = re.compile(r'[0-9a-fA-F]{2}|.')
 								END_NUMBER = re.compile(r'[^0-9]')
 								END_KEYWORD = re.compile(r'[#/%\[\]()<>{}\s]')
 								END_STRING = re.compile(r'[()\134]')
 								OCT_STRING = re.compile(r'[0-7]')
 								ESC_STRING = { 'b':8, 't':9, 'n':10, 'f':12, 'r':13, '(':40, ')':41, '\\':92 }
 								class PSBaseParser(object):
 								    '''
 								    Most basic PostScript parser that performs only basic tokenization.
 								    '''
 								    BUFSIZ = 4096
 								    def __init__(self, fp):
 								        self.fp = fp
 								        self.seek(0)
 								        return
 								    def __repr__(self):
 								        return '<PSBaseParser: %r, bufpos=%d>' % (self.fp, self.bufpos)
 								    def flush(self):
 								        return
 								    def close(self):
 								        self.flush()
 								        return
 								    def tell(self):
 								        return self.bufpos+self.charpos
 								    def poll(self, pos=None, n=80):
 								        pos0 = self.fp.tell()
 								        if not pos:
 								            pos = self.bufpos+self.charpos
 								        self.fp.seek(pos)
 								        ##print >>sys.stderr, 'poll(%d): %r' % (pos, self.fp.read(n))
 								        self.fp.seek(pos0)
 								        return
 								    def seek(self, pos):
 								        '''
 								        Seeks the parser to the given position.
 								        '''
 								        self.fp.seek(pos)
 								        # reset the status for nextline()
 								        self.bufpos = pos
 								        self.buf = ''
 								        self.charpos = 0
 								        # reset the status for nexttoken()
 								        self.parse1 = self.parse_main
 								        self.tokens = []
 								        return
 								    def fillbuf(self):
 								        if self.charpos < len(self.buf): return
 								        # fetch next chunk.
 								        self.bufpos = self.fp.tell()
 								        self.buf = self.fp.read(self.BUFSIZ)
 								        if not self.buf:
 								            raise PSEOF('Unexpected EOF')
 								        self.charpos = 0
 								        return
 								    def parse_main(self, s, i):
 								        m = NONSPC.search(s, i)
 								        if not m:
 								            return (self.parse_main, len(s))
 								        j = m.start(0)
 								        c = s[j]
 								        self.tokenstart = self.bufpos+j
 								        if c == '%':
 								            self.token = '%'
 								            return (self.parse_comment, j+1)
 								        if c == '/':
 								            self.token = ''
 								            return (self.parse_literal, j+1)
 								        if c in '-+' or c.isdigit():
 								            self.token = c
 								            return (self.parse_number, j+1)
 								        if c == '.':
 								            self.token = c
-												Update PDF to use Decimal instead of float to handle very precise numbers. Update for changes to ActiveState Python. Fix a few copyright dates. Update version to 6.5.4. Minor changes to obok script for stand-alone use.

											
										
										
											2017-06-26 23:50:24 -06:00
+								            return (self.parse_decimal, j+1)
-												tools v6.0.8

											
										
										
											2013-10-02 12:59:40 -06:00
+								        if c.isalpha():
 								            self.token = c
 								            return (self.parse_keyword, j+1)
 								        if c == '(':
 								            self.token = ''
 								            self.paren = 1
 								            return (self.parse_string, j+1)
 								        if c == '<':
 								            self.token = ''
 								            return (self.parse_wopen, j+1)
 								        if c == '>':
 								            self.token = ''
 								            return (self.parse_wclose, j+1)
 								        self.add_token(KWD(c))
 								        return (self.parse_main, j+1)
 								    def add_token(self, obj):
 								        self.tokens.append((self.tokenstart, obj))
 								        return
 								    def parse_comment(self, s, i):
 								        m = EOL.search(s, i)
 								        if not m:
 								            self.token += s[i:]
 								            return (self.parse_comment, len(s))
 								        j = m.start(0)
 								        self.token += s[i:j]
 								        # We ignore comments.
 								        #self.tokens.append(self.token)
 								        return (self.parse_main, j)
 								    def parse_literal(self, s, i):
 								        m = END_LITERAL.search(s, i)
 								        if not m:
 								            self.token += s[i:]
 								            return (self.parse_literal, len(s))
 								        j = m.start(0)
 								        self.token += s[i:j]
 								        c = s[j]
 								        if c == '#':
 								            self.hex = ''
 								            return (self.parse_literal_hex, j+1)
 								        self.add_token(LIT(self.token))
 								        return (self.parse_main, j)
 								    def parse_literal_hex(self, s, i):
 								        c = s[i]
 								        if HEX.match(c) and len(self.hex) < 2:
 								            self.hex += c
 								            return (self.parse_literal_hex, i+1)
 								        if self.hex:
 								            self.token += chr(int(self.hex, 16))
 								        return (self.parse_literal, i)
 								    def parse_number(self, s, i):
 								        m = END_NUMBER.search(s, i)
 								        if not m:
 								            self.token += s[i:]
 								            return (self.parse_number, len(s))
 								        j = m.start(0)
 								        self.token += s[i:j]
 								        c = s[j]
 								        if c == '.':
 								            self.token += c
-												Update PDF to use Decimal instead of float to handle very precise numbers. Update for changes to ActiveState Python. Fix a few copyright dates. Update version to 6.5.4. Minor changes to obok script for stand-alone use.

											
										
										
											2017-06-26 23:50:24 -06:00
+								            return (self.parse_decimal, j+1)
-												tools v6.0.8

											
										
										
											2013-10-02 12:59:40 -06:00
+								        try:
 								            self.add_token(int(self.token))
 								        except ValueError:
 								            pass
 								        return (self.parse_main, j)
-												Update PDF to use Decimal instead of float to handle very precise numbers. Update for changes to ActiveState Python. Fix a few copyright dates. Update version to 6.5.4. Minor changes to obok script for stand-alone use.

											
										
										
											2017-06-26 23:50:24 -06:00
 								    def parse_decimal(self, s, i):
-												tools v6.0.8

											
										
										
											2013-10-02 12:59:40 -06:00
+								        m = END_NUMBER.search(s, i)
 								        if not m:
 								            self.token += s[i:]
-												Update PDF to use Decimal instead of float to handle very precise numbers. Update for changes to ActiveState Python. Fix a few copyright dates. Update version to 6.5.4. Minor changes to obok script for stand-alone use.

											
										
										
											2017-06-26 23:50:24 -06:00
+								            return (self.parse_decimal, len(s))
-												tools v6.0.8

											
										
										
											2013-10-02 12:59:40 -06:00
+								        j = m.start(0)
 								        self.token += s[i:j]
-												Update PDF to use Decimal instead of float to handle very precise numbers. Update for changes to ActiveState Python. Fix a few copyright dates. Update version to 6.5.4. Minor changes to obok script for stand-alone use.

											
										
										
											2017-06-26 23:50:24 -06:00
+								        self.add_token(Decimal(self.token))
-												tools v6.0.8

											
										
										
											2013-10-02 12:59:40 -06:00
+								        return (self.parse_main, j)
 								    def parse_keyword(self, s, i):
 								        m = END_KEYWORD.search(s, i)
 								        if not m:
 								            self.token += s[i:]
 								            return (self.parse_keyword, len(s))
 								        j = m.start(0)
 								        self.token += s[i:j]
 								        if self.token == 'true':
 								            token = True
 								        elif self.token == 'false':
 								            token = False
 								        else:
 								            token = KWD(self.token)
 								        self.add_token(token)
 								        return (self.parse_main, j)
 								    def parse_string(self, s, i):
 								        m = END_STRING.search(s, i)
 								        if not m:
 								            self.token += s[i:]
 								            return (self.parse_string, len(s))
 								        j = m.start(0)
 								        self.token += s[i:j]
 								        c = s[j]
 								        if c == '\\':
 								            self.oct = ''
 								            return (self.parse_string_1, j+1)
 								        if c == '(':
 								            self.paren += 1
 								            self.token += c
 								            return (self.parse_string, j+1)
 								        if c == ')':
 								            self.paren -= 1
 								            if self.paren:
 								                self.token += c
 								                return (self.parse_string, j+1)
 								        self.add_token(self.token)
 								        return (self.parse_main, j+1)
 								    def parse_string_1(self, s, i):
 								        c = s[i]
 								        if OCT_STRING.match(c) and len(self.oct) < 3:
 								            self.oct += c
 								            return (self.parse_string_1, i+1)
 								        if self.oct:
 								            self.token += chr(int(self.oct, 8))
 								            return (self.parse_string, i)
 								        if c in ESC_STRING:
 								            self.token += chr(ESC_STRING[c])
 								        return (self.parse_string, i+1)
 								    def parse_wopen(self, s, i):
 								        c = s[i]
 								        if c.isspace() or HEX.match(c):
 								            return (self.parse_hexstring, i)
 								        if c == '<':
 								            self.add_token(KEYWORD_DICT_BEGIN)
 								            i += 1
 								        return (self.parse_main, i)
 								    def parse_wclose(self, s, i):
 								        c = s[i]
 								        if c == '>':
 								            self.add_token(KEYWORD_DICT_END)
 								            i += 1
 								        return (self.parse_main, i)
 								    def parse_hexstring(self, s, i):
 								        m = END_HEX_STRING.search(s, i)
 								        if not m:
 								            self.token += s[i:]
 								            return (self.parse_hexstring, len(s))
 								        j = m.start(0)
 								        self.token += s[i:j]
 								        token = HEX_PAIR.sub(lambda m: chr(int(m.group(0), 16)),
 								                                                 SPC.sub('', self.token))
 								        self.add_token(token)
 								        return (self.parse_main, j)
 								    def nexttoken(self):
 								        while not self.tokens:
 								            self.fillbuf()
 								            (self.parse1, self.charpos) = self.parse1(self.buf, self.charpos)
 								        token = self.tokens.pop(0)
 								        return token
 								    def nextline(self):
 								        '''
 								        Fetches a next line that ends either with \\r or \\n.
 								        '''
 								        linebuf = ''
 								        linepos = self.bufpos + self.charpos
 								        eol = False
 								        while 1:
 								            self.fillbuf()
 								            if eol:
 								                c = self.buf[self.charpos]
 								                # handle '\r\n'
 								                if c == '\n':
 								                    linebuf += c
 								                    self.charpos += 1
 								                break
 								            m = EOL.search(self.buf, self.charpos)
 								            if m:
 								                linebuf += self.buf[self.charpos:m.end(0)]
 								                self.charpos = m.end(0)
 								                if linebuf[-1] == '\r':
 								                    eol = True
 								                else:
 								                    break
 								            else:
 								                linebuf += self.buf[self.charpos:]
 								                self.charpos = len(self.buf)
 								        return (linepos, linebuf)
 								    def revreadlines(self):
 								        '''
 								        Fetches a next line backword. This is used to locate
 								        the trailers at the end of a file.
 								        '''
 								        self.fp.seek(0, 2)
 								        pos = self.fp.tell()
 								        buf = ''
 								        while 0 < pos:
 								            prevpos = pos
 								            pos = max(0, pos-self.BUFSIZ)
 								            self.fp.seek(pos)
 								            s = self.fp.read(prevpos-pos)
 								            if not s: break
 								            while 1:
 								                n = max(s.rfind('\r'), s.rfind('\n'))
 								                if n == -1:
 								                    buf = s + buf
 								                    break
 								                yield s[n:]+buf
 								                s = s[:n]
 								                buf = ''
 								        return
 								##  PSStackParser
 								##
 								class PSStackParser(PSBaseParser):
 								    def __init__(self, fp):
 								        PSBaseParser.__init__(self, fp)
 								        self.reset()
 								        return
 								    def reset(self):
 								        self.context = []
 								        self.curtype = None
 								        self.curstack = []
 								        self.results = []
 								        return
 								    def seek(self, pos):
 								        PSBaseParser.seek(self, pos)
 								        self.reset()
 								        return
 								    def push(self, *objs):
 								        self.curstack.extend(objs)
 								        return
 								    def pop(self, n):
 								        objs = self.curstack[-n:]
 								        self.curstack[-n:] = []
 								        return objs
 								    def popall(self):
 								        objs = self.curstack
 								        self.curstack = []
 								        return objs
 								    def add_results(self, *objs):
 								        self.results.extend(objs)
 								        return
 								    def start_type(self, pos, type):
 								        self.context.append((pos, self.curtype, self.curstack))
 								        (self.curtype, self.curstack) = (type, [])
 								        return
 								    def end_type(self, type):
 								        if self.curtype != type:
 								            raise PSTypeError('Type mismatch: %r != %r' % (self.curtype, type))
 								        objs = [ obj for (_,obj) in self.curstack ]
 								        (pos, self.curtype, self.curstack) = self.context.pop()
 								        return (pos, objs)
 								    def do_keyword(self, pos, token):
 								        return
 								    def nextobject(self, direct=False):
 								        '''
 								        Yields a list of objects: keywords, literals, strings,
 								        numbers, arrays and dictionaries. Arrays and dictionaries
 								        are represented as Python sequence and dictionaries.
 								        '''
 								        while not self.results:
 								            (pos, token) = self.nexttoken()
 								            ##print (pos,token), (self.curtype, self.curstack)
 								            if (isinstance(token, int) or
-												Update PDF to use Decimal instead of float to handle very precise numbers. Update for changes to ActiveState Python. Fix a few copyright dates. Update version to 6.5.4. Minor changes to obok script for stand-alone use.

											
										
										
											2017-06-26 23:50:24 -06:00
+								                    isinstance(token, Decimal) or
-												tools v6.0.8

											
										
										
											2013-10-02 12:59:40 -06:00
+								                    isinstance(token, bool) or
 								                    isinstance(token, str) or
 								                    isinstance(token, PSLiteral)):
 								                # normal token
 								                self.push((pos, token))
 								            elif token == KEYWORD_ARRAY_BEGIN:
 								                # begin array
 								                self.start_type(pos, 'a')
 								            elif token == KEYWORD_ARRAY_END:
 								                # end array
 								                try:
 								                    self.push(self.end_type('a'))
 								                except PSTypeError:
 								                    if STRICT: raise
 								            elif token == KEYWORD_DICT_BEGIN:
 								                # begin dictionary
 								                self.start_type(pos, 'd')
 								            elif token == KEYWORD_DICT_END:
 								                # end dictionary
 								                try:
 								                    (pos, objs) = self.end_type('d')
 								                    if len(objs) % 2 != 0:
 								                        print "Incomplete dictionary construct"
 								                        objs.append("") # this isn't necessary.
 								                        # temporary fix. is this due to rental books?
 								                        # raise PSSyntaxError(
 								                        #     'Invalid dictionary construct: %r' % objs)
 								                    d = dict((literal_name(k), v) \
 								                                 for (k,v) in choplist(2, objs))
 								                    self.push((pos, d))
 								                except PSTypeError:
 								                    if STRICT: raise
 								            else:
 								                self.do_keyword(pos, token)
 								            if self.context:
 								                continue
 								            else:
 								                if direct:
 								                    return self.pop(1)[0]
 								                self.flush()
 								        obj = self.results.pop(0)
 								        return obj
 								LITERAL_CRYPT = PSLiteralTable.intern('Crypt')
 								LITERALS_FLATE_DECODE = (PSLiteralTable.intern('FlateDecode'), PSLiteralTable.intern('Fl'))
 								LITERALS_LZW_DECODE = (PSLiteralTable.intern('LZWDecode'), PSLiteralTable.intern('LZW'))
 								LITERALS_ASCII85_DECODE = (PSLiteralTable.intern('ASCII85Decode'), PSLiteralTable.intern('A85'))
 								##  PDF Objects
 								##
 								class PDFObject(PSObject): pass
 								class PDFException(PSException): pass
 								class PDFTypeError(PDFException): pass
 								class PDFValueError(PDFException): pass
 								class PDFNotImplementedError(PSException): pass
 								##  PDFObjRef
 								##
 								class PDFObjRef(PDFObject):
 								    def __init__(self, doc, objid, genno):
 								        if objid == 0:
 								            if STRICT:
 								                raise PDFValueError('PDF object id cannot be 0.')
 								        self.doc = doc
 								        self.objid = objid
 								        self.genno = genno
 								        return
 								    def __repr__(self):
 								        return '<PDFObjRef:%d %d>' % (self.objid, self.genno)
 								    def resolve(self):
 								        return self.doc.getobj(self.objid)
 								# resolve
 								def resolve1(x):
 								    '''
 								    Resolve an object. If this is an array or dictionary,
 								    it may still contains some indirect objects inside.
 								    '''
 								    while isinstance(x, PDFObjRef):
 								        x = x.resolve()
 								    return x
 								def resolve_all(x):
 								    '''
 								    Recursively resolve X and all the internals.
 								    Make sure there is no indirect reference within the nested object.
 								    This procedure might be slow.
 								    '''
 								    while isinstance(x, PDFObjRef):
 								        x = x.resolve()
 								    if isinstance(x, list):
 								        x = [ resolve_all(v) for v in x ]
 								    elif isinstance(x, dict):
 								        for (k,v) in x.iteritems():
 								            x[k] = resolve_all(v)
 								    return x
 								def decipher_all(decipher, objid, genno, x):
 								    '''
 								    Recursively decipher X.
 								    '''
 								    if isinstance(x, str):
 								        return decipher(objid, genno, x)
 								    decf = lambda v: decipher_all(decipher, objid, genno, v)
 								    if isinstance(x, list):
 								        x = [decf(v) for v in x]
 								    elif isinstance(x, dict):
 								        x = dict((k, decf(v)) for (k, v) in x.iteritems())
 								    return x
 								# Type cheking
 								def int_value(x):
 								    x = resolve1(x)
 								    if not isinstance(x, int):
 								        if STRICT:
 								            raise PDFTypeError('Integer required: %r' % x)
 								        return 0
 								    return x
-												Update PDF to use Decimal instead of float to handle very precise numbers. Update for changes to ActiveState Python. Fix a few copyright dates. Update version to 6.5.4. Minor changes to obok script for stand-alone use.

											
										
										
											2017-06-26 23:50:24 -06:00
+								def decimal_value(x):
-												tools v6.0.8

											
										
										
											2013-10-02 12:59:40 -06:00
+								    x = resolve1(x)
-												Update PDF to use Decimal instead of float to handle very precise numbers. Update for changes to ActiveState Python. Fix a few copyright dates. Update version to 6.5.4. Minor changes to obok script for stand-alone use.

											
										
										
											2017-06-26 23:50:24 -06:00
+								    if not isinstance(x, Decimal):
-												tools v6.0.8

											
										
										
											2013-10-02 12:59:40 -06:00
+								        if STRICT:
-												Update PDF to use Decimal instead of float to handle very precise numbers. Update for changes to ActiveState Python. Fix a few copyright dates. Update version to 6.5.4. Minor changes to obok script for stand-alone use.

											
										
										
											2017-06-26 23:50:24 -06:00
+								            raise PDFTypeError('Decimal required: %r' % x)
-												tools v6.0.8

											
										
										
											2013-10-02 12:59:40 -06:00
+								        return 0.0
 								    return x
 								def num_value(x):
 								    x = resolve1(x)
-												Update PDF to use Decimal instead of float to handle very precise numbers. Update for changes to ActiveState Python. Fix a few copyright dates. Update version to 6.5.4. Minor changes to obok script for stand-alone use.

											
										
										
											2017-06-26 23:50:24 -06:00
+								    if not (isinstance(x, int) or isinstance(x, Decimal)):
-												tools v6.0.8

											
										
										
											2013-10-02 12:59:40 -06:00
+								        if STRICT:
 								            raise PDFTypeError('Int or Float required: %r' % x)
 								        return 0
 								    return x
 								def str_value(x):
 								    x = resolve1(x)
 								    if not isinstance(x, str):
 								        if STRICT:
 								            raise PDFTypeError('String required: %r' % x)
 								        return ''
 								    return x
 								def list_value(x):
 								    x = resolve1(x)
 								    if not (isinstance(x, list) or isinstance(x, tuple)):
 								        if STRICT:
 								            raise PDFTypeError('List required: %r' % x)
 								        return []
 								    return x
 								def dict_value(x):
 								    x = resolve1(x)
 								    if not isinstance(x, dict):
 								        if STRICT:
 								            raise PDFTypeError('Dict required: %r' % x)
 								        return {}
 								    return x
 								def stream_value(x):
 								    x = resolve1(x)
 								    if not isinstance(x, PDFStream):
 								        if STRICT:
 								            raise PDFTypeError('PDFStream required: %r' % x)
 								        return PDFStream({}, '')
 								    return x
 								# ascii85decode(data)
 								def ascii85decode(data):
 								    n = b = 0
 								    out = ''
 								    for c in data:
 								        if '!' <= c and c <= 'u':
 								            n += 1
 								            b = b*85+(ord(c)-33)
 								            if n == 5:
 								                out += struct.pack('>L',b)
 								                n = b = 0
 								        elif c == 'z':
 								            assert n == 0
 								            out += '\0\0\0\0'
 								        elif c == '~':
 								            if n:
 								                for _ in range(5-n):
 								                    b = b*85+84
 								                out += struct.pack('>L',b)[:n-1]
 								            break
 								    return out
 								##  PDFStream type
 								class PDFStream(PDFObject):
 								    def __init__(self, dic, rawdata, decipher=None):
 								        length = int_value(dic.get('Length', 0))
 								        eol = rawdata[length:]
 								        # quick and dirty fix for false length attribute,
 								        # might not work if the pdf stream parser has a problem
 								        if decipher != None and decipher.__name__ == 'decrypt_aes':
 								            if (len(rawdata) % 16) != 0:
 								                cutdiv = len(rawdata) // 16
 								                rawdata = rawdata[:16*cutdiv]
 								        else:
 								            if eol in ('\r', '\n', '\r\n'):
 								                rawdata = rawdata[:length]
 								        self.dic = dic
 								        self.rawdata = rawdata
 								        self.decipher = decipher
 								        self.data = None
 								        self.decdata = None
 								        self.objid = None
 								        self.genno = None
 								        return
 								    def set_objid(self, objid, genno):
 								        self.objid = objid
 								        self.genno = genno
 								        return
 								    def __repr__(self):
 								        if self.rawdata:
 								            return '<PDFStream(%r): raw=%d, %r>' % \
 								                   (self.objid, len(self.rawdata), self.dic)
 								        else:
 								            return '<PDFStream(%r): data=%d, %r>' % \
 								                   (self.objid, len(self.data), self.dic)
 								    def decode(self):
 								        assert self.data is None and self.rawdata is not None
 								        data = self.rawdata
 								        if self.decipher:
 								            # Handle encryption
 								            data = self.decipher(self.objid, self.genno, data)
 								            if gen_xref_stm:
 								                self.decdata = data # keep decrypted data
 								        if 'Filter' not in self.dic:
 								            self.data = data
 								            self.rawdata = None
 								            ##print self.dict
 								            return
 								        filters = self.dic['Filter']
 								        if not isinstance(filters, list):
 								            filters = [ filters ]
 								        for f in filters:
 								            if f in LITERALS_FLATE_DECODE:
 								                # will get errors if the document is encrypted.
 								                data = zlib.decompress(data)
 								            elif f in LITERALS_LZW_DECODE:
 								                data = ''.join(LZWDecoder(StringIO(data)).run())
 								            elif f in LITERALS_ASCII85_DECODE:
 								                data = ascii85decode(data)
 								            elif f == LITERAL_CRYPT:
 								                raise PDFNotImplementedError('/Crypt filter is unsupported')
 								            else:
 								                raise PDFNotImplementedError('Unsupported filter: %r' % f)
 								            # apply predictors
 								            if 'DP' in self.dic:
 								                params = self.dic['DP']
 								            else:
 								                params = self.dic.get('DecodeParms', {})
 								            if 'Predictor' in params:
 								                pred = int_value(params['Predictor'])
 								                if pred:
 								                    if pred != 12:
 								                        raise PDFNotImplementedError(
 								                            'Unsupported predictor: %r' % pred)
 								                    if 'Columns' not in params:
 								                        raise PDFValueError(
 								                            'Columns undefined for predictor=12')
 								                    columns = int_value(params['Columns'])
 								                    buf = ''
 								                    ent0 = '\x00' * columns
 								                    for i in xrange(0, len(data), columns+1):
 								                        pred = data[i]
 								                        ent1 = data[i+1:i+1+columns]
 								                        if pred == '\x02':
 								                            ent1 = ''.join(chr((ord(a)+ord(b)) & 255) \
 								                                               for (a,b) in zip(ent0,ent1))
 								                        buf += ent1
 								                        ent0 = ent1
 								                    data = buf
 								        self.data = data
 								        self.rawdata = None
 								        return
 								    def get_data(self):
 								        if self.data is None:
 								            self.decode()
 								        return self.data
 								    def get_rawdata(self):
 								        return self.rawdata
 								    def get_decdata(self):
 								        if self.decdata is not None:
 								            return self.decdata
 								        data = self.rawdata
 								        if self.decipher and data:
 								            # Handle encryption
 								            data = self.decipher(self.objid, self.genno, data)
 								        return data
 								##  PDF Exceptions
 								##
 								class PDFSyntaxError(PDFException): pass
 								class PDFNoValidXRef(PDFSyntaxError): pass
 								class PDFEncryptionError(PDFException): pass
 								class PDFPasswordIncorrect(PDFEncryptionError): pass
 								# some predefined literals and keywords.
 								LITERAL_OBJSTM = PSLiteralTable.intern('ObjStm')
 								LITERAL_XREF = PSLiteralTable.intern('XRef')
 								LITERAL_PAGE = PSLiteralTable.intern('Page')
 								LITERAL_PAGES = PSLiteralTable.intern('Pages')
 								LITERAL_CATALOG = PSLiteralTable.intern('Catalog')
 								##  XRefs
 								##
 								##  PDFXRef
 								##
 								class PDFXRef(object):
 								    def __init__(self):
 								        self.offsets = None
 								        return
 								    def __repr__(self):
 								        return '<PDFXRef: objs=%d>' % len(self.offsets)
 								    def objids(self):
 								        return self.offsets.iterkeys()
 								    def load(self, parser):
 								        self.offsets = {}
 								        while 1:
 								            try:
 								                (pos, line) = parser.nextline()
 								            except PSEOF:
 								                raise PDFNoValidXRef('Unexpected EOF - file corrupted?')
 								            if not line:
 								                raise PDFNoValidXRef('Premature eof: %r' % parser)
 								            if line.startswith('trailer'):
 								                parser.seek(pos)
 								                break
 								            f = line.strip().split(' ')
 								            if len(f) != 2:
 								                raise PDFNoValidXRef('Trailer not found: %r: line=%r' % (parser, line))
 								            try:
 								                (start, nobjs) = map(int, f)
-												tools v6.0.2

											
										
										
											2013-04-05 10:44:48 -06:00
+								            except ValueError:
-												tools v6.0.8

											
										
										
											2013-10-02 12:59:40 -06:00
+								                raise PDFNoValidXRef('Invalid line: %r: line=%r' % (parser, line))
 								            for objid in xrange(start, start+nobjs):
 								                try:
 								                    (_, line) = parser.nextline()
 								                except PSEOF:
 								                    raise PDFNoValidXRef('Unexpected EOF - file corrupted?')
 								                f = line.strip().split(' ')
 								                if len(f) != 3:
 								                    raise PDFNoValidXRef('Invalid XRef format: %r, line=%r' % (parser, line))
 								                (pos, genno, use) = f
 								                if use != 'n': continue
 								                self.offsets[objid] = (int(genno), int(pos))
 								        self.load_trailer(parser)
 								        return
 								    KEYWORD_TRAILER = PSKeywordTable.intern('trailer')
 								    def load_trailer(self, parser):
 								        try:
 								            (_,kwd) = parser.nexttoken()
 								            assert kwd is self.KEYWORD_TRAILER
 								            (_,dic) = parser.nextobject(direct=True)
 								        except PSEOF:
 								            x = parser.pop(1)
 								            if not x:
 								                raise PDFNoValidXRef('Unexpected EOF - file corrupted')
 								            (_,dic) = x[0]
 								        self.trailer = dict_value(dic)
 								        return
 								    def getpos(self, objid):
 								        try:
 								            (genno, pos) = self.offsets[objid]
 								        except KeyError:
 								            raise
 								        return (None, pos)
 								##  PDFXRefStream
 								##
 								class PDFXRefStream(object):
 								    def __init__(self):
 								        self.index = None
 								        self.data = None
 								        self.entlen = None
 								        self.fl1 = self.fl2 = self.fl3 = None
 								        return
 								    def __repr__(self):
 								        return '<PDFXRef: objids=%s>' % self.index
 								    def objids(self):
 								        for first, size in self.index:
 								            for objid in xrange(first, first + size):
 								                yield objid
 								    def load(self, parser, debug=0):
 								        (_,objid) = parser.nexttoken() # ignored
 								        (_,genno) = parser.nexttoken() # ignored
 								        (_,kwd) = parser.nexttoken()
 								        (_,stream) = parser.nextobject()
 								        if not isinstance(stream, PDFStream) or \
 								           stream.dic['Type'] is not LITERAL_XREF:
 								            raise PDFNoValidXRef('Invalid PDF stream spec.')
 								        size = stream.dic['Size']
 								        index = stream.dic.get('Index', (0,size))
 								        self.index = zip(islice(index, 0, None, 2),
 								                         islice(index, 1, None, 2))
 								        (self.fl1, self.fl2, self.fl3) = stream.dic['W']
 								        self.data = stream.get_data()
 								        self.entlen = self.fl1+self.fl2+self.fl3
 								        self.trailer = stream.dic
 								        return
 								    def getpos(self, objid):
 								        offset = 0
 								        for first, size in self.index:
 								            if first <= objid  and objid < (first + size):
 								                break
 								            offset += size
 								        else:
 								            raise KeyError(objid)
 								        i = self.entlen * ((objid - first) + offset)
 								        ent = self.data[i:i+self.entlen]
 								        f1 = nunpack(ent[:self.fl1], 1)
 								        if f1 == 1:
 								            pos = nunpack(ent[self.fl1:self.fl1+self.fl2])
 								            genno = nunpack(ent[self.fl1+self.fl2:])
 								            return (None, pos)
 								        elif f1 == 2:
 								            objid = nunpack(ent[self.fl1:self.fl1+self.fl2])
 								            index = nunpack(ent[self.fl1+self.fl2:])
 								            return (objid, index)
 								        # this is a free object
 								        raise KeyError(objid)
 								##  PDFDocument
 								##
 								##  A PDFDocument object represents a PDF document.
 								##  Since a PDF file is usually pretty big, normally it is not loaded
 								##  at once. Rather it is parsed dynamically as processing goes.
 								##  A PDF parser is associated with the document.
 								##
 								class PDFDocument(object):
 								    def __init__(self):
 								        self.xrefs = []
 								        self.objs = {}
 								        self.parsed_objs = {}
 								        self.root = None
 								        self.catalog = None
 								        self.parser = None
 								        self.encryption = None
 								        self.decipher = None
 								        return
 								    # set_parser(parser)
 								    #   Associates the document with an (already initialized) parser object.
 								    def set_parser(self, parser):
 								        if self.parser: return
 								        self.parser = parser
 								        # The document is set to be temporarily ready during collecting
 								        # all the basic information about the document, e.g.
 								        # the header, the encryption information, and the access rights
 								        # for the document.
 								        self.ready = True
 								        # Retrieve the information of each header that was appended
 								        # (maybe multiple times) at the end of the document.
 								        self.xrefs = parser.read_xref()
 								        for xref in self.xrefs:
 								            trailer = xref.trailer
 								            if not trailer: continue
 								            # If there's an encryption info, remember it.
 								            if 'Encrypt' in trailer:
 								                #assert not self.encryption
 								                try:
 								                    self.encryption = (list_value(trailer['ID']),
 								                                   dict_value(trailer['Encrypt']))
 								                # fix for bad files
 								                except:
 								                    self.encryption = ('ffffffffffffffffffffffffffffffffffff',
 								                                       dict_value(trailer['Encrypt']))
 								            if 'Root' in trailer:
 								                self.set_root(dict_value(trailer['Root']))
 								                break
 								        else:
 								            raise PDFSyntaxError('No /Root object! - Is this really a PDF?')
 								        # The document is set to be non-ready again, until all the
 								        # proper initialization (asking the password key and
 								        # verifying the access permission, so on) is finished.
 								        self.ready = False
 								        return
 								    # set_root(root)
 								    #   Set the Root dictionary of the document.
 								    #   Each PDF file must have exactly one /Root dictionary.
 								    def set_root(self, root):
 								        self.root = root
 								        self.catalog = dict_value(self.root)
 								        if self.catalog.get('Type') is not LITERAL_CATALOG:
 								            if STRICT:
 								                raise PDFSyntaxError('Catalog not found!')
 								        return
 								    # initialize(password='')
 								    #   Perform the initialization with a given password.
 								    #   This step is mandatory even if there's no password associated
 								    #   with the document.
 								    def initialize(self, password=''):
 								        if not self.encryption:
 								            self.is_printable = self.is_modifiable = self.is_extractable = True
 								            self.ready = True
-												Change to ineptpdf.py, so that we throw an exception for DRM-free PDFs, rather than processing them.

											
										
										
											2016-08-09 23:40:48 -06:00
+								            raise PDFEncryptionError('Document is not encrypted.')
-												tools v6.0.8

											
										
										
											2013-10-02 12:59:40 -06:00
+								            return
 								        (docid, param) = self.encryption
 								        type = literal_name(param['Filter'])
 								        if type == 'Adobe.APS':
 								            return self.initialize_adobe_ps(password, docid, param)
 								        if type == 'Standard':
 								            return self.initialize_standard(password, docid, param)
 								        if type == 'EBX_HANDLER':
 								            return self.initialize_ebx(password, docid, param)
 								        raise PDFEncryptionError('Unknown filter: param=%r' % param)
 								    def initialize_adobe_ps(self, password, docid, param):
 								        global KEYFILEPATH
 								        self.decrypt_key = self.genkey_adobe_ps(param)
 								        self.genkey = self.genkey_v4
 								        self.decipher = self.decrypt_aes
 								        self.ready = True
 								        return
 								    def genkey_adobe_ps(self, param):
 								        # nice little offline principal keys dictionary
 								        # global static principal key for German Onleihe / Bibliothek Digital
 								        principalkeys = { 'bibliothek-digital.de': 'rRwGv2tbpKov1krvv7PO0ws9S436/lArPlfipz5Pqhw='.decode('base64')}
 								        self.is_printable = self.is_modifiable = self.is_extractable = True
 								        length = int_value(param.get('Length', 0)) / 8
 								        edcdata = str_value(param.get('EDCData')).decode('base64')
 								        pdrllic = str_value(param.get('PDRLLic')).decode('base64')
 								        pdrlpol = str_value(param.get('PDRLPol')).decode('base64')
 								        edclist = []
 								        for pair in edcdata.split('\n'):
 								            edclist.append(pair)
 								        # principal key request
 								        for key in principalkeys:
 								            if key in pdrllic:
 								                principalkey = principalkeys[key]
 								            else:
 								                raise ADEPTError('Cannot find principal key for this pdf')
 								        shakey = SHA256(principalkey)
 								        ivector = 16 * chr(0)
 								        plaintext = AES.new(shakey,AES.MODE_CBC,ivector).decrypt(edclist[9].decode('base64'))
 								        if plaintext[-16:] != 16 * chr(16):
 								            raise ADEPTError('Offlinekey cannot be decrypted, aborting ...')
 								        pdrlpol = AES.new(plaintext[16:32],AES.MODE_CBC,edclist[2].decode('base64')).decrypt(pdrlpol)
 								        if ord(pdrlpol[-1]) < 1 or ord(pdrlpol[-1]) > 16:
 								            raise ADEPTError('Could not decrypt PDRLPol, aborting ...')
 								        else:
 								            cutter = -1 * ord(pdrlpol[-1])
 								            pdrlpol = pdrlpol[:cutter]
 								        return plaintext[:16]
 								    PASSWORD_PADDING = '(\xbfN^Nu\x8aAd\x00NV\xff\xfa\x01\x08..' \
 								                       '\x00\xb6\xd0h>\x80/\x0c\xa9\xfedSiz'
 								    # experimental aes pw support
 								    def initialize_standard(self, password, docid, param):
 								        # copy from a global variable
 								        V = int_value(param.get('V', 0))
 								        if (V <=0 or V > 4):
 								            raise PDFEncryptionError('Unknown algorithm: param=%r' % param)
 								        length = int_value(param.get('Length', 40)) # Key length (bits)
 								        O = str_value(param['O'])
 								        R = int_value(param['R']) # Revision
 								        if 5 <= R:
 								            raise PDFEncryptionError('Unknown revision: %r' % R)
 								        U = str_value(param['U'])
 								        P = int_value(param['P'])
 								        try:
 								            EncMetadata = str_value(param['EncryptMetadata'])
 								        except:
 								            EncMetadata = 'True'
 								        self.is_printable = bool(P & 4)
 								        self.is_modifiable = bool(P & 8)
 								        self.is_extractable = bool(P & 16)
 								        self.is_annotationable = bool(P & 32)
 								        self.is_formsenabled = bool(P & 256)
 								        self.is_textextractable = bool(P & 512)
 								        self.is_assemblable = bool(P & 1024)
 								        self.is_formprintable = bool(P & 2048)
 								        # Algorithm 3.2
 								        password = (password+self.PASSWORD_PADDING)[:32] # 1
 								        hash = hashlib.md5(password) # 2
 								        hash.update(O) # 3
 								        hash.update(struct.pack('<l', P)) # 4
 								        hash.update(docid[0]) # 5
 								        # aes special handling if metadata isn't encrypted
 								        if EncMetadata == ('False' or 'false'):
 								            hash.update('ffffffff'.decode('hex'))
 								        if 5 <= R:
 								            # 8
 								            for _ in xrange(50):
 								                hash = hashlib.md5(hash.digest()[:length/8])
 								        key = hash.digest()[:length/8]
 								        if R == 2:
 								            # Algorithm 3.4
 								            u1 = ARC4.new(key).decrypt(password)
 								        elif R >= 3:
 								            # Algorithm 3.5
 								            hash = hashlib.md5(self.PASSWORD_PADDING) # 2
 								            hash.update(docid[0]) # 3
 								            x = ARC4.new(key).decrypt(hash.digest()[:16]) # 4
 								            for i in xrange(1,19+1):
 								                k = ''.join( chr(ord(c) ^ i) for c in key )
 								                x = ARC4.new(k).decrypt(x)
 								            u1 = x+x # 32bytes total
 								        if R == 2:
 								            is_authenticated = (u1 == U)
 								        else:
 								            is_authenticated = (u1[:16] == U[:16])
 								        if not is_authenticated:
 								            raise ADEPTError('Password is not correct.')
 								        self.decrypt_key = key
 								        # genkey method
 								        if V == 1 or V == 2:
 								            self.genkey = self.genkey_v2
 								        elif V == 3:
 								            self.genkey = self.genkey_v3
 								        elif V == 4:
 								            self.genkey = self.genkey_v2
 								        #self.genkey = self.genkey_v3 if V == 3 else self.genkey_v2
 								        # rc4
 								        if V != 4:
 								            self.decipher = self.decipher_rc4  # XXX may be AES
 								        # aes
 								        elif V == 4 and Length == 128:
 								            elf.decipher = self.decipher_aes
 								        elif V == 4 and Length == 256:
 								            raise PDFNotImplementedError('AES256 encryption is currently unsupported')
 								        self.ready = True
 								        return
 								    def initialize_ebx(self, password, docid, param):
 								        self.is_printable = self.is_modifiable = self.is_extractable = True
 								        rsa = RSA(password)
 								        length = int_value(param.get('Length', 0)) / 8
 								        rights = str_value(param.get('ADEPT_LICENSE')).decode('base64')
 								        rights = zlib.decompress(rights, -15)
 								        rights = etree.fromstring(rights)
 								        expr = './/{http://ns.adobe.com/adept}encryptedKey'
 								        bookkey = ''.join(rights.findtext(expr)).decode('base64')
 								        bookkey = rsa.decrypt(bookkey)
 								        if bookkey[0] != '\x02':
 								            raise ADEPTError('error decrypting book session key')
 								        index = bookkey.index('\0') + 1
 								        bookkey = bookkey[index:]
 								        ebx_V = int_value(param.get('V', 4))
 								        ebx_type = int_value(param.get('EBX_ENCRYPTIONTYPE', 6))
 								        # added because of improper booktype / decryption book session key errors
 								        if length > 0:
 								            if len(bookkey) == length:
 								                if ebx_V == 3:
 								                    V = 3
 								                else:
 								                    V = 2
 								            elif len(bookkey) == length + 1:
 								                V = ord(bookkey[0])
 								                bookkey = bookkey[1:]
 								            else:
 								                print "ebx_V is %d  and ebx_type is %d" % (ebx_V, ebx_type)
 								                print "length is %d and len(bookkey) is %d" % (length, len(bookkey))
 								                print "bookkey[0] is %d" % ord(bookkey[0])
 								                raise ADEPTError('error decrypting book session key - mismatched length')
 								        else:
 								            # proper length unknown try with whatever you have
 								            print "ebx_V is %d  and ebx_type is %d" % (ebx_V, ebx_type)
 								            print "length is %d and len(bookkey) is %d" % (length, len(bookkey))
 								            print "bookkey[0] is %d" % ord(bookkey[0])
 								            if ebx_V == 3:
 								                V = 3
 								            else:
 								                V = 2
 								        self.decrypt_key = bookkey
 								        self.genkey = self.genkey_v3 if V == 3 else self.genkey_v2
 								        self.decipher = self.decrypt_rc4
 								        self.ready = True
 								        return
 								    # genkey functions
 								    def genkey_v2(self, objid, genno):
 								        objid = struct.pack('<L', objid)[:3]
 								        genno = struct.pack('<L', genno)[:2]
 								        key = self.decrypt_key + objid + genno
 								        hash = hashlib.md5(key)
 								        key = hash.digest()[:min(len(self.decrypt_key) + 5, 16)]
 								        return key
 								    def genkey_v3(self, objid, genno):
 								        objid = struct.pack('<L', objid ^ 0x3569ac)
 								        genno = struct.pack('<L', genno ^ 0xca96)
 								        key = self.decrypt_key
 								        key += objid[0] + genno[0] + objid[1] + genno[1] + objid[2] + 'sAlT'
 								        hash = hashlib.md5(key)
 								        key = hash.digest()[:min(len(self.decrypt_key) + 5, 16)]
 								        return key
 								    # aes v2 and v4 algorithm
 								    def genkey_v4(self, objid, genno):
 								        objid = struct.pack('<L', objid)[:3]
 								        genno = struct.pack('<L', genno)[:2]
 								        key = self.decrypt_key + objid + genno + 'sAlT'
 								        hash = hashlib.md5(key)
 								        key = hash.digest()[:min(len(self.decrypt_key) + 5, 16)]
 								        return key
 								    def decrypt_aes(self, objid, genno, data):
 								        key = self.genkey(objid, genno)
 								        ivector = data[:16]
 								        data = data[16:]
 								        plaintext = AES.new(key,AES.MODE_CBC,ivector).decrypt(data)
 								        # remove pkcs#5 aes padding
 								        cutter = -1 * ord(plaintext[-1])
 								        #print cutter
 								        plaintext = plaintext[:cutter]
 								        return plaintext
 								    def decrypt_aes256(self, objid, genno, data):
 								        key = self.genkey(objid, genno)
 								        ivector = data[:16]
 								        data = data[16:]
 								        plaintext = AES.new(key,AES.MODE_CBC,ivector).decrypt(data)
 								        # remove pkcs#5 aes padding
 								        cutter = -1 * ord(plaintext[-1])
 								        #print cutter
 								        plaintext = plaintext[:cutter]
 								        return plaintext
 								    def decrypt_rc4(self, objid, genno, data):
 								        key = self.genkey(objid, genno)
 								        return ARC4.new(key).decrypt(data)
 								    KEYWORD_OBJ = PSKeywordTable.intern('obj')
 								    def getobj(self, objid):
 								        if not self.ready:
 								            raise PDFException('PDFDocument not initialized')
 								        #assert self.xrefs
 								        if objid in self.objs:
 								            genno = 0
 								            obj = self.objs[objid]
-												Unknown date, late 2009/early 2010

											
										
										
											2009-01-01 07:33:12 -07:00
+								        else:
-												tools v6.0.8

											
										
										
											2013-10-02 12:59:40 -06:00
+								            for xref in self.xrefs:
 								                try:
 								                    (stmid, index) = xref.getpos(objid)
 								                    break
 								                except KeyError:
 								                    pass
 								            else:
 								                #if STRICT:
 								                #    raise PDFSyntaxError('Cannot locate objid=%r' % objid)
 								                return None
 								            if stmid:
 								                if gen_xref_stm:
 								                    return PDFObjStmRef(objid, stmid, index)
 								                # Stuff from pdfminer: extract objects from object stream
 								                stream = stream_value(self.getobj(stmid))
 								                if stream.dic.get('Type') is not LITERAL_OBJSTM:
 								                    if STRICT:
 								                        raise PDFSyntaxError('Not a stream object: %r' % stream)
 								                try:
 								                    n = stream.dic['N']
 								                except KeyError:
 								                    if STRICT:
 								                        raise PDFSyntaxError('N is not defined: %r' % stream)
 								                    n = 0
 								                if stmid in self.parsed_objs:
 								                    objs = self.parsed_objs[stmid]
 								                else:
 								                    parser = PDFObjStrmParser(stream.get_data(), self)
 								                    objs = []
 								                    try:
 								                        while 1:
 								                            (_,obj) = parser.nextobject()
 								                            objs.append(obj)
 								                    except PSEOF:
 								                        pass
 								                    self.parsed_objs[stmid] = objs
 								                genno = 0
 								                i = n*2+index
 								                try:
 								                    obj = objs[i]
 								                except IndexError:
 								                    raise PDFSyntaxError('Invalid object number: objid=%r' % (objid))
 								                if isinstance(obj, PDFStream):
 								                    obj.set_objid(objid, 0)
 								            else:
 								                self.parser.seek(index)
 								                (_,objid1) = self.parser.nexttoken() # objid
 								                (_,genno) = self.parser.nexttoken() # genno
 								                #assert objid1 == objid, (objid, objid1)
 								                (_,kwd) = self.parser.nexttoken()
 								        # #### hack around malformed pdf files
 								        #        assert objid1 == objid, (objid, objid1)
 								##                if objid1 != objid:
 								##                    x = []
 								##                    while kwd is not self.KEYWORD_OBJ:
 								##                        (_,kwd) = self.parser.nexttoken()
 								##                        x.append(kwd)
 								##                    if x:
 								##                        objid1 = x[-2]
 								##                        genno = x[-1]
 								##
 								                if kwd is not self.KEYWORD_OBJ:
 								                    raise PDFSyntaxError(
 								                        'Invalid object spec: offset=%r' % index)
 								                (_,obj) = self.parser.nextobject()
 								                if isinstance(obj, PDFStream):
 								                    obj.set_objid(objid, genno)
 								                if self.decipher:
 								                    obj = decipher_all(self.decipher, objid, genno, obj)
 								            self.objs[objid] = obj
 								        return obj
 								class PDFObjStmRef(object):
 								    maxindex = 0
 								    def __init__(self, objid, stmid, index):
 								        self.objid = objid
 								        self.stmid = stmid
 								        self.index = index
 								        if index > PDFObjStmRef.maxindex:
 								            PDFObjStmRef.maxindex = index
 								##  PDFParser
 								##
 								class PDFParser(PSStackParser):
 								    def __init__(self, doc, fp):
 								        PSStackParser.__init__(self, fp)
 								        self.doc = doc
 								        self.doc.set_parser(self)
 								        return
 								    def __repr__(self):
 								        return '<PDFParser>'
 								    KEYWORD_R = PSKeywordTable.intern('R')
 								    KEYWORD_ENDOBJ = PSKeywordTable.intern('endobj')
 								    KEYWORD_STREAM = PSKeywordTable.intern('stream')
 								    KEYWORD_XREF = PSKeywordTable.intern('xref')
 								    KEYWORD_STARTXREF = PSKeywordTable.intern('startxref')
 								    def do_keyword(self, pos, token):
 								        if token in (self.KEYWORD_XREF, self.KEYWORD_STARTXREF):
 								            self.add_results(*self.pop(1))
 								            return
 								        if token is self.KEYWORD_ENDOBJ:
 								            self.add_results(*self.pop(4))
 								            return
 								        if token is self.KEYWORD_R:
 								            # reference to indirect object
-												ineptpdf 7

											
										
										
											2010-02-23 01:15:41 -07:00
+								            try:
-												tools v6.0.8

											
										
										
											2013-10-02 12:59:40 -06:00
+								                ((_,objid), (_,genno)) = self.pop(2)
 								                (objid, genno) = (int(objid), int(genno))
 								                obj = PDFObjRef(self.doc, objid, genno)
 								                self.push((pos, obj))
 								            except PSSyntaxError:
-												ineptpdf 7

											
										
										
											2010-02-23 01:15:41 -07:00
+								                pass
-												tools v6.0.8

											
										
										
											2013-10-02 12:59:40 -06:00
+								            return
-												ineptpdf 7.2

											
										
										
											2010-02-27 14:37:34 -07:00
-												tools v6.0.8

											
										
										
											2013-10-02 12:59:40 -06:00
+								        if token is self.KEYWORD_STREAM:
 								            # stream object
 								            ((_,dic),) = self.pop(1)
 								            dic = dict_value(dic)
 								            try:
 								                objlen = int_value(dic['Length'])
 								            except KeyError:
 								                if STRICT:
 								                    raise PDFSyntaxError('/Length is undefined: %r' % dic)
 								                objlen = 0
 								            self.seek(pos)
 								            try:
 								                (_, line) = self.nextline()  # 'stream'
 								            except PSEOF:
 								                if STRICT:
 								                    raise PDFSyntaxError('Unexpected EOF')
 								                return
 								            pos += len(line)
 								            self.fp.seek(pos)
 								            data = self.fp.read(objlen)
 								            self.seek(pos+objlen)
 								            while 1:
 								                try:
 								                    (linepos, line) = self.nextline()
 								                except PSEOF:
 								                    if STRICT:
 								                        raise PDFSyntaxError('Unexpected EOF')
 								                    break
 								                if 'endstream' in line:
 								                    i = line.index('endstream')
 								                    objlen += i
 								                    data += line[:i]
 								                    break
 								                objlen += len(line)
 								                data += line
 								            self.seek(pos+objlen)
 								            obj = PDFStream(dic, data, self.doc.decipher)
 								            self.push((pos, obj))
 								            return
-												ineptpdf 7.2

											
										
										
											2010-02-27 14:37:34 -07:00
-												tools v6.0.8

											
										
										
											2013-10-02 12:59:40 -06:00
+								        # others
 								        self.push((pos, token))
 								        return
-												tools v5.0

Introduction of alfcrypto library for speed
Reorganisation of archive plugins,apps,other

											
										
										
											2012-03-06 11:24:28 -07:00
-												tools v6.0.8

											
										
										
											2013-10-02 12:59:40 -06:00
+								    def find_xref(self):
 								        # search the last xref table by scanning the file backwards.
 								        prev = None
 								        for line in self.revreadlines():
 								            line = line.strip()
 								            if line == 'startxref': break
 								            if line:
 								                prev = line
 								        else:
 								            raise PDFNoValidXRef('Unexpected EOF')
 								        return int(prev)
-												tools v6.0.2

											
										
										
											2013-04-05 10:44:48 -06:00
-												tools v6.0.8

											
										
										
											2013-10-02 12:59:40 -06:00
+								    # read xref table
 								    def read_xref_from(self, start, xrefs):
 								        self.seek(start)
 								        self.reset()
 								        try:
 								            (pos, token) = self.nexttoken()
 								        except PSEOF:
 								            raise PDFNoValidXRef('Unexpected EOF')
 								        if isinstance(token, int):
 								            # XRefStream: PDF-1.5
 								            if GEN_XREF_STM == 1:
 								                global gen_xref_stm
 								                gen_xref_stm = True
 								            self.seek(pos)
 								            self.reset()
 								            xref = PDFXRefStream()
 								            xref.load(self)
 								        else:
 								            if token is not self.KEYWORD_XREF:
 								                raise PDFNoValidXRef('xref not found: pos=%d, token=%r' %
 								                                     (pos, token))
 								            self.nextline()
 								            xref = PDFXRef()
 								            xref.load(self)
 								        xrefs.append(xref)
 								        trailer = xref.trailer
 								        if 'XRefStm' in trailer:
 								            pos = int_value(trailer['XRefStm'])
 								            self.read_xref_from(pos, xrefs)
 								        if 'Prev' in trailer:
 								            # find previous xref
 								            pos = int_value(trailer['Prev'])
 								            self.read_xref_from(pos, xrefs)
 								        return
-												tools v6.0.2

											
										
										
											2013-04-05 10:44:48 -06:00
-												tools v6.0.8

											
										
										
											2013-10-02 12:59:40 -06:00
+								    # read xref tables and trailers
 								    def read_xref(self):
 								        xrefs = []
 								        trailerpos = None
 								        try:
 								            pos = self.find_xref()
 								            self.read_xref_from(pos, xrefs)
 								        except PDFNoValidXRef:
 								            # fallback
 								            self.seek(0)
 								            pat = re.compile(r'^(\d+)\s+(\d+)\s+obj\b')
 								            offsets = {}
 								            xref = PDFXRef()
 								            while 1:
 								                try:
 								                    (pos, line) = self.nextline()
 								                except PSEOF:
 								                    break
 								                if line.startswith('trailer'):
 								                    trailerpos = pos # remember last trailer
 								                m = pat.match(line)
 								                if not m: continue
 								                (objid, genno) = m.groups()
 								                offsets[int(objid)] = (0, pos)
 								            if not offsets: raise
 								            xref.offsets = offsets
 								            if trailerpos:
 								                self.seek(trailerpos)
 								                xref.load_trailer(self)
 								                xrefs.append(xref)
 								        return xrefs
-												tools v6.0.2

											
										
										
											2013-04-05 10:44:48 -06:00
-												tools v6.0.8

											
										
										
											2013-10-02 12:59:40 -06:00
+								##  PDFObjStrmParser
 								##
 								class PDFObjStrmParser(PDFParser):
-												tools v6.0.2

											
										
										
											2013-04-05 10:44:48 -06:00
-												tools v6.0.8

											
										
										
											2013-10-02 12:59:40 -06:00
+								    def __init__(self, data, doc):
 								        PSStackParser.__init__(self, StringIO(data))
 								        self.doc = doc
 								        return
-												tools v6.0.2

											
										
										
											2013-04-05 10:44:48 -06:00
-												tools v6.0.8

											
										
										
											2013-10-02 12:59:40 -06:00
+								    def flush(self):
 								        self.add_results(*self.popall())
 								        return
-												tools v6.0.2

											
										
										
											2013-04-05 10:44:48 -06:00
-												tools v6.0.8

											
										
										
											2013-10-02 12:59:40 -06:00
+								    KEYWORD_R = KWD('R')
 								    def do_keyword(self, pos, token):
 								        if token is self.KEYWORD_R:
 								            # reference to indirect object
 								            try:
 								                ((_,objid), (_,genno)) = self.pop(2)
 								                (objid, genno) = (int(objid), int(genno))
 								                obj = PDFObjRef(self.doc, objid, genno)
 								                self.push((pos, obj))
 								            except PSSyntaxError:
 								                pass
 								            return
 								        # others
 								        self.push((pos, token))
 								        return
-												tools v5.0

Introduction of alfcrypto library for speed
Reorganisation of archive plugins,apps,other

											
										
										
											2012-03-06 11:24:28 -07:00
-												tools v6.0.8

											
										
										
											2013-10-02 12:59:40 -06:00
+								###
 								### My own code, for which there is none else to blame
-												first Adobe Adept PDF tool

											
										
										
											2009-02-26 03:42:22 -07:00
-												tools v6.0.8

											
										
										
											2013-10-02 12:59:40 -06:00
+								class PDFSerializer(object):
 								    def __init__(self, inf, userkey):
 								        global GEN_XREF_STM, gen_xref_stm
 								        gen_xref_stm = GEN_XREF_STM > 1
 								        self.version = inf.read(8)
 								        inf.seek(0)
 								        self.doc = doc = PDFDocument()
 								        parser = PDFParser(doc, inf)
 								        doc.initialize(userkey)
 								        self.objids = objids = set()
 								        for xref in reversed(doc.xrefs):
 								            trailer = xref.trailer
 								            for objid in xref.objids():
 								                objids.add(objid)
 								        trailer = dict(trailer)
 								        trailer.pop('Prev', None)
 								        trailer.pop('XRefStm', None)
 								        if 'Encrypt' in trailer:
 								            objids.remove(trailer.pop('Encrypt').objid)
 								        self.trailer = trailer
-												first Adobe Adept PDF tool

											
										
										
											2009-02-26 03:42:22 -07:00
-												tools v6.0.8

											
										
										
											2013-10-02 12:59:40 -06:00
+								    def dump(self, outf):
 								        self.outf = outf
 								        self.write(self.version)
 								        self.write('\n%\xe2\xe3\xcf\xd3\n')
 								        doc = self.doc
 								        objids = self.objids
 								        xrefs = {}
 								        maxobj = max(objids)
 								        trailer = dict(self.trailer)
 								        trailer['Size'] = maxobj + 1
 								        for objid in objids:
 								            obj = doc.getobj(objid)
 								            if isinstance(obj, PDFObjStmRef):
 								                xrefs[objid] = obj
 								                continue
 								            if obj is not None:
 								                try:
 								                    genno = obj.genno
 								                except AttributeError:
 								                    genno = 0
 								                xrefs[objid] = (self.tell(), genno)
 								                self.serialize_indirect(objid, obj)
 								        startxref = self.tell()
-												tools v5.0

Introduction of alfcrypto library for speed
Reorganisation of archive plugins,apps,other

											
										
										
											2012-03-06 11:24:28 -07:00
-												tools v6.0.8

											
										
										
											2013-10-02 12:59:40 -06:00
+								        if not gen_xref_stm:
 								            self.write('xref\n')
 								            self.write('0 %d\n' % (maxobj + 1,))
 								            for objid in xrange(0, maxobj + 1):
 								                if objid in xrefs:
 								                    # force the genno to be 0
 								                    self.write("%010d 00000 n \n" % xrefs[objid][0])
 								                else:
 								                    self.write("%010d %05d f \n" % (0, 65535))
-												tools v5.0

Introduction of alfcrypto library for speed
Reorganisation of archive plugins,apps,other

											
										
										
											2012-03-06 11:24:28 -07:00
-												tools v6.0.8

											
										
										
											2013-10-02 12:59:40 -06:00
+								            self.write('trailer\n')
 								            self.serialize_object(trailer)
 								            self.write('\nstartxref\n%d\n%%%%EOF' % startxref)
-												first Adobe Adept PDF tool

											
										
										
											2009-02-26 03:42:22 -07:00
-												tools v6.0.8

											
										
										
											2013-10-02 12:59:40 -06:00
+								        else: # Generate crossref stream.
-												first Adobe Adept PDF tool

											
										
										
											2009-02-26 03:42:22 -07:00
-												tools v6.0.8

											
										
										
											2013-10-02 12:59:40 -06:00
+								            # Calculate size of entries
 								            maxoffset = max(startxref, maxobj)
 								            maxindex = PDFObjStmRef.maxindex
 								            fl2 = 2
 								            power = 65536
 								            while maxoffset >= power:
 								                fl2 += 1
 								                power *= 256
 								            fl3 = 1
 								            power = 256
 								            while maxindex >= power:
 								                fl3 += 1
 								                power *= 256
-												first Adobe Adept PDF tool

											
										
										
											2009-02-26 03:42:22 -07:00
-												tools v6.0.8

											
										
										
											2013-10-02 12:59:40 -06:00
+								            index = []
 								            first = None
 								            prev = None
 								            data = []
 								            # Put the xrefstream's reference in itself
 								            startxref = self.tell()
 								            maxobj += 1
 								            xrefs[maxobj] = (startxref, 0)
 								            for objid in sorted(xrefs):
 								                if first is None:
 								                    first = objid
 								                elif objid != prev + 1:
 								                    index.extend((first, prev - first + 1))
 								                    first = objid
 								                prev = objid
 								                objref = xrefs[objid]
 								                if isinstance(objref, PDFObjStmRef):
 								                    f1 = 2
 								                    f2 = objref.stmid
 								                    f3 = objref.index
 								                else:
 								                    f1 = 1
 								                    f2 = objref[0]
 								                    # we force all generation numbers to be 0
 								                    # f3 = objref[1]
 								                    f3 = 0
-												tools v3.2

First appearance of combined windows python app

											
										
										
											2011-01-17 00:24:53 -07:00
-												tools v6.0.8

											
										
										
											2013-10-02 12:59:40 -06:00
+								                data.append(struct.pack('>B', f1))
 								                data.append(struct.pack('>L', f2)[-fl2:])
 								                data.append(struct.pack('>L', f3)[-fl3:])
 								            index.extend((first, prev - first + 1))
 								            data = zlib.compress(''.join(data))
 								            dic = {'Type': LITERAL_XREF, 'Size': prev + 1, 'Index': index,
 								                   'W': [1, fl2, fl3], 'Length': len(data),
 								                   'Filter': LITERALS_FLATE_DECODE[0],
 								                   'Root': trailer['Root'],}
 								            if 'Info' in trailer:
 								                dic['Info'] = trailer['Info']
 								            xrefstm = PDFStream(dic, data)
 								            self.serialize_indirect(maxobj, xrefstm)
 								            self.write('startxref\n%d\n%%%%EOF' % startxref)
 								    def write(self, data):
 								        self.outf.write(data)
 								        self.last = data[-1:]
-												tools v3.2

First appearance of combined windows python app

											
										
										
											2011-01-17 00:24:53 -07:00
-												tools v6.0.8

											
										
										
											2013-10-02 12:59:40 -06:00
+								    def tell(self):
 								        return self.outf.tell()
 								    def escape_string(self, string):
 								        string = string.replace('\\', '\\\\')
 								        string = string.replace('\n', r'\n')
 								        string = string.replace('(', r'\(')
 								        string = string.replace(')', r'\)')
 								         # get rid of ciando id
 								        regularexp = re.compile(r'http://www.ciando.com/index.cfm/intRefererID/\d{5}')
 								        if regularexp.match(string): return ('http://www.ciando.com')
 								        return string
 								    def serialize_object(self, obj):
 								        if isinstance(obj, dict):
 								            # Correct malformed Mac OS resource forks for Stanza
 								            if 'ResFork' in obj and 'Type' in obj and 'Subtype' not in obj \
 								                   and isinstance(obj['Type'], int):
 								                obj['Subtype'] = obj['Type']
 								                del obj['Type']
 								            # end - hope this doesn't have bad effects
 								            self.write('<<')
 								            for key, val in obj.items():
 								                self.write('/%s' % key)
 								                self.serialize_object(val)
 								            self.write('>>')
 								        elif isinstance(obj, list):
 								            self.write('[')
 								            for val in obj:
 								                self.serialize_object(val)
 								            self.write(']')
 								        elif isinstance(obj, str):
 								            self.write('(%s)' % self.escape_string(obj))
 								        elif isinstance(obj, bool):
 								            if self.last.isalnum():
 								                self.write(' ')
 								            self.write(str(obj).lower())
-												Update PDF to use Decimal instead of float to handle very precise numbers. Update for changes to ActiveState Python. Fix a few copyright dates. Update version to 6.5.4. Minor changes to obok script for stand-alone use.

											
										
										
											2017-06-26 23:50:24 -06:00
+								        elif isinstance(obj, (int, long)):
 								            if self.last.isalnum():
 								                self.write(' ')
 								            self.write(str(obj))
 								        elif isinstance(obj, Decimal):
-												tools v6.0.8

											
										
										
											2013-10-02 12:59:40 -06:00
+								            if self.last.isalnum():
 								                self.write(' ')
 								            self.write(str(obj))
 								        elif isinstance(obj, PDFObjRef):
 								            if self.last.isalnum():
 								                self.write(' ')
 								            self.write('%d %d R' % (obj.objid, 0))
 								        elif isinstance(obj, PDFStream):
 								            ### If we don't generate cross ref streams the object streams
 								            ### are no longer useful, as we have extracted all objects from
 								            ### them. Therefore leave them out from the output.
 								            if obj.dic.get('Type') == LITERAL_OBJSTM and not gen_xref_stm:
 								                self.write('(deleted)')
 								            else:
 								                data = obj.get_decdata()
 								                self.serialize_object(obj.dic)
 								                self.write('stream\n')
 								                self.write(data)
 								                self.write('\nendstream')
 								        else:
 								            data = str(obj)
 								            if data[0].isalnum() and self.last.isalnum():
 								                self.write(' ')
 								            self.write(data)
 								    def serialize_indirect(self, objid, obj):
 								        self.write('%d 0 obj' % (objid,))
 								        self.serialize_object(obj)
 								        if self.last.isalnum():
 								            self.write('\n')
 								        self.write('endobj\n')
 								def decryptBook(userkey, inpath, outpath):
 								    if RSA is None:
 								        raise ADEPTError(u"PyCrypto or OpenSSL must be installed.")
 								    with open(inpath, 'rb') as inf:
-												Change to ineptpdf.py, so that we throw an exception for DRM-free PDFs, rather than processing them.

											
										
										
											2016-08-09 23:40:48 -06:00
+								        #try:
 								        serializer = PDFSerializer(inf, userkey)
 								        #except:
 								        #    print u"Error serializing pdf {0}. Probably wrong key.".format(os.path.basename(inpath))
 								        #    return 2
-												tools v6.0.8

											
										
										
											2013-10-02 12:59:40 -06:00
+								        # hope this will fix the 'bad file descriptor' problem
 								        with open(outpath, 'wb') as outf:
 								            # help construct to make sure the method runs to the end
 								            try:
 								                serializer.dump(outf)
 								            except Exception, e:
 								                print u"error writing pdf: {0}".format(e.args[0])
 								                return 2
 								    return 0
-												tools v3.2

First appearance of combined windows python app

											
										
										
											2011-01-17 00:24:53 -07:00
-												tools v6.0.2

											
										
										
											2013-04-05 10:44:48 -06:00
-												tools v6.0.1

											
										
										
											2013-03-26 10:38:18 -06:00
+								def cli_main():
-												tools v6.0.8

											
										
										
											2013-10-02 12:59:40 -06:00
+								    sys.stdout=SafeUnbuffered(sys.stdout)
 								    sys.stderr=SafeUnbuffered(sys.stderr)
-												tools v6.0.1

											
										
										
											2013-03-26 10:38:18 -06:00
+								    argv=unicode_argv()
-												tools v3.2

First appearance of combined windows python app

											
										
										
											2011-01-17 00:24:53 -07:00
+								    progname = os.path.basename(argv[0])
-												tools v6.0.8

											
										
										
											2013-10-02 12:59:40 -06:00
+								    if len(argv) != 4:
 								        print u"usage: {0} <keyfile.der> <inbook.pdf> <outbook.pdf>".format(progname)
 								        return 1
 								    keypath, inpath, outpath = argv[1:]
 								    userkey = open(keypath,'rb').read()
 								    result = decryptBook(userkey, inpath, outpath)
 								    if result == 0:
 								        print u"Successfully decrypted {0:s} as {1:s}".format(os.path.basename(inpath),os.path.basename(outpath))
 								    return result
-												tools v6.0.2

											
										
										
											2013-04-05 10:44:48 -06:00
-												tools v6.0.8

											
										
										
											2013-10-02 12:59:40 -06:00
 								def gui_main():
-												tools v6.0.2

											
										
										
											2013-04-05 10:44:48 -06:00
+								    try:
-												tools v6.0.8

											
										
										
											2013-10-02 12:59:40 -06:00
+								        import Tkinter
 								        import Tkconstants
-												Update PDF to use Decimal instead of float to handle very precise numbers. Update for changes to ActiveState Python. Fix a few copyright dates. Update version to 6.5.4. Minor changes to obok script for stand-alone use.

											
										
										
											2017-06-26 23:50:24 -06:00
+								        import tkFileDialog
-												tools v6.0.8

											
										
										
											2013-10-02 12:59:40 -06:00
+								        import tkMessageBox
 								        import traceback
 								    except:
 								        return cli_main()
 								    class DecryptionDialog(Tkinter.Frame):
 								        def __init__(self, root):
 								            Tkinter.Frame.__init__(self, root, border=5)
 								            self.status = Tkinter.Label(self, text=u"Select files for decryption")
 								            self.status.pack(fill=Tkconstants.X, expand=1)
 								            body = Tkinter.Frame(self)
 								            body.pack(fill=Tkconstants.X, expand=1)
 								            sticky = Tkconstants.E + Tkconstants.W
 								            body.grid_columnconfigure(1, weight=2)
 								            Tkinter.Label(body, text=u"Key file").grid(row=0)
 								            self.keypath = Tkinter.Entry(body, width=30)
 								            self.keypath.grid(row=0, column=1, sticky=sticky)
 								            if os.path.exists(u"adeptkey.der"):
 								                self.keypath.insert(0, u"adeptkey.der")
 								            button = Tkinter.Button(body, text=u"...", command=self.get_keypath)
 								            button.grid(row=0, column=2)
 								            Tkinter.Label(body, text=u"Input file").grid(row=1)
 								            self.inpath = Tkinter.Entry(body, width=30)
 								            self.inpath.grid(row=1, column=1, sticky=sticky)
 								            button = Tkinter.Button(body, text=u"...", command=self.get_inpath)
 								            button.grid(row=1, column=2)
 								            Tkinter.Label(body, text=u"Output file").grid(row=2)
 								            self.outpath = Tkinter.Entry(body, width=30)
 								            self.outpath.grid(row=2, column=1, sticky=sticky)
 								            button = Tkinter.Button(body, text=u"...", command=self.get_outpath)
 								            button.grid(row=2, column=2)
 								            buttons = Tkinter.Frame(self)
 								            buttons.pack()
 								            botton = Tkinter.Button(
 								                buttons, text=u"Decrypt", width=10, command=self.decrypt)
 								            botton.pack(side=Tkconstants.LEFT)
 								            Tkinter.Frame(buttons, width=10).pack(side=Tkconstants.LEFT)
 								            button = Tkinter.Button(
 								                buttons, text=u"Quit", width=10, command=self.quit)
 								            button.pack(side=Tkconstants.RIGHT)
 								        def get_keypath(self):
 								            keypath = tkFileDialog.askopenfilename(
 								                parent=None, title=u"Select Adobe Adept \'.der\' key file",
 								                defaultextension=u".der",
 								                filetypes=[('Adobe Adept DER-encoded files', '.der'),
 								                           ('All Files', '.*')])
 								            if keypath:
 								                keypath = os.path.normpath(keypath)
 								                self.keypath.delete(0, Tkconstants.END)
 								                self.keypath.insert(0, keypath)
 								            return
 								        def get_inpath(self):
 								            inpath = tkFileDialog.askopenfilename(
 								                parent=None, title=u"Select ADEPT-encrypted PDF file to decrypt",
 								                defaultextension=u".pdf", filetypes=[('PDF files', '.pdf')])
 								            if inpath:
 								                inpath = os.path.normpath(inpath)
 								                self.inpath.delete(0, Tkconstants.END)
 								                self.inpath.insert(0, inpath)
 								            return
 								        def get_outpath(self):
 								            outpath = tkFileDialog.asksaveasfilename(
 								                parent=None, title=u"Select unencrypted PDF file to produce",
 								                defaultextension=u".pdf", filetypes=[('PDF files', '.pdf')])
 								            if outpath:
 								                outpath = os.path.normpath(outpath)
 								                self.outpath.delete(0, Tkconstants.END)
 								                self.outpath.insert(0, outpath)
 								            return
 								        def decrypt(self):
 								            keypath = self.keypath.get()
 								            inpath = self.inpath.get()
 								            outpath = self.outpath.get()
 								            if not keypath or not os.path.exists(keypath):
 								                self.status['text'] = u"Specified key file does not exist"
 								                return
 								            if not inpath or not os.path.exists(inpath):
 								                self.status['text'] = u"Specified input file does not exist"
 								                return
 								            if not outpath:
 								                self.status['text'] = u"Output file not specified"
 								                return
 								            if inpath == outpath:
 								                self.status['text'] = u"Must have different input and output files"
 								                return
 								            userkey = open(keypath,'rb').read()
 								            self.status['text'] = u"Decrypting..."
 								            try:
 								                decrypt_status = decryptBook(userkey, inpath, outpath)
 								            except Exception, e:
 								                self.status['text'] = u"Error; {0}".format(e.args[0])
 								                return
 								            if decrypt_status == 0:
 								                self.status['text'] = u"File successfully decrypted"
 								            else:
 								                self.status['text'] = u"The was an error decrypting the file."
 								    root = Tkinter.Tk()
 								    if RSA is None:
 								        root.withdraw()
 								        tkMessageBox.showerror(
 								            "INEPT PDF",
 								            "This script requires OpenSSL or PyCrypto, which must be installed "
 								            "separately.  Read the top-of-script comment for details.")
 								        return 1
 								    root.title(u"Adobe Adept PDF Decrypter v.{0}".format(__version__))
 								    root.resizable(True, False)
 								    root.minsize(370, 0)
 								    DecryptionDialog(root).pack(fill=Tkconstants.X, expand=1)
 								    root.mainloop()
 								    return 0
-												first Adobe Adept PDF tool

											
										
										
											2009-02-26 03:42:22 -07:00
 								if __name__ == '__main__':
-												tools v6.0.8

											
										
										
											2013-10-02 12:59:40 -06:00
+								    if len(sys.argv) > 1:
 								        sys.exit(cli_main())
 								    sys.exit(gui_main())