diff --git a/Calibre_Plugins/Ignobleepub ReadMe.txt b/Calibre_Plugins/Ignobleepub ReadMe.txt index 262ef40..3eb916b 100644 --- a/Calibre_Plugins/Ignobleepub ReadMe.txt +++ b/Calibre_Plugins/Ignobleepub ReadMe.txt @@ -1,4 +1,4 @@ -Ignoble Epub DeDRM - ignobleepub_v02.2_plugin.zip +Ignoble Epub DeDRM - ignobleepub_v02.4_plugin.zip All credit given to I♥Cabbages for the original standalone scripts. I had the much easier job of converting them to a calibre plugin. @@ -8,7 +8,7 @@ This plugin is meant to decrypt Barnes & Noble Epubs that are protected with Ado Installation: -Go to calibre's Preferences page. Do **NOT** select "Get plugins to enhance calibre" as this is reserved for "official" calibre plugins, instead select "Change calibre behavior". Under "Advanced" click on the Plugins button. Use the "Load plugin from file" button to select the plugin's zip file (ignobleepub_v02.2_plugin.zip) and click the 'Add' button. Click 'Yes' in the the "Are you sure?" dialog. Click OK in the "Success" dialog. +Go to calibre's Preferences page. Do **NOT** select "Get plugins to enhance calibre" as this is reserved for "official" calibre plugins, instead select "Change calibre behavior". Under "Advanced" click on the Plugins button. Use the "Load plugin from file" button to select the plugin's zip file (ignobleepub_v02.4_plugin.zip) and click the 'Add' button. Click 'Yes' in the the "Are you sure?" dialog. Click OK in the "Success" dialog. Configuration: @@ -64,4 +64,4 @@ Now copy the output from the terminal window. On Windows, you must use the window menu (little icon at left of window bar) to select all the text and then to copy it. On Macintosh and Linux, just use the normal text select and copy commands. -Paste the information into a comment at my blog, describing your problem. \ No newline at end of file +Paste the information into a comment at my blog, describing your problem. diff --git a/Calibre_Plugins/Ineptpdf ReadMe.txt b/Calibre_Plugins/Ineptpdf ReadMe.txt index 9fcb58d..ab5a510 100644 --- a/Calibre_Plugins/Ineptpdf ReadMe.txt +++ b/Calibre_Plugins/Ineptpdf ReadMe.txt @@ -1,4 +1,4 @@ -Inept PDF Plugin - ineptpdf_v01.6_plugin.zip +Inept PDF Plugin - ineptpdf_v01.8_plugin.zip All credit given to I♥Cabbages for the original standalone scripts. I had the much easier job of converting them to a Calibre plugin. @@ -8,7 +8,7 @@ This plugin is meant to decrypt Adobe Digital Edition PDFs that are protected wi Installation: -Go to calibre's Preferences page. Do **NOT** select "Get plugins to enhance calibre" as this is reserved for "official" calibre plugins, instead select "Change calibre behavior". Under "Advanced" click on the Plugins button. Use the "Load plugin from file" button to select the plugin's zip file (ineptpdf_v01.6_plugin.zip) and click the 'Add' button. Click 'Yes' in the the "Are you sure?" dialog. Click OK in the "Success" dialog. +Go to calibre's Preferences page. Do **NOT** select "Get plugins to enhance calibre" as this is reserved for "official" calibre plugins, instead select "Change calibre behavior". Under "Advanced" click on the Plugins button. Use the "Load plugin from file" button to select the plugin's zip file (ineptpdf_v01.8_plugin.zip) and click the 'Add' button. Click 'Yes' in the the "Are you sure?" dialog. Click OK in the "Success" dialog. Configuration: @@ -45,4 +45,4 @@ Now copy the output from the terminal window. On Windows, you must use the window menu (little icon at left of window bar) to select all the text and then to copy it. On Macintosh and Linux, just use the normal text select and copy commands. -Paste the information into a comment at my blog, describing your problem. \ No newline at end of file +Paste the information into a comment at my blog, describing your problem. diff --git a/Calibre_Plugins/K4MobiDeDRM ReadMe.txt b/Calibre_Plugins/K4MobiDeDRM ReadMe.txt index 022f8ab..d080d49 100644 --- a/Calibre_Plugins/K4MobiDeDRM ReadMe.txt +++ b/Calibre_Plugins/K4MobiDeDRM ReadMe.txt @@ -1,4 +1,4 @@ -K4MobiDeDRM_v04.6_plugin.zip +K4MobiDeDRM_v04.7_plugin.zip Credit given to The Dark Reverser for the original standalone script. Credit also to the many people who have updated and expanded that script since then. @@ -11,7 +11,7 @@ This plugin is meant to remove the DRM from .prc, .mobi, .azw, .azw1, .azw3, .az Installation: -Go to calibre's Preferences page. Do **NOT** select "Get plugins to enhance calibre" as this is reserved for "official" calibre plugins, instead select "Change calibre behavior". Under "Advanced" click on the Plugins button. Use the "Load plugin from file" button to select the plugin's zip file (K4MobiDeDRM_v04.6_plugin.zip) and click the 'Add' button. Click 'Yes' in the the "Are you sure?" dialog. Click OK in the "Success" dialog. +Go to calibre's Preferences page. Do **NOT** select "Get plugins to enhance calibre" as this is reserved for "official" calibre plugins, instead select "Change calibre behavior". Under "Advanced" click on the Plugins button. Use the "Load plugin from file" button to select the plugin's zip file (K4MobiDeDRM_v04.7_plugin.zip) and click the 'Add' button. Click 'Yes' in the the "Are you sure?" dialog. Click OK in the "Success" dialog. Make sure that you delete any old versions of the plugin. They might interfere with the operation of the new one. @@ -24,7 +24,7 @@ If you have an eInk Kindle enter the 16 character serial number (these all begin If you have Mobipocket books, enter your 8 or 10 digit PID in the Mobipocket PIDs field. If you have more than one PID, separate them with commas. -These configuration steps are not needed if you only want to decode "Kindle for PC" or "Kindle for Mac" books. +These configuration steps are not needed if you only want to decode "Kindle for PC" or "Kindle for Mac" books. Linux Systems Only: diff --git a/Calibre_Plugins/K4MobiDeDRM_plugin/__init__.py b/Calibre_Plugins/K4MobiDeDRM_plugin/__init__.py index ba11adf..75f6d21 100644 --- a/Calibre_Plugins/K4MobiDeDRM_plugin/__init__.py +++ b/Calibre_Plugins/K4MobiDeDRM_plugin/__init__.py @@ -13,6 +13,7 @@ from calibre.constants import iswindows, isosx import sys import os import re +import time from zipfile import ZipFile class K4DeDRM(FileTypePlugin): @@ -20,7 +21,7 @@ class K4DeDRM(FileTypePlugin): description = 'Removes DRM from eInk Kindle, Kindle 4 Mac and Kindle 4 PC ebooks, and from Mobipocket ebooks. Provided by the work of many including DiapDealer, SomeUpdates, IHeartCabbages, CMBDTC, Skindle, DarkReverser, mdlnx, ApprenticeAlf, etc.' supported_platforms = ['osx', 'windows', 'linux'] # Platforms this plugin will run on author = 'DiapDealer, SomeUpdates, mdlnx, Apprentice Alf' # The author of this plugin - version = (0, 4, 6) # The version number of this plugin + version = (0, 4, 7) # The version number of this plugin file_types = set(['prc','mobi','azw','azw1','azw3','azw4','tpz']) # The file types that this plugin will be applied to on_import = True # Run this plugin during the import priority = 520 # run this plugin before earlier versions @@ -51,7 +52,7 @@ class K4DeDRM(FileTypePlugin): f.write(data) def run(self, path_to_ebook): - # add the alfcrypto directory to sys.path so alfcrypto.py + # add the alfcrypto directory to sys.path so alfcrypto.py # will be able to locate the custom lib(s) for CDLL import. sys.path.insert(0, self.alfdir) # Had to move these imports here so the custom libs can be @@ -73,8 +74,11 @@ class K4DeDRM(FileTypePlugin): pids = [] serials = [] kInfoFiles = [] + starttime = time.time() + print "K4MobiDeDRM plugin v{0:s}: Starting".format(plug_ver) + self.config() - + # Get supplied list of PIDs to try from plugin customization. pidstringlistt = self.pids_string.split(',') for pid in pidstringlistt: @@ -84,22 +88,22 @@ class K4DeDRM(FileTypePlugin): else: if len(pid) > 0: print "'%s' is not a valid Mobipocket PID." % pid - + # For linux, get PIDs by calling the right routines under WINE if sys.platform.startswith('linux'): k4 = False pids.extend(self.WINEgetPIDs(path_to_ebook)) - + # Get supplied list of Kindle serial numbers to try from plugin customization. serialstringlistt = self.serials_string.split(',') for serial in serialstringlistt: - serial = str(serial).strip() + serial = str(serial).replace(" ","") if len(serial) == 16 and serial[0] == 'B': serials.append(serial) else: if len(serial) > 0: print "'%s' is not a valid Kindle serial number." % serial - + # Load any kindle info files (*.info) included Calibre's config directory. try: print 'K4MobiDeDRM v%s: Calibre configuration directory = %s' % (plug_ver, config_dir) @@ -129,10 +133,11 @@ class K4DeDRM(FileTypePlugin): title = mb.getBookTitle() md1, md2 = mb.getPIDMetaInfo() - pidlst = kgenpids.getPidList(md1, md2, k4, pids, serials, kInfoFiles) + pids.extend(kgenpids.getPidList(md1, md2, k4, serials, kInfoFiles)) + print "K4MobiDeDRM plugin v{2:s}: Found {1:d} keys to try after {0:.1f} seconds".format(time.time()-starttime, len(pids),plug_ver) try: - mb.processBook(pidlst) + mb.processBook(pids) except mobidedrm.DrmException, e: #if you reached here then no luck raise and exception @@ -142,7 +147,7 @@ class K4DeDRM(FileTypePlugin): d.show() d.raise_() d.exec_() - raise Exception("K4MobiDeDRM plugin v%s Error: %s" % (plug_ver, str(e))) + raise Exception("K4MobiDeDRM plugin v{1:s} Error: {2:s} after {0:.1f} seconds".format(time.time()-starttime,plug_ver,str(e))) except topazextract.TpzDRMError, e: #if you reached here then no luck raise and exception if is_ok_to_use_qt(): @@ -151,23 +156,25 @@ class K4DeDRM(FileTypePlugin): d.show() d.raise_() d.exec_() - raise Exception("K4MobiDeDRM plugin v%s Error: %s" % (plug_ver, str(e))) + raise Exception("K4MobiDeDRM plugin v{1:s} Error: {2:s} after {0:.1f} seconds".format(time.time()-starttime,plug_ver,str(e))) - print "Success!" + print "K4MobiDeDRM plugin v{1:s}: Successfully decrypted book after {0:.1f} seconds".format(time.time()-starttime,plug_ver) if mobi: if mb.getPrintReplica(): of = self.temporary_file(bookname+'.azw4') - print 'K4MobiDeDRM v%s: Print Replica format detected.' % plug_ver + print 'K4MobiDeDRM plugin v%s: Print Replica format detected.' % plug_ver elif mb.getMobiVersion() >= 8: - print 'K4MobiDeDRM v%s: Stand-alone KF8 format detected.' % plug_ver + print 'K4MobiDeDRM plugin v%s: Stand-alone KF8 format detected.' % plug_ver of = self.temporary_file(bookname+'.azw3') else: of = self.temporary_file(bookname+'.mobi') mb.getMobiFile(of.name) + print "K4MobiDeDRM plugin v{1:s}: Saved decrypted book after {0:.1f} seconds".format(time.time()-starttime,plug_ver) else: of = self.temporary_file(bookname+'.htmlz') mb.getHTMLZip(of.name) mb.cleanup() + print "K4MobiDeDRM plugin v{1:s}: Saved decrypted Topaz HTMLZ after {0:.1f} seconds".format(time.time()-starttime,plug_ver) return of.name def WINEgetPIDs(self, infile): @@ -191,7 +198,7 @@ class K4DeDRM(FileTypePlugin): + ' "' + outfile + '"' env = os.environ - + print "My wine_prefix from tweaks is ", self.wine_prefix if ("WINEPREFIX" in env): @@ -212,7 +219,7 @@ class K4DeDRM(FileTypePlugin): print "WINE subprocess error ", str(e) return [] print "WINE subprocess returned ", result - + WINEpids = [] if os.path.exists(outfile): try: @@ -242,14 +249,14 @@ class K4DeDRM(FileTypePlugin): # from the command line from calibre_plugins.k4mobidedrm.config import ConfigWidget return config.ConfigWidget() - + def config(self): from calibre_plugins.k4mobidedrm.config import prefs - + self.pids_string = prefs['pids'] self.serials_string = prefs['serials'] self.wine_prefix = prefs['WINEPREFIX'] - + def save_settings(self, config_widget): ''' Save the settings specified by the user with config_widget. @@ -263,4 +270,4 @@ class K4DeDRM(FileTypePlugin): for candidate in zf.namelist(): if candidate in names: ans[candidate] = zf.read(candidate) - return ans \ No newline at end of file + return ans diff --git a/Calibre_Plugins/K4MobiDeDRM_plugin/aescbc.py b/Calibre_Plugins/K4MobiDeDRM_plugin/aescbc.py index e69de29..5667511 100644 --- a/Calibre_Plugins/K4MobiDeDRM_plugin/aescbc.py +++ b/Calibre_Plugins/K4MobiDeDRM_plugin/aescbc.py @@ -0,0 +1,568 @@ +#! /usr/bin/env python + +""" + Routines for doing AES CBC in one file + + Modified by some_updates to extract + and combine only those parts needed for AES CBC + into one simple to add python file + + Original Version + Copyright (c) 2002 by Paul A. Lambert + Under: + CryptoPy Artisitic License Version 1.0 + See the wonderful pure python package cryptopy-1.2.5 + and read its LICENSE.txt for complete license details. +""" + +class CryptoError(Exception): + """ Base class for crypto exceptions """ + def __init__(self,errorMessage='Error!'): + self.message = errorMessage + def __str__(self): + return self.message + +class InitCryptoError(CryptoError): + """ Crypto errors during algorithm initialization """ +class BadKeySizeError(InitCryptoError): + """ Bad key size error """ +class EncryptError(CryptoError): + """ Error in encryption processing """ +class DecryptError(CryptoError): + """ Error in decryption processing """ +class DecryptNotBlockAlignedError(DecryptError): + """ Error in decryption processing """ + +def xorS(a,b): + """ XOR two strings """ + assert len(a)==len(b) + x = [] + for i in range(len(a)): + x.append( chr(ord(a[i])^ord(b[i]))) + return ''.join(x) + +def xor(a,b): + """ XOR two strings """ + x = [] + for i in range(min(len(a),len(b))): + x.append( chr(ord(a[i])^ord(b[i]))) + return ''.join(x) + +""" + Base 'BlockCipher' and Pad classes for cipher instances. + BlockCipher supports automatic padding and type conversion. The BlockCipher + class was written to make the actual algorithm code more readable and + not for performance. +""" + +class BlockCipher: + """ Block ciphers """ + def __init__(self): + self.reset() + + def reset(self): + self.resetEncrypt() + self.resetDecrypt() + def resetEncrypt(self): + self.encryptBlockCount = 0 + self.bytesToEncrypt = '' + def resetDecrypt(self): + self.decryptBlockCount = 0 + self.bytesToDecrypt = '' + + def encrypt(self, plainText, more = None): + """ Encrypt a string and return a binary string """ + self.bytesToEncrypt += plainText # append plainText to any bytes from prior encrypt + numBlocks, numExtraBytes = divmod(len(self.bytesToEncrypt), self.blockSize) + cipherText = '' + for i in range(numBlocks): + bStart = i*self.blockSize + ctBlock = self.encryptBlock(self.bytesToEncrypt[bStart:bStart+self.blockSize]) + self.encryptBlockCount += 1 + cipherText += ctBlock + if numExtraBytes > 0: # save any bytes that are not block aligned + self.bytesToEncrypt = self.bytesToEncrypt[-numExtraBytes:] + else: + self.bytesToEncrypt = '' + + if more == None: # no more data expected from caller + finalBytes = self.padding.addPad(self.bytesToEncrypt,self.blockSize) + if len(finalBytes) > 0: + ctBlock = self.encryptBlock(finalBytes) + self.encryptBlockCount += 1 + cipherText += ctBlock + self.resetEncrypt() + return cipherText + + def decrypt(self, cipherText, more = None): + """ Decrypt a string and return a string """ + self.bytesToDecrypt += cipherText # append to any bytes from prior decrypt + + numBlocks, numExtraBytes = divmod(len(self.bytesToDecrypt), self.blockSize) + if more == None: # no more calls to decrypt, should have all the data + if numExtraBytes != 0: + raise DecryptNotBlockAlignedError, 'Data not block aligned on decrypt' + + # hold back some bytes in case last decrypt has zero len + if (more != None) and (numExtraBytes == 0) and (numBlocks >0) : + numBlocks -= 1 + numExtraBytes = self.blockSize + + plainText = '' + for i in range(numBlocks): + bStart = i*self.blockSize + ptBlock = self.decryptBlock(self.bytesToDecrypt[bStart : bStart+self.blockSize]) + self.decryptBlockCount += 1 + plainText += ptBlock + + if numExtraBytes > 0: # save any bytes that are not block aligned + self.bytesToEncrypt = self.bytesToEncrypt[-numExtraBytes:] + else: + self.bytesToEncrypt = '' + + if more == None: # last decrypt remove padding + plainText = self.padding.removePad(plainText, self.blockSize) + self.resetDecrypt() + return plainText + + +class Pad: + def __init__(self): + pass # eventually could put in calculation of min and max size extension + +class padWithPadLen(Pad): + """ Pad a binary string with the length of the padding """ + + def addPad(self, extraBytes, blockSize): + """ Add padding to a binary string to make it an even multiple + of the block size """ + blocks, numExtraBytes = divmod(len(extraBytes), blockSize) + padLength = blockSize - numExtraBytes + return extraBytes + padLength*chr(padLength) + + def removePad(self, paddedBinaryString, blockSize): + """ Remove padding from a binary string """ + if not(0 6 and i%Nk == 4 : + temp = [ Sbox[byte] for byte in temp ] # SubWord(temp) + w.append( [ w[i-Nk][byte]^temp[byte] for byte in range(4) ] ) + return w + +Rcon = (0,0x01,0x02,0x04,0x08,0x10,0x20,0x40,0x80,0x1b,0x36, # note extra '0' !!! + 0x6c,0xd8,0xab,0x4d,0x9a,0x2f,0x5e,0xbc,0x63,0xc6, + 0x97,0x35,0x6a,0xd4,0xb3,0x7d,0xfa,0xef,0xc5,0x91) + +#------------------------------------- +def AddRoundKey(algInstance, keyBlock): + """ XOR the algorithm state with a block of key material """ + for column in range(algInstance.Nb): + for row in range(4): + algInstance.state[column][row] ^= keyBlock[column][row] +#------------------------------------- + +def SubBytes(algInstance): + for column in range(algInstance.Nb): + for row in range(4): + algInstance.state[column][row] = Sbox[algInstance.state[column][row]] + +def InvSubBytes(algInstance): + for column in range(algInstance.Nb): + for row in range(4): + algInstance.state[column][row] = InvSbox[algInstance.state[column][row]] + +Sbox = (0x63,0x7c,0x77,0x7b,0xf2,0x6b,0x6f,0xc5, + 0x30,0x01,0x67,0x2b,0xfe,0xd7,0xab,0x76, + 0xca,0x82,0xc9,0x7d,0xfa,0x59,0x47,0xf0, + 0xad,0xd4,0xa2,0xaf,0x9c,0xa4,0x72,0xc0, + 0xb7,0xfd,0x93,0x26,0x36,0x3f,0xf7,0xcc, + 0x34,0xa5,0xe5,0xf1,0x71,0xd8,0x31,0x15, + 0x04,0xc7,0x23,0xc3,0x18,0x96,0x05,0x9a, + 0x07,0x12,0x80,0xe2,0xeb,0x27,0xb2,0x75, + 0x09,0x83,0x2c,0x1a,0x1b,0x6e,0x5a,0xa0, + 0x52,0x3b,0xd6,0xb3,0x29,0xe3,0x2f,0x84, + 0x53,0xd1,0x00,0xed,0x20,0xfc,0xb1,0x5b, + 0x6a,0xcb,0xbe,0x39,0x4a,0x4c,0x58,0xcf, + 0xd0,0xef,0xaa,0xfb,0x43,0x4d,0x33,0x85, + 0x45,0xf9,0x02,0x7f,0x50,0x3c,0x9f,0xa8, + 0x51,0xa3,0x40,0x8f,0x92,0x9d,0x38,0xf5, + 0xbc,0xb6,0xda,0x21,0x10,0xff,0xf3,0xd2, + 0xcd,0x0c,0x13,0xec,0x5f,0x97,0x44,0x17, + 0xc4,0xa7,0x7e,0x3d,0x64,0x5d,0x19,0x73, + 0x60,0x81,0x4f,0xdc,0x22,0x2a,0x90,0x88, + 0x46,0xee,0xb8,0x14,0xde,0x5e,0x0b,0xdb, + 0xe0,0x32,0x3a,0x0a,0x49,0x06,0x24,0x5c, + 0xc2,0xd3,0xac,0x62,0x91,0x95,0xe4,0x79, + 0xe7,0xc8,0x37,0x6d,0x8d,0xd5,0x4e,0xa9, + 0x6c,0x56,0xf4,0xea,0x65,0x7a,0xae,0x08, + 0xba,0x78,0x25,0x2e,0x1c,0xa6,0xb4,0xc6, + 0xe8,0xdd,0x74,0x1f,0x4b,0xbd,0x8b,0x8a, + 0x70,0x3e,0xb5,0x66,0x48,0x03,0xf6,0x0e, + 0x61,0x35,0x57,0xb9,0x86,0xc1,0x1d,0x9e, + 0xe1,0xf8,0x98,0x11,0x69,0xd9,0x8e,0x94, + 0x9b,0x1e,0x87,0xe9,0xce,0x55,0x28,0xdf, + 0x8c,0xa1,0x89,0x0d,0xbf,0xe6,0x42,0x68, + 0x41,0x99,0x2d,0x0f,0xb0,0x54,0xbb,0x16) + +InvSbox = (0x52,0x09,0x6a,0xd5,0x30,0x36,0xa5,0x38, + 0xbf,0x40,0xa3,0x9e,0x81,0xf3,0xd7,0xfb, + 0x7c,0xe3,0x39,0x82,0x9b,0x2f,0xff,0x87, + 0x34,0x8e,0x43,0x44,0xc4,0xde,0xe9,0xcb, + 0x54,0x7b,0x94,0x32,0xa6,0xc2,0x23,0x3d, + 0xee,0x4c,0x95,0x0b,0x42,0xfa,0xc3,0x4e, + 0x08,0x2e,0xa1,0x66,0x28,0xd9,0x24,0xb2, + 0x76,0x5b,0xa2,0x49,0x6d,0x8b,0xd1,0x25, + 0x72,0xf8,0xf6,0x64,0x86,0x68,0x98,0x16, + 0xd4,0xa4,0x5c,0xcc,0x5d,0x65,0xb6,0x92, + 0x6c,0x70,0x48,0x50,0xfd,0xed,0xb9,0xda, + 0x5e,0x15,0x46,0x57,0xa7,0x8d,0x9d,0x84, + 0x90,0xd8,0xab,0x00,0x8c,0xbc,0xd3,0x0a, + 0xf7,0xe4,0x58,0x05,0xb8,0xb3,0x45,0x06, + 0xd0,0x2c,0x1e,0x8f,0xca,0x3f,0x0f,0x02, + 0xc1,0xaf,0xbd,0x03,0x01,0x13,0x8a,0x6b, + 0x3a,0x91,0x11,0x41,0x4f,0x67,0xdc,0xea, + 0x97,0xf2,0xcf,0xce,0xf0,0xb4,0xe6,0x73, + 0x96,0xac,0x74,0x22,0xe7,0xad,0x35,0x85, + 0xe2,0xf9,0x37,0xe8,0x1c,0x75,0xdf,0x6e, + 0x47,0xf1,0x1a,0x71,0x1d,0x29,0xc5,0x89, + 0x6f,0xb7,0x62,0x0e,0xaa,0x18,0xbe,0x1b, + 0xfc,0x56,0x3e,0x4b,0xc6,0xd2,0x79,0x20, + 0x9a,0xdb,0xc0,0xfe,0x78,0xcd,0x5a,0xf4, + 0x1f,0xdd,0xa8,0x33,0x88,0x07,0xc7,0x31, + 0xb1,0x12,0x10,0x59,0x27,0x80,0xec,0x5f, + 0x60,0x51,0x7f,0xa9,0x19,0xb5,0x4a,0x0d, + 0x2d,0xe5,0x7a,0x9f,0x93,0xc9,0x9c,0xef, + 0xa0,0xe0,0x3b,0x4d,0xae,0x2a,0xf5,0xb0, + 0xc8,0xeb,0xbb,0x3c,0x83,0x53,0x99,0x61, + 0x17,0x2b,0x04,0x7e,0xba,0x77,0xd6,0x26, + 0xe1,0x69,0x14,0x63,0x55,0x21,0x0c,0x7d) + +#------------------------------------- +""" For each block size (Nb), the ShiftRow operation shifts row i + by the amount Ci. Note that row 0 is not shifted. + Nb C1 C2 C3 + ------------------- """ +shiftOffset = { 4 : ( 0, 1, 2, 3), + 5 : ( 0, 1, 2, 3), + 6 : ( 0, 1, 2, 3), + 7 : ( 0, 1, 2, 4), + 8 : ( 0, 1, 3, 4) } +def ShiftRows(algInstance): + tmp = [0]*algInstance.Nb # list of size Nb + for r in range(1,4): # row 0 reamains unchanged and can be skipped + for c in range(algInstance.Nb): + tmp[c] = algInstance.state[(c+shiftOffset[algInstance.Nb][r]) % algInstance.Nb][r] + for c in range(algInstance.Nb): + algInstance.state[c][r] = tmp[c] +def InvShiftRows(algInstance): + tmp = [0]*algInstance.Nb # list of size Nb + for r in range(1,4): # row 0 reamains unchanged and can be skipped + for c in range(algInstance.Nb): + tmp[c] = algInstance.state[(c+algInstance.Nb-shiftOffset[algInstance.Nb][r]) % algInstance.Nb][r] + for c in range(algInstance.Nb): + algInstance.state[c][r] = tmp[c] +#------------------------------------- +def MixColumns(a): + Sprime = [0,0,0,0] + for j in range(a.Nb): # for each column + Sprime[0] = mul(2,a.state[j][0])^mul(3,a.state[j][1])^mul(1,a.state[j][2])^mul(1,a.state[j][3]) + Sprime[1] = mul(1,a.state[j][0])^mul(2,a.state[j][1])^mul(3,a.state[j][2])^mul(1,a.state[j][3]) + Sprime[2] = mul(1,a.state[j][0])^mul(1,a.state[j][1])^mul(2,a.state[j][2])^mul(3,a.state[j][3]) + Sprime[3] = mul(3,a.state[j][0])^mul(1,a.state[j][1])^mul(1,a.state[j][2])^mul(2,a.state[j][3]) + for i in range(4): + a.state[j][i] = Sprime[i] + +def InvMixColumns(a): + """ Mix the four bytes of every column in a linear way + This is the opposite operation of Mixcolumn """ + Sprime = [0,0,0,0] + for j in range(a.Nb): # for each column + Sprime[0] = mul(0x0E,a.state[j][0])^mul(0x0B,a.state[j][1])^mul(0x0D,a.state[j][2])^mul(0x09,a.state[j][3]) + Sprime[1] = mul(0x09,a.state[j][0])^mul(0x0E,a.state[j][1])^mul(0x0B,a.state[j][2])^mul(0x0D,a.state[j][3]) + Sprime[2] = mul(0x0D,a.state[j][0])^mul(0x09,a.state[j][1])^mul(0x0E,a.state[j][2])^mul(0x0B,a.state[j][3]) + Sprime[3] = mul(0x0B,a.state[j][0])^mul(0x0D,a.state[j][1])^mul(0x09,a.state[j][2])^mul(0x0E,a.state[j][3]) + for i in range(4): + a.state[j][i] = Sprime[i] + +#------------------------------------- +def mul(a, b): + """ Multiply two elements of GF(2^m) + needed for MixColumn and InvMixColumn """ + if (a !=0 and b!=0): + return Alogtable[(Logtable[a] + Logtable[b])%255] + else: + return 0 + +Logtable = ( 0, 0, 25, 1, 50, 2, 26, 198, 75, 199, 27, 104, 51, 238, 223, 3, + 100, 4, 224, 14, 52, 141, 129, 239, 76, 113, 8, 200, 248, 105, 28, 193, + 125, 194, 29, 181, 249, 185, 39, 106, 77, 228, 166, 114, 154, 201, 9, 120, + 101, 47, 138, 5, 33, 15, 225, 36, 18, 240, 130, 69, 53, 147, 218, 142, + 150, 143, 219, 189, 54, 208, 206, 148, 19, 92, 210, 241, 64, 70, 131, 56, + 102, 221, 253, 48, 191, 6, 139, 98, 179, 37, 226, 152, 34, 136, 145, 16, + 126, 110, 72, 195, 163, 182, 30, 66, 58, 107, 40, 84, 250, 133, 61, 186, + 43, 121, 10, 21, 155, 159, 94, 202, 78, 212, 172, 229, 243, 115, 167, 87, + 175, 88, 168, 80, 244, 234, 214, 116, 79, 174, 233, 213, 231, 230, 173, 232, + 44, 215, 117, 122, 235, 22, 11, 245, 89, 203, 95, 176, 156, 169, 81, 160, + 127, 12, 246, 111, 23, 196, 73, 236, 216, 67, 31, 45, 164, 118, 123, 183, + 204, 187, 62, 90, 251, 96, 177, 134, 59, 82, 161, 108, 170, 85, 41, 157, + 151, 178, 135, 144, 97, 190, 220, 252, 188, 149, 207, 205, 55, 63, 91, 209, + 83, 57, 132, 60, 65, 162, 109, 71, 20, 42, 158, 93, 86, 242, 211, 171, + 68, 17, 146, 217, 35, 32, 46, 137, 180, 124, 184, 38, 119, 153, 227, 165, + 103, 74, 237, 222, 197, 49, 254, 24, 13, 99, 140, 128, 192, 247, 112, 7) + +Alogtable= ( 1, 3, 5, 15, 17, 51, 85, 255, 26, 46, 114, 150, 161, 248, 19, 53, + 95, 225, 56, 72, 216, 115, 149, 164, 247, 2, 6, 10, 30, 34, 102, 170, + 229, 52, 92, 228, 55, 89, 235, 38, 106, 190, 217, 112, 144, 171, 230, 49, + 83, 245, 4, 12, 20, 60, 68, 204, 79, 209, 104, 184, 211, 110, 178, 205, + 76, 212, 103, 169, 224, 59, 77, 215, 98, 166, 241, 8, 24, 40, 120, 136, + 131, 158, 185, 208, 107, 189, 220, 127, 129, 152, 179, 206, 73, 219, 118, 154, + 181, 196, 87, 249, 16, 48, 80, 240, 11, 29, 39, 105, 187, 214, 97, 163, + 254, 25, 43, 125, 135, 146, 173, 236, 47, 113, 147, 174, 233, 32, 96, 160, + 251, 22, 58, 78, 210, 109, 183, 194, 93, 231, 50, 86, 250, 21, 63, 65, + 195, 94, 226, 61, 71, 201, 64, 192, 91, 237, 44, 116, 156, 191, 218, 117, + 159, 186, 213, 100, 172, 239, 42, 126, 130, 157, 188, 223, 122, 142, 137, 128, + 155, 182, 193, 88, 232, 35, 101, 175, 234, 37, 111, 177, 200, 67, 197, 84, + 252, 31, 33, 99, 165, 244, 7, 9, 27, 45, 119, 153, 176, 203, 70, 202, + 69, 207, 74, 222, 121, 139, 134, 145, 168, 227, 62, 66, 198, 81, 243, 14, + 18, 54, 90, 238, 41, 123, 141, 140, 143, 138, 133, 148, 167, 242, 13, 23, + 57, 75, 221, 124, 132, 151, 162, 253, 28, 36, 108, 180, 199, 82, 246, 1) + + + + +""" + AES Encryption Algorithm + The AES algorithm is just Rijndael algorithm restricted to the default + blockSize of 128 bits. +""" + +class AES(Rijndael): + """ The AES algorithm is the Rijndael block cipher restricted to block + sizes of 128 bits and key sizes of 128, 192 or 256 bits + """ + def __init__(self, key = None, padding = padWithPadLen(), keySize=16): + """ Initialize AES, keySize is in bytes """ + if not (keySize == 16 or keySize == 24 or keySize == 32) : + raise BadKeySizeError, 'Illegal AES key size, must be 16, 24, or 32 bytes' + + Rijndael.__init__( self, key, padding=padding, keySize=keySize, blockSize=16 ) + + self.name = 'AES' + + +""" + CBC mode of encryption for block ciphers. + This algorithm mode wraps any BlockCipher to make a + Cipher Block Chaining mode. +""" +from random import Random # should change to crypto.random!!! + + +class CBC(BlockCipher): + """ The CBC class wraps block ciphers to make cipher block chaining (CBC) mode + algorithms. The initialization (IV) is automatic if set to None. Padding + is also automatic based on the Pad class used to initialize the algorithm + """ + def __init__(self, blockCipherInstance, padding = padWithPadLen()): + """ CBC algorithms are created by initializing with a BlockCipher instance """ + self.baseCipher = blockCipherInstance + self.name = self.baseCipher.name + '_CBC' + self.blockSize = self.baseCipher.blockSize + self.keySize = self.baseCipher.keySize + self.padding = padding + self.baseCipher.padding = noPadding() # baseCipher should NOT pad!! + self.r = Random() # for IV generation, currently uses + # mediocre standard distro version <---------------- + import time + newSeed = time.ctime()+str(self.r) # seed with instance location + self.r.seed(newSeed) # to make unique + self.reset() + + def setKey(self, key): + self.baseCipher.setKey(key) + + # Overload to reset both CBC state and the wrapped baseCipher + def resetEncrypt(self): + BlockCipher.resetEncrypt(self) # reset CBC encrypt state (super class) + self.baseCipher.resetEncrypt() # reset base cipher encrypt state + + def resetDecrypt(self): + BlockCipher.resetDecrypt(self) # reset CBC state (super class) + self.baseCipher.resetDecrypt() # reset base cipher decrypt state + + def encrypt(self, plainText, iv=None, more=None): + """ CBC encryption - overloads baseCipher to allow optional explicit IV + when iv=None, iv is auto generated! + """ + if self.encryptBlockCount == 0: + self.iv = iv + else: + assert(iv==None), 'IV used only on first call to encrypt' + + return BlockCipher.encrypt(self,plainText, more=more) + + def decrypt(self, cipherText, iv=None, more=None): + """ CBC decryption - overloads baseCipher to allow optional explicit IV + when iv=None, iv is auto generated! + """ + if self.decryptBlockCount == 0: + self.iv = iv + else: + assert(iv==None), 'IV used only on first call to decrypt' + + return BlockCipher.decrypt(self, cipherText, more=more) + + def encryptBlock(self, plainTextBlock): + """ CBC block encryption, IV is set with 'encrypt' """ + auto_IV = '' + if self.encryptBlockCount == 0: + if self.iv == None: + # generate IV and use + self.iv = ''.join([chr(self.r.randrange(256)) for i in range(self.blockSize)]) + self.prior_encr_CT_block = self.iv + auto_IV = self.prior_encr_CT_block # prepend IV if it's automatic + else: # application provided IV + assert(len(self.iv) == self.blockSize ),'IV must be same length as block' + self.prior_encr_CT_block = self.iv + """ encrypt the prior CT XORed with the PT """ + ct = self.baseCipher.encryptBlock( xor(self.prior_encr_CT_block, plainTextBlock) ) + self.prior_encr_CT_block = ct + return auto_IV+ct + + def decryptBlock(self, encryptedBlock): + """ Decrypt a single block """ + + if self.decryptBlockCount == 0: # first call, process IV + if self.iv == None: # auto decrypt IV? + self.prior_CT_block = encryptedBlock + return '' + else: + assert(len(self.iv)==self.blockSize),"Bad IV size on CBC decryption" + self.prior_CT_block = self.iv + + dct = self.baseCipher.decryptBlock(encryptedBlock) + """ XOR the prior decrypted CT with the prior CT """ + dct_XOR_priorCT = xor( self.prior_CT_block, dct ) + + self.prior_CT_block = encryptedBlock + + return dct_XOR_priorCT + + +""" + AES_CBC Encryption Algorithm +""" + +class AES_CBC(CBC): + """ AES encryption in CBC feedback mode """ + def __init__(self, key=None, padding=padWithPadLen(), keySize=16): + CBC.__init__( self, AES(key, noPadding(), keySize), padding) + self.name = 'AES_CBC' diff --git a/Calibre_Plugins/K4MobiDeDRM_plugin/alfcrypto.dll b/Calibre_Plugins/K4MobiDeDRM_plugin/alfcrypto.dll index 1b0ee71..26d740d 100644 Binary files a/Calibre_Plugins/K4MobiDeDRM_plugin/alfcrypto.dll and b/Calibre_Plugins/K4MobiDeDRM_plugin/alfcrypto.dll differ diff --git a/Calibre_Plugins/K4MobiDeDRM_plugin/alfcrypto.py b/Calibre_Plugins/K4MobiDeDRM_plugin/alfcrypto.py index 5667511..e25a0c8 100644 --- a/Calibre_Plugins/K4MobiDeDRM_plugin/alfcrypto.py +++ b/Calibre_Plugins/K4MobiDeDRM_plugin/alfcrypto.py @@ -1,568 +1,290 @@ #! /usr/bin/env python -""" - Routines for doing AES CBC in one file +import sys, os +import hmac +from struct import pack +import hashlib - Modified by some_updates to extract - and combine only those parts needed for AES CBC - into one simple to add python file - Original Version - Copyright (c) 2002 by Paul A. Lambert - Under: - CryptoPy Artisitic License Version 1.0 - See the wonderful pure python package cryptopy-1.2.5 - and read its LICENSE.txt for complete license details. -""" +# interface to needed routines libalfcrypto +def _load_libalfcrypto(): + import ctypes + from ctypes import CDLL, byref, POINTER, c_void_p, c_char_p, c_int, c_long, \ + Structure, c_ulong, create_string_buffer, addressof, string_at, cast, sizeof -class CryptoError(Exception): - """ Base class for crypto exceptions """ - def __init__(self,errorMessage='Error!'): - self.message = errorMessage - def __str__(self): - return self.message - -class InitCryptoError(CryptoError): - """ Crypto errors during algorithm initialization """ -class BadKeySizeError(InitCryptoError): - """ Bad key size error """ -class EncryptError(CryptoError): - """ Error in encryption processing """ -class DecryptError(CryptoError): - """ Error in decryption processing """ -class DecryptNotBlockAlignedError(DecryptError): - """ Error in decryption processing """ - -def xorS(a,b): - """ XOR two strings """ - assert len(a)==len(b) - x = [] - for i in range(len(a)): - x.append( chr(ord(a[i])^ord(b[i]))) - return ''.join(x) - -def xor(a,b): - """ XOR two strings """ - x = [] - for i in range(min(len(a),len(b))): - x.append( chr(ord(a[i])^ord(b[i]))) - return ''.join(x) - -""" - Base 'BlockCipher' and Pad classes for cipher instances. - BlockCipher supports automatic padding and type conversion. The BlockCipher - class was written to make the actual algorithm code more readable and - not for performance. -""" - -class BlockCipher: - """ Block ciphers """ - def __init__(self): - self.reset() - - def reset(self): - self.resetEncrypt() - self.resetDecrypt() - def resetEncrypt(self): - self.encryptBlockCount = 0 - self.bytesToEncrypt = '' - def resetDecrypt(self): - self.decryptBlockCount = 0 - self.bytesToDecrypt = '' - - def encrypt(self, plainText, more = None): - """ Encrypt a string and return a binary string """ - self.bytesToEncrypt += plainText # append plainText to any bytes from prior encrypt - numBlocks, numExtraBytes = divmod(len(self.bytesToEncrypt), self.blockSize) - cipherText = '' - for i in range(numBlocks): - bStart = i*self.blockSize - ctBlock = self.encryptBlock(self.bytesToEncrypt[bStart:bStart+self.blockSize]) - self.encryptBlockCount += 1 - cipherText += ctBlock - if numExtraBytes > 0: # save any bytes that are not block aligned - self.bytesToEncrypt = self.bytesToEncrypt[-numExtraBytes:] + pointer_size = ctypes.sizeof(ctypes.c_voidp) + name_of_lib = None + if sys.platform.startswith('darwin'): + name_of_lib = 'libalfcrypto.dylib' + elif sys.platform.startswith('win'): + if pointer_size == 4: + name_of_lib = 'alfcrypto.dll' else: - self.bytesToEncrypt = '' - - if more == None: # no more data expected from caller - finalBytes = self.padding.addPad(self.bytesToEncrypt,self.blockSize) - if len(finalBytes) > 0: - ctBlock = self.encryptBlock(finalBytes) - self.encryptBlockCount += 1 - cipherText += ctBlock - self.resetEncrypt() - return cipherText - - def decrypt(self, cipherText, more = None): - """ Decrypt a string and return a string """ - self.bytesToDecrypt += cipherText # append to any bytes from prior decrypt - - numBlocks, numExtraBytes = divmod(len(self.bytesToDecrypt), self.blockSize) - if more == None: # no more calls to decrypt, should have all the data - if numExtraBytes != 0: - raise DecryptNotBlockAlignedError, 'Data not block aligned on decrypt' - - # hold back some bytes in case last decrypt has zero len - if (more != None) and (numExtraBytes == 0) and (numBlocks >0) : - numBlocks -= 1 - numExtraBytes = self.blockSize - - plainText = '' - for i in range(numBlocks): - bStart = i*self.blockSize - ptBlock = self.decryptBlock(self.bytesToDecrypt[bStart : bStart+self.blockSize]) - self.decryptBlockCount += 1 - plainText += ptBlock - - if numExtraBytes > 0: # save any bytes that are not block aligned - self.bytesToEncrypt = self.bytesToEncrypt[-numExtraBytes:] - else: - self.bytesToEncrypt = '' - - if more == None: # last decrypt remove padding - plainText = self.padding.removePad(plainText, self.blockSize) - self.resetDecrypt() - return plainText - - -class Pad: - def __init__(self): - pass # eventually could put in calculation of min and max size extension - -class padWithPadLen(Pad): - """ Pad a binary string with the length of the padding """ - - def addPad(self, extraBytes, blockSize): - """ Add padding to a binary string to make it an even multiple - of the block size """ - blocks, numExtraBytes = divmod(len(extraBytes), blockSize) - padLength = blockSize - numExtraBytes - return extraBytes + padLength*chr(padLength) - - def removePad(self, paddedBinaryString, blockSize): - """ Remove padding from a binary string """ - if not(0 6 and i%Nk == 4 : - temp = [ Sbox[byte] for byte in temp ] # SubWord(temp) - w.append( [ w[i-Nk][byte]^temp[byte] for byte in range(4) ] ) - return w - -Rcon = (0,0x01,0x02,0x04,0x08,0x10,0x20,0x40,0x80,0x1b,0x36, # note extra '0' !!! - 0x6c,0xd8,0xab,0x4d,0x9a,0x2f,0x5e,0xbc,0x63,0xc6, - 0x97,0x35,0x6a,0xd4,0xb3,0x7d,0xfa,0xef,0xc5,0x91) - -#------------------------------------- -def AddRoundKey(algInstance, keyBlock): - """ XOR the algorithm state with a block of key material """ - for column in range(algInstance.Nb): - for row in range(4): - algInstance.state[column][row] ^= keyBlock[column][row] -#------------------------------------- - -def SubBytes(algInstance): - for column in range(algInstance.Nb): - for row in range(4): - algInstance.state[column][row] = Sbox[algInstance.state[column][row]] - -def InvSubBytes(algInstance): - for column in range(algInstance.Nb): - for row in range(4): - algInstance.state[column][row] = InvSbox[algInstance.state[column][row]] - -Sbox = (0x63,0x7c,0x77,0x7b,0xf2,0x6b,0x6f,0xc5, - 0x30,0x01,0x67,0x2b,0xfe,0xd7,0xab,0x76, - 0xca,0x82,0xc9,0x7d,0xfa,0x59,0x47,0xf0, - 0xad,0xd4,0xa2,0xaf,0x9c,0xa4,0x72,0xc0, - 0xb7,0xfd,0x93,0x26,0x36,0x3f,0xf7,0xcc, - 0x34,0xa5,0xe5,0xf1,0x71,0xd8,0x31,0x15, - 0x04,0xc7,0x23,0xc3,0x18,0x96,0x05,0x9a, - 0x07,0x12,0x80,0xe2,0xeb,0x27,0xb2,0x75, - 0x09,0x83,0x2c,0x1a,0x1b,0x6e,0x5a,0xa0, - 0x52,0x3b,0xd6,0xb3,0x29,0xe3,0x2f,0x84, - 0x53,0xd1,0x00,0xed,0x20,0xfc,0xb1,0x5b, - 0x6a,0xcb,0xbe,0x39,0x4a,0x4c,0x58,0xcf, - 0xd0,0xef,0xaa,0xfb,0x43,0x4d,0x33,0x85, - 0x45,0xf9,0x02,0x7f,0x50,0x3c,0x9f,0xa8, - 0x51,0xa3,0x40,0x8f,0x92,0x9d,0x38,0xf5, - 0xbc,0xb6,0xda,0x21,0x10,0xff,0xf3,0xd2, - 0xcd,0x0c,0x13,0xec,0x5f,0x97,0x44,0x17, - 0xc4,0xa7,0x7e,0x3d,0x64,0x5d,0x19,0x73, - 0x60,0x81,0x4f,0xdc,0x22,0x2a,0x90,0x88, - 0x46,0xee,0xb8,0x14,0xde,0x5e,0x0b,0xdb, - 0xe0,0x32,0x3a,0x0a,0x49,0x06,0x24,0x5c, - 0xc2,0xd3,0xac,0x62,0x91,0x95,0xe4,0x79, - 0xe7,0xc8,0x37,0x6d,0x8d,0xd5,0x4e,0xa9, - 0x6c,0x56,0xf4,0xea,0x65,0x7a,0xae,0x08, - 0xba,0x78,0x25,0x2e,0x1c,0xa6,0xb4,0xc6, - 0xe8,0xdd,0x74,0x1f,0x4b,0xbd,0x8b,0x8a, - 0x70,0x3e,0xb5,0x66,0x48,0x03,0xf6,0x0e, - 0x61,0x35,0x57,0xb9,0x86,0xc1,0x1d,0x9e, - 0xe1,0xf8,0x98,0x11,0x69,0xd9,0x8e,0x94, - 0x9b,0x1e,0x87,0xe9,0xce,0x55,0x28,0xdf, - 0x8c,0xa1,0x89,0x0d,0xbf,0xe6,0x42,0x68, - 0x41,0x99,0x2d,0x0f,0xb0,0x54,0xbb,0x16) - -InvSbox = (0x52,0x09,0x6a,0xd5,0x30,0x36,0xa5,0x38, - 0xbf,0x40,0xa3,0x9e,0x81,0xf3,0xd7,0xfb, - 0x7c,0xe3,0x39,0x82,0x9b,0x2f,0xff,0x87, - 0x34,0x8e,0x43,0x44,0xc4,0xde,0xe9,0xcb, - 0x54,0x7b,0x94,0x32,0xa6,0xc2,0x23,0x3d, - 0xee,0x4c,0x95,0x0b,0x42,0xfa,0xc3,0x4e, - 0x08,0x2e,0xa1,0x66,0x28,0xd9,0x24,0xb2, - 0x76,0x5b,0xa2,0x49,0x6d,0x8b,0xd1,0x25, - 0x72,0xf8,0xf6,0x64,0x86,0x68,0x98,0x16, - 0xd4,0xa4,0x5c,0xcc,0x5d,0x65,0xb6,0x92, - 0x6c,0x70,0x48,0x50,0xfd,0xed,0xb9,0xda, - 0x5e,0x15,0x46,0x57,0xa7,0x8d,0x9d,0x84, - 0x90,0xd8,0xab,0x00,0x8c,0xbc,0xd3,0x0a, - 0xf7,0xe4,0x58,0x05,0xb8,0xb3,0x45,0x06, - 0xd0,0x2c,0x1e,0x8f,0xca,0x3f,0x0f,0x02, - 0xc1,0xaf,0xbd,0x03,0x01,0x13,0x8a,0x6b, - 0x3a,0x91,0x11,0x41,0x4f,0x67,0xdc,0xea, - 0x97,0xf2,0xcf,0xce,0xf0,0xb4,0xe6,0x73, - 0x96,0xac,0x74,0x22,0xe7,0xad,0x35,0x85, - 0xe2,0xf9,0x37,0xe8,0x1c,0x75,0xdf,0x6e, - 0x47,0xf1,0x1a,0x71,0x1d,0x29,0xc5,0x89, - 0x6f,0xb7,0x62,0x0e,0xaa,0x18,0xbe,0x1b, - 0xfc,0x56,0x3e,0x4b,0xc6,0xd2,0x79,0x20, - 0x9a,0xdb,0xc0,0xfe,0x78,0xcd,0x5a,0xf4, - 0x1f,0xdd,0xa8,0x33,0x88,0x07,0xc7,0x31, - 0xb1,0x12,0x10,0x59,0x27,0x80,0xec,0x5f, - 0x60,0x51,0x7f,0xa9,0x19,0xb5,0x4a,0x0d, - 0x2d,0xe5,0x7a,0x9f,0x93,0xc9,0x9c,0xef, - 0xa0,0xe0,0x3b,0x4d,0xae,0x2a,0xf5,0xb0, - 0xc8,0xeb,0xbb,0x3c,0x83,0x53,0x99,0x61, - 0x17,0x2b,0x04,0x7e,0xba,0x77,0xd6,0x26, - 0xe1,0x69,0x14,0x63,0x55,0x21,0x0c,0x7d) - -#------------------------------------- -""" For each block size (Nb), the ShiftRow operation shifts row i - by the amount Ci. Note that row 0 is not shifted. - Nb C1 C2 C3 - ------------------- """ -shiftOffset = { 4 : ( 0, 1, 2, 3), - 5 : ( 0, 1, 2, 3), - 6 : ( 0, 1, 2, 3), - 7 : ( 0, 1, 2, 4), - 8 : ( 0, 1, 3, 4) } -def ShiftRows(algInstance): - tmp = [0]*algInstance.Nb # list of size Nb - for r in range(1,4): # row 0 reamains unchanged and can be skipped - for c in range(algInstance.Nb): - tmp[c] = algInstance.state[(c+shiftOffset[algInstance.Nb][r]) % algInstance.Nb][r] - for c in range(algInstance.Nb): - algInstance.state[c][r] = tmp[c] -def InvShiftRows(algInstance): - tmp = [0]*algInstance.Nb # list of size Nb - for r in range(1,4): # row 0 reamains unchanged and can be skipped - for c in range(algInstance.Nb): - tmp[c] = algInstance.state[(c+algInstance.Nb-shiftOffset[algInstance.Nb][r]) % algInstance.Nb][r] - for c in range(algInstance.Nb): - algInstance.state[c][r] = tmp[c] -#------------------------------------- -def MixColumns(a): - Sprime = [0,0,0,0] - for j in range(a.Nb): # for each column - Sprime[0] = mul(2,a.state[j][0])^mul(3,a.state[j][1])^mul(1,a.state[j][2])^mul(1,a.state[j][3]) - Sprime[1] = mul(1,a.state[j][0])^mul(2,a.state[j][1])^mul(3,a.state[j][2])^mul(1,a.state[j][3]) - Sprime[2] = mul(1,a.state[j][0])^mul(1,a.state[j][1])^mul(2,a.state[j][2])^mul(3,a.state[j][3]) - Sprime[3] = mul(3,a.state[j][0])^mul(1,a.state[j][1])^mul(1,a.state[j][2])^mul(2,a.state[j][3]) - for i in range(4): - a.state[j][i] = Sprime[i] - -def InvMixColumns(a): - """ Mix the four bytes of every column in a linear way - This is the opposite operation of Mixcolumn """ - Sprime = [0,0,0,0] - for j in range(a.Nb): # for each column - Sprime[0] = mul(0x0E,a.state[j][0])^mul(0x0B,a.state[j][1])^mul(0x0D,a.state[j][2])^mul(0x09,a.state[j][3]) - Sprime[1] = mul(0x09,a.state[j][0])^mul(0x0E,a.state[j][1])^mul(0x0B,a.state[j][2])^mul(0x0D,a.state[j][3]) - Sprime[2] = mul(0x0D,a.state[j][0])^mul(0x09,a.state[j][1])^mul(0x0E,a.state[j][2])^mul(0x0B,a.state[j][3]) - Sprime[3] = mul(0x0B,a.state[j][0])^mul(0x0D,a.state[j][1])^mul(0x09,a.state[j][2])^mul(0x0E,a.state[j][3]) - for i in range(4): - a.state[j][i] = Sprime[i] - -#------------------------------------- -def mul(a, b): - """ Multiply two elements of GF(2^m) - needed for MixColumn and InvMixColumn """ - if (a !=0 and b!=0): - return Alogtable[(Logtable[a] + Logtable[b])%255] + name_of_lib = 'alfcrypto64.dll' else: - return 0 - -Logtable = ( 0, 0, 25, 1, 50, 2, 26, 198, 75, 199, 27, 104, 51, 238, 223, 3, - 100, 4, 224, 14, 52, 141, 129, 239, 76, 113, 8, 200, 248, 105, 28, 193, - 125, 194, 29, 181, 249, 185, 39, 106, 77, 228, 166, 114, 154, 201, 9, 120, - 101, 47, 138, 5, 33, 15, 225, 36, 18, 240, 130, 69, 53, 147, 218, 142, - 150, 143, 219, 189, 54, 208, 206, 148, 19, 92, 210, 241, 64, 70, 131, 56, - 102, 221, 253, 48, 191, 6, 139, 98, 179, 37, 226, 152, 34, 136, 145, 16, - 126, 110, 72, 195, 163, 182, 30, 66, 58, 107, 40, 84, 250, 133, 61, 186, - 43, 121, 10, 21, 155, 159, 94, 202, 78, 212, 172, 229, 243, 115, 167, 87, - 175, 88, 168, 80, 244, 234, 214, 116, 79, 174, 233, 213, 231, 230, 173, 232, - 44, 215, 117, 122, 235, 22, 11, 245, 89, 203, 95, 176, 156, 169, 81, 160, - 127, 12, 246, 111, 23, 196, 73, 236, 216, 67, 31, 45, 164, 118, 123, 183, - 204, 187, 62, 90, 251, 96, 177, 134, 59, 82, 161, 108, 170, 85, 41, 157, - 151, 178, 135, 144, 97, 190, 220, 252, 188, 149, 207, 205, 55, 63, 91, 209, - 83, 57, 132, 60, 65, 162, 109, 71, 20, 42, 158, 93, 86, 242, 211, 171, - 68, 17, 146, 217, 35, 32, 46, 137, 180, 124, 184, 38, 119, 153, 227, 165, - 103, 74, 237, 222, 197, 49, 254, 24, 13, 99, 140, 128, 192, 247, 112, 7) - -Alogtable= ( 1, 3, 5, 15, 17, 51, 85, 255, 26, 46, 114, 150, 161, 248, 19, 53, - 95, 225, 56, 72, 216, 115, 149, 164, 247, 2, 6, 10, 30, 34, 102, 170, - 229, 52, 92, 228, 55, 89, 235, 38, 106, 190, 217, 112, 144, 171, 230, 49, - 83, 245, 4, 12, 20, 60, 68, 204, 79, 209, 104, 184, 211, 110, 178, 205, - 76, 212, 103, 169, 224, 59, 77, 215, 98, 166, 241, 8, 24, 40, 120, 136, - 131, 158, 185, 208, 107, 189, 220, 127, 129, 152, 179, 206, 73, 219, 118, 154, - 181, 196, 87, 249, 16, 48, 80, 240, 11, 29, 39, 105, 187, 214, 97, 163, - 254, 25, 43, 125, 135, 146, 173, 236, 47, 113, 147, 174, 233, 32, 96, 160, - 251, 22, 58, 78, 210, 109, 183, 194, 93, 231, 50, 86, 250, 21, 63, 65, - 195, 94, 226, 61, 71, 201, 64, 192, 91, 237, 44, 116, 156, 191, 218, 117, - 159, 186, 213, 100, 172, 239, 42, 126, 130, 157, 188, 223, 122, 142, 137, 128, - 155, 182, 193, 88, 232, 35, 101, 175, 234, 37, 111, 177, 200, 67, 197, 84, - 252, 31, 33, 99, 165, 244, 7, 9, 27, 45, 119, 153, 176, 203, 70, 202, - 69, 207, 74, 222, 121, 139, 134, 145, 168, 227, 62, 66, 198, 81, 243, 14, - 18, 54, 90, 238, 41, 123, 141, 140, 143, 138, 133, 148, 167, 242, 13, 23, - 57, 75, 221, 124, 132, 151, 162, 253, 28, 36, 108, 180, 199, 82, 246, 1) - - - - -""" - AES Encryption Algorithm - The AES algorithm is just Rijndael algorithm restricted to the default - blockSize of 128 bits. -""" - -class AES(Rijndael): - """ The AES algorithm is the Rijndael block cipher restricted to block - sizes of 128 bits and key sizes of 128, 192 or 256 bits - """ - def __init__(self, key = None, padding = padWithPadLen(), keySize=16): - """ Initialize AES, keySize is in bytes """ - if not (keySize == 16 or keySize == 24 or keySize == 32) : - raise BadKeySizeError, 'Illegal AES key size, must be 16, 24, or 32 bytes' - - Rijndael.__init__( self, key, padding=padding, keySize=keySize, blockSize=16 ) - - self.name = 'AES' - - -""" - CBC mode of encryption for block ciphers. - This algorithm mode wraps any BlockCipher to make a - Cipher Block Chaining mode. -""" -from random import Random # should change to crypto.random!!! - - -class CBC(BlockCipher): - """ The CBC class wraps block ciphers to make cipher block chaining (CBC) mode - algorithms. The initialization (IV) is automatic if set to None. Padding - is also automatic based on the Pad class used to initialize the algorithm - """ - def __init__(self, blockCipherInstance, padding = padWithPadLen()): - """ CBC algorithms are created by initializing with a BlockCipher instance """ - self.baseCipher = blockCipherInstance - self.name = self.baseCipher.name + '_CBC' - self.blockSize = self.baseCipher.blockSize - self.keySize = self.baseCipher.keySize - self.padding = padding - self.baseCipher.padding = noPadding() # baseCipher should NOT pad!! - self.r = Random() # for IV generation, currently uses - # mediocre standard distro version <---------------- - import time - newSeed = time.ctime()+str(self.r) # seed with instance location - self.r.seed(newSeed) # to make unique - self.reset() - - def setKey(self, key): - self.baseCipher.setKey(key) - - # Overload to reset both CBC state and the wrapped baseCipher - def resetEncrypt(self): - BlockCipher.resetEncrypt(self) # reset CBC encrypt state (super class) - self.baseCipher.resetEncrypt() # reset base cipher encrypt state - - def resetDecrypt(self): - BlockCipher.resetDecrypt(self) # reset CBC state (super class) - self.baseCipher.resetDecrypt() # reset base cipher decrypt state - - def encrypt(self, plainText, iv=None, more=None): - """ CBC encryption - overloads baseCipher to allow optional explicit IV - when iv=None, iv is auto generated! - """ - if self.encryptBlockCount == 0: - self.iv = iv + if pointer_size == 4: + name_of_lib = 'libalfcrypto32.so' else: - assert(iv==None), 'IV used only on first call to encrypt' + name_of_lib = 'libalfcrypto64.so' + + libalfcrypto = sys.path[0] + os.sep + name_of_lib - return BlockCipher.encrypt(self,plainText, more=more) + if not os.path.isfile(libalfcrypto): + raise Exception('libalfcrypto not found') - def decrypt(self, cipherText, iv=None, more=None): - """ CBC decryption - overloads baseCipher to allow optional explicit IV - when iv=None, iv is auto generated! - """ - if self.decryptBlockCount == 0: - self.iv = iv - else: - assert(iv==None), 'IV used only on first call to decrypt' + libalfcrypto = CDLL(libalfcrypto) - return BlockCipher.decrypt(self, cipherText, more=more) - - def encryptBlock(self, plainTextBlock): - """ CBC block encryption, IV is set with 'encrypt' """ - auto_IV = '' - if self.encryptBlockCount == 0: - if self.iv == None: - # generate IV and use - self.iv = ''.join([chr(self.r.randrange(256)) for i in range(self.blockSize)]) - self.prior_encr_CT_block = self.iv - auto_IV = self.prior_encr_CT_block # prepend IV if it's automatic - else: # application provided IV - assert(len(self.iv) == self.blockSize ),'IV must be same length as block' - self.prior_encr_CT_block = self.iv - """ encrypt the prior CT XORed with the PT """ - ct = self.baseCipher.encryptBlock( xor(self.prior_encr_CT_block, plainTextBlock) ) - self.prior_encr_CT_block = ct - return auto_IV+ct - - def decryptBlock(self, encryptedBlock): - """ Decrypt a single block """ - - if self.decryptBlockCount == 0: # first call, process IV - if self.iv == None: # auto decrypt IV? - self.prior_CT_block = encryptedBlock - return '' - else: - assert(len(self.iv)==self.blockSize),"Bad IV size on CBC decryption" - self.prior_CT_block = self.iv - - dct = self.baseCipher.decryptBlock(encryptedBlock) - """ XOR the prior decrypted CT with the prior CT """ - dct_XOR_priorCT = xor( self.prior_CT_block, dct ) - - self.prior_CT_block = encryptedBlock - - return dct_XOR_priorCT + c_char_pp = POINTER(c_char_p) + c_int_p = POINTER(c_int) -""" - AES_CBC Encryption Algorithm -""" + def F(restype, name, argtypes): + func = getattr(libalfcrypto, name) + func.restype = restype + func.argtypes = argtypes + return func + + # aes cbc decryption + # + # struct aes_key_st { + # unsigned long rd_key[4 *(AES_MAXNR + 1)]; + # int rounds; + # }; + # + # typedef struct aes_key_st AES_KEY; + # + # int AES_set_decrypt_key(const unsigned char *userKey, const int bits, AES_KEY *key); + # + # + # void AES_cbc_encrypt(const unsigned char *in, unsigned char *out, + # const unsigned long length, const AES_KEY *key, + # unsigned char *ivec, const int enc); + + AES_MAXNR = 14 + + class AES_KEY(Structure): + _fields_ = [('rd_key', c_long * (4 * (AES_MAXNR + 1))), ('rounds', c_int)] + + AES_KEY_p = POINTER(AES_KEY) + AES_cbc_encrypt = F(None, 'AES_cbc_encrypt',[c_char_p, c_char_p, c_ulong, AES_KEY_p, c_char_p, c_int]) + AES_set_decrypt_key = F(c_int, 'AES_set_decrypt_key',[c_char_p, c_int, AES_KEY_p]) + + + + # Pukall 1 Cipher + # unsigned char *PC1(const unsigned char *key, unsigned int klen, const unsigned char *src, + # unsigned char *dest, unsigned int len, int decryption); + + PC1 = F(c_char_p, 'PC1', [c_char_p, c_ulong, c_char_p, c_char_p, c_ulong, c_ulong]) + + # Topaz Encryption + # typedef struct _TpzCtx { + # unsigned int v[2]; + # } TpzCtx; + # + # void topazCryptoInit(TpzCtx *ctx, const unsigned char *key, int klen); + # void topazCryptoDecrypt(const TpzCtx *ctx, const unsigned char *in, unsigned char *out, int len); + + class TPZ_CTX(Structure): + _fields_ = [('v', c_long * 2)] + + TPZ_CTX_p = POINTER(TPZ_CTX) + topazCryptoInit = F(None, 'topazCryptoInit', [TPZ_CTX_p, c_char_p, c_ulong]) + topazCryptoDecrypt = F(None, 'topazCryptoDecrypt', [TPZ_CTX_p, c_char_p, c_char_p, c_ulong]) + + + class AES_CBC(object): + def __init__(self): + self._blocksize = 0 + self._keyctx = None + self._iv = 0 + + def set_decrypt_key(self, userkey, iv): + self._blocksize = len(userkey) + if (self._blocksize != 16) and (self._blocksize != 24) and (self._blocksize != 32) : + raise Exception('AES CBC improper key used') + return + keyctx = self._keyctx = AES_KEY() + self._iv = iv + rv = AES_set_decrypt_key(userkey, len(userkey) * 8, keyctx) + if rv < 0: + raise Exception('Failed to initialize AES CBC key') + + def decrypt(self, data): + out = create_string_buffer(len(data)) + mutable_iv = create_string_buffer(self._iv, len(self._iv)) + rv = AES_cbc_encrypt(data, out, len(data), self._keyctx, mutable_iv, 0) + if rv == 0: + raise Exception('AES CBC decryption failed') + return out.raw + + class Pukall_Cipher(object): + def __init__(self): + self.key = None + + def PC1(self, key, src, decryption=True): + self.key = key + out = create_string_buffer(len(src)) + de = 0 + if decryption: + de = 1 + rv = PC1(key, len(key), src, out, len(src), de) + return out.raw + + class Topaz_Cipher(object): + def __init__(self): + self._ctx = None + + def ctx_init(self, key): + tpz_ctx = self._ctx = TPZ_CTX() + topazCryptoInit(tpz_ctx, key, len(key)) + return tpz_ctx + + def decrypt(self, data, ctx=None): + if ctx == None: + ctx = self._ctx + out = create_string_buffer(len(data)) + topazCryptoDecrypt(ctx, data, out, len(data)) + return out.raw + + print "Using Library AlfCrypto DLL/DYLIB/SO" + return (AES_CBC, Pukall_Cipher, Topaz_Cipher) + + +def _load_python_alfcrypto(): + + import aescbc + + class Pukall_Cipher(object): + def __init__(self): + self.key = None + + def PC1(self, key, src, decryption=True): + sum1 = 0; + sum2 = 0; + keyXorVal = 0; + if len(key)!=16: + print "Bad key length!" + return None + wkey = [] + for i in xrange(8): + wkey.append(ord(key[i*2])<<8 | ord(key[i*2+1])) + dst = "" + for i in xrange(len(src)): + temp1 = 0; + byteXorVal = 0; + for j in xrange(8): + temp1 ^= wkey[j] + sum2 = (sum2+j)*20021 + sum1 + sum1 = (temp1*346)&0xFFFF + sum2 = (sum2+sum1)&0xFFFF + temp1 = (temp1*20021+1)&0xFFFF + byteXorVal ^= temp1 ^ sum2 + curByte = ord(src[i]) + if not decryption: + keyXorVal = curByte * 257; + curByte = ((curByte ^ (byteXorVal >> 8)) ^ byteXorVal) & 0xFF + if decryption: + keyXorVal = curByte * 257; + for j in xrange(8): + wkey[j] ^= keyXorVal; + dst+=chr(curByte) + return dst + + class Topaz_Cipher(object): + def __init__(self): + self._ctx = None + + def ctx_init(self, key): + ctx1 = 0x0CAFFE19E + for keyChar in key: + keyByte = ord(keyChar) + ctx2 = ctx1 + ctx1 = ((((ctx1 >>2) * (ctx1 >>7))&0xFFFFFFFF) ^ (keyByte * keyByte * 0x0F902007)& 0xFFFFFFFF ) + self._ctx = [ctx1, ctx2] + return [ctx1,ctx2] + + def decrypt(self, data, ctx=None): + if ctx == None: + ctx = self._ctx + ctx1 = ctx[0] + ctx2 = ctx[1] + plainText = "" + for dataChar in data: + dataByte = ord(dataChar) + m = (dataByte ^ ((ctx1 >> 3) &0xFF) ^ ((ctx2<<3) & 0xFF)) &0xFF + ctx2 = ctx1 + ctx1 = (((ctx1 >> 2) * (ctx1 >> 7)) &0xFFFFFFFF) ^((m * m * 0x0F902007) &0xFFFFFFFF) + plainText += chr(m) + return plainText + + class AES_CBC(object): + def __init__(self): + self._key = None + self._iv = None + self.aes = None + + def set_decrypt_key(self, userkey, iv): + self._key = userkey + self._iv = iv + self.aes = aescbc.AES_CBC(userkey, aescbc.noPadding(), len(userkey)) + + def decrypt(self, data): + iv = self._iv + cleartext = self.aes.decrypt(iv + data) + return cleartext + + return (AES_CBC, Pukall_Cipher, Topaz_Cipher) + + +def _load_crypto(): + AES_CBC = Pukall_Cipher = Topaz_Cipher = None + cryptolist = (_load_libalfcrypto, _load_python_alfcrypto) + for loader in cryptolist: + try: + AES_CBC, Pukall_Cipher, Topaz_Cipher = loader() + break + except (ImportError, Exception): + pass + return AES_CBC, Pukall_Cipher, Topaz_Cipher + +AES_CBC, Pukall_Cipher, Topaz_Cipher = _load_crypto() + + +class KeyIVGen(object): + # this only exists in openssl so we will use pure python implementation instead + # PKCS5_PBKDF2_HMAC_SHA1 = F(c_int, 'PKCS5_PBKDF2_HMAC_SHA1', + # [c_char_p, c_ulong, c_char_p, c_ulong, c_ulong, c_ulong, c_char_p]) + def pbkdf2(self, passwd, salt, iter, keylen): + + def xorstr( a, b ): + if len(a) != len(b): + raise Exception("xorstr(): lengths differ") + return ''.join((chr(ord(x)^ord(y)) for x, y in zip(a, b))) + + def prf( h, data ): + hm = h.copy() + hm.update( data ) + return hm.digest() + + def pbkdf2_F( h, salt, itercount, blocknum ): + U = prf( h, salt + pack('>i',blocknum ) ) + T = U + for i in range(2, itercount+1): + U = prf( h, U ) + T = xorstr( T, U ) + return T + + sha = hashlib.sha1 + digest_size = sha().digest_size + # l - number of output blocks to produce + l = keylen / digest_size + if keylen % digest_size != 0: + l += 1 + h = hmac.new( passwd, None, sha ) + T = "" + for i in range(1, l+1): + T += pbkdf2_F( h, salt, iter, i ) + return T[0: keylen] + -class AES_CBC(CBC): - """ AES encryption in CBC feedback mode """ - def __init__(self, key=None, padding=padWithPadLen(), keySize=16): - CBC.__init__( self, AES(key, noPadding(), keySize), padding) - self.name = 'AES_CBC' diff --git a/Calibre_Plugins/K4MobiDeDRM_plugin/alfcrypto64.dll b/Calibre_Plugins/K4MobiDeDRM_plugin/alfcrypto64.dll index 26d740d..7bef68e 100644 Binary files a/Calibre_Plugins/K4MobiDeDRM_plugin/alfcrypto64.dll and b/Calibre_Plugins/K4MobiDeDRM_plugin/alfcrypto64.dll differ diff --git a/Calibre_Plugins/K4MobiDeDRM_plugin/alfcrypto_src.zip b/Calibre_Plugins/K4MobiDeDRM_plugin/alfcrypto_src.zip index e25a0c8..269810c 100644 Binary files a/Calibre_Plugins/K4MobiDeDRM_plugin/alfcrypto_src.zip and b/Calibre_Plugins/K4MobiDeDRM_plugin/alfcrypto_src.zip differ diff --git a/Calibre_Plugins/K4MobiDeDRM_plugin/cmbtc_v2.2.py b/Calibre_Plugins/K4MobiDeDRM_plugin/cmbtc_v2.2.py deleted file mode 100644 index 7bef68e..0000000 Binary files a/Calibre_Plugins/K4MobiDeDRM_plugin/cmbtc_v2.2.py and /dev/null differ diff --git a/Calibre_Plugins/K4MobiDeDRM_plugin/config.py b/Calibre_Plugins/K4MobiDeDRM_plugin/config.py index 269810c..9825878 100644 Binary files a/Calibre_Plugins/K4MobiDeDRM_plugin/config.py and b/Calibre_Plugins/K4MobiDeDRM_plugin/config.py differ diff --git a/Calibre_Plugins/K4MobiDeDRM_plugin/convert2xml.py b/Calibre_Plugins/K4MobiDeDRM_plugin/convert2xml.py index 9825878..c412d7b 100644 --- a/Calibre_Plugins/K4MobiDeDRM_plugin/convert2xml.py +++ b/Calibre_Plugins/K4MobiDeDRM_plugin/convert2xml.py @@ -1,59 +1,846 @@ -from PyQt4.Qt import QWidget, QVBoxLayout, QLabel, QLineEdit +#! /usr/bin/python +# vim:ts=4:sw=4:softtabstop=4:smarttab:expandtab +# For use with Topaz Scripts Version 2.6 -from calibre.utils.config import JSONConfig +class Unbuffered: + def __init__(self, stream): + self.stream = stream + def write(self, data): + self.stream.write(data) + self.stream.flush() + def __getattr__(self, attr): + return getattr(self.stream, attr) -# This is where all preferences for this plugin will be stored -# You should always prefix your config file name with plugins/, -# so as to ensure you dont accidentally clobber a calibre config file -prefs = JSONConfig('plugins/K4MobiDeDRM') +import sys +sys.stdout=Unbuffered(sys.stdout) -# Set defaults -prefs.defaults['pids'] = "" -prefs.defaults['serials'] = "" -prefs.defaults['WINEPREFIX'] = None +import csv +import os +import getopt +from struct import pack +from struct import unpack + +class TpzDRMError(Exception): + pass + +# Get a 7 bit encoded number from string. The most +# significant byte comes first and has the high bit (8th) set + +def readEncodedNumber(file): + flag = False + c = file.read(1) + if (len(c) == 0): + return None + data = ord(c) + + if data == 0xFF: + flag = True + c = file.read(1) + if (len(c) == 0): + return None + data = ord(c) + + if data >= 0x80: + datax = (data & 0x7F) + while data >= 0x80 : + c = file.read(1) + if (len(c) == 0): + return None + data = ord(c) + datax = (datax <<7) + (data & 0x7F) + data = datax + + if flag: + data = -data + return data -class ConfigWidget(QWidget): +# returns a binary string that encodes a number into 7 bits +# most significant byte first which has the high bit set - def __init__(self): - QWidget.__init__(self) - self.l = QVBoxLayout() - self.setLayout(self.l) +def encodeNumber(number): + result = "" + negative = False + flag = 0 - self.serialLabel = QLabel('eInk Kindle Serial numbers (First character B, 16 characters, use commas if more than one)') - self.l.addWidget(self.serialLabel) + if number < 0 : + number = -number + 1 + negative = True - self.serials = QLineEdit(self) - self.serials.setText(prefs['serials']) - self.l.addWidget(self.serials) - self.serialLabel.setBuddy(self.serials) + while True: + byte = number & 0x7F + number = number >> 7 + byte += flag + result += chr(byte) + flag = 0x80 + if number == 0 : + if (byte == 0xFF and negative == False) : + result += chr(0x80) + break - self.pidLabel = QLabel('Mobipocket PIDs (8 or 10 characters, use commas if more than one)') - self.l.addWidget(self.pidLabel) + if negative: + result += chr(0xFF) - self.pids = QLineEdit(self) - self.pids.setText(prefs['pids']) - self.l.addWidget(self.pids) - self.pidLabel.setBuddy(self.serials) + return result[::-1] - self.wpLabel = QLabel('For Linux only: WINEPREFIX (enter absolute path)') - self.l.addWidget(self.wpLabel) - self.wineprefix = QLineEdit(self) - wineprefix = prefs['WINEPREFIX'] - if wineprefix is not None: - self.wineprefix.setText(wineprefix) + +# create / read a length prefixed string from the file + +def lengthPrefixString(data): + return encodeNumber(len(data))+data + +def readString(file): + stringLength = readEncodedNumber(file) + if (stringLength == None): + return "" + sv = file.read(stringLength) + if (len(sv) != stringLength): + return "" + return unpack(str(stringLength)+"s",sv)[0] + + +# convert a binary string generated by encodeNumber (7 bit encoded number) +# to the value you would find inside the page*.dat files to be processed + +def convert(i): + result = '' + val = encodeNumber(i) + for j in xrange(len(val)): + c = ord(val[j:j+1]) + result += '%02x' % c + return result + + + +# the complete string table used to store all book text content +# as well as the xml tokens and values that make sense out of it + +class Dictionary(object): + def __init__(self, dictFile): + self.filename = dictFile + self.size = 0 + self.fo = file(dictFile,'rb') + self.stable = [] + self.size = readEncodedNumber(self.fo) + for i in xrange(self.size): + self.stable.append(self.escapestr(readString(self.fo))) + self.pos = 0 + + def escapestr(self, str): + str = str.replace('&','&') + str = str.replace('<','<') + str = str.replace('>','>') + str = str.replace('=','=') + return str + + def lookup(self,val): + if ((val >= 0) and (val < self.size)) : + self.pos = val + return self.stable[self.pos] else: - self.wineprefix.setText('') + print "Error - %d outside of string table limits" % val + raise TpzDRMError('outside of string table limits') + # sys.exit(-1) - self.l.addWidget(self.wineprefix) - self.wpLabel.setBuddy(self.wineprefix) + def getSize(self): + return self.size - def save_settings(self): - prefs['pids'] = str(self.pids.text()).replace(" ","") - prefs['serials'] = str(self.serials.text()).replace(" ","") - winepref=str(self.wineprefix.text()) - if winepref.strip() != '': - prefs['WINEPREFIX'] = winepref + def getPos(self): + return self.pos + + def dumpDict(self): + for i in xrange(self.size): + print "%d %s %s" % (i, convert(i), self.stable[i]) + return + +# parses the xml snippets that are represented by each page*.dat file. +# also parses the other0.dat file - the main stylesheet +# and information used to inject the xml snippets into page*.dat files + +class PageParser(object): + def __init__(self, filename, dict, debug, flat_xml): + self.fo = file(filename,'rb') + self.id = os.path.basename(filename).replace('.dat','') + self.dict = dict + self.debug = debug + self.flat_xml = flat_xml + self.tagpath = [] + self.doc = [] + self.snippetList = [] + + + # hash table used to enable the decoding process + # This has all been developed by trial and error so it may still have omissions or + # contain errors + # Format: + # tag : (number of arguments, argument type, subtags present, special case of subtags presents when escaped) + + token_tags = { + 'x' : (1, 'scalar_number', 0, 0), + 'y' : (1, 'scalar_number', 0, 0), + 'h' : (1, 'scalar_number', 0, 0), + 'w' : (1, 'scalar_number', 0, 0), + 'firstWord' : (1, 'scalar_number', 0, 0), + 'lastWord' : (1, 'scalar_number', 0, 0), + 'rootID' : (1, 'scalar_number', 0, 0), + 'stemID' : (1, 'scalar_number', 0, 0), + 'type' : (1, 'scalar_text', 0, 0), + + 'info' : (0, 'number', 1, 0), + + 'info.word' : (0, 'number', 1, 1), + 'info.word.ocrText' : (1, 'text', 0, 0), + 'info.word.firstGlyph' : (1, 'raw', 0, 0), + 'info.word.lastGlyph' : (1, 'raw', 0, 0), + 'info.word.bl' : (1, 'raw', 0, 0), + 'info.word.link_id' : (1, 'number', 0, 0), + + 'glyph' : (0, 'number', 1, 1), + 'glyph.x' : (1, 'number', 0, 0), + 'glyph.y' : (1, 'number', 0, 0), + 'glyph.glyphID' : (1, 'number', 0, 0), + + 'dehyphen' : (0, 'number', 1, 1), + 'dehyphen.rootID' : (1, 'number', 0, 0), + 'dehyphen.stemID' : (1, 'number', 0, 0), + 'dehyphen.stemPage' : (1, 'number', 0, 0), + 'dehyphen.sh' : (1, 'number', 0, 0), + + 'links' : (0, 'number', 1, 1), + 'links.page' : (1, 'number', 0, 0), + 'links.rel' : (1, 'number', 0, 0), + 'links.row' : (1, 'number', 0, 0), + 'links.title' : (1, 'text', 0, 0), + 'links.href' : (1, 'text', 0, 0), + 'links.type' : (1, 'text', 0, 0), + 'links.id' : (1, 'number', 0, 0), + + 'paraCont' : (0, 'number', 1, 1), + 'paraCont.rootID' : (1, 'number', 0, 0), + 'paraCont.stemID' : (1, 'number', 0, 0), + 'paraCont.stemPage' : (1, 'number', 0, 0), + + 'paraStems' : (0, 'number', 1, 1), + 'paraStems.stemID' : (1, 'number', 0, 0), + + 'wordStems' : (0, 'number', 1, 1), + 'wordStems.stemID' : (1, 'number', 0, 0), + + 'empty' : (1, 'snippets', 1, 0), + + 'page' : (1, 'snippets', 1, 0), + 'page.pageid' : (1, 'scalar_text', 0, 0), + 'page.pagelabel' : (1, 'scalar_text', 0, 0), + 'page.type' : (1, 'scalar_text', 0, 0), + 'page.h' : (1, 'scalar_number', 0, 0), + 'page.w' : (1, 'scalar_number', 0, 0), + 'page.startID' : (1, 'scalar_number', 0, 0), + + 'group' : (1, 'snippets', 1, 0), + 'group.type' : (1, 'scalar_text', 0, 0), + 'group._tag' : (1, 'scalar_text', 0, 0), + 'group.orientation': (1, 'scalar_text', 0, 0), + + 'region' : (1, 'snippets', 1, 0), + 'region.type' : (1, 'scalar_text', 0, 0), + 'region.x' : (1, 'scalar_number', 0, 0), + 'region.y' : (1, 'scalar_number', 0, 0), + 'region.h' : (1, 'scalar_number', 0, 0), + 'region.w' : (1, 'scalar_number', 0, 0), + 'region.orientation' : (1, 'scalar_text', 0, 0), + + 'empty_text_region' : (1, 'snippets', 1, 0), + + 'img' : (1, 'snippets', 1, 0), + 'img.x' : (1, 'scalar_number', 0, 0), + 'img.y' : (1, 'scalar_number', 0, 0), + 'img.h' : (1, 'scalar_number', 0, 0), + 'img.w' : (1, 'scalar_number', 0, 0), + 'img.src' : (1, 'scalar_number', 0, 0), + 'img.color_src' : (1, 'scalar_number', 0, 0), + + 'paragraph' : (1, 'snippets', 1, 0), + 'paragraph.class' : (1, 'scalar_text', 0, 0), + 'paragraph.firstWord' : (1, 'scalar_number', 0, 0), + 'paragraph.lastWord' : (1, 'scalar_number', 0, 0), + 'paragraph.lastWord' : (1, 'scalar_number', 0, 0), + 'paragraph.gridSize' : (1, 'scalar_number', 0, 0), + 'paragraph.gridBottomCenter' : (1, 'scalar_number', 0, 0), + 'paragraph.gridTopCenter' : (1, 'scalar_number', 0, 0), + 'paragraph.gridBeginCenter' : (1, 'scalar_number', 0, 0), + 'paragraph.gridEndCenter' : (1, 'scalar_number', 0, 0), + + + 'word_semantic' : (1, 'snippets', 1, 1), + 'word_semantic.type' : (1, 'scalar_text', 0, 0), + 'word_semantic.firstWord' : (1, 'scalar_number', 0, 0), + 'word_semantic.lastWord' : (1, 'scalar_number', 0, 0), + + 'word' : (1, 'snippets', 1, 0), + 'word.type' : (1, 'scalar_text', 0, 0), + 'word.class' : (1, 'scalar_text', 0, 0), + 'word.firstGlyph' : (1, 'scalar_number', 0, 0), + 'word.lastGlyph' : (1, 'scalar_number', 0, 0), + + '_span' : (1, 'snippets', 1, 0), + '_span.firstWord' : (1, 'scalar_number', 0, 0), + '_span.lastWord' : (1, 'scalar_number', 0, 0), + '_span.gridSize' : (1, 'scalar_number', 0, 0), + '_span.gridBottomCenter' : (1, 'scalar_number', 0, 0), + '_span.gridTopCenter' : (1, 'scalar_number', 0, 0), + '_span.gridBeginCenter' : (1, 'scalar_number', 0, 0), + '_span.gridEndCenter' : (1, 'scalar_number', 0, 0), + + 'span' : (1, 'snippets', 1, 0), + 'span.firstWord' : (1, 'scalar_number', 0, 0), + 'span.lastWord' : (1, 'scalar_number', 0, 0), + 'span.gridSize' : (1, 'scalar_number', 0, 0), + 'span.gridBottomCenter' : (1, 'scalar_number', 0, 0), + 'span.gridTopCenter' : (1, 'scalar_number', 0, 0), + 'span.gridBeginCenter' : (1, 'scalar_number', 0, 0), + 'span.gridEndCenter' : (1, 'scalar_number', 0, 0), + + 'extratokens' : (1, 'snippets', 1, 0), + 'extratokens.type' : (1, 'scalar_text', 0, 0), + 'extratokens.firstGlyph' : (1, 'scalar_number', 0, 0), + 'extratokens.lastGlyph' : (1, 'scalar_number', 0, 0), + + 'glyph.h' : (1, 'number', 0, 0), + 'glyph.w' : (1, 'number', 0, 0), + 'glyph.use' : (1, 'number', 0, 0), + 'glyph.vtx' : (1, 'number', 0, 1), + 'glyph.len' : (1, 'number', 0, 1), + 'glyph.dpi' : (1, 'number', 0, 0), + 'vtx' : (0, 'number', 1, 1), + 'vtx.x' : (1, 'number', 0, 0), + 'vtx.y' : (1, 'number', 0, 0), + 'len' : (0, 'number', 1, 1), + 'len.n' : (1, 'number', 0, 0), + + 'book' : (1, 'snippets', 1, 0), + 'version' : (1, 'snippets', 1, 0), + 'version.FlowEdit_1_id' : (1, 'scalar_text', 0, 0), + 'version.FlowEdit_1_version' : (1, 'scalar_text', 0, 0), + 'version.Schema_id' : (1, 'scalar_text', 0, 0), + 'version.Schema_version' : (1, 'scalar_text', 0, 0), + 'version.Topaz_version' : (1, 'scalar_text', 0, 0), + 'version.WordDetailEdit_1_id' : (1, 'scalar_text', 0, 0), + 'version.WordDetailEdit_1_version' : (1, 'scalar_text', 0, 0), + 'version.ZoneEdit_1_id' : (1, 'scalar_text', 0, 0), + 'version.ZoneEdit_1_version' : (1, 'scalar_text', 0, 0), + 'version.chapterheaders' : (1, 'scalar_text', 0, 0), + 'version.creation_date' : (1, 'scalar_text', 0, 0), + 'version.header_footer' : (1, 'scalar_text', 0, 0), + 'version.init_from_ocr' : (1, 'scalar_text', 0, 0), + 'version.letter_insertion' : (1, 'scalar_text', 0, 0), + 'version.xmlinj_convert' : (1, 'scalar_text', 0, 0), + 'version.xmlinj_reflow' : (1, 'scalar_text', 0, 0), + 'version.xmlinj_transform' : (1, 'scalar_text', 0, 0), + 'version.findlists' : (1, 'scalar_text', 0, 0), + 'version.page_num' : (1, 'scalar_text', 0, 0), + 'version.page_type' : (1, 'scalar_text', 0, 0), + 'version.bad_text' : (1, 'scalar_text', 0, 0), + 'version.glyph_mismatch' : (1, 'scalar_text', 0, 0), + 'version.margins' : (1, 'scalar_text', 0, 0), + 'version.staggered_lines' : (1, 'scalar_text', 0, 0), + 'version.paragraph_continuation' : (1, 'scalar_text', 0, 0), + 'version.toc' : (1, 'scalar_text', 0, 0), + + 'stylesheet' : (1, 'snippets', 1, 0), + 'style' : (1, 'snippets', 1, 0), + 'style._tag' : (1, 'scalar_text', 0, 0), + 'style.type' : (1, 'scalar_text', 0, 0), + 'style._parent_type' : (1, 'scalar_text', 0, 0), + 'style.class' : (1, 'scalar_text', 0, 0), + 'style._after_class' : (1, 'scalar_text', 0, 0), + 'rule' : (1, 'snippets', 1, 0), + 'rule.attr' : (1, 'scalar_text', 0, 0), + 'rule.value' : (1, 'scalar_text', 0, 0), + + 'original' : (0, 'number', 1, 1), + 'original.pnum' : (1, 'number', 0, 0), + 'original.pid' : (1, 'text', 0, 0), + 'pages' : (0, 'number', 1, 1), + 'pages.ref' : (1, 'number', 0, 0), + 'pages.id' : (1, 'number', 0, 0), + 'startID' : (0, 'number', 1, 1), + 'startID.page' : (1, 'number', 0, 0), + 'startID.id' : (1, 'number', 0, 0), + + } + + + # full tag path record keeping routines + def tag_push(self, token): + self.tagpath.append(token) + def tag_pop(self): + if len(self.tagpath) > 0 : + self.tagpath.pop() + def tagpath_len(self): + return len(self.tagpath) + def get_tagpath(self, i): + cnt = len(self.tagpath) + if i < cnt : result = self.tagpath[i] + for j in xrange(i+1, cnt) : + result += '.' + self.tagpath[j] + return result + + + # list of absolute command byte values values that indicate + # various types of loop meachanisms typically used to generate vectors + + cmd_list = (0x76, 0x76) + + # peek at and return 1 byte that is ahead by i bytes + def peek(self, aheadi): + c = self.fo.read(aheadi) + if (len(c) == 0): + return None + self.fo.seek(-aheadi,1) + c = c[-1:] + return ord(c) + + + # get the next value from the file being processed + def getNext(self): + nbyte = self.peek(1); + if (nbyte == None): + return None + val = readEncodedNumber(self.fo) + return val + + + # format an arg by argtype + def formatArg(self, arg, argtype): + if (argtype == 'text') or (argtype == 'scalar_text') : + result = self.dict.lookup(arg) + elif (argtype == 'raw') or (argtype == 'number') or (argtype == 'scalar_number') : + result = arg + elif (argtype == 'snippets') : + result = arg + else : + print "Error Unknown argtype %s" % argtype + sys.exit(-2) + return result + + + # process the next tag token, recursively handling subtags, + # arguments, and commands + def procToken(self, token): + + known_token = False + self.tag_push(token) + + if self.debug : print 'Processing: ', self.get_tagpath(0) + cnt = self.tagpath_len() + for j in xrange(cnt): + tkn = self.get_tagpath(j) + if tkn in self.token_tags : + num_args = self.token_tags[tkn][0] + argtype = self.token_tags[tkn][1] + subtags = self.token_tags[tkn][2] + splcase = self.token_tags[tkn][3] + ntags = -1 + known_token = True + break + + if known_token : + + # handle subtags if present + subtagres = [] + if (splcase == 1): + # this type of tag uses of escape marker 0x74 indicate subtag count + if self.peek(1) == 0x74: + skip = readEncodedNumber(self.fo) + subtags = 1 + num_args = 0 + + if (subtags == 1): + ntags = readEncodedNumber(self.fo) + if self.debug : print 'subtags: ' + token + ' has ' + str(ntags) + for j in xrange(ntags): + val = readEncodedNumber(self.fo) + subtagres.append(self.procToken(self.dict.lookup(val))) + + # arguments can be scalars or vectors of text or numbers + argres = [] + if num_args > 0 : + firstarg = self.peek(1) + if (firstarg in self.cmd_list) and (argtype != 'scalar_number') and (argtype != 'scalar_text'): + # single argument is a variable length vector of data + arg = readEncodedNumber(self.fo) + argres = self.decodeCMD(arg,argtype) + else : + # num_arg scalar arguments + for i in xrange(num_args): + argres.append(self.formatArg(readEncodedNumber(self.fo), argtype)) + + # build the return tag + result = [] + tkn = self.get_tagpath(0) + result.append(tkn) + result.append(subtagres) + result.append(argtype) + result.append(argres) + self.tag_pop() + return result + + # all tokens that need to be processed should be in the hash + # table if it may indicate a problem, either new token + # or an out of sync condition else: - prefs['WINEPREFIX'] = None + result = [] + if (self.debug): + print 'Unknown Token:', token + self.tag_pop() + return result + + + # special loop used to process code snippets + # it is NEVER used to format arguments. + # builds the snippetList + def doLoop72(self, argtype): + cnt = readEncodedNumber(self.fo) + if self.debug : + result = 'Set of '+ str(cnt) + ' xml snippets. The overall structure \n' + result += 'of the document is indicated by snippet number sets at the\n' + result += 'end of each snippet. \n' + print result + for i in xrange(cnt): + if self.debug: print 'Snippet:',str(i) + snippet = [] + snippet.append(i) + val = readEncodedNumber(self.fo) + snippet.append(self.procToken(self.dict.lookup(val))) + self.snippetList.append(snippet) + return + + + + # general loop code gracisouly submitted by "skindle" - thank you! + def doLoop76Mode(self, argtype, cnt, mode): + result = [] + adj = 0 + if mode & 1: + adj = readEncodedNumber(self.fo) + mode = mode >> 1 + x = [] + for i in xrange(cnt): + x.append(readEncodedNumber(self.fo) - adj) + for i in xrange(mode): + for j in xrange(1, cnt): + x[j] = x[j] + x[j - 1] + for i in xrange(cnt): + result.append(self.formatArg(x[i],argtype)) + return result + + + # dispatches loop commands bytes with various modes + # The 0x76 style loops are used to build vectors + + # This was all derived by trial and error and + # new loop types may exist that are not handled here + # since they did not appear in the test cases + + def decodeCMD(self, cmd, argtype): + if (cmd == 0x76): + + # loop with cnt, and mode to control loop styles + cnt = readEncodedNumber(self.fo) + mode = readEncodedNumber(self.fo) + + if self.debug : print 'Loop for', cnt, 'with mode', mode, ': ' + return self.doLoop76Mode(argtype, cnt, mode) + + if self.dbug: print "Unknown command", cmd + result = [] + return result + + + + # add full tag path to injected snippets + def updateName(self, tag, prefix): + name = tag[0] + subtagList = tag[1] + argtype = tag[2] + argList = tag[3] + nname = prefix + '.' + name + nsubtaglist = [] + for j in subtagList: + nsubtaglist.append(self.updateName(j,prefix)) + ntag = [] + ntag.append(nname) + ntag.append(nsubtaglist) + ntag.append(argtype) + ntag.append(argList) + return ntag + + + + # perform depth first injection of specified snippets into this one + def injectSnippets(self, snippet): + snipno, tag = snippet + name = tag[0] + subtagList = tag[1] + argtype = tag[2] + argList = tag[3] + nsubtagList = [] + if len(argList) > 0 : + for j in argList: + asnip = self.snippetList[j] + aso, atag = self.injectSnippets(asnip) + atag = self.updateName(atag, name) + nsubtagList.append(atag) + argtype='number' + argList=[] + if len(nsubtagList) > 0 : + subtagList.extend(nsubtagList) + tag = [] + tag.append(name) + tag.append(subtagList) + tag.append(argtype) + tag.append(argList) + snippet = [] + snippet.append(snipno) + snippet.append(tag) + return snippet + + + + # format the tag for output + def formatTag(self, node): + name = node[0] + subtagList = node[1] + argtype = node[2] + argList = node[3] + fullpathname = name.split('.') + nodename = fullpathname.pop() + ilvl = len(fullpathname) + indent = ' ' * (3 * ilvl) + rlst = [] + rlst.append(indent + '<' + nodename + '>') + if len(argList) > 0: + alst = [] + for j in argList: + if (argtype == 'text') or (argtype == 'scalar_text') : + alst.append(j + '|') + else : + alst.append(str(j) + ',') + argres = "".join(alst) + argres = argres[0:-1] + if argtype == 'snippets' : + rlst.append('snippets:' + argres) + else : + rlst.append(argres) + if len(subtagList) > 0 : + rlst.append('\n') + for j in subtagList: + if len(j) > 0 : + rlst.append(self.formatTag(j)) + rlst.append(indent + '\n') + else: + rlst.append('\n') + return "".join(rlst) + + + # flatten tag + def flattenTag(self, node): + name = node[0] + subtagList = node[1] + argtype = node[2] + argList = node[3] + rlst = [] + rlst.append(name) + if (len(argList) > 0): + alst = [] + for j in argList: + if (argtype == 'text') or (argtype == 'scalar_text') : + alst.append(j + '|') + else : + alst.append(str(j) + '|') + argres = "".join(alst) + argres = argres[0:-1] + if argtype == 'snippets' : + rlst.append('.snippets=' + argres) + else : + rlst.append('=' + argres) + rlst.append('\n') + for j in subtagList: + if len(j) > 0 : + rlst.append(self.flattenTag(j)) + return "".join(rlst) + + + # reduce create xml output + def formatDoc(self, flat_xml): + rlst = [] + for j in self.doc : + if len(j) > 0: + if flat_xml: + rlst.append(self.flattenTag(j)) + else: + rlst.append(self.formatTag(j)) + result = "".join(rlst) + if self.debug : print result + return result + + + + # main loop - parse the page.dat files + # to create structured document and snippets + + # FIXME: value at end of magic appears to be a subtags count + # but for what? For now, inject an 'info" tag as it is in + # every dictionary and seems close to what is meant + # The alternative is to special case the last _ "0x5f" to mean something + + def process(self): + + # peek at the first bytes to see what type of file it is + magic = self.fo.read(9) + if (magic[0:1] == 'p') and (magic[2:9] == 'marker_'): + first_token = 'info' + elif (magic[0:1] == 'p') and (magic[2:9] == '__PAGE_'): + skip = self.fo.read(2) + first_token = 'info' + elif (magic[0:1] == 'p') and (magic[2:8] == '_PAGE_'): + first_token = 'info' + elif (magic[0:1] == 'g') and (magic[2:9] == '__GLYPH'): + skip = self.fo.read(3) + first_token = 'info' + else : + # other0.dat file + first_token = None + self.fo.seek(-9,1) + + + # main loop to read and build the document tree + while True: + + if first_token != None : + # use "inserted" first token 'info' for page and glyph files + tag = self.procToken(first_token) + if len(tag) > 0 : + self.doc.append(tag) + first_token = None + + v = self.getNext() + if (v == None): + break + + if (v == 0x72): + self.doLoop72('number') + elif (v > 0) and (v < self.dict.getSize()) : + tag = self.procToken(self.dict.lookup(v)) + if len(tag) > 0 : + self.doc.append(tag) + else: + if self.debug: + print "Main Loop: Unknown value: %x" % v + if (v == 0): + if (self.peek(1) == 0x5f): + skip = self.fo.read(1) + first_token = 'info' + + # now do snippet injection + if len(self.snippetList) > 0 : + if self.debug : print 'Injecting Snippets:' + snippet = self.injectSnippets(self.snippetList[0]) + snipno = snippet[0] + tag_add = snippet[1] + if self.debug : print self.formatTag(tag_add) + if len(tag_add) > 0: + self.doc.append(tag_add) + + # handle generation of xml output + xmlpage = self.formatDoc(self.flat_xml) + + return xmlpage + + +def fromData(dict, fname): + flat_xml = True + debug = False + pp = PageParser(fname, dict, debug, flat_xml) + xmlpage = pp.process() + return xmlpage + +def getXML(dict, fname): + flat_xml = False + debug = False + pp = PageParser(fname, dict, debug, flat_xml) + xmlpage = pp.process() + return xmlpage + +def usage(): + print 'Usage: ' + print ' convert2xml.py dict0000.dat infile.dat ' + print ' ' + print ' Options:' + print ' -h print this usage help message ' + print ' -d turn on debug output to check for potential errors ' + print ' --flat-xml output the flattened xml page description only ' + print ' ' + print ' This program will attempt to convert a page*.dat file or ' + print ' glyphs*.dat file, using the dict0000.dat file, to its xml description. ' + print ' ' + print ' Use "cmbtc_dump.py" first to unencrypt, uncompress, and dump ' + print ' the *.dat files from a Topaz format e-book.' + +# +# Main +# + +def main(argv): + dictFile = "" + pageFile = "" + debug = False + flat_xml = False + printOutput = False + if len(argv) == 0: + printOutput = True + argv = sys.argv + + try: + opts, args = getopt.getopt(argv[1:], "hd", ["flat-xml"]) + + except getopt.GetoptError, err: + + # print help information and exit: + print str(err) # will print something like "option -a not recognized" + usage() + sys.exit(2) + + if len(opts) == 0 and len(args) == 0 : + usage() + sys.exit(2) + + for o, a in opts: + if o =="-d": + debug=True + if o =="-h": + usage() + sys.exit(0) + if o =="--flat-xml": + flat_xml = True + + dictFile, pageFile = args[0], args[1] + + # read in the string table dictionary + dict = Dictionary(dictFile) + # dict.dumpDict() + + # create a page parser + pp = PageParser(pageFile, dict, debug, flat_xml) + + xmlpage = pp.process() + + if printOutput: + print xmlpage + return 0 + + return xmlpage + +if __name__ == '__main__': + sys.exit(main('')) diff --git a/Calibre_Plugins/K4MobiDeDRM_plugin/flatxml2html.py b/Calibre_Plugins/K4MobiDeDRM_plugin/flatxml2html.py index 3cdc820..e5647f4 100644 Binary files a/Calibre_Plugins/K4MobiDeDRM_plugin/flatxml2html.py and b/Calibre_Plugins/K4MobiDeDRM_plugin/flatxml2html.py differ diff --git a/Calibre_Plugins/K4MobiDeDRM_plugin/flatxml2svg.py b/Calibre_Plugins/K4MobiDeDRM_plugin/flatxml2svg.py index c412d7b..4dfd6c7 100644 --- a/Calibre_Plugins/K4MobiDeDRM_plugin/flatxml2svg.py +++ b/Calibre_Plugins/K4MobiDeDRM_plugin/flatxml2svg.py @@ -1,846 +1,249 @@ #! /usr/bin/python # vim:ts=4:sw=4:softtabstop=4:smarttab:expandtab -# For use with Topaz Scripts Version 2.6 - -class Unbuffered: - def __init__(self, stream): - self.stream = stream - def write(self, data): - self.stream.write(data) - self.stream.flush() - def __getattr__(self, attr): - return getattr(self.stream, attr) import sys -sys.stdout=Unbuffered(sys.stdout) - import csv import os import getopt from struct import pack from struct import unpack -class TpzDRMError(Exception): - pass -# Get a 7 bit encoded number from string. The most -# significant byte comes first and has the high bit (8th) set +class PParser(object): + def __init__(self, gd, flatxml, meta_array): + self.gd = gd + self.flatdoc = flatxml.split('\n') + self.docSize = len(self.flatdoc) + self.temp = [] -def readEncodedNumber(file): - flag = False - c = file.read(1) - if (len(c) == 0): - return None - data = ord(c) + self.ph = -1 + self.pw = -1 + startpos = self.posinDoc('page.h') or self.posinDoc('book.h') + for p in startpos: + (name, argres) = self.lineinDoc(p) + self.ph = max(self.ph, int(argres)) + startpos = self.posinDoc('page.w') or self.posinDoc('book.w') + for p in startpos: + (name, argres) = self.lineinDoc(p) + self.pw = max(self.pw, int(argres)) - if data == 0xFF: - flag = True - c = file.read(1) - if (len(c) == 0): - return None - data = ord(c) + if self.ph <= 0: + self.ph = int(meta_array.get('pageHeight', '11000')) + if self.pw <= 0: + self.pw = int(meta_array.get('pageWidth', '8500')) - if data >= 0x80: - datax = (data & 0x7F) - while data >= 0x80 : - c = file.read(1) - if (len(c) == 0): - return None - data = ord(c) - datax = (datax <<7) + (data & 0x7F) - data = datax + res = [] + startpos = self.posinDoc('info.glyph.x') + for p in startpos: + argres = self.getDataatPos('info.glyph.x', p) + res.extend(argres) + self.gx = res - if flag: - data = -data - return data + res = [] + startpos = self.posinDoc('info.glyph.y') + for p in startpos: + argres = self.getDataatPos('info.glyph.y', p) + res.extend(argres) + self.gy = res + + res = [] + startpos = self.posinDoc('info.glyph.glyphID') + for p in startpos: + argres = self.getDataatPos('info.glyph.glyphID', p) + res.extend(argres) + self.gid = res -# returns a binary string that encodes a number into 7 bits -# most significant byte first which has the high bit set + # return tag at line pos in document + def lineinDoc(self, pos) : + if (pos >= 0) and (pos < self.docSize) : + item = self.flatdoc[pos] + if item.find('=') >= 0: + (name, argres) = item.split('=',1) + else : + name = item + argres = '' + return name, argres -def encodeNumber(number): - result = "" - negative = False - flag = 0 - - if number < 0 : - number = -number + 1 - negative = True - - while True: - byte = number & 0x7F - number = number >> 7 - byte += flag - result += chr(byte) - flag = 0x80 - if number == 0 : - if (byte == 0xFF and negative == False) : - result += chr(0x80) - break - - if negative: - result += chr(0xFF) - - return result[::-1] - - - -# create / read a length prefixed string from the file - -def lengthPrefixString(data): - return encodeNumber(len(data))+data - -def readString(file): - stringLength = readEncodedNumber(file) - if (stringLength == None): - return "" - sv = file.read(stringLength) - if (len(sv) != stringLength): - return "" - return unpack(str(stringLength)+"s",sv)[0] - - -# convert a binary string generated by encodeNumber (7 bit encoded number) -# to the value you would find inside the page*.dat files to be processed - -def convert(i): - result = '' - val = encodeNumber(i) - for j in xrange(len(val)): - c = ord(val[j:j+1]) - result += '%02x' % c - return result - - - -# the complete string table used to store all book text content -# as well as the xml tokens and values that make sense out of it - -class Dictionary(object): - def __init__(self, dictFile): - self.filename = dictFile - self.size = 0 - self.fo = file(dictFile,'rb') - self.stable = [] - self.size = readEncodedNumber(self.fo) - for i in xrange(self.size): - self.stable.append(self.escapestr(readString(self.fo))) - self.pos = 0 - - def escapestr(self, str): - str = str.replace('&','&') - str = str.replace('<','<') - str = str.replace('>','>') - str = str.replace('=','=') - return str - - def lookup(self,val): - if ((val >= 0) and (val < self.size)) : - self.pos = val - return self.stable[self.pos] + # find tag in doc if within pos to end inclusive + def findinDoc(self, tagpath, pos, end) : + result = None + if end == -1 : + end = self.docSize else: - print "Error - %d outside of string table limits" % val - raise TpzDRMError('outside of string table limits') - # sys.exit(-1) + end = min(self.docSize, end) + foundat = -1 + for j in xrange(pos, end): + item = self.flatdoc[j] + if item.find('=') >= 0: + (name, argres) = item.split('=',1) + else : + name = item + argres = '' + if name.endswith(tagpath) : + result = argres + foundat = j + break + return foundat, result - def getSize(self): - return self.size + # return list of start positions for the tagpath + def posinDoc(self, tagpath): + startpos = [] + pos = 0 + res = "" + while res != None : + (foundpos, res) = self.findinDoc(tagpath, pos, -1) + if res != None : + startpos.append(foundpos) + pos = foundpos + 1 + return startpos - def getPos(self): - return self.pos - - def dumpDict(self): - for i in xrange(self.size): - print "%d %s %s" % (i, convert(i), self.stable[i]) - return - -# parses the xml snippets that are represented by each page*.dat file. -# also parses the other0.dat file - the main stylesheet -# and information used to inject the xml snippets into page*.dat files - -class PageParser(object): - def __init__(self, filename, dict, debug, flat_xml): - self.fo = file(filename,'rb') - self.id = os.path.basename(filename).replace('.dat','') - self.dict = dict - self.debug = debug - self.flat_xml = flat_xml - self.tagpath = [] - self.doc = [] - self.snippetList = [] - - - # hash table used to enable the decoding process - # This has all been developed by trial and error so it may still have omissions or - # contain errors - # Format: - # tag : (number of arguments, argument type, subtags present, special case of subtags presents when escaped) - - token_tags = { - 'x' : (1, 'scalar_number', 0, 0), - 'y' : (1, 'scalar_number', 0, 0), - 'h' : (1, 'scalar_number', 0, 0), - 'w' : (1, 'scalar_number', 0, 0), - 'firstWord' : (1, 'scalar_number', 0, 0), - 'lastWord' : (1, 'scalar_number', 0, 0), - 'rootID' : (1, 'scalar_number', 0, 0), - 'stemID' : (1, 'scalar_number', 0, 0), - 'type' : (1, 'scalar_text', 0, 0), - - 'info' : (0, 'number', 1, 0), - - 'info.word' : (0, 'number', 1, 1), - 'info.word.ocrText' : (1, 'text', 0, 0), - 'info.word.firstGlyph' : (1, 'raw', 0, 0), - 'info.word.lastGlyph' : (1, 'raw', 0, 0), - 'info.word.bl' : (1, 'raw', 0, 0), - 'info.word.link_id' : (1, 'number', 0, 0), - - 'glyph' : (0, 'number', 1, 1), - 'glyph.x' : (1, 'number', 0, 0), - 'glyph.y' : (1, 'number', 0, 0), - 'glyph.glyphID' : (1, 'number', 0, 0), - - 'dehyphen' : (0, 'number', 1, 1), - 'dehyphen.rootID' : (1, 'number', 0, 0), - 'dehyphen.stemID' : (1, 'number', 0, 0), - 'dehyphen.stemPage' : (1, 'number', 0, 0), - 'dehyphen.sh' : (1, 'number', 0, 0), - - 'links' : (0, 'number', 1, 1), - 'links.page' : (1, 'number', 0, 0), - 'links.rel' : (1, 'number', 0, 0), - 'links.row' : (1, 'number', 0, 0), - 'links.title' : (1, 'text', 0, 0), - 'links.href' : (1, 'text', 0, 0), - 'links.type' : (1, 'text', 0, 0), - 'links.id' : (1, 'number', 0, 0), - - 'paraCont' : (0, 'number', 1, 1), - 'paraCont.rootID' : (1, 'number', 0, 0), - 'paraCont.stemID' : (1, 'number', 0, 0), - 'paraCont.stemPage' : (1, 'number', 0, 0), - - 'paraStems' : (0, 'number', 1, 1), - 'paraStems.stemID' : (1, 'number', 0, 0), - - 'wordStems' : (0, 'number', 1, 1), - 'wordStems.stemID' : (1, 'number', 0, 0), - - 'empty' : (1, 'snippets', 1, 0), - - 'page' : (1, 'snippets', 1, 0), - 'page.pageid' : (1, 'scalar_text', 0, 0), - 'page.pagelabel' : (1, 'scalar_text', 0, 0), - 'page.type' : (1, 'scalar_text', 0, 0), - 'page.h' : (1, 'scalar_number', 0, 0), - 'page.w' : (1, 'scalar_number', 0, 0), - 'page.startID' : (1, 'scalar_number', 0, 0), - - 'group' : (1, 'snippets', 1, 0), - 'group.type' : (1, 'scalar_text', 0, 0), - 'group._tag' : (1, 'scalar_text', 0, 0), - 'group.orientation': (1, 'scalar_text', 0, 0), - - 'region' : (1, 'snippets', 1, 0), - 'region.type' : (1, 'scalar_text', 0, 0), - 'region.x' : (1, 'scalar_number', 0, 0), - 'region.y' : (1, 'scalar_number', 0, 0), - 'region.h' : (1, 'scalar_number', 0, 0), - 'region.w' : (1, 'scalar_number', 0, 0), - 'region.orientation' : (1, 'scalar_text', 0, 0), - - 'empty_text_region' : (1, 'snippets', 1, 0), - - 'img' : (1, 'snippets', 1, 0), - 'img.x' : (1, 'scalar_number', 0, 0), - 'img.y' : (1, 'scalar_number', 0, 0), - 'img.h' : (1, 'scalar_number', 0, 0), - 'img.w' : (1, 'scalar_number', 0, 0), - 'img.src' : (1, 'scalar_number', 0, 0), - 'img.color_src' : (1, 'scalar_number', 0, 0), - - 'paragraph' : (1, 'snippets', 1, 0), - 'paragraph.class' : (1, 'scalar_text', 0, 0), - 'paragraph.firstWord' : (1, 'scalar_number', 0, 0), - 'paragraph.lastWord' : (1, 'scalar_number', 0, 0), - 'paragraph.lastWord' : (1, 'scalar_number', 0, 0), - 'paragraph.gridSize' : (1, 'scalar_number', 0, 0), - 'paragraph.gridBottomCenter' : (1, 'scalar_number', 0, 0), - 'paragraph.gridTopCenter' : (1, 'scalar_number', 0, 0), - 'paragraph.gridBeginCenter' : (1, 'scalar_number', 0, 0), - 'paragraph.gridEndCenter' : (1, 'scalar_number', 0, 0), - - - 'word_semantic' : (1, 'snippets', 1, 1), - 'word_semantic.type' : (1, 'scalar_text', 0, 0), - 'word_semantic.firstWord' : (1, 'scalar_number', 0, 0), - 'word_semantic.lastWord' : (1, 'scalar_number', 0, 0), - - 'word' : (1, 'snippets', 1, 0), - 'word.type' : (1, 'scalar_text', 0, 0), - 'word.class' : (1, 'scalar_text', 0, 0), - 'word.firstGlyph' : (1, 'scalar_number', 0, 0), - 'word.lastGlyph' : (1, 'scalar_number', 0, 0), - - '_span' : (1, 'snippets', 1, 0), - '_span.firstWord' : (1, 'scalar_number', 0, 0), - '_span.lastWord' : (1, 'scalar_number', 0, 0), - '_span.gridSize' : (1, 'scalar_number', 0, 0), - '_span.gridBottomCenter' : (1, 'scalar_number', 0, 0), - '_span.gridTopCenter' : (1, 'scalar_number', 0, 0), - '_span.gridBeginCenter' : (1, 'scalar_number', 0, 0), - '_span.gridEndCenter' : (1, 'scalar_number', 0, 0), - - 'span' : (1, 'snippets', 1, 0), - 'span.firstWord' : (1, 'scalar_number', 0, 0), - 'span.lastWord' : (1, 'scalar_number', 0, 0), - 'span.gridSize' : (1, 'scalar_number', 0, 0), - 'span.gridBottomCenter' : (1, 'scalar_number', 0, 0), - 'span.gridTopCenter' : (1, 'scalar_number', 0, 0), - 'span.gridBeginCenter' : (1, 'scalar_number', 0, 0), - 'span.gridEndCenter' : (1, 'scalar_number', 0, 0), - - 'extratokens' : (1, 'snippets', 1, 0), - 'extratokens.type' : (1, 'scalar_text', 0, 0), - 'extratokens.firstGlyph' : (1, 'scalar_number', 0, 0), - 'extratokens.lastGlyph' : (1, 'scalar_number', 0, 0), - - 'glyph.h' : (1, 'number', 0, 0), - 'glyph.w' : (1, 'number', 0, 0), - 'glyph.use' : (1, 'number', 0, 0), - 'glyph.vtx' : (1, 'number', 0, 1), - 'glyph.len' : (1, 'number', 0, 1), - 'glyph.dpi' : (1, 'number', 0, 0), - 'vtx' : (0, 'number', 1, 1), - 'vtx.x' : (1, 'number', 0, 0), - 'vtx.y' : (1, 'number', 0, 0), - 'len' : (0, 'number', 1, 1), - 'len.n' : (1, 'number', 0, 0), - - 'book' : (1, 'snippets', 1, 0), - 'version' : (1, 'snippets', 1, 0), - 'version.FlowEdit_1_id' : (1, 'scalar_text', 0, 0), - 'version.FlowEdit_1_version' : (1, 'scalar_text', 0, 0), - 'version.Schema_id' : (1, 'scalar_text', 0, 0), - 'version.Schema_version' : (1, 'scalar_text', 0, 0), - 'version.Topaz_version' : (1, 'scalar_text', 0, 0), - 'version.WordDetailEdit_1_id' : (1, 'scalar_text', 0, 0), - 'version.WordDetailEdit_1_version' : (1, 'scalar_text', 0, 0), - 'version.ZoneEdit_1_id' : (1, 'scalar_text', 0, 0), - 'version.ZoneEdit_1_version' : (1, 'scalar_text', 0, 0), - 'version.chapterheaders' : (1, 'scalar_text', 0, 0), - 'version.creation_date' : (1, 'scalar_text', 0, 0), - 'version.header_footer' : (1, 'scalar_text', 0, 0), - 'version.init_from_ocr' : (1, 'scalar_text', 0, 0), - 'version.letter_insertion' : (1, 'scalar_text', 0, 0), - 'version.xmlinj_convert' : (1, 'scalar_text', 0, 0), - 'version.xmlinj_reflow' : (1, 'scalar_text', 0, 0), - 'version.xmlinj_transform' : (1, 'scalar_text', 0, 0), - 'version.findlists' : (1, 'scalar_text', 0, 0), - 'version.page_num' : (1, 'scalar_text', 0, 0), - 'version.page_type' : (1, 'scalar_text', 0, 0), - 'version.bad_text' : (1, 'scalar_text', 0, 0), - 'version.glyph_mismatch' : (1, 'scalar_text', 0, 0), - 'version.margins' : (1, 'scalar_text', 0, 0), - 'version.staggered_lines' : (1, 'scalar_text', 0, 0), - 'version.paragraph_continuation' : (1, 'scalar_text', 0, 0), - 'version.toc' : (1, 'scalar_text', 0, 0), - - 'stylesheet' : (1, 'snippets', 1, 0), - 'style' : (1, 'snippets', 1, 0), - 'style._tag' : (1, 'scalar_text', 0, 0), - 'style.type' : (1, 'scalar_text', 0, 0), - 'style._parent_type' : (1, 'scalar_text', 0, 0), - 'style.class' : (1, 'scalar_text', 0, 0), - 'style._after_class' : (1, 'scalar_text', 0, 0), - 'rule' : (1, 'snippets', 1, 0), - 'rule.attr' : (1, 'scalar_text', 0, 0), - 'rule.value' : (1, 'scalar_text', 0, 0), - - 'original' : (0, 'number', 1, 1), - 'original.pnum' : (1, 'number', 0, 0), - 'original.pid' : (1, 'text', 0, 0), - 'pages' : (0, 'number', 1, 1), - 'pages.ref' : (1, 'number', 0, 0), - 'pages.id' : (1, 'number', 0, 0), - 'startID' : (0, 'number', 1, 1), - 'startID.page' : (1, 'number', 0, 0), - 'startID.id' : (1, 'number', 0, 0), - - } - - - # full tag path record keeping routines - def tag_push(self, token): - self.tagpath.append(token) - def tag_pop(self): - if len(self.tagpath) > 0 : - self.tagpath.pop() - def tagpath_len(self): - return len(self.tagpath) - def get_tagpath(self, i): - cnt = len(self.tagpath) - if i < cnt : result = self.tagpath[i] - for j in xrange(i+1, cnt) : - result += '.' + self.tagpath[j] - return result - - - # list of absolute command byte values values that indicate - # various types of loop meachanisms typically used to generate vectors - - cmd_list = (0x76, 0x76) - - # peek at and return 1 byte that is ahead by i bytes - def peek(self, aheadi): - c = self.fo.read(aheadi) - if (len(c) == 0): - return None - self.fo.seek(-aheadi,1) - c = c[-1:] - return ord(c) - - - # get the next value from the file being processed - def getNext(self): - nbyte = self.peek(1); - if (nbyte == None): - return None - val = readEncodedNumber(self.fo) - return val - - - # format an arg by argtype - def formatArg(self, arg, argtype): - if (argtype == 'text') or (argtype == 'scalar_text') : - result = self.dict.lookup(arg) - elif (argtype == 'raw') or (argtype == 'number') or (argtype == 'scalar_number') : - result = arg - elif (argtype == 'snippets') : - result = arg - else : - print "Error Unknown argtype %s" % argtype - sys.exit(-2) - return result - - - # process the next tag token, recursively handling subtags, - # arguments, and commands - def procToken(self, token): - - known_token = False - self.tag_push(token) - - if self.debug : print 'Processing: ', self.get_tagpath(0) - cnt = self.tagpath_len() + def getData(self, path): + result = None + cnt = len(self.flatdoc) for j in xrange(cnt): - tkn = self.get_tagpath(j) - if tkn in self.token_tags : - num_args = self.token_tags[tkn][0] - argtype = self.token_tags[tkn][1] - subtags = self.token_tags[tkn][2] - splcase = self.token_tags[tkn][3] - ntags = -1 - known_token = True - break - - if known_token : - - # handle subtags if present - subtagres = [] - if (splcase == 1): - # this type of tag uses of escape marker 0x74 indicate subtag count - if self.peek(1) == 0x74: - skip = readEncodedNumber(self.fo) - subtags = 1 - num_args = 0 - - if (subtags == 1): - ntags = readEncodedNumber(self.fo) - if self.debug : print 'subtags: ' + token + ' has ' + str(ntags) - for j in xrange(ntags): - val = readEncodedNumber(self.fo) - subtagres.append(self.procToken(self.dict.lookup(val))) - - # arguments can be scalars or vectors of text or numbers - argres = [] - if num_args > 0 : - firstarg = self.peek(1) - if (firstarg in self.cmd_list) and (argtype != 'scalar_number') and (argtype != 'scalar_text'): - # single argument is a variable length vector of data - arg = readEncodedNumber(self.fo) - argres = self.decodeCMD(arg,argtype) - else : - # num_arg scalar arguments - for i in xrange(num_args): - argres.append(self.formatArg(readEncodedNumber(self.fo), argtype)) - - # build the return tag - result = [] - tkn = self.get_tagpath(0) - result.append(tkn) - result.append(subtagres) - result.append(argtype) - result.append(argres) - self.tag_pop() - return result - - # all tokens that need to be processed should be in the hash - # table if it may indicate a problem, either new token - # or an out of sync condition - else: - result = [] - if (self.debug): - print 'Unknown Token:', token - self.tag_pop() - return result - - - # special loop used to process code snippets - # it is NEVER used to format arguments. - # builds the snippetList - def doLoop72(self, argtype): - cnt = readEncodedNumber(self.fo) - if self.debug : - result = 'Set of '+ str(cnt) + ' xml snippets. The overall structure \n' - result += 'of the document is indicated by snippet number sets at the\n' - result += 'end of each snippet. \n' - print result - for i in xrange(cnt): - if self.debug: print 'Snippet:',str(i) - snippet = [] - snippet.append(i) - val = readEncodedNumber(self.fo) - snippet.append(self.procToken(self.dict.lookup(val))) - self.snippetList.append(snippet) - return - - - - # general loop code gracisouly submitted by "skindle" - thank you! - def doLoop76Mode(self, argtype, cnt, mode): - result = [] - adj = 0 - if mode & 1: - adj = readEncodedNumber(self.fo) - mode = mode >> 1 - x = [] - for i in xrange(cnt): - x.append(readEncodedNumber(self.fo) - adj) - for i in xrange(mode): - for j in xrange(1, cnt): - x[j] = x[j] + x[j - 1] - for i in xrange(cnt): - result.append(self.formatArg(x[i],argtype)) - return result - - - # dispatches loop commands bytes with various modes - # The 0x76 style loops are used to build vectors - - # This was all derived by trial and error and - # new loop types may exist that are not handled here - # since they did not appear in the test cases - - def decodeCMD(self, cmd, argtype): - if (cmd == 0x76): - - # loop with cnt, and mode to control loop styles - cnt = readEncodedNumber(self.fo) - mode = readEncodedNumber(self.fo) - - if self.debug : print 'Loop for', cnt, 'with mode', mode, ': ' - return self.doLoop76Mode(argtype, cnt, mode) - - if self.dbug: print "Unknown command", cmd - result = [] - return result - - - - # add full tag path to injected snippets - def updateName(self, tag, prefix): - name = tag[0] - subtagList = tag[1] - argtype = tag[2] - argList = tag[3] - nname = prefix + '.' + name - nsubtaglist = [] - for j in subtagList: - nsubtaglist.append(self.updateName(j,prefix)) - ntag = [] - ntag.append(nname) - ntag.append(nsubtaglist) - ntag.append(argtype) - ntag.append(argList) - return ntag - - - - # perform depth first injection of specified snippets into this one - def injectSnippets(self, snippet): - snipno, tag = snippet - name = tag[0] - subtagList = tag[1] - argtype = tag[2] - argList = tag[3] - nsubtagList = [] - if len(argList) > 0 : - for j in argList: - asnip = self.snippetList[j] - aso, atag = self.injectSnippets(asnip) - atag = self.updateName(atag, name) - nsubtagList.append(atag) - argtype='number' - argList=[] - if len(nsubtagList) > 0 : - subtagList.extend(nsubtagList) - tag = [] - tag.append(name) - tag.append(subtagList) - tag.append(argtype) - tag.append(argList) - snippet = [] - snippet.append(snipno) - snippet.append(tag) - return snippet - - - - # format the tag for output - def formatTag(self, node): - name = node[0] - subtagList = node[1] - argtype = node[2] - argList = node[3] - fullpathname = name.split('.') - nodename = fullpathname.pop() - ilvl = len(fullpathname) - indent = ' ' * (3 * ilvl) - rlst = [] - rlst.append(indent + '<' + nodename + '>') - if len(argList) > 0: - alst = [] - for j in argList: - if (argtype == 'text') or (argtype == 'scalar_text') : - alst.append(j + '|') - else : - alst.append(str(j) + ',') - argres = "".join(alst) - argres = argres[0:-1] - if argtype == 'snippets' : - rlst.append('snippets:' + argres) - else : - rlst.append(argres) - if len(subtagList) > 0 : - rlst.append('\n') - for j in subtagList: - if len(j) > 0 : - rlst.append(self.formatTag(j)) - rlst.append(indent + '\n') - else: - rlst.append('\n') - return "".join(rlst) - - - # flatten tag - def flattenTag(self, node): - name = node[0] - subtagList = node[1] - argtype = node[2] - argList = node[3] - rlst = [] - rlst.append(name) - if (len(argList) > 0): - alst = [] - for j in argList: - if (argtype == 'text') or (argtype == 'scalar_text') : - alst.append(j + '|') - else : - alst.append(str(j) + '|') - argres = "".join(alst) - argres = argres[0:-1] - if argtype == 'snippets' : - rlst.append('.snippets=' + argres) - else : - rlst.append('=' + argres) - rlst.append('\n') - for j in subtagList: - if len(j) > 0 : - rlst.append(self.flattenTag(j)) - return "".join(rlst) - - - # reduce create xml output - def formatDoc(self, flat_xml): - rlst = [] - for j in self.doc : - if len(j) > 0: - if flat_xml: - rlst.append(self.flattenTag(j)) - else: - rlst.append(self.formatTag(j)) - result = "".join(rlst) - if self.debug : print result - return result - - - - # main loop - parse the page.dat files - # to create structured document and snippets - - # FIXME: value at end of magic appears to be a subtags count - # but for what? For now, inject an 'info" tag as it is in - # every dictionary and seems close to what is meant - # The alternative is to special case the last _ "0x5f" to mean something - - def process(self): - - # peek at the first bytes to see what type of file it is - magic = self.fo.read(9) - if (magic[0:1] == 'p') and (magic[2:9] == 'marker_'): - first_token = 'info' - elif (magic[0:1] == 'p') and (magic[2:9] == '__PAGE_'): - skip = self.fo.read(2) - first_token = 'info' - elif (magic[0:1] == 'p') and (magic[2:8] == '_PAGE_'): - first_token = 'info' - elif (magic[0:1] == 'g') and (magic[2:9] == '__GLYPH'): - skip = self.fo.read(3) - first_token = 'info' - else : - # other0.dat file - first_token = None - self.fo.seek(-9,1) - - - # main loop to read and build the document tree - while True: - - if first_token != None : - # use "inserted" first token 'info' for page and glyph files - tag = self.procToken(first_token) - if len(tag) > 0 : - self.doc.append(tag) - first_token = None - - v = self.getNext() - if (v == None): - break - - if (v == 0x72): - self.doLoop72('number') - elif (v > 0) and (v < self.dict.getSize()) : - tag = self.procToken(self.dict.lookup(v)) - if len(tag) > 0 : - self.doc.append(tag) + item = self.flatdoc[j] + if item.find('=') >= 0: + (name, argt) = item.split('=') + argres = argt.split('|') else: - if self.debug: - print "Main Loop: Unknown value: %x" % v - if (v == 0): - if (self.peek(1) == 0x5f): - skip = self.fo.read(1) - first_token = 'info' + name = item + argres = [] + if (name.endswith(path)): + result = argres + break + if (len(argres) > 0) : + for j in xrange(0,len(argres)): + argres[j] = int(argres[j]) + return result - # now do snippet injection - if len(self.snippetList) > 0 : - if self.debug : print 'Injecting Snippets:' - snippet = self.injectSnippets(self.snippetList[0]) - snipno = snippet[0] - tag_add = snippet[1] - if self.debug : print self.formatTag(tag_add) - if len(tag_add) > 0: - self.doc.append(tag_add) + def getDataatPos(self, path, pos): + result = None + item = self.flatdoc[pos] + if item.find('=') >= 0: + (name, argt) = item.split('=') + argres = argt.split('|') + else: + name = item + argres = [] + if (len(argres) > 0) : + for j in xrange(0,len(argres)): + argres[j] = int(argres[j]) + if (name.endswith(path)): + result = argres + return result - # handle generation of xml output - xmlpage = self.formatDoc(self.flat_xml) + def getDataTemp(self, path): + result = None + cnt = len(self.temp) + for j in xrange(cnt): + item = self.temp[j] + if item.find('=') >= 0: + (name, argt) = item.split('=') + argres = argt.split('|') + else: + name = item + argres = [] + if (name.endswith(path)): + result = argres + self.temp.pop(j) + break + if (len(argres) > 0) : + for j in xrange(0,len(argres)): + argres[j] = int(argres[j]) + return result - return xmlpage + def getImages(self): + result = [] + self.temp = self.flatdoc + while (self.getDataTemp('img') != None): + h = self.getDataTemp('img.h')[0] + w = self.getDataTemp('img.w')[0] + x = self.getDataTemp('img.x')[0] + y = self.getDataTemp('img.y')[0] + src = self.getDataTemp('img.src')[0] + result.append('\n' % (src, x, y, w, h)) + return result + + def getGlyphs(self): + result = [] + if (self.gid != None) and (len(self.gid) > 0): + glyphs = [] + for j in set(self.gid): + glyphs.append(j) + glyphs.sort() + for gid in glyphs: + id='id="gl%d"' % gid + path = self.gd.lookup(id) + if path: + result.append(id + ' ' + path) + return result -def fromData(dict, fname): - flat_xml = True - debug = False - pp = PageParser(fname, dict, debug, flat_xml) - xmlpage = pp.process() - return xmlpage +def convert2SVG(gdict, flat_xml, pageid, previd, nextid, svgDir, raw, meta_array, scaledpi): + mlst = [] + pp = PParser(gdict, flat_xml, meta_array) + mlst.append('\n') + if (raw): + mlst.append('\n') + mlst.append('\n' % (pp.pw / scaledpi, pp.ph / scaledpi, pp.pw -1, pp.ph -1)) + mlst.append('Page %d - %s by %s\n' % (pageid, meta_array['Title'],meta_array['Authors'])) + else: + mlst.append('\n') + mlst.append('\n') + mlst.append('Page %d - %s by %s\n' % (pageid, meta_array['Title'],meta_array['Authors'])) + mlst.append('\n') + mlst.append('\n') + mlst.append('\n') + mlst.append('
\n') + if previd == None: + mlst.append('\n') + else: + mlst.append('\n') -def getXML(dict, fname): - flat_xml = False - debug = False - pp = PageParser(fname, dict, debug, flat_xml) - xmlpage = pp.process() - return xmlpage - -def usage(): - print 'Usage: ' - print ' convert2xml.py dict0000.dat infile.dat ' - print ' ' - print ' Options:' - print ' -h print this usage help message ' - print ' -d turn on debug output to check for potential errors ' - print ' --flat-xml output the flattened xml page description only ' - print ' ' - print ' This program will attempt to convert a page*.dat file or ' - print ' glyphs*.dat file, using the dict0000.dat file, to its xml description. ' - print ' ' - print ' Use "cmbtc_dump.py" first to unencrypt, uncompress, and dump ' - print ' the *.dat files from a Topaz format e-book.' - -# -# Main -# - -def main(argv): - dictFile = "" - pageFile = "" - debug = False - flat_xml = False - printOutput = False - if len(argv) == 0: - printOutput = True - argv = sys.argv - - try: - opts, args = getopt.getopt(argv[1:], "hd", ["flat-xml"]) - - except getopt.GetoptError, err: - - # print help information and exit: - print str(err) # will print something like "option -a not recognized" - usage() - sys.exit(2) - - if len(opts) == 0 and len(args) == 0 : - usage() - sys.exit(2) - - for o, a in opts: - if o =="-d": - debug=True - if o =="-h": - usage() - sys.exit(0) - if o =="--flat-xml": - flat_xml = True - - dictFile, pageFile = args[0], args[1] - - # read in the string table dictionary - dict = Dictionary(dictFile) - # dict.dumpDict() - - # create a page parser - pp = PageParser(pageFile, dict, debug, flat_xml) - - xmlpage = pp.process() - - if printOutput: - print xmlpage - return 0 - - return xmlpage - -if __name__ == '__main__': - sys.exit(main('')) + mlst.append('' % (pp.pw, pp.ph)) + if (pp.gid != None): + mlst.append('\n') + gdefs = pp.getGlyphs() + for j in xrange(0,len(gdefs)): + mlst.append(gdefs[j]) + mlst.append('\n') + img = pp.getImages() + if (img != None): + for j in xrange(0,len(img)): + mlst.append(img[j]) + if (pp.gid != None): + for j in xrange(0,len(pp.gid)): + mlst.append('\n' % (pp.gid[j], pp.gx[j], pp.gy[j])) + if (img == None or len(img) == 0) and (pp.gid == None or len(pp.gid) == 0): + xpos = "%d" % (pp.pw // 3) + ypos = "%d" % (pp.ph // 3) + mlst.append('This page intentionally left blank.\n') + if (raw) : + mlst.append('') + else : + mlst.append('\n') + if nextid == None: + mlst.append('\n') + else : + mlst.append('\n') + mlst.append('
\n') + mlst.append('
zoom in - zoom out
\n') + mlst.append('\n') + mlst.append('\n') + return "".join(mlst) diff --git a/Calibre_Plugins/K4MobiDeDRM_plugin/genbook.py b/Calibre_Plugins/K4MobiDeDRM_plugin/genbook.py index e5647f4..9733887 100644 --- a/Calibre_Plugins/K4MobiDeDRM_plugin/genbook.py +++ b/Calibre_Plugins/K4MobiDeDRM_plugin/genbook.py @@ -1,147 +1,148 @@ #! /usr/bin/python # vim:ts=4:sw=4:softtabstop=4:smarttab:expandtab -# For use with Topaz Scripts Version 2.6 + +class Unbuffered: + def __init__(self, stream): + self.stream = stream + def write(self, data): + self.stream.write(data) + self.stream.flush() + def __getattr__(self, attr): + return getattr(self.stream, attr) import sys +sys.stdout=Unbuffered(sys.stdout) + import csv import os -import math import getopt from struct import pack from struct import unpack +class TpzDRMError(Exception): + pass -class DocParser(object): - def __init__(self, flatxml, classlst, fileid, bookDir, gdict, fixedimage): - self.id = os.path.basename(fileid).replace('.dat','') - self.svgcount = 0 - self.docList = flatxml.split('\n') - self.docSize = len(self.docList) - self.classList = {} - self.bookDir = bookDir - self.gdict = gdict - tmpList = classlst.split('\n') - for pclass in tmpList: - if pclass != '': - # remove the leading period from the css name - cname = pclass[1:] - self.classList[cname] = True - self.fixedimage = fixedimage - self.ocrtext = [] - self.link_id = [] - self.link_title = [] - self.link_page = [] - self.link_href = [] - self.link_type = [] - self.dehyphen_rootid = [] - self.paracont_stemid = [] - self.parastems_stemid = [] +# local support routines +if 'calibre' in sys.modules: + inCalibre = True +else: + inCalibre = False + +if inCalibre : + from calibre_plugins.k4mobidedrm import convert2xml + from calibre_plugins.k4mobidedrm import flatxml2html + from calibre_plugins.k4mobidedrm import flatxml2svg + from calibre_plugins.k4mobidedrm import stylexml2css +else : + import convert2xml + import flatxml2html + import flatxml2svg + import stylexml2css + +# global switch +buildXML = False + +# Get a 7 bit encoded number from a file +def readEncodedNumber(file): + flag = False + c = file.read(1) + if (len(c) == 0): + return None + data = ord(c) + if data == 0xFF: + flag = True + c = file.read(1) + if (len(c) == 0): + return None + data = ord(c) + if data >= 0x80: + datax = (data & 0x7F) + while data >= 0x80 : + c = file.read(1) + if (len(c) == 0): + return None + data = ord(c) + datax = (datax <<7) + (data & 0x7F) + data = datax + if flag: + data = -data + return data + +# Get a length prefixed string from the file +def lengthPrefixString(data): + return encodeNumber(len(data))+data + +def readString(file): + stringLength = readEncodedNumber(file) + if (stringLength == None): + return None + sv = file.read(stringLength) + if (len(sv) != stringLength): + return "" + return unpack(str(stringLength)+"s",sv)[0] + +def getMetaArray(metaFile): + # parse the meta file + result = {} + fo = file(metaFile,'rb') + size = readEncodedNumber(fo) + for i in xrange(size): + tag = readString(fo) + value = readString(fo) + result[tag] = value + # print tag, value + fo.close() + return result - def getGlyph(self, gid): - result = '' - id='id="gl%d"' % gid - return self.gdict.lookup(id) - - def glyphs_to_image(self, glyphList): - - def extract(path, key): - b = path.find(key) + len(key) - e = path.find(' ',b) - return int(path[b:e]) - - svgDir = os.path.join(self.bookDir,'svg') - - imgDir = os.path.join(self.bookDir,'img') - imgname = self.id + '_%04d.svg' % self.svgcount - imgfile = os.path.join(imgDir,imgname) - - # get glyph information - gxList = self.getData('info.glyph.x',0,-1) - gyList = self.getData('info.glyph.y',0,-1) - gidList = self.getData('info.glyph.glyphID',0,-1) - - gids = [] - maxws = [] - maxhs = [] - xs = [] - ys = [] - gdefs = [] - - # get path defintions, positions, dimensions for each glyph - # that makes up the image, and find min x and min y to reposition origin - minx = -1 - miny = -1 - for j in glyphList: - gid = gidList[j] - gids.append(gid) - - xs.append(gxList[j]) - if minx == -1: minx = gxList[j] - else : minx = min(minx, gxList[j]) - - ys.append(gyList[j]) - if miny == -1: miny = gyList[j] - else : miny = min(miny, gyList[j]) - - path = self.getGlyph(gid) - gdefs.append(path) - - maxws.append(extract(path,'width=')) - maxhs.append(extract(path,'height=')) +# dictionary of all text strings by index value +class Dictionary(object): + def __init__(self, dictFile): + self.filename = dictFile + self.size = 0 + self.fo = file(dictFile,'rb') + self.stable = [] + self.size = readEncodedNumber(self.fo) + for i in xrange(self.size): + self.stable.append(self.escapestr(readString(self.fo))) + self.pos = 0 + def escapestr(self, str): + str = str.replace('&','&') + str = str.replace('<','<') + str = str.replace('>','>') + str = str.replace('=','=') + return str + def lookup(self,val): + if ((val >= 0) and (val < self.size)) : + self.pos = val + return self.stable[self.pos] + else: + print "Error - %d outside of string table limits" % val + raise TpzDRMError('outside or string table limits') + # sys.exit(-1) + def getSize(self): + return self.size + def getPos(self): + return self.pos - # change the origin to minx, miny and calc max height and width - maxw = maxws[0] + xs[0] - minx - maxh = maxhs[0] + ys[0] - miny - for j in xrange(0, len(xs)): - xs[j] = xs[j] - minx - ys[j] = ys[j] - miny - maxw = max( maxw, (maxws[j] + xs[j]) ) - maxh = max( maxh, (maxhs[j] + ys[j]) ) - - # open the image file for output - ifile = open(imgfile,'w') - ifile.write('\n') - ifile.write('\n') - ifile.write('\n' % (math.floor(maxw/10), math.floor(maxh/10), maxw, maxh)) - ifile.write('\n') - for j in xrange(0,len(gdefs)): - ifile.write(gdefs[j]) - ifile.write('\n') - for j in xrange(0,len(gids)): - ifile.write('\n' % (gids[j], xs[j], ys[j])) - ifile.write('') - ifile.close() - - return 0 - - - - # return tag at line pos in document - def lineinDoc(self, pos) : - if (pos >= 0) and (pos < self.docSize) : - item = self.docList[pos] - if item.find('=') >= 0: - (name, argres) = item.split('=',1) - else : - name = item - argres = '' - return name, argres - - - # find tag in doc if within pos to end inclusive +class PageDimParser(object): + def __init__(self, flatxml): + self.flatdoc = flatxml.split('\n') + # find tag if within pos to end inclusive def findinDoc(self, tagpath, pos, end) : result = None + docList = self.flatdoc + cnt = len(docList) if end == -1 : - end = self.docSize + end = cnt else: - end = min(self.docSize, end) + end = min(cnt,end) foundat = -1 for j in xrange(pos, end): - item = self.docList[j] + item = docList[j] if item.find('=') >= 0: - (name, argres) = item.split('=',1) + (name, argres) = item.split('=') else : name = item argres = '' @@ -150,644 +151,571 @@ class DocParser(object): foundat = j break return foundat, result - - - # return list of start positions for the tagpath - def posinDoc(self, tagpath): - startpos = [] - pos = 0 - res = "" - while res != None : - (foundpos, res) = self.findinDoc(tagpath, pos, -1) - if res != None : - startpos.append(foundpos) - pos = foundpos + 1 - return startpos - - - # returns a vector of integers for the tagpath - def getData(self, tagpath, pos, end): - argres=[] - (foundat, argt) = self.findinDoc(tagpath, pos, end) - if (argt != None) and (len(argt) > 0) : - argList = argt.split('|') - argres = [ int(strval) for strval in argList] - return argres - - - # get the class - def getClass(self, pclass): - nclass = pclass - - # class names are an issue given topaz may start them with numerals (not allowed), - # use a mix of cases (which cause some browsers problems), and actually - # attach numbers after "_reclustered*" to the end to deal classeses that inherit - # from a base class (but then not actually provide all of these _reclustereed - # classes in the stylesheet! - - # so we clean this up by lowercasing, prepend 'cl-', and getting any baseclass - # that exists in the stylesheet first, and then adding this specific class - # after - - # also some class names have spaces in them so need to convert to dashes - if nclass != None : - nclass = nclass.replace(' ','-') - classres = '' - nclass = nclass.lower() - nclass = 'cl-' + nclass - baseclass = '' - # graphic is the base class for captions - if nclass.find('cl-cap-') >=0 : - classres = 'graphic' + ' ' - else : - # strip to find baseclass - p = nclass.find('_') - if p > 0 : - baseclass = nclass[0:p] - if baseclass in self.classList: - classres += baseclass + ' ' - classres += nclass - nclass = classres - return nclass - - - # develop a sorted description of the starting positions of - # groups and regions on the page, as well as the page type - def PageDescription(self): - - def compare(x, y): - (xtype, xval) = x - (ytype, yval) = y - if xval > yval: - return 1 - if xval == yval: - return 0 - return -1 - - result = [] - (pos, pagetype) = self.findinDoc('page.type',0,-1) - - groupList = self.posinDoc('page.group') - groupregionList = self.posinDoc('page.group.region') - pageregionList = self.posinDoc('page.region') - # integrate into one list - for j in groupList: - result.append(('grpbeg',j)) - for j in groupregionList: - result.append(('gregion',j)) - for j in pageregionList: - result.append(('pregion',j)) - result.sort(compare) - - # insert group end and page end indicators - inGroup = False - j = 0 - while True: - if j == len(result): break - rtype = result[j][0] - rval = result[j][1] - if not inGroup and (rtype == 'grpbeg') : - inGroup = True - j = j + 1 - elif inGroup and (rtype in ('grpbeg', 'pregion')): - result.insert(j,('grpend',rval)) - inGroup = False - else: - j = j + 1 - if inGroup: - result.append(('grpend',-1)) - result.append(('pageend', -1)) - return pagetype, result - - - - # build a description of the paragraph - def getParaDescription(self, start, end, regtype): - - result = [] - - # paragraph - (pos, pclass) = self.findinDoc('paragraph.class',start,end) - - pclass = self.getClass(pclass) - - # if paragraph uses extratokens (extra glyphs) then make it fixed - (pos, extraglyphs) = self.findinDoc('paragraph.extratokens',start,end) - - # build up a description of the paragraph in result and return it - # first check for the basic - all words paragraph - (pos, sfirst) = self.findinDoc('paragraph.firstWord',start,end) - (pos, slast) = self.findinDoc('paragraph.lastWord',start,end) - if (sfirst != None) and (slast != None) : - first = int(sfirst) - last = int(slast) - - makeImage = (regtype == 'vertical') or (regtype == 'table') - makeImage = makeImage or (extraglyphs != None) - if self.fixedimage: - makeImage = makeImage or (regtype == 'fixed') - - if (pclass != None): - makeImage = makeImage or (pclass.find('.inverted') >= 0) - if self.fixedimage : - makeImage = makeImage or (pclass.find('cl-f-') >= 0) - - # before creating an image make sure glyph info exists - gidList = self.getData('info.glyph.glyphID',0,-1) - - makeImage = makeImage & (len(gidList) > 0) - - if not makeImage : - # standard all word paragraph - for wordnum in xrange(first, last): - result.append(('ocr', wordnum)) - return pclass, result - - # convert paragraph to svg image - # translate first and last word into first and last glyphs - # and generate inline image and include it - glyphList = [] - firstglyphList = self.getData('word.firstGlyph',0,-1) - gidList = self.getData('info.glyph.glyphID',0,-1) - firstGlyph = firstglyphList[first] - if last < len(firstglyphList): - lastGlyph = firstglyphList[last] - else : - lastGlyph = len(gidList) - - # handle case of white sapce paragraphs with no actual glyphs in them - # by reverting to text based paragraph - if firstGlyph >= lastGlyph: - # revert to standard text based paragraph - for wordnum in xrange(first, last): - result.append(('ocr', wordnum)) - return pclass, result - - for glyphnum in xrange(firstGlyph, lastGlyph): - glyphList.append(glyphnum) - # include any extratokens if they exist - (pos, sfg) = self.findinDoc('extratokens.firstGlyph',start,end) - (pos, slg) = self.findinDoc('extratokens.lastGlyph',start,end) - if (sfg != None) and (slg != None): - for glyphnum in xrange(int(sfg), int(slg)): - glyphList.append(glyphnum) - num = self.svgcount - self.glyphs_to_image(glyphList) - self.svgcount += 1 - result.append(('svg', num)) - return pclass, result - - # this type of paragraph may be made up of multiple spans, inline - # word monograms (images), and words with semantic meaning, - # plus glyphs used to form starting letter of first word - - # need to parse this type line by line - line = start + 1 - word_class = '' - - # if end is -1 then we must search to end of document - if end == -1 : - end = self.docSize - - # seems some xml has last* coming before first* so we have to - # handle any order - sp_first = -1 - sp_last = -1 - - gl_first = -1 - gl_last = -1 - - ws_first = -1 - ws_last = -1 - - word_class = '' - - word_semantic_type = '' - - while (line < end) : - - (name, argres) = self.lineinDoc(line) - - if name.endswith('span.firstWord') : - sp_first = int(argres) - - elif name.endswith('span.lastWord') : - sp_last = int(argres) - - elif name.endswith('word.firstGlyph') : - gl_first = int(argres) - - elif name.endswith('word.lastGlyph') : - gl_last = int(argres) - - elif name.endswith('word_semantic.firstWord'): - ws_first = int(argres) - - elif name.endswith('word_semantic.lastWord'): - ws_last = int(argres) - - elif name.endswith('word.class'): - (cname, space) = argres.split('-',1) - if space == '' : space = '0' - if (cname == 'spaceafter') and (int(space) > 0) : - word_class = 'sa' - - elif name.endswith('word.img.src'): - result.append(('img' + word_class, int(argres))) - word_class = '' - - elif name.endswith('region.img.src'): - result.append(('img' + word_class, int(argres))) - - if (sp_first != -1) and (sp_last != -1): - for wordnum in xrange(sp_first, sp_last): - result.append(('ocr', wordnum)) - sp_first = -1 - sp_last = -1 - - if (gl_first != -1) and (gl_last != -1): - glyphList = [] - for glyphnum in xrange(gl_first, gl_last): - glyphList.append(glyphnum) - num = self.svgcount - self.glyphs_to_image(glyphList) - self.svgcount += 1 - result.append(('svg', num)) - gl_first = -1 - gl_last = -1 - - if (ws_first != -1) and (ws_last != -1): - for wordnum in xrange(ws_first, ws_last): - result.append(('ocr', wordnum)) - ws_first = -1 - ws_last = -1 - - line += 1 - - return pclass, result - - - def buildParagraph(self, pclass, pdesc, type, regtype) : - parares = '' - sep ='' - - classres = '' - if pclass : - classres = ' class="' + pclass + '"' - - br_lb = (regtype == 'fixed') or (regtype == 'chapterheading') or (regtype == 'vertical') - - handle_links = len(self.link_id) > 0 - - if (type == 'full') or (type == 'begin') : - parares += '' - - if (type == 'end'): - parares += ' ' - - lstart = len(parares) - - cnt = len(pdesc) - - for j in xrange( 0, cnt) : - - (wtype, num) = pdesc[j] - - if wtype == 'ocr' : - word = self.ocrtext[num] - sep = ' ' - - if handle_links: - link = self.link_id[num] - if (link > 0): - linktype = self.link_type[link-1] - title = self.link_title[link-1] - if (title == "") or (parares.rfind(title) < 0): - title=parares[lstart:] - if linktype == 'external' : - linkhref = self.link_href[link-1] - linkhtml = '' % linkhref - else : - if len(self.link_page) >= link : - ptarget = self.link_page[link-1] - 1 - linkhtml = '' % ptarget - else : - # just link to the current page - linkhtml = '' - linkhtml += title + '' - pos = parares.rfind(title) - if pos >= 0: - parares = parares[0:pos] + linkhtml + parares[pos+len(title):] - else : - parares += linkhtml - lstart = len(parares) - if word == '_link_' : word = '' - elif (link < 0) : - if word == '_link_' : word = '' - - if word == '_lb_': - if ((num-1) in self.dehyphen_rootid ) or handle_links: - word = '' - sep = '' - elif br_lb : - word = '
\n' - sep = '' - else : - word = '\n' - sep = '' - - if num in self.dehyphen_rootid : - word = word[0:-1] - sep = '' - - parares += word + sep - - elif wtype == 'img' : - sep = '' - parares += '' % num - parares += sep - - elif wtype == 'imgsa' : - sep = ' ' - parares += '' % num - parares += sep - - elif wtype == 'svg' : - sep = '' - parares += '' % num - parares += sep - - if len(sep) > 0 : parares = parares[0:-1] - if (type == 'full') or (type == 'end') : - parares += '

' - return parares - - - def buildTOCEntry(self, pdesc) : - parares = '' - sep ='' - tocentry = '' - handle_links = len(self.link_id) > 0 - - lstart = 0 - - cnt = len(pdesc) - for j in xrange( 0, cnt) : - - (wtype, num) = pdesc[j] - - if wtype == 'ocr' : - word = self.ocrtext[num] - sep = ' ' - - if handle_links: - link = self.link_id[num] - if (link > 0): - linktype = self.link_type[link-1] - title = self.link_title[link-1] - title = title.rstrip('. ') - alt_title = parares[lstart:] - alt_title = alt_title.strip() - # now strip off the actual printed page number - alt_title = alt_title.rstrip('01234567890ivxldIVXLD-.') - alt_title = alt_title.rstrip('. ') - # skip over any external links - can't have them in a books toc - if linktype == 'external' : - title = '' - alt_title = '' - linkpage = '' - else : - if len(self.link_page) >= link : - ptarget = self.link_page[link-1] - 1 - linkpage = '%04d' % ptarget - else : - # just link to the current page - linkpage = self.id[4:] - if len(alt_title) >= len(title): - title = alt_title - if title != '' and linkpage != '': - tocentry += title + '|' + linkpage + '\n' - lstart = len(parares) - if word == '_link_' : word = '' - elif (link < 0) : - if word == '_link_' : word = '' - - if word == '_lb_': - word = '' - sep = '' - - if num in self.dehyphen_rootid : - word = word[0:-1] - sep = '' - - parares += word + sep - - else : - continue - - return tocentry - - - - - # walk the document tree collecting the information needed - # to build an html page using the ocrText - def process(self): + (pos, sph) = self.findinDoc('page.h',0,-1) + (pos, spw) = self.findinDoc('page.w',0,-1) + if (sph == None): sph = '-1' + if (spw == None): spw = '-1' + return sph, spw - tocinfo = '' - hlst = [] - - # get the ocr text - (pos, argres) = self.findinDoc('info.word.ocrText',0,-1) - if argres : self.ocrtext = argres.split('|') - - # get information to dehyphenate the text - self.dehyphen_rootid = self.getData('info.dehyphen.rootID',0,-1) - - # determine if first paragraph is continued from previous page - (pos, self.parastems_stemid) = self.findinDoc('info.paraStems.stemID',0,-1) - first_para_continued = (self.parastems_stemid != None) - - # determine if last paragraph is continued onto the next page - (pos, self.paracont_stemid) = self.findinDoc('info.paraCont.stemID',0,-1) - last_para_continued = (self.paracont_stemid != None) - - # collect link ids - self.link_id = self.getData('info.word.link_id',0,-1) - - # collect link destination page numbers - self.link_page = self.getData('info.links.page',0,-1) - - # collect link types (container versus external) - (pos, argres) = self.findinDoc('info.links.type',0,-1) - if argres : self.link_type = argres.split('|') - - # collect link destinations - (pos, argres) = self.findinDoc('info.links.href',0,-1) - if argres : self.link_href = argres.split('|') - - # collect link titles - (pos, argres) = self.findinDoc('info.links.title',0,-1) - if argres : - self.link_title = argres.split('|') - else: - self.link_title.append('') - - # get a descriptions of the starting points of the regions - # and groups on the page - (pagetype, pageDesc) = self.PageDescription() - regcnt = len(pageDesc) - 1 - - anchorSet = False - breakSet = False - inGroup = False - - # process each region on the page and convert what you can to html - - for j in xrange(regcnt): - - (etype, start) = pageDesc[j] - (ntype, end) = pageDesc[j+1] - - - # set anchor for link target on this page - if not anchorSet and not first_para_continued: - hlst.append('\n') - anchorSet = True - - # handle groups of graphics with text captions - if (etype == 'grpbeg'): - (pos, grptype) = self.findinDoc('group.type', start, end) - if grptype != None: - if grptype == 'graphic': - gcstr = ' class="' + grptype + '"' - hlst.append('') - inGroup = True - - elif (etype == 'grpend'): - if inGroup: - hlst.append('\n') - inGroup = False - - else: - (pos, regtype) = self.findinDoc('region.type',start,end) - - if regtype == 'graphic' : - (pos, simgsrc) = self.findinDoc('img.src',start,end) - if simgsrc: - if inGroup: - hlst.append('' % int(simgsrc)) - else: - hlst.append('
' % int(simgsrc)) - - elif regtype == 'chapterheading' : - (pclass, pdesc) = self.getParaDescription(start,end, regtype) - if not breakSet: - hlst.append('
 
\n') - breakSet = True - tag = 'h1' - if pclass and (len(pclass) >= 7): - if pclass[3:7] == 'ch1-' : tag = 'h1' - if pclass[3:7] == 'ch2-' : tag = 'h2' - if pclass[3:7] == 'ch3-' : tag = 'h3' - hlst.append('<' + tag + ' class="' + pclass + '">') - else: - hlst.append('<' + tag + '>') - hlst.append(self.buildParagraph(pclass, pdesc, 'middle', regtype)) - hlst.append('') - - elif (regtype == 'text') or (regtype == 'fixed') or (regtype == 'insert') or (regtype == 'listitem'): - ptype = 'full' - # check to see if this is a continution from the previous page - if first_para_continued : - ptype = 'end' - first_para_continued = False - (pclass, pdesc) = self.getParaDescription(start,end, regtype) - if pclass and (len(pclass) >= 6) and (ptype == 'full'): - tag = 'p' - if pclass[3:6] == 'h1-' : tag = 'h4' - if pclass[3:6] == 'h2-' : tag = 'h5' - if pclass[3:6] == 'h3-' : tag = 'h6' - hlst.append('<' + tag + ' class="' + pclass + '">') - hlst.append(self.buildParagraph(pclass, pdesc, 'middle', regtype)) - hlst.append('') - else : - hlst.append(self.buildParagraph(pclass, pdesc, ptype, regtype)) - - elif (regtype == 'tocentry') : - ptype = 'full' - if first_para_continued : - ptype = 'end' - first_para_continued = False - (pclass, pdesc) = self.getParaDescription(start,end, regtype) - tocinfo += self.buildTOCEntry(pdesc) - hlst.append(self.buildParagraph(pclass, pdesc, ptype, regtype)) - - elif (regtype == 'vertical') or (regtype == 'table') : - ptype = 'full' - if inGroup: - ptype = 'middle' - if first_para_continued : - ptype = 'end' - first_para_continued = False - (pclass, pdesc) = self.getParaDescription(start, end, regtype) - hlst.append(self.buildParagraph(pclass, pdesc, ptype, regtype)) - - - elif (regtype == 'synth_fcvr.center'): - (pos, simgsrc) = self.findinDoc('img.src',start,end) - if simgsrc: - hlst.append('
' % int(simgsrc)) - - else : - print ' Making region type', regtype, - (pos, temp) = self.findinDoc('paragraph',start,end) - (pos2, temp) = self.findinDoc('span',start,end) - if pos != -1 or pos2 != -1: - print ' a "text" region' - orig_regtype = regtype - regtype = 'fixed' - ptype = 'full' - # check to see if this is a continution from the previous page - if first_para_continued : - ptype = 'end' - first_para_continued = False - (pclass, pdesc) = self.getParaDescription(start,end, regtype) - if not pclass: - if orig_regtype.endswith('.right') : pclass = 'cl-right' - elif orig_regtype.endswith('.center') : pclass = 'cl-center' - elif orig_regtype.endswith('.left') : pclass = 'cl-left' - elif orig_regtype.endswith('.justify') : pclass = 'cl-justify' - if pclass and (ptype == 'full') and (len(pclass) >= 6): - tag = 'p' - if pclass[3:6] == 'h1-' : tag = 'h4' - if pclass[3:6] == 'h2-' : tag = 'h5' - if pclass[3:6] == 'h3-' : tag = 'h6' - hlst.append('<' + tag + ' class="' + pclass + '">') - hlst.append(self.buildParagraph(pclass, pdesc, 'middle', regtype)) - hlst.append('') - else : - hlst.append(self.buildParagraph(pclass, pdesc, ptype, regtype)) - else : - print ' a "graphic" region' - (pos, simgsrc) = self.findinDoc('img.src',start,end) - if simgsrc: - hlst.append('
' % int(simgsrc)) - - - htmlpage = "".join(hlst) - if last_para_continued : - if htmlpage[-4:] == '

': - htmlpage = htmlpage[0:-4] - last_para_continued = False - - return htmlpage, tocinfo - - -def convert2HTML(flatxml, classlst, fileid, bookDir, gdict, fixedimage): +def getPageDim(flatxml): # create a document parser - dp = DocParser(flatxml, classlst, fileid, bookDir, gdict, fixedimage) - htmlpage, tocinfo = dp.process() - return htmlpage, tocinfo + dp = PageDimParser(flatxml) + (ph, pw) = dp.process() + return ph, pw + +class GParser(object): + def __init__(self, flatxml): + self.flatdoc = flatxml.split('\n') + self.dpi = 1440 + self.gh = self.getData('info.glyph.h') + self.gw = self.getData('info.glyph.w') + self.guse = self.getData('info.glyph.use') + if self.guse : + self.count = len(self.guse) + else : + self.count = 0 + self.gvtx = self.getData('info.glyph.vtx') + self.glen = self.getData('info.glyph.len') + self.gdpi = self.getData('info.glyph.dpi') + self.vx = self.getData('info.vtx.x') + self.vy = self.getData('info.vtx.y') + self.vlen = self.getData('info.len.n') + if self.vlen : + self.glen.append(len(self.vlen)) + elif self.glen: + self.glen.append(0) + if self.vx : + self.gvtx.append(len(self.vx)) + elif self.gvtx : + self.gvtx.append(0) + def getData(self, path): + result = None + cnt = len(self.flatdoc) + for j in xrange(cnt): + item = self.flatdoc[j] + if item.find('=') >= 0: + (name, argt) = item.split('=') + argres = argt.split('|') + else: + name = item + argres = [] + if (name == path): + result = argres + break + if (len(argres) > 0) : + for j in xrange(0,len(argres)): + argres[j] = int(argres[j]) + return result + def getGlyphDim(self, gly): + if self.gdpi[gly] == 0: + return 0, 0 + maxh = (self.gh[gly] * self.dpi) / self.gdpi[gly] + maxw = (self.gw[gly] * self.dpi) / self.gdpi[gly] + return maxh, maxw + def getPath(self, gly): + path = '' + if (gly < 0) or (gly >= self.count): + return path + tx = self.vx[self.gvtx[gly]:self.gvtx[gly+1]] + ty = self.vy[self.gvtx[gly]:self.gvtx[gly+1]] + p = 0 + for k in xrange(self.glen[gly], self.glen[gly+1]): + if (p == 0): + zx = tx[0:self.vlen[k]+1] + zy = ty[0:self.vlen[k]+1] + else: + zx = tx[self.vlen[k-1]+1:self.vlen[k]+1] + zy = ty[self.vlen[k-1]+1:self.vlen[k]+1] + p += 1 + j = 0 + while ( j < len(zx) ): + if (j == 0): + # Start Position. + path += 'M %d %d ' % (zx[j] * self.dpi / self.gdpi[gly], zy[j] * self.dpi / self.gdpi[gly]) + elif (j <= len(zx)-3): + # Cubic Bezier Curve + path += 'C %d %d %d %d %d %d ' % (zx[j] * self.dpi / self.gdpi[gly], zy[j] * self.dpi / self.gdpi[gly], zx[j+1] * self.dpi / self.gdpi[gly], zy[j+1] * self.dpi / self.gdpi[gly], zx[j+2] * self.dpi / self.gdpi[gly], zy[j+2] * self.dpi / self.gdpi[gly]) + j += 2 + elif (j == len(zx)-2): + # Cubic Bezier Curve to Start Position + path += 'C %d %d %d %d %d %d ' % (zx[j] * self.dpi / self.gdpi[gly], zy[j] * self.dpi / self.gdpi[gly], zx[j+1] * self.dpi / self.gdpi[gly], zy[j+1] * self.dpi / self.gdpi[gly], zx[0] * self.dpi / self.gdpi[gly], zy[0] * self.dpi / self.gdpi[gly]) + j += 1 + elif (j == len(zx)-1): + # Quadratic Bezier Curve to Start Position + path += 'Q %d %d %d %d ' % (zx[j] * self.dpi / self.gdpi[gly], zy[j] * self.dpi / self.gdpi[gly], zx[0] * self.dpi / self.gdpi[gly], zy[0] * self.dpi / self.gdpi[gly]) + + j += 1 + path += 'z' + return path + + + +# dictionary of all text strings by index value +class GlyphDict(object): + def __init__(self): + self.gdict = {} + def lookup(self, id): + # id='id="gl%d"' % val + if id in self.gdict: + return self.gdict[id] + return None + def addGlyph(self, val, path): + id='id="gl%d"' % val + self.gdict[id] = path + + +def generateBook(bookDir, raw, fixedimage): + # sanity check Topaz file extraction + if not os.path.exists(bookDir) : + print "Can not find directory with unencrypted book" + return 1 + + dictFile = os.path.join(bookDir,'dict0000.dat') + if not os.path.exists(dictFile) : + print "Can not find dict0000.dat file" + return 1 + + pageDir = os.path.join(bookDir,'page') + if not os.path.exists(pageDir) : + print "Can not find page directory in unencrypted book" + return 1 + + imgDir = os.path.join(bookDir,'img') + if not os.path.exists(imgDir) : + print "Can not find image directory in unencrypted book" + return 1 + + glyphsDir = os.path.join(bookDir,'glyphs') + if not os.path.exists(glyphsDir) : + print "Can not find glyphs directory in unencrypted book" + return 1 + + metaFile = os.path.join(bookDir,'metadata0000.dat') + if not os.path.exists(metaFile) : + print "Can not find metadata0000.dat in unencrypted book" + return 1 + + svgDir = os.path.join(bookDir,'svg') + if not os.path.exists(svgDir) : + os.makedirs(svgDir) + + if buildXML: + xmlDir = os.path.join(bookDir,'xml') + if not os.path.exists(xmlDir) : + os.makedirs(xmlDir) + + otherFile = os.path.join(bookDir,'other0000.dat') + if not os.path.exists(otherFile) : + print "Can not find other0000.dat in unencrypted book" + return 1 + + print "Updating to color images if available" + spath = os.path.join(bookDir,'color_img') + dpath = os.path.join(bookDir,'img') + filenames = os.listdir(spath) + filenames = sorted(filenames) + for filename in filenames: + imgname = filename.replace('color','img') + sfile = os.path.join(spath,filename) + dfile = os.path.join(dpath,imgname) + imgdata = file(sfile,'rb').read() + file(dfile,'wb').write(imgdata) + + print "Creating cover.jpg" + isCover = False + cpath = os.path.join(bookDir,'img') + cpath = os.path.join(cpath,'img0000.jpg') + if os.path.isfile(cpath): + cover = file(cpath, 'rb').read() + cpath = os.path.join(bookDir,'cover.jpg') + file(cpath, 'wb').write(cover) + isCover = True + + + print 'Processing Dictionary' + dict = Dictionary(dictFile) + + print 'Processing Meta Data and creating OPF' + meta_array = getMetaArray(metaFile) + + # replace special chars in title and authors like & < > + title = meta_array.get('Title','No Title Provided') + title = title.replace('&','&') + title = title.replace('<','<') + title = title.replace('>','>') + meta_array['Title'] = title + authors = meta_array.get('Authors','No Authors Provided') + authors = authors.replace('&','&') + authors = authors.replace('<','<') + authors = authors.replace('>','>') + meta_array['Authors'] = authors + + if buildXML: + xname = os.path.join(xmlDir, 'metadata.xml') + mlst = [] + for key in meta_array: + mlst.append('\n') + metastr = "".join(mlst) + mlst = None + file(xname, 'wb').write(metastr) + + print 'Processing StyleSheet' + + # get some scaling info from metadata to use while processing styles + # and first page info + + fontsize = '135' + if 'fontSize' in meta_array: + fontsize = meta_array['fontSize'] + + # also get the size of a normal text page + # get the total number of pages unpacked as a safety check + filenames = os.listdir(pageDir) + numfiles = len(filenames) + + spage = '1' + if 'firstTextPage' in meta_array: + spage = meta_array['firstTextPage'] + pnum = int(spage) + if pnum >= numfiles or pnum < 0: + # metadata is wrong so just select a page near the front + # 10% of the book to get a normal text page + pnum = int(0.10 * numfiles) + # print "first normal text page is", spage + + # get page height and width from first text page for use in stylesheet scaling + pname = 'page%04d.dat' % (pnum + 1) + fname = os.path.join(pageDir,pname) + flat_xml = convert2xml.fromData(dict, fname) + + (ph, pw) = getPageDim(flat_xml) + if (ph == '-1') or (ph == '0') : ph = '11000' + if (pw == '-1') or (pw == '0') : pw = '8500' + meta_array['pageHeight'] = ph + meta_array['pageWidth'] = pw + if 'fontSize' not in meta_array.keys(): + meta_array['fontSize'] = fontsize + + # process other.dat for css info and for map of page files to svg images + # this map is needed because some pages actually are made up of multiple + # pageXXXX.xml files + xname = os.path.join(bookDir, 'style.css') + flat_xml = convert2xml.fromData(dict, otherFile) + + # extract info.original.pid to get original page information + pageIDMap = {} + pageidnums = stylexml2css.getpageIDMap(flat_xml) + if len(pageidnums) == 0: + filenames = os.listdir(pageDir) + numfiles = len(filenames) + for k in range(numfiles): + pageidnums.append(k) + # create a map from page ids to list of page file nums to process for that page + for i in range(len(pageidnums)): + id = pageidnums[i] + if id in pageIDMap.keys(): + pageIDMap[id].append(i) + else: + pageIDMap[id] = [i] + + # now get the css info + cssstr , classlst = stylexml2css.convert2CSS(flat_xml, fontsize, ph, pw) + file(xname, 'wb').write(cssstr) + if buildXML: + xname = os.path.join(xmlDir, 'other0000.xml') + file(xname, 'wb').write(convert2xml.getXML(dict, otherFile)) + + print 'Processing Glyphs' + gd = GlyphDict() + filenames = os.listdir(glyphsDir) + filenames = sorted(filenames) + glyfname = os.path.join(svgDir,'glyphs.svg') + glyfile = open(glyfname, 'w') + glyfile.write('\n') + glyfile.write('\n') + glyfile.write('\n') + glyfile.write('Glyphs for %s\n' % meta_array['Title']) + glyfile.write('\n') + counter = 0 + for filename in filenames: + # print ' ', filename + print '.', + fname = os.path.join(glyphsDir,filename) + flat_xml = convert2xml.fromData(dict, fname) + + if buildXML: + xname = os.path.join(xmlDir, filename.replace('.dat','.xml')) + file(xname, 'wb').write(convert2xml.getXML(dict, fname)) + + gp = GParser(flat_xml) + for i in xrange(0, gp.count): + path = gp.getPath(i) + maxh, maxw = gp.getGlyphDim(i) + fullpath = '\n' % (counter * 256 + i, path, maxw, maxh) + glyfile.write(fullpath) + gd.addGlyph(counter * 256 + i, fullpath) + counter += 1 + glyfile.write('\n') + glyfile.write('\n') + glyfile.close() + print " " + + + # start up the html + # also build up tocentries while processing html + htmlFileName = "book.html" + hlst = [] + hlst.append('\n') + hlst.append('\n') + hlst.append('\n') + hlst.append('\n') + hlst.append('\n') + hlst.append('' + meta_array['Title'] + ' by ' + meta_array['Authors'] + '\n') + hlst.append('\n') + hlst.append('\n') + if 'ASIN' in meta_array: + hlst.append('\n') + if 'GUID' in meta_array: + hlst.append('\n') + hlst.append('\n') + hlst.append('\n\n') + + print 'Processing Pages' + # Books are at 1440 DPI. This is rendering at twice that size for + # readability when rendering to the screen. + scaledpi = 1440.0 + + filenames = os.listdir(pageDir) + filenames = sorted(filenames) + numfiles = len(filenames) + + xmllst = [] + elst = [] + + for filename in filenames: + # print ' ', filename + print ".", + fname = os.path.join(pageDir,filename) + flat_xml = convert2xml.fromData(dict, fname) + + # keep flat_xml for later svg processing + xmllst.append(flat_xml) + + if buildXML: + xname = os.path.join(xmlDir, filename.replace('.dat','.xml')) + file(xname, 'wb').write(convert2xml.getXML(dict, fname)) + + # first get the html + pagehtml, tocinfo = flatxml2html.convert2HTML(flat_xml, classlst, fname, bookDir, gd, fixedimage) + elst.append(tocinfo) + hlst.append(pagehtml) + + # finish up the html string and output it + hlst.append('\n\n') + htmlstr = "".join(hlst) + hlst = None + file(os.path.join(bookDir, htmlFileName), 'wb').write(htmlstr) + + print " " + print 'Extracting Table of Contents from Amazon OCR' + + # first create a table of contents file for the svg images + tlst = [] + tlst.append('\n') + tlst.append('\n') + tlst.append('') + tlst.append('\n') + tlst.append('' + meta_array['Title'] + '\n') + tlst.append('\n') + tlst.append('\n') + if 'ASIN' in meta_array: + tlst.append('\n') + if 'GUID' in meta_array: + tlst.append('\n') + tlst.append('\n') + tlst.append('\n') + + tlst.append('

Table of Contents

\n') + start = pageidnums[0] + if (raw): + startname = 'page%04d.svg' % start + else: + startname = 'page%04d.xhtml' % start + + tlst.append('

Start of Book

\n') + # build up a table of contents for the svg xhtml output + tocentries = "".join(elst) + elst = None + toclst = tocentries.split('\n') + toclst.pop() + for entry in toclst: + print entry + title, pagenum = entry.split('|') + id = pageidnums[int(pagenum)] + if (raw): + fname = 'page%04d.svg' % id + else: + fname = 'page%04d.xhtml' % id + tlst.append('

' + title + '

\n') + tlst.append('\n') + tlst.append('\n') + tochtml = "".join(tlst) + file(os.path.join(svgDir, 'toc.xhtml'), 'wb').write(tochtml) + + + # now create index_svg.xhtml that points to all required files + slst = [] + slst.append('\n') + slst.append('\n') + slst.append('') + slst.append('\n') + slst.append('' + meta_array['Title'] + '\n') + slst.append('\n') + slst.append('\n') + if 'ASIN' in meta_array: + slst.append('\n') + if 'GUID' in meta_array: + slst.append('\n') + slst.append('\n') + slst.append('\n') + + print "Building svg images of each book page" + slst.append('

List of Pages

\n') + slst.append('
\n') + idlst = sorted(pageIDMap.keys()) + numids = len(idlst) + cnt = len(idlst) + previd = None + for j in range(cnt): + pageid = idlst[j] + if j < cnt - 1: + nextid = idlst[j+1] + else: + nextid = None + print '.', + pagelst = pageIDMap[pageid] + flst = [] + for page in pagelst: + flst.append(xmllst[page]) + flat_svg = "".join(flst) + flst=None + svgxml = flatxml2svg.convert2SVG(gd, flat_svg, pageid, previd, nextid, svgDir, raw, meta_array, scaledpi) + if (raw) : + pfile = open(os.path.join(svgDir,'page%04d.svg' % pageid),'w') + slst.append('Page %d\n' % (pageid, pageid)) + else : + pfile = open(os.path.join(svgDir,'page%04d.xhtml' % pageid), 'w') + slst.append('Page %d\n' % (pageid, pageid)) + previd = pageid + pfile.write(svgxml) + pfile.close() + counter += 1 + slst.append('
\n') + slst.append('

Table of Contents

\n') + slst.append('\n\n') + svgindex = "".join(slst) + slst = None + file(os.path.join(bookDir, 'index_svg.xhtml'), 'wb').write(svgindex) + + print " " + + # build the opf file + opfname = os.path.join(bookDir, 'book.opf') + olst = [] + olst.append('\n') + olst.append('\n') + # adding metadata + olst.append(' \n') + if 'GUID' in meta_array: + olst.append(' ' + meta_array['GUID'] + '\n') + if 'ASIN' in meta_array: + olst.append(' ' + meta_array['ASIN'] + '\n') + if 'oASIN' in meta_array: + olst.append(' ' + meta_array['oASIN'] + '\n') + olst.append(' ' + meta_array['Title'] + '\n') + olst.append(' ' + meta_array['Authors'] + '\n') + olst.append(' en\n') + olst.append(' ' + meta_array['UpdateTime'] + '\n') + if isCover: + olst.append(' \n') + olst.append(' \n') + olst.append('\n') + olst.append(' \n') + olst.append(' \n') + # adding image files to manifest + filenames = os.listdir(imgDir) + filenames = sorted(filenames) + for filename in filenames: + imgname, imgext = os.path.splitext(filename) + if imgext == '.jpg': + imgext = 'jpeg' + if imgext == '.svg': + imgext = 'svg+xml' + olst.append(' \n') + if isCover: + olst.append(' \n') + olst.append('\n') + # adding spine + olst.append('\n \n\n') + if isCover: + olst.append(' \n') + olst.append(' \n') + olst.append(' \n') + olst.append('\n') + opfstr = "".join(olst) + olst = None + file(opfname, 'wb').write(opfstr) + + print 'Processing Complete' + + return 0 + +def usage(): + print "genbook.py generates a book from the extract Topaz Files" + print "Usage:" + print " genbook.py [-r] [-h [--fixed-image] " + print " " + print "Options:" + print " -h : help - print this usage message" + print " -r : generate raw svg files (not wrapped in xhtml)" + print " --fixed-image : genearate any Fixed Area as an svg image in the html" + print " " + + +def main(argv): + bookDir = '' + if len(argv) == 0: + argv = sys.argv + + try: + opts, args = getopt.getopt(argv[1:], "rh:",["fixed-image"]) + + except getopt.GetoptError, err: + print str(err) + usage() + return 1 + + if len(opts) == 0 and len(args) == 0 : + usage() + return 1 + + raw = 0 + fixedimage = True + for o, a in opts: + if o =="-h": + usage() + return 0 + if o =="-r": + raw = 1 + if o =="--fixed-image": + fixedimage = True + + bookDir = args[0] + + rv = generateBook(bookDir, raw, fixedimage) + return rv + + +if __name__ == '__main__': + sys.exit(main('')) diff --git a/Calibre_Plugins/K4MobiDeDRM_plugin/getk4pcpids.py b/Calibre_Plugins/K4MobiDeDRM_plugin/getk4pcpids.py index 4dfd6c7..cc8bcd4 100644 --- a/Calibre_Plugins/K4MobiDeDRM_plugin/getk4pcpids.py +++ b/Calibre_Plugins/K4MobiDeDRM_plugin/getk4pcpids.py @@ -1,249 +1,78 @@ -#! /usr/bin/python -# vim:ts=4:sw=4:softtabstop=4:smarttab:expandtab +#!/usr/bin/python +# +# This is a python script. You need a Python interpreter to run it. +# For example, ActiveState Python, which exists for windows. +# +# Changelog +# 1.00 - Initial version +# 1.01 - getPidList interface change + +__version__ = '1.01' import sys -import csv + +class Unbuffered: + def __init__(self, stream): + self.stream = stream + def write(self, data): + self.stream.write(data) + self.stream.flush() + def __getattr__(self, attr): + return getattr(self.stream, attr) +sys.stdout=Unbuffered(sys.stdout) + import os -import getopt -from struct import pack -from struct import unpack +import struct +import binascii +import kgenpids +import topazextract +import mobidedrm +from alfcrypto import Pukall_Cipher +class DrmException(Exception): + pass -class PParser(object): - def __init__(self, gd, flatxml, meta_array): - self.gd = gd - self.flatdoc = flatxml.split('\n') - self.docSize = len(self.flatdoc) - self.temp = [] +def getK4PCpids(path_to_ebook): + # Return Kindle4PC PIDs. Assumes that the caller checked that we are not on Linux, which will raise an exception - self.ph = -1 - self.pw = -1 - startpos = self.posinDoc('page.h') or self.posinDoc('book.h') - for p in startpos: - (name, argres) = self.lineinDoc(p) - self.ph = max(self.ph, int(argres)) - startpos = self.posinDoc('page.w') or self.posinDoc('book.w') - for p in startpos: - (name, argres) = self.lineinDoc(p) - self.pw = max(self.pw, int(argres)) + mobi = True + magic3 = file(path_to_ebook,'rb').read(3) + if magic3 == 'TPZ': + mobi = False - if self.ph <= 0: - self.ph = int(meta_array.get('pageHeight', '11000')) - if self.pw <= 0: - self.pw = int(meta_array.get('pageWidth', '8500')) - - res = [] - startpos = self.posinDoc('info.glyph.x') - for p in startpos: - argres = self.getDataatPos('info.glyph.x', p) - res.extend(argres) - self.gx = res - - res = [] - startpos = self.posinDoc('info.glyph.y') - for p in startpos: - argres = self.getDataatPos('info.glyph.y', p) - res.extend(argres) - self.gy = res - - res = [] - startpos = self.posinDoc('info.glyph.glyphID') - for p in startpos: - argres = self.getDataatPos('info.glyph.glyphID', p) - res.extend(argres) - self.gid = res - - - # return tag at line pos in document - def lineinDoc(self, pos) : - if (pos >= 0) and (pos < self.docSize) : - item = self.flatdoc[pos] - if item.find('=') >= 0: - (name, argres) = item.split('=',1) - else : - name = item - argres = '' - return name, argres - - # find tag in doc if within pos to end inclusive - def findinDoc(self, tagpath, pos, end) : - result = None - if end == -1 : - end = self.docSize - else: - end = min(self.docSize, end) - foundat = -1 - for j in xrange(pos, end): - item = self.flatdoc[j] - if item.find('=') >= 0: - (name, argres) = item.split('=',1) - else : - name = item - argres = '' - if name.endswith(tagpath) : - result = argres - foundat = j - break - return foundat, result - - # return list of start positions for the tagpath - def posinDoc(self, tagpath): - startpos = [] - pos = 0 - res = "" - while res != None : - (foundpos, res) = self.findinDoc(tagpath, pos, -1) - if res != None : - startpos.append(foundpos) - pos = foundpos + 1 - return startpos - - def getData(self, path): - result = None - cnt = len(self.flatdoc) - for j in xrange(cnt): - item = self.flatdoc[j] - if item.find('=') >= 0: - (name, argt) = item.split('=') - argres = argt.split('|') - else: - name = item - argres = [] - if (name.endswith(path)): - result = argres - break - if (len(argres) > 0) : - for j in xrange(0,len(argres)): - argres[j] = int(argres[j]) - return result - - def getDataatPos(self, path, pos): - result = None - item = self.flatdoc[pos] - if item.find('=') >= 0: - (name, argt) = item.split('=') - argres = argt.split('|') - else: - name = item - argres = [] - if (len(argres) > 0) : - for j in xrange(0,len(argres)): - argres[j] = int(argres[j]) - if (name.endswith(path)): - result = argres - return result - - def getDataTemp(self, path): - result = None - cnt = len(self.temp) - for j in xrange(cnt): - item = self.temp[j] - if item.find('=') >= 0: - (name, argt) = item.split('=') - argres = argt.split('|') - else: - name = item - argres = [] - if (name.endswith(path)): - result = argres - self.temp.pop(j) - break - if (len(argres) > 0) : - for j in xrange(0,len(argres)): - argres[j] = int(argres[j]) - return result - - def getImages(self): - result = [] - self.temp = self.flatdoc - while (self.getDataTemp('img') != None): - h = self.getDataTemp('img.h')[0] - w = self.getDataTemp('img.w')[0] - x = self.getDataTemp('img.x')[0] - y = self.getDataTemp('img.y')[0] - src = self.getDataTemp('img.src')[0] - result.append('\n' % (src, x, y, w, h)) - return result - - def getGlyphs(self): - result = [] - if (self.gid != None) and (len(self.gid) > 0): - glyphs = [] - for j in set(self.gid): - glyphs.append(j) - glyphs.sort() - for gid in glyphs: - id='id="gl%d"' % gid - path = self.gd.lookup(id) - if path: - result.append(id + ' ' + path) - return result - - -def convert2SVG(gdict, flat_xml, pageid, previd, nextid, svgDir, raw, meta_array, scaledpi): - mlst = [] - pp = PParser(gdict, flat_xml, meta_array) - mlst.append('\n') - if (raw): - mlst.append('\n') - mlst.append('\n' % (pp.pw / scaledpi, pp.ph / scaledpi, pp.pw -1, pp.ph -1)) - mlst.append('Page %d - %s by %s\n' % (pageid, meta_array['Title'],meta_array['Authors'])) + if mobi: + mb = mobidedrm.MobiBook(path_to_ebook,False) else: - mlst.append('\n') - mlst.append('\n') - mlst.append('Page %d - %s by %s\n' % (pageid, meta_array['Title'],meta_array['Authors'])) - mlst.append('\n') - mlst.append('\n') - mlst.append('\n') - mlst.append('
\n') - if previd == None: - mlst.append('\n') - else: - mlst.append('\n') + mb = topazextract.TopazBook(path_to_ebook) - mlst.append('' % (pp.pw, pp.ph)) - if (pp.gid != None): - mlst.append('\n') - gdefs = pp.getGlyphs() - for j in xrange(0,len(gdefs)): - mlst.append(gdefs[j]) - mlst.append('\n') - img = pp.getImages() - if (img != None): - for j in xrange(0,len(img)): - mlst.append(img[j]) - if (pp.gid != None): - for j in xrange(0,len(pp.gid)): - mlst.append('\n' % (pp.gid[j], pp.gx[j], pp.gy[j])) - if (img == None or len(img) == 0) and (pp.gid == None or len(pp.gid) == 0): - xpos = "%d" % (pp.pw // 3) - ypos = "%d" % (pp.ph // 3) - mlst.append('This page intentionally left blank.\n') - if (raw) : - mlst.append('') - else : - mlst.append('\n') - if nextid == None: - mlst.append('\n') - else : - mlst.append('\n') - mlst.append('
\n') - mlst.append('\n') - mlst.append('\n') - mlst.append('\n') - return "".join(mlst) + md1, md2 = mb.getPIDMetaInfo() + + return kgenpids.getPidList(md1, md2) + + +def main(argv=sys.argv): + print ('getk4pcpids.py v%(__version__)s. ' + 'Copyright 2012 Apprentice Alf' % globals()) + + if len(argv)<2 or len(argv)>3: + print "Gets the possible book-specific PIDs from K4PC for a particular book" + print "Usage:" + print " %s []" % sys.argv[0] + return 1 + else: + infile = argv[1] + try: + pidlist = getK4PCpids(infile) + except DrmException, e: + print "Error: %s" % e + return 1 + pidstring = ','.join(pidlist) + print "Possible PIDs are: ", pidstring + if len(argv) is 3: + outfile = argv[2] + file(outfile, 'w').write(pidstring) + + return 0 + +if __name__ == "__main__": + sys.exit(main()) diff --git a/Calibre_Plugins/K4MobiDeDRM_plugin/k4mobidedrm_orig.py b/Calibre_Plugins/K4MobiDeDRM_plugin/k4mobidedrm_orig.py deleted file mode 100644 index 9733887..0000000 --- a/Calibre_Plugins/K4MobiDeDRM_plugin/k4mobidedrm_orig.py +++ /dev/null @@ -1,721 +0,0 @@ -#! /usr/bin/python -# vim:ts=4:sw=4:softtabstop=4:smarttab:expandtab - -class Unbuffered: - def __init__(self, stream): - self.stream = stream - def write(self, data): - self.stream.write(data) - self.stream.flush() - def __getattr__(self, attr): - return getattr(self.stream, attr) - -import sys -sys.stdout=Unbuffered(sys.stdout) - -import csv -import os -import getopt -from struct import pack -from struct import unpack - -class TpzDRMError(Exception): - pass - -# local support routines -if 'calibre' in sys.modules: - inCalibre = True -else: - inCalibre = False - -if inCalibre : - from calibre_plugins.k4mobidedrm import convert2xml - from calibre_plugins.k4mobidedrm import flatxml2html - from calibre_plugins.k4mobidedrm import flatxml2svg - from calibre_plugins.k4mobidedrm import stylexml2css -else : - import convert2xml - import flatxml2html - import flatxml2svg - import stylexml2css - -# global switch -buildXML = False - -# Get a 7 bit encoded number from a file -def readEncodedNumber(file): - flag = False - c = file.read(1) - if (len(c) == 0): - return None - data = ord(c) - if data == 0xFF: - flag = True - c = file.read(1) - if (len(c) == 0): - return None - data = ord(c) - if data >= 0x80: - datax = (data & 0x7F) - while data >= 0x80 : - c = file.read(1) - if (len(c) == 0): - return None - data = ord(c) - datax = (datax <<7) + (data & 0x7F) - data = datax - if flag: - data = -data - return data - -# Get a length prefixed string from the file -def lengthPrefixString(data): - return encodeNumber(len(data))+data - -def readString(file): - stringLength = readEncodedNumber(file) - if (stringLength == None): - return None - sv = file.read(stringLength) - if (len(sv) != stringLength): - return "" - return unpack(str(stringLength)+"s",sv)[0] - -def getMetaArray(metaFile): - # parse the meta file - result = {} - fo = file(metaFile,'rb') - size = readEncodedNumber(fo) - for i in xrange(size): - tag = readString(fo) - value = readString(fo) - result[tag] = value - # print tag, value - fo.close() - return result - - -# dictionary of all text strings by index value -class Dictionary(object): - def __init__(self, dictFile): - self.filename = dictFile - self.size = 0 - self.fo = file(dictFile,'rb') - self.stable = [] - self.size = readEncodedNumber(self.fo) - for i in xrange(self.size): - self.stable.append(self.escapestr(readString(self.fo))) - self.pos = 0 - def escapestr(self, str): - str = str.replace('&','&') - str = str.replace('<','<') - str = str.replace('>','>') - str = str.replace('=','=') - return str - def lookup(self,val): - if ((val >= 0) and (val < self.size)) : - self.pos = val - return self.stable[self.pos] - else: - print "Error - %d outside of string table limits" % val - raise TpzDRMError('outside or string table limits') - # sys.exit(-1) - def getSize(self): - return self.size - def getPos(self): - return self.pos - - -class PageDimParser(object): - def __init__(self, flatxml): - self.flatdoc = flatxml.split('\n') - # find tag if within pos to end inclusive - def findinDoc(self, tagpath, pos, end) : - result = None - docList = self.flatdoc - cnt = len(docList) - if end == -1 : - end = cnt - else: - end = min(cnt,end) - foundat = -1 - for j in xrange(pos, end): - item = docList[j] - if item.find('=') >= 0: - (name, argres) = item.split('=') - else : - name = item - argres = '' - if name.endswith(tagpath) : - result = argres - foundat = j - break - return foundat, result - def process(self): - (pos, sph) = self.findinDoc('page.h',0,-1) - (pos, spw) = self.findinDoc('page.w',0,-1) - if (sph == None): sph = '-1' - if (spw == None): spw = '-1' - return sph, spw - -def getPageDim(flatxml): - # create a document parser - dp = PageDimParser(flatxml) - (ph, pw) = dp.process() - return ph, pw - -class GParser(object): - def __init__(self, flatxml): - self.flatdoc = flatxml.split('\n') - self.dpi = 1440 - self.gh = self.getData('info.glyph.h') - self.gw = self.getData('info.glyph.w') - self.guse = self.getData('info.glyph.use') - if self.guse : - self.count = len(self.guse) - else : - self.count = 0 - self.gvtx = self.getData('info.glyph.vtx') - self.glen = self.getData('info.glyph.len') - self.gdpi = self.getData('info.glyph.dpi') - self.vx = self.getData('info.vtx.x') - self.vy = self.getData('info.vtx.y') - self.vlen = self.getData('info.len.n') - if self.vlen : - self.glen.append(len(self.vlen)) - elif self.glen: - self.glen.append(0) - if self.vx : - self.gvtx.append(len(self.vx)) - elif self.gvtx : - self.gvtx.append(0) - def getData(self, path): - result = None - cnt = len(self.flatdoc) - for j in xrange(cnt): - item = self.flatdoc[j] - if item.find('=') >= 0: - (name, argt) = item.split('=') - argres = argt.split('|') - else: - name = item - argres = [] - if (name == path): - result = argres - break - if (len(argres) > 0) : - for j in xrange(0,len(argres)): - argres[j] = int(argres[j]) - return result - def getGlyphDim(self, gly): - if self.gdpi[gly] == 0: - return 0, 0 - maxh = (self.gh[gly] * self.dpi) / self.gdpi[gly] - maxw = (self.gw[gly] * self.dpi) / self.gdpi[gly] - return maxh, maxw - def getPath(self, gly): - path = '' - if (gly < 0) or (gly >= self.count): - return path - tx = self.vx[self.gvtx[gly]:self.gvtx[gly+1]] - ty = self.vy[self.gvtx[gly]:self.gvtx[gly+1]] - p = 0 - for k in xrange(self.glen[gly], self.glen[gly+1]): - if (p == 0): - zx = tx[0:self.vlen[k]+1] - zy = ty[0:self.vlen[k]+1] - else: - zx = tx[self.vlen[k-1]+1:self.vlen[k]+1] - zy = ty[self.vlen[k-1]+1:self.vlen[k]+1] - p += 1 - j = 0 - while ( j < len(zx) ): - if (j == 0): - # Start Position. - path += 'M %d %d ' % (zx[j] * self.dpi / self.gdpi[gly], zy[j] * self.dpi / self.gdpi[gly]) - elif (j <= len(zx)-3): - # Cubic Bezier Curve - path += 'C %d %d %d %d %d %d ' % (zx[j] * self.dpi / self.gdpi[gly], zy[j] * self.dpi / self.gdpi[gly], zx[j+1] * self.dpi / self.gdpi[gly], zy[j+1] * self.dpi / self.gdpi[gly], zx[j+2] * self.dpi / self.gdpi[gly], zy[j+2] * self.dpi / self.gdpi[gly]) - j += 2 - elif (j == len(zx)-2): - # Cubic Bezier Curve to Start Position - path += 'C %d %d %d %d %d %d ' % (zx[j] * self.dpi / self.gdpi[gly], zy[j] * self.dpi / self.gdpi[gly], zx[j+1] * self.dpi / self.gdpi[gly], zy[j+1] * self.dpi / self.gdpi[gly], zx[0] * self.dpi / self.gdpi[gly], zy[0] * self.dpi / self.gdpi[gly]) - j += 1 - elif (j == len(zx)-1): - # Quadratic Bezier Curve to Start Position - path += 'Q %d %d %d %d ' % (zx[j] * self.dpi / self.gdpi[gly], zy[j] * self.dpi / self.gdpi[gly], zx[0] * self.dpi / self.gdpi[gly], zy[0] * self.dpi / self.gdpi[gly]) - - j += 1 - path += 'z' - return path - - - -# dictionary of all text strings by index value -class GlyphDict(object): - def __init__(self): - self.gdict = {} - def lookup(self, id): - # id='id="gl%d"' % val - if id in self.gdict: - return self.gdict[id] - return None - def addGlyph(self, val, path): - id='id="gl%d"' % val - self.gdict[id] = path - - -def generateBook(bookDir, raw, fixedimage): - # sanity check Topaz file extraction - if not os.path.exists(bookDir) : - print "Can not find directory with unencrypted book" - return 1 - - dictFile = os.path.join(bookDir,'dict0000.dat') - if not os.path.exists(dictFile) : - print "Can not find dict0000.dat file" - return 1 - - pageDir = os.path.join(bookDir,'page') - if not os.path.exists(pageDir) : - print "Can not find page directory in unencrypted book" - return 1 - - imgDir = os.path.join(bookDir,'img') - if not os.path.exists(imgDir) : - print "Can not find image directory in unencrypted book" - return 1 - - glyphsDir = os.path.join(bookDir,'glyphs') - if not os.path.exists(glyphsDir) : - print "Can not find glyphs directory in unencrypted book" - return 1 - - metaFile = os.path.join(bookDir,'metadata0000.dat') - if not os.path.exists(metaFile) : - print "Can not find metadata0000.dat in unencrypted book" - return 1 - - svgDir = os.path.join(bookDir,'svg') - if not os.path.exists(svgDir) : - os.makedirs(svgDir) - - if buildXML: - xmlDir = os.path.join(bookDir,'xml') - if not os.path.exists(xmlDir) : - os.makedirs(xmlDir) - - otherFile = os.path.join(bookDir,'other0000.dat') - if not os.path.exists(otherFile) : - print "Can not find other0000.dat in unencrypted book" - return 1 - - print "Updating to color images if available" - spath = os.path.join(bookDir,'color_img') - dpath = os.path.join(bookDir,'img') - filenames = os.listdir(spath) - filenames = sorted(filenames) - for filename in filenames: - imgname = filename.replace('color','img') - sfile = os.path.join(spath,filename) - dfile = os.path.join(dpath,imgname) - imgdata = file(sfile,'rb').read() - file(dfile,'wb').write(imgdata) - - print "Creating cover.jpg" - isCover = False - cpath = os.path.join(bookDir,'img') - cpath = os.path.join(cpath,'img0000.jpg') - if os.path.isfile(cpath): - cover = file(cpath, 'rb').read() - cpath = os.path.join(bookDir,'cover.jpg') - file(cpath, 'wb').write(cover) - isCover = True - - - print 'Processing Dictionary' - dict = Dictionary(dictFile) - - print 'Processing Meta Data and creating OPF' - meta_array = getMetaArray(metaFile) - - # replace special chars in title and authors like & < > - title = meta_array.get('Title','No Title Provided') - title = title.replace('&','&') - title = title.replace('<','<') - title = title.replace('>','>') - meta_array['Title'] = title - authors = meta_array.get('Authors','No Authors Provided') - authors = authors.replace('&','&') - authors = authors.replace('<','<') - authors = authors.replace('>','>') - meta_array['Authors'] = authors - - if buildXML: - xname = os.path.join(xmlDir, 'metadata.xml') - mlst = [] - for key in meta_array: - mlst.append('\n') - metastr = "".join(mlst) - mlst = None - file(xname, 'wb').write(metastr) - - print 'Processing StyleSheet' - - # get some scaling info from metadata to use while processing styles - # and first page info - - fontsize = '135' - if 'fontSize' in meta_array: - fontsize = meta_array['fontSize'] - - # also get the size of a normal text page - # get the total number of pages unpacked as a safety check - filenames = os.listdir(pageDir) - numfiles = len(filenames) - - spage = '1' - if 'firstTextPage' in meta_array: - spage = meta_array['firstTextPage'] - pnum = int(spage) - if pnum >= numfiles or pnum < 0: - # metadata is wrong so just select a page near the front - # 10% of the book to get a normal text page - pnum = int(0.10 * numfiles) - # print "first normal text page is", spage - - # get page height and width from first text page for use in stylesheet scaling - pname = 'page%04d.dat' % (pnum + 1) - fname = os.path.join(pageDir,pname) - flat_xml = convert2xml.fromData(dict, fname) - - (ph, pw) = getPageDim(flat_xml) - if (ph == '-1') or (ph == '0') : ph = '11000' - if (pw == '-1') or (pw == '0') : pw = '8500' - meta_array['pageHeight'] = ph - meta_array['pageWidth'] = pw - if 'fontSize' not in meta_array.keys(): - meta_array['fontSize'] = fontsize - - # process other.dat for css info and for map of page files to svg images - # this map is needed because some pages actually are made up of multiple - # pageXXXX.xml files - xname = os.path.join(bookDir, 'style.css') - flat_xml = convert2xml.fromData(dict, otherFile) - - # extract info.original.pid to get original page information - pageIDMap = {} - pageidnums = stylexml2css.getpageIDMap(flat_xml) - if len(pageidnums) == 0: - filenames = os.listdir(pageDir) - numfiles = len(filenames) - for k in range(numfiles): - pageidnums.append(k) - # create a map from page ids to list of page file nums to process for that page - for i in range(len(pageidnums)): - id = pageidnums[i] - if id in pageIDMap.keys(): - pageIDMap[id].append(i) - else: - pageIDMap[id] = [i] - - # now get the css info - cssstr , classlst = stylexml2css.convert2CSS(flat_xml, fontsize, ph, pw) - file(xname, 'wb').write(cssstr) - if buildXML: - xname = os.path.join(xmlDir, 'other0000.xml') - file(xname, 'wb').write(convert2xml.getXML(dict, otherFile)) - - print 'Processing Glyphs' - gd = GlyphDict() - filenames = os.listdir(glyphsDir) - filenames = sorted(filenames) - glyfname = os.path.join(svgDir,'glyphs.svg') - glyfile = open(glyfname, 'w') - glyfile.write('\n') - glyfile.write('\n') - glyfile.write('\n') - glyfile.write('Glyphs for %s\n' % meta_array['Title']) - glyfile.write('\n') - counter = 0 - for filename in filenames: - # print ' ', filename - print '.', - fname = os.path.join(glyphsDir,filename) - flat_xml = convert2xml.fromData(dict, fname) - - if buildXML: - xname = os.path.join(xmlDir, filename.replace('.dat','.xml')) - file(xname, 'wb').write(convert2xml.getXML(dict, fname)) - - gp = GParser(flat_xml) - for i in xrange(0, gp.count): - path = gp.getPath(i) - maxh, maxw = gp.getGlyphDim(i) - fullpath = '\n' % (counter * 256 + i, path, maxw, maxh) - glyfile.write(fullpath) - gd.addGlyph(counter * 256 + i, fullpath) - counter += 1 - glyfile.write('\n') - glyfile.write('\n') - glyfile.close() - print " " - - - # start up the html - # also build up tocentries while processing html - htmlFileName = "book.html" - hlst = [] - hlst.append('\n') - hlst.append('\n') - hlst.append('\n') - hlst.append('\n') - hlst.append('\n') - hlst.append('' + meta_array['Title'] + ' by ' + meta_array['Authors'] + '\n') - hlst.append('\n') - hlst.append('\n') - if 'ASIN' in meta_array: - hlst.append('\n') - if 'GUID' in meta_array: - hlst.append('\n') - hlst.append('\n') - hlst.append('\n\n') - - print 'Processing Pages' - # Books are at 1440 DPI. This is rendering at twice that size for - # readability when rendering to the screen. - scaledpi = 1440.0 - - filenames = os.listdir(pageDir) - filenames = sorted(filenames) - numfiles = len(filenames) - - xmllst = [] - elst = [] - - for filename in filenames: - # print ' ', filename - print ".", - fname = os.path.join(pageDir,filename) - flat_xml = convert2xml.fromData(dict, fname) - - # keep flat_xml for later svg processing - xmllst.append(flat_xml) - - if buildXML: - xname = os.path.join(xmlDir, filename.replace('.dat','.xml')) - file(xname, 'wb').write(convert2xml.getXML(dict, fname)) - - # first get the html - pagehtml, tocinfo = flatxml2html.convert2HTML(flat_xml, classlst, fname, bookDir, gd, fixedimage) - elst.append(tocinfo) - hlst.append(pagehtml) - - # finish up the html string and output it - hlst.append('\n\n') - htmlstr = "".join(hlst) - hlst = None - file(os.path.join(bookDir, htmlFileName), 'wb').write(htmlstr) - - print " " - print 'Extracting Table of Contents from Amazon OCR' - - # first create a table of contents file for the svg images - tlst = [] - tlst.append('\n') - tlst.append('\n') - tlst.append('') - tlst.append('\n') - tlst.append('' + meta_array['Title'] + '\n') - tlst.append('\n') - tlst.append('\n') - if 'ASIN' in meta_array: - tlst.append('\n') - if 'GUID' in meta_array: - tlst.append('\n') - tlst.append('\n') - tlst.append('\n') - - tlst.append('

Table of Contents

\n') - start = pageidnums[0] - if (raw): - startname = 'page%04d.svg' % start - else: - startname = 'page%04d.xhtml' % start - - tlst.append('

Start of Book

\n') - # build up a table of contents for the svg xhtml output - tocentries = "".join(elst) - elst = None - toclst = tocentries.split('\n') - toclst.pop() - for entry in toclst: - print entry - title, pagenum = entry.split('|') - id = pageidnums[int(pagenum)] - if (raw): - fname = 'page%04d.svg' % id - else: - fname = 'page%04d.xhtml' % id - tlst.append('

' + title + '

\n') - tlst.append('\n') - tlst.append('\n') - tochtml = "".join(tlst) - file(os.path.join(svgDir, 'toc.xhtml'), 'wb').write(tochtml) - - - # now create index_svg.xhtml that points to all required files - slst = [] - slst.append('\n') - slst.append('\n') - slst.append('') - slst.append('\n') - slst.append('' + meta_array['Title'] + '\n') - slst.append('\n') - slst.append('\n') - if 'ASIN' in meta_array: - slst.append('\n') - if 'GUID' in meta_array: - slst.append('\n') - slst.append('\n') - slst.append('\n') - - print "Building svg images of each book page" - slst.append('

List of Pages

\n') - slst.append('
\n') - idlst = sorted(pageIDMap.keys()) - numids = len(idlst) - cnt = len(idlst) - previd = None - for j in range(cnt): - pageid = idlst[j] - if j < cnt - 1: - nextid = idlst[j+1] - else: - nextid = None - print '.', - pagelst = pageIDMap[pageid] - flst = [] - for page in pagelst: - flst.append(xmllst[page]) - flat_svg = "".join(flst) - flst=None - svgxml = flatxml2svg.convert2SVG(gd, flat_svg, pageid, previd, nextid, svgDir, raw, meta_array, scaledpi) - if (raw) : - pfile = open(os.path.join(svgDir,'page%04d.svg' % pageid),'w') - slst.append('Page %d\n' % (pageid, pageid)) - else : - pfile = open(os.path.join(svgDir,'page%04d.xhtml' % pageid), 'w') - slst.append('Page %d\n' % (pageid, pageid)) - previd = pageid - pfile.write(svgxml) - pfile.close() - counter += 1 - slst.append('
\n') - slst.append('

Table of Contents

\n') - slst.append('\n\n') - svgindex = "".join(slst) - slst = None - file(os.path.join(bookDir, 'index_svg.xhtml'), 'wb').write(svgindex) - - print " " - - # build the opf file - opfname = os.path.join(bookDir, 'book.opf') - olst = [] - olst.append('\n') - olst.append('\n') - # adding metadata - olst.append(' \n') - if 'GUID' in meta_array: - olst.append(' ' + meta_array['GUID'] + '\n') - if 'ASIN' in meta_array: - olst.append(' ' + meta_array['ASIN'] + '\n') - if 'oASIN' in meta_array: - olst.append(' ' + meta_array['oASIN'] + '\n') - olst.append(' ' + meta_array['Title'] + '\n') - olst.append(' ' + meta_array['Authors'] + '\n') - olst.append(' en\n') - olst.append(' ' + meta_array['UpdateTime'] + '\n') - if isCover: - olst.append(' \n') - olst.append(' \n') - olst.append('\n') - olst.append(' \n') - olst.append(' \n') - # adding image files to manifest - filenames = os.listdir(imgDir) - filenames = sorted(filenames) - for filename in filenames: - imgname, imgext = os.path.splitext(filename) - if imgext == '.jpg': - imgext = 'jpeg' - if imgext == '.svg': - imgext = 'svg+xml' - olst.append(' \n') - if isCover: - olst.append(' \n') - olst.append('\n') - # adding spine - olst.append('\n \n\n') - if isCover: - olst.append(' \n') - olst.append(' \n') - olst.append(' \n') - olst.append('\n') - opfstr = "".join(olst) - olst = None - file(opfname, 'wb').write(opfstr) - - print 'Processing Complete' - - return 0 - -def usage(): - print "genbook.py generates a book from the extract Topaz Files" - print "Usage:" - print " genbook.py [-r] [-h [--fixed-image] " - print " " - print "Options:" - print " -h : help - print this usage message" - print " -r : generate raw svg files (not wrapped in xhtml)" - print " --fixed-image : genearate any Fixed Area as an svg image in the html" - print " " - - -def main(argv): - bookDir = '' - if len(argv) == 0: - argv = sys.argv - - try: - opts, args = getopt.getopt(argv[1:], "rh:",["fixed-image"]) - - except getopt.GetoptError, err: - print str(err) - usage() - return 1 - - if len(opts) == 0 and len(args) == 0 : - usage() - return 1 - - raw = 0 - fixedimage = True - for o, a in opts: - if o =="-h": - usage() - return 0 - if o =="-r": - raw = 1 - if o =="--fixed-image": - fixedimage = True - - bookDir = args[0] - - rv = generateBook(bookDir, raw, fixedimage) - return rv - - -if __name__ == '__main__': - sys.exit(main('')) diff --git a/Calibre_Plugins/K4MobiDeDRM_plugin/kgenpids.py b/Calibre_Plugins/K4MobiDeDRM_plugin/kgenpids.py index 813f8ce..b0fbaa4 100644 Binary files a/Calibre_Plugins/K4MobiDeDRM_plugin/kgenpids.py and b/Calibre_Plugins/K4MobiDeDRM_plugin/kgenpids.py differ diff --git a/Calibre_Plugins/K4MobiDeDRM_plugin/libalfcrypto.dylib b/Calibre_Plugins/K4MobiDeDRM_plugin/libalfcrypto.dylib index d491d7d..01c348c 100644 Binary files a/Calibre_Plugins/K4MobiDeDRM_plugin/libalfcrypto.dylib and b/Calibre_Plugins/K4MobiDeDRM_plugin/libalfcrypto.dylib differ diff --git a/Calibre_Plugins/K4MobiDeDRM_plugin/libalfcrypto32.so b/Calibre_Plugins/K4MobiDeDRM_plugin/libalfcrypto32.so index dd60706..9a5a442 100644 Binary files a/Calibre_Plugins/K4MobiDeDRM_plugin/libalfcrypto32.so and b/Calibre_Plugins/K4MobiDeDRM_plugin/libalfcrypto32.so differ diff --git a/Calibre_Plugins/K4MobiDeDRM_plugin/libalfcrypto64.so b/Calibre_Plugins/K4MobiDeDRM_plugin/libalfcrypto64.so index 40d84ad..a08ac28 100644 Binary files a/Calibre_Plugins/K4MobiDeDRM_plugin/libalfcrypto64.so and b/Calibre_Plugins/K4MobiDeDRM_plugin/libalfcrypto64.so differ diff --git a/Calibre_Plugins/K4MobiDeDRM_plugin/outputfix.py b/Calibre_Plugins/K4MobiDeDRM_plugin/outputfix.py new file mode 100644 index 0000000..906c6e9 --- /dev/null +++ b/Calibre_Plugins/K4MobiDeDRM_plugin/outputfix.py @@ -0,0 +1,45 @@ +# -*- coding: utf-8 -*- +# +# Adapted and simplified from the kitchen project +# +# Kitchen Project Copyright (c) 2012 Red Hat, Inc. +# +# kitchen is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 2.1 of the License, or (at your option) any later version. +# +# kitchen is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public +# License along with kitchen; if not, see +# +# Authors: +# Toshio Kuratomi +# Seth Vidal +# +# Portions of code taken from yum/i18n.py and +# python-fedora: fedora/textutils.py + +import codecs + +# returns a char string unchanged +# returns a unicode string converted to a char string of the passed encoding +# return the empty string for anything else +def getwriter(encoding): + class _StreamWriter(codecs.StreamWriter): + def __init__(self, stream): + codecs.StreamWriter.__init__(self, stream, 'replace') + + def encode(self, msg, errors='replace'): + if isinstance(msg, basestring): + if isinstance(msg, str): + return (msg, len(msg)) + return (msg.encode(self.encoding, 'replace'), len(msg)) + return ('',0) + + _StreamWriter.encoding = encoding + return _StreamWriter diff --git a/Calibre_Plugins/K4MobiDeDRM_plugin/pbkdf2.py b/Calibre_Plugins/K4MobiDeDRM_plugin/pbkdf2.py index 9a5a442..65220a9 100644 Binary files a/Calibre_Plugins/K4MobiDeDRM_plugin/pbkdf2.py and b/Calibre_Plugins/K4MobiDeDRM_plugin/pbkdf2.py differ diff --git a/Calibre_Plugins/K4MobiDeDRM_plugin/plugin-import-name-k4mobidedrm.txt b/Calibre_Plugins/K4MobiDeDRM_plugin/plugin-import-name-k4mobidedrm.txt index a08ac28..e69de29 100644 Binary files a/Calibre_Plugins/K4MobiDeDRM_plugin/plugin-import-name-k4mobidedrm.txt and b/Calibre_Plugins/K4MobiDeDRM_plugin/plugin-import-name-k4mobidedrm.txt differ diff --git a/Calibre_Plugins/K4MobiDeDRM_plugin/scrolltextwidget.py b/Calibre_Plugins/K4MobiDeDRM_plugin/scrolltextwidget.py index 1ad2bac..98b4147 100644 --- a/Calibre_Plugins/K4MobiDeDRM_plugin/scrolltextwidget.py +++ b/Calibre_Plugins/K4MobiDeDRM_plugin/scrolltextwidget.py @@ -1,460 +1,27 @@ -#!/usr/bin/python -# -# This is a python script. You need a Python interpreter to run it. -# For example, ActiveState Python, which exists for windows. -# -# Changelog -# 0.01 - Initial version -# 0.02 - Huffdic compressed books were not properly decrypted -# 0.03 - Wasn't checking MOBI header length -# 0.04 - Wasn't sanity checking size of data record -# 0.05 - It seems that the extra data flags take two bytes not four -# 0.06 - And that low bit does mean something after all :-) -# 0.07 - The extra data flags aren't present in MOBI header < 0xE8 in size -# 0.08 - ...and also not in Mobi header version < 6 -# 0.09 - ...but they are there with Mobi header version 6, header size 0xE4! -# 0.10 - Outputs unencrypted files as-is, so that when run as a Calibre -# import filter it works when importing unencrypted files. -# Also now handles encrypted files that don't need a specific PID. -# 0.11 - use autoflushed stdout and proper return values -# 0.12 - Fix for problems with metadata import as Calibre plugin, report errors -# 0.13 - Formatting fixes: retabbed file, removed trailing whitespace -# and extra blank lines, converted CR/LF pairs at ends of each line, -# and other cosmetic fixes. -# 0.14 - Working out when the extra data flags are present has been problematic -# Versions 7 through 9 have tried to tweak the conditions, but have been -# only partially successful. Closer examination of lots of sample -# files reveals that a confusion has arisen because trailing data entries -# are not encrypted, but it turns out that the multibyte entries -# in utf8 file are encrypted. (Although neither kind gets compressed.) -# This knowledge leads to a simplification of the test for the -# trailing data byte flags - version 5 and higher AND header size >= 0xE4. -# 0.15 - Now outputs 'heartbeat', and is also quicker for long files. -# 0.16 - And reverts to 'done' not 'done.' at the end for unswindle compatibility. -# 0.17 - added modifications to support its use as an imported python module -# both inside calibre and also in other places (ie K4DeDRM tools) -# 0.17a- disabled the standalone plugin feature since a plugin can not import -# a plugin -# 0.18 - It seems that multibyte entries aren't encrypted in a v7 file... -# Removed the disabled Calibre plug-in code -# Permit use of 8-digit PIDs -# 0.19 - It seems that multibyte entries aren't encrypted in a v6 file either. -# 0.20 - Correction: It seems that multibyte entries are encrypted in a v6 file. -# 0.21 - Added support for multiple pids -# 0.22 - revised structure to hold MobiBook as a class to allow an extended interface -# 0.23 - fixed problem with older files with no EXTH section -# 0.24 - add support for type 1 encryption and 'TEXtREAd' books as well -# 0.25 - Fixed support for 'BOOKMOBI' type 1 encryption -# 0.26 - Now enables Text-To-Speech flag and sets clipping limit to 100% -# 0.27 - Correct pid metadata token generation to match that used by skindle (Thank You Bart!) -# 0.28 - slight additional changes to metadata token generation (None -> '') -# 0.29 - It seems that the ideas about when multibyte trailing characters were -# included in the encryption were wrong. They are for DOC compressed -# files, but they are not for HUFF/CDIC compress files! -# 0.30 - Modified interface slightly to work better with new calibre plugin style -# 0.31 - The multibyte encrytion info is true for version 7 files too. -# 0.32 - Added support for "Print Replica" Kindle ebooks -# 0.33 - Performance improvements for large files (concatenation) -# 0.34 - Performance improvements in decryption (libalfcrypto) -# 0.35 - add interface to get mobi_version -# 0.36 - fixed problem with TEXtREAd and getBookTitle interface -# 0.37 - Fixed double announcement for stand-alone operation +#!/usr/bin/env python +# vim:ts=4:sw=4:softtabstop=4:smarttab:expandtab +import Tkinter +import Tkconstants -__version__ = '0.37' +# basic scrolled text widget +class ScrolledText(Tkinter.Text): + def __init__(self, master=None, **kw): + self.frame = Tkinter.Frame(master) + self.vbar = Tkinter.Scrollbar(self.frame) + self.vbar.pack(side=Tkconstants.RIGHT, fill=Tkconstants.Y) + kw.update({'yscrollcommand': self.vbar.set}) + Tkinter.Text.__init__(self, self.frame, **kw) + self.pack(side=Tkconstants.LEFT, fill=Tkconstants.BOTH, expand=True) + self.vbar['command'] = self.yview + # Copy geometry methods of self.frame without overriding Text + # methods = hack! + text_meths = vars(Tkinter.Text).keys() + methods = vars(Tkinter.Pack).keys() + vars(Tkinter.Grid).keys() + vars(Tkinter.Place).keys() + methods = set(methods).difference(text_meths) + for m in methods: + if m[0] != '_' and m != 'config' and m != 'configure': + setattr(self, m, getattr(self.frame, m)) -import sys - -class Unbuffered: - def __init__(self, stream): - self.stream = stream - def write(self, data): - self.stream.write(data) - self.stream.flush() - def __getattr__(self, attr): - return getattr(self.stream, attr) -sys.stdout=Unbuffered(sys.stdout) - -import os -import struct -import binascii -from alfcrypto import Pukall_Cipher - -class DrmException(Exception): - pass - - -# -# MobiBook Utility Routines -# - -# Implementation of Pukall Cipher 1 -def PC1(key, src, decryption=True): - return Pukall_Cipher().PC1(key,src,decryption) -# sum1 = 0; -# sum2 = 0; -# keyXorVal = 0; -# if len(key)!=16: -# print "Bad key length!" -# return None -# wkey = [] -# for i in xrange(8): -# wkey.append(ord(key[i*2])<<8 | ord(key[i*2+1])) -# dst = "" -# for i in xrange(len(src)): -# temp1 = 0; -# byteXorVal = 0; -# for j in xrange(8): -# temp1 ^= wkey[j] -# sum2 = (sum2+j)*20021 + sum1 -# sum1 = (temp1*346)&0xFFFF -# sum2 = (sum2+sum1)&0xFFFF -# temp1 = (temp1*20021+1)&0xFFFF -# byteXorVal ^= temp1 ^ sum2 -# curByte = ord(src[i]) -# if not decryption: -# keyXorVal = curByte * 257; -# curByte = ((curByte ^ (byteXorVal >> 8)) ^ byteXorVal) & 0xFF -# if decryption: -# keyXorVal = curByte * 257; -# for j in xrange(8): -# wkey[j] ^= keyXorVal; -# dst+=chr(curByte) -# return dst - -def checksumPid(s): - letters = "ABCDEFGHIJKLMNPQRSTUVWXYZ123456789" - crc = (~binascii.crc32(s,-1))&0xFFFFFFFF - crc = crc ^ (crc >> 16) - res = s - l = len(letters) - for i in (0,1): - b = crc & 0xff - pos = (b // l) ^ (b % l) - res += letters[pos%l] - crc >>= 8 - return res - -def getSizeOfTrailingDataEntries(ptr, size, flags): - def getSizeOfTrailingDataEntry(ptr, size): - bitpos, result = 0, 0 - if size <= 0: - return result - while True: - v = ord(ptr[size-1]) - result |= (v & 0x7F) << bitpos - bitpos += 7 - size -= 1 - if (v & 0x80) != 0 or (bitpos >= 28) or (size == 0): - return result - num = 0 - testflags = flags >> 1 - while testflags: - if testflags & 1: - num += getSizeOfTrailingDataEntry(ptr, size - num) - testflags >>= 1 - # Check the low bit to see if there's multibyte data present. - # if multibyte data is included in the encryped data, we'll - # have already cleared this flag. - if flags & 1: - num += (ord(ptr[size - num - 1]) & 0x3) + 1 - return num - - - -class MobiBook: - def loadSection(self, section): - if (section + 1 == self.num_sections): - endoff = len(self.data_file) - else: - endoff = self.sections[section + 1][0] - off = self.sections[section][0] - return self.data_file[off:endoff] - - def __init__(self, infile, announce = True): - if announce: - print ('MobiDeDrm v%(__version__)s. ' - 'Copyright 2008-2012 The Dark Reverser et al.' % globals()) - - # initial sanity check on file - self.data_file = file(infile, 'rb').read() - self.mobi_data = '' - self.header = self.data_file[0:78] - if self.header[0x3C:0x3C+8] != 'BOOKMOBI' and self.header[0x3C:0x3C+8] != 'TEXtREAd': - raise DrmException("invalid file format") - self.magic = self.header[0x3C:0x3C+8] - self.crypto_type = -1 - - # build up section offset and flag info - self.num_sections, = struct.unpack('>H', self.header[76:78]) - self.sections = [] - for i in xrange(self.num_sections): - offset, a1,a2,a3,a4 = struct.unpack('>LBBBB', self.data_file[78+i*8:78+i*8+8]) - flags, val = a1, a2<<16|a3<<8|a4 - self.sections.append( (offset, flags, val) ) - - # parse information from section 0 - self.sect = self.loadSection(0) - self.records, = struct.unpack('>H', self.sect[0x8:0x8+2]) - self.compression, = struct.unpack('>H', self.sect[0x0:0x0+2]) - - if self.magic == 'TEXtREAd': - print "Book has format: ", self.magic - self.extra_data_flags = 0 - self.mobi_length = 0 - self.mobi_codepage = 1252 - self.mobi_version = -1 - self.meta_array = {} - return - self.mobi_length, = struct.unpack('>L',self.sect[0x14:0x18]) - self.mobi_codepage, = struct.unpack('>L',self.sect[0x1c:0x20]) - self.mobi_version, = struct.unpack('>L',self.sect[0x68:0x6C]) - print "MOBI header version = %d, length = %d" %(self.mobi_version, self.mobi_length) - self.extra_data_flags = 0 - if (self.mobi_length >= 0xE4) and (self.mobi_version >= 5): - self.extra_data_flags, = struct.unpack('>H', self.sect[0xF2:0xF4]) - print "Extra Data Flags = %d" % self.extra_data_flags - if (self.compression != 17480): - # multibyte utf8 data is included in the encryption for PalmDoc compression - # so clear that byte so that we leave it to be decrypted. - self.extra_data_flags &= 0xFFFE - - # if exth region exists parse it for metadata array - self.meta_array = {} - try: - exth_flag, = struct.unpack('>L', self.sect[0x80:0x84]) - exth = 'NONE' - if exth_flag & 0x40: - exth = self.sect[16 + self.mobi_length:] - if (len(exth) >= 4) and (exth[:4] == 'EXTH'): - nitems, = struct.unpack('>I', exth[8:12]) - pos = 12 - for i in xrange(nitems): - type, size = struct.unpack('>II', exth[pos: pos + 8]) - content = exth[pos + 8: pos + size] - self.meta_array[type] = content - # reset the text to speech flag and clipping limit, if present - if type == 401 and size == 9: - # set clipping limit to 100% - self.patchSection(0, "\144", 16 + self.mobi_length + pos + 8) - elif type == 404 and size == 9: - # make sure text to speech is enabled - self.patchSection(0, "\0", 16 + self.mobi_length + pos + 8) - # print type, size, content, content.encode('hex') - pos += size - except: - self.meta_array = {} - pass - self.print_replica = False - - def getBookTitle(self): - codec_map = { - 1252 : 'windows-1252', - 65001 : 'utf-8', - } - title = '' - codec = 'windows-1252' - if self.magic == 'BOOKMOBI': - if 503 in self.meta_array: - title = self.meta_array[503] - else: - toff, tlen = struct.unpack('>II', self.sect[0x54:0x5c]) - tend = toff + tlen - title = self.sect[toff:tend] - if self.mobi_codepage in codec_map.keys(): - codec = codec_map[self.mobi_codepage] - if title == '': - title = self.header[:32] - title = title.split("\0")[0] - return unicode(title, codec).encode('utf-8') - - def getPIDMetaInfo(self): - rec209 = '' - token = '' - if 209 in self.meta_array: - rec209 = self.meta_array[209] - data = rec209 - # The 209 data comes in five byte groups. Interpret the last four bytes - # of each group as a big endian unsigned integer to get a key value - # if that key exists in the meta_array, append its contents to the token - for i in xrange(0,len(data),5): - val, = struct.unpack('>I',data[i+1:i+5]) - sval = self.meta_array.get(val,'') - token += sval - return rec209, token - - def patch(self, off, new): - self.data_file = self.data_file[:off] + new + self.data_file[off+len(new):] - - def patchSection(self, section, new, in_off = 0): - if (section + 1 == self.num_sections): - endoff = len(self.data_file) - else: - endoff = self.sections[section + 1][0] - off = self.sections[section][0] - assert off + in_off + len(new) <= endoff - self.patch(off + in_off, new) - - def parseDRM(self, data, count, pidlist): - found_key = None - keyvec1 = "\x72\x38\x33\xB0\xB4\xF2\xE3\xCA\xDF\x09\x01\xD6\xE2\xE0\x3F\x96" - for pid in pidlist: - bigpid = pid.ljust(16,'\0') - temp_key = PC1(keyvec1, bigpid, False) - temp_key_sum = sum(map(ord,temp_key)) & 0xff - found_key = None - for i in xrange(count): - verification, size, type, cksum, cookie = struct.unpack('>LLLBxxx32s', data[i*0x30:i*0x30+0x30]) - if cksum == temp_key_sum: - cookie = PC1(temp_key, cookie) - ver,flags,finalkey,expiry,expiry2 = struct.unpack('>LL16sLL', cookie) - if verification == ver and (flags & 0x1F) == 1: - found_key = finalkey - break - if found_key != None: - break - if not found_key: - # Then try the default encoding that doesn't require a PID - pid = "00000000" - temp_key = keyvec1 - temp_key_sum = sum(map(ord,temp_key)) & 0xff - for i in xrange(count): - verification, size, type, cksum, cookie = struct.unpack('>LLLBxxx32s', data[i*0x30:i*0x30+0x30]) - if cksum == temp_key_sum: - cookie = PC1(temp_key, cookie) - ver,flags,finalkey,expiry,expiry2 = struct.unpack('>LL16sLL', cookie) - if verification == ver: - found_key = finalkey - break - return [found_key,pid] - - def getMobiFile(self, outpath): - file(outpath,'wb').write(self.mobi_data) - - def getMobiVersion(self): - return self.mobi_version - - def getPrintReplica(self): - return self.print_replica - - def processBook(self, pidlist): - crypto_type, = struct.unpack('>H', self.sect[0xC:0xC+2]) - print 'Crypto Type is: ', crypto_type - self.crypto_type = crypto_type - if crypto_type == 0: - print "This book is not encrypted." - # we must still check for Print Replica - self.print_replica = (self.loadSection(1)[0:4] == '%MOP') - self.mobi_data = self.data_file - return - if crypto_type != 2 and crypto_type != 1: - raise DrmException("Cannot decode unknown Mobipocket encryption type %d" % crypto_type) - if 406 in self.meta_array: - data406 = self.meta_array[406] - val406, = struct.unpack('>Q',data406) - if val406 != 0: - raise DrmException("Cannot decode library or rented ebooks.") - - goodpids = [] - for pid in pidlist: - if len(pid)==10: - if checksumPid(pid[0:-2]) != pid: - print "Warning: PID " + pid + " has incorrect checksum, should have been "+checksumPid(pid[0:-2]) - goodpids.append(pid[0:-2]) - elif len(pid)==8: - goodpids.append(pid) - - if self.crypto_type == 1: - t1_keyvec = "QDCVEPMU675RUBSZ" - if self.magic == 'TEXtREAd': - bookkey_data = self.sect[0x0E:0x0E+16] - elif self.mobi_version < 0: - bookkey_data = self.sect[0x90:0x90+16] - else: - bookkey_data = self.sect[self.mobi_length+16:self.mobi_length+32] - pid = "00000000" - found_key = PC1(t1_keyvec, bookkey_data) - else : - # calculate the keys - drm_ptr, drm_count, drm_size, drm_flags = struct.unpack('>LLLL', self.sect[0xA8:0xA8+16]) - if drm_count == 0: - raise DrmException("Not yet initialised with PID. Must be opened with Mobipocket Reader first.") - found_key, pid = self.parseDRM(self.sect[drm_ptr:drm_ptr+drm_size], drm_count, goodpids) - if not found_key: - raise DrmException("No key found in " + str(len(goodpids)) + " keys tried. Please report this failure for help.") - # kill the drm keys - self.patchSection(0, "\0" * drm_size, drm_ptr) - # kill the drm pointers - self.patchSection(0, "\xff" * 4 + "\0" * 12, 0xA8) - - if pid=="00000000": - print "File has default encryption, no specific PID." - else: - print "File is encoded with PID "+checksumPid(pid)+"." - - # clear the crypto type - self.patchSection(0, "\0" * 2, 0xC) - - # decrypt sections - print "Decrypting. Please wait . . .", - mobidataList = [] - mobidataList.append(self.data_file[:self.sections[1][0]]) - for i in xrange(1, self.records+1): - data = self.loadSection(i) - extra_size = getSizeOfTrailingDataEntries(data, len(data), self.extra_data_flags) - if i%100 == 0: - print ".", - # print "record %d, extra_size %d" %(i,extra_size) - decoded_data = PC1(found_key, data[0:len(data) - extra_size]) - if i==1: - self.print_replica = (decoded_data[0:4] == '%MOP') - mobidataList.append(decoded_data) - if extra_size > 0: - mobidataList.append(data[-extra_size:]) - if self.num_sections > self.records+1: - mobidataList.append(self.data_file[self.sections[self.records+1][0]:]) - self.mobi_data = "".join(mobidataList) - print "done" - return - -def getUnencryptedBook(infile,pid,announce=True): - if not os.path.isfile(infile): - raise DrmException('Input File Not Found') - book = MobiBook(infile,announce) - book.processBook([pid]) - return book.mobi_data - -def getUnencryptedBookWithList(infile,pidlist,announce=True): - if not os.path.isfile(infile): - raise DrmException('Input File Not Found') - book = MobiBook(infile, announce) - book.processBook(pidlist) - return book.mobi_data - - -def main(argv=sys.argv): - print ('MobiDeDrm v%(__version__)s. ' - 'Copyright 2008-2012 The Dark Reverser et al.' % globals()) - if len(argv)<3 or len(argv)>4: - print "Removes protection from Kindle/Mobipocket, Kindle/KF8 and Kindle/Print Replica ebooks" - print "Usage:" - print " %s []" % sys.argv[0] - return 1 - else: - infile = argv[1] - outfile = argv[2] - if len(argv) is 4: - pidlist = argv[3].split(',') - else: - pidlist = {} - try: - stripped_file = getUnencryptedBookWithList(infile, pidlist, False) - file(outfile, 'wb').write(stripped_file) - except DrmException, e: - print "Error: %s" % e - return 1 - return 0 - - -if __name__ == "__main__": - sys.exit(main()) + def __str__(self): + return str(self.frame) diff --git a/Calibre_Plugins/K4MobiDeDRM_plugin/stylexml2css.py b/Calibre_Plugins/K4MobiDeDRM_plugin/stylexml2css.py index 906c6e9..2347f6a 100644 --- a/Calibre_Plugins/K4MobiDeDRM_plugin/stylexml2css.py +++ b/Calibre_Plugins/K4MobiDeDRM_plugin/stylexml2css.py @@ -1,45 +1,266 @@ -# -*- coding: utf-8 -*- -# -# Adapted and simplified from the kitchen project -# -# Kitchen Project Copyright (c) 2012 Red Hat, Inc. -# -# kitchen is free software; you can redistribute it and/or -# modify it under the terms of the GNU Lesser General Public -# License as published by the Free Software Foundation; either -# version 2.1 of the License, or (at your option) any later version. -# -# kitchen is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -# Lesser General Public License for more details. -# -# You should have received a copy of the GNU Lesser General Public -# License along with kitchen; if not, see -# -# Authors: -# Toshio Kuratomi -# Seth Vidal -# -# Portions of code taken from yum/i18n.py and -# python-fedora: fedora/textutils.py +#! /usr/bin/python +# vim:ts=4:sw=4:softtabstop=4:smarttab:expandtab +# For use with Topaz Scripts Version 2.6 -import codecs +import csv +import sys +import os +import getopt +import re +from struct import pack +from struct import unpack -# returns a char string unchanged -# returns a unicode string converted to a char string of the passed encoding -# return the empty string for anything else -def getwriter(encoding): - class _StreamWriter(codecs.StreamWriter): - def __init__(self, stream): - codecs.StreamWriter.__init__(self, stream, 'replace') - def encode(self, msg, errors='replace'): - if isinstance(msg, basestring): - if isinstance(msg, str): - return (msg, len(msg)) - return (msg.encode(self.encoding, 'replace'), len(msg)) - return ('',0) +class DocParser(object): + def __init__(self, flatxml, fontsize, ph, pw): + self.flatdoc = flatxml.split('\n') + self.fontsize = int(fontsize) + self.ph = int(ph) * 1.0 + self.pw = int(pw) * 1.0 - _StreamWriter.encoding = encoding - return _StreamWriter + stags = { + 'paragraph' : 'p', + 'graphic' : '.graphic' + } + + attr_val_map = { + 'hang' : 'text-indent: ', + 'indent' : 'text-indent: ', + 'line-space' : 'line-height: ', + 'margin-bottom' : 'margin-bottom: ', + 'margin-left' : 'margin-left: ', + 'margin-right' : 'margin-right: ', + 'margin-top' : 'margin-top: ', + 'space-after' : 'padding-bottom: ', + } + + attr_str_map = { + 'align-center' : 'text-align: center; margin-left: auto; margin-right: auto;', + 'align-left' : 'text-align: left;', + 'align-right' : 'text-align: right;', + 'align-justify' : 'text-align: justify;', + 'display-inline' : 'display: inline;', + 'pos-left' : 'text-align: left;', + 'pos-right' : 'text-align: right;', + 'pos-center' : 'text-align: center; margin-left: auto; margin-right: auto;', + } + + + # find tag if within pos to end inclusive + def findinDoc(self, tagpath, pos, end) : + result = None + docList = self.flatdoc + cnt = len(docList) + if end == -1 : + end = cnt + else: + end = min(cnt,end) + foundat = -1 + for j in xrange(pos, end): + item = docList[j] + if item.find('=') >= 0: + (name, argres) = item.split('=',1) + else : + name = item + argres = '' + if name.endswith(tagpath) : + result = argres + foundat = j + break + return foundat, result + + + # return list of start positions for the tagpath + def posinDoc(self, tagpath): + startpos = [] + pos = 0 + res = "" + while res != None : + (foundpos, res) = self.findinDoc(tagpath, pos, -1) + if res != None : + startpos.append(foundpos) + pos = foundpos + 1 + return startpos + + # returns a vector of integers for the tagpath + def getData(self, tagpath, pos, end, clean=False): + if clean: + digits_only = re.compile(r'''([0-9]+)''') + argres=[] + (foundat, argt) = self.findinDoc(tagpath, pos, end) + if (argt != None) and (len(argt) > 0) : + argList = argt.split('|') + for strval in argList: + if clean: + m = re.search(digits_only, strval) + if m != None: + strval = m.group() + argres.append(int(strval)) + return argres + + def process(self): + + classlst = '' + csspage = '.cl-center { text-align: center; margin-left: auto; margin-right: auto; }\n' + csspage += '.cl-right { text-align: right; }\n' + csspage += '.cl-left { text-align: left; }\n' + csspage += '.cl-justify { text-align: justify; }\n' + + # generate a list of each