tools v1.1

2010-02-14 15:47:48 +00:00 · 2010-02-14 15:47:48 +00:00 · 2819550411
parent f8154c4615
commit 2819550411
2 changed files with 61 additions and 42 deletions
--- a/Adobe_EPUB_Tools/ineptepub.pyw
+++ b/Adobe_EPUB_Tools/ineptepub.pyw
@ -62,7 +62,7 @@ class ASN1Parser(object):
        def __init__(self, bytes):
            self.bytes = bytes
            self.index = 0
-    
+
        def get(self, length):
            if self.index + length > len(self.bytes):
                raise ASN1Error("Error decoding ASN.1")
@ -72,22 +72,22 @@ class ASN1Parser(object):
                x |= self.bytes[self.index]
                self.index += 1
            return x
-    
+
        def getFixBytes(self, lengthBytes):
            bytes = self.bytes[self.index : self.index+lengthBytes]
            self.index += lengthBytes
            return bytes
-    
+
        def getVarBytes(self, lengthLength):
            lengthBytes = self.get(lengthLength)
            return self.getFixBytes(lengthBytes)
-    
+
        def getFixList(self, length, lengthList):
            l = [0] * lengthList
            for x in range(lengthList):
                l[x] = self.get(length)
            return l
-    
+
        def getVarList(self, length, lengthLength):
            lengthList = self.get(lengthLength)
            if lengthList % length != 0:
@ -97,19 +97,19 @@ class ASN1Parser(object):
            for x in range(lengthList):
                l[x] = self.get(length)
            return l
-    
+
        def startLengthCheck(self, lengthLength):
            self.lengthCheck = self.get(lengthLength)
            self.indexCheck = self.index
-    
+
        def setLengthCheck(self, length):
            self.lengthCheck = length
            self.indexCheck = self.index
-    
+
        def stopLengthCheck(self):
            if (self.index - self.indexCheck) != self.lengthCheck:
                raise ASN1Error("Error decoding ASN.1")
-    
+
        def atLengthCheck(self):
            if (self.index - self.indexCheck) < self.lengthCheck:
                return False
@ -162,7 +162,7 @@ class Decryptor(object):
            path = elem.get('URI', None)
            if path is not None:
                encrypted.add(path)
-    
+
    def decompress(self, bytes):
        dc = zlib.decompressobj(-15)
        bytes = dc.decompress(bytes)
@ -170,7 +170,7 @@ class Decryptor(object):
        if ex:
            bytes = bytes + ex
        return bytes
-    
+
    def decrypt(self, path, data):
        if path in self._encrypted:
            data = self._aes.decrypt(data)[16:]
@ -336,5 +336,6 @@ def gui_main():
    return 0

 if __name__ == '__main__':
-    # sys.exit(cli_main())
+    if len(sys.argv) > 1:
+        sys.exit(cli_main())
    sys.exit(gui_main())
--- a/Topaz_Tools/lib/flatxml2html.py
+++ b/Topaz_Tools/lib/flatxml2html.py
@ -346,35 +346,40 @@ class DocParser(object):
        if end == -1 :
            end = self.docSize

+        # seems some xml has last* coming before first* so we have to 
+        # handle any order
+        sp_first = -1
+        sp_last = -1
+
+        gl_first = -1
+        gl_last = -1
+
+        ws_first = -1
+        ws_last = -1
+
+        word_class = ''
+
        while (line < end) :

            (name, argres) = self.lineinDoc(line)

-            # handle both span and _span
            if name.endswith('span.firstWord') :
-                first = int(argres)
-                (name, argres) = self.lineinDoc(line+1)
-                if not name.endswith('span.lastWord'):
-                    print 'Error: - incorrect _span ordering inside paragraph'
-                last = int(argres)
-                for wordnum in xrange(first, last):
-                    result.append(('ocr', wordnum))
-                line += 1
+                sp_first = int(argres)
+
+            elif name.endswith('span.lastWord') :
+                sp_last = int(argres)

            elif name.endswith('word.firstGlyph') :
-                first = int(argres)
-                (name, argres) = self.lineinDoc(line+1)
-                if not name.endswith('word.lastGlyph'):
-                    print 'Error: - incorrect glyph ordering inside word in paragraph'
-                last = int(argres)
-                glyphList = []
-                for glyphnum in xrange(first, last):
-                    glyphList.append(glyphnum)
-                num = self.svgcount
-                self.glyphs_to_image(glyphList)
-                self.svgcount += 1
-                result.append(('svg', num))
-                line += 1
+                gl_first = int(argres)
+
+            elif name.endswith('word.lastGlyph') :
+                gl_last = int(argres)
+
+            elif name.endswith('word_semantic.firstWord'):
+                ws_first = int(argres)
+
+            elif name.endswith('word_semantic.lastWord'):
+                ws_last = int(argres)

            elif name.endswith('word.class'):
               (cname, space) = argres.split('-',1)
@ -386,15 +391,28 @@ class DocParser(object):
                result.append(('img' + word_class, int(argres)))
                word_class = ''

-            elif name.endswith('word_semantic.firstWord'):
-                first = int(argres)
-                (name, argres) = self.lineinDoc(line+1)
-                if not name.endswith('word_semantic.lastWord'):
-                    print 'Error: - incorrect word_semantic ordering inside paragraph'
-                last = int(argres)
-                for wordnum in xrange(first, last):
+            if (sp_first != -1) and (sp_last != -1):
+                for wordnum in xrange(sp_first, sp_last):
                    result.append(('ocr', wordnum))
-                line += 1
+                sp_first = -1
+                sp_last = -1
+
+            if (gl_first != -1) and (gl_last != -1):
+                glyphList = []
+                for glyphnum in xrange(gl_first, gl_last):
+                    glyphList.append(glyphnum)
+                num = self.svgcount
+                self.glyphs_to_image(glyphList)
+                self.svgcount += 1
+                result.append(('svg', num))
+                gl_first = -1
+                gl_last = -1
+
+            if (ws_first != -1) and (ws_last != -1):
+                for wordnum in xrange(ws_first, ws_last):
+                    result.append(('ocr', wordnum))
+                ws_first = -1
+                ws_last = -1
                              
            line += 1