tools v1.1

This commit is contained in:
Apprentice Alf 2010-02-14 15:47:48 +00:00
parent f8154c4615
commit 2819550411
2 changed files with 61 additions and 42 deletions

View File

@ -62,7 +62,7 @@ class ASN1Parser(object):
def __init__(self, bytes): def __init__(self, bytes):
self.bytes = bytes self.bytes = bytes
self.index = 0 self.index = 0
def get(self, length): def get(self, length):
if self.index + length > len(self.bytes): if self.index + length > len(self.bytes):
raise ASN1Error("Error decoding ASN.1") raise ASN1Error("Error decoding ASN.1")
@ -72,22 +72,22 @@ class ASN1Parser(object):
x |= self.bytes[self.index] x |= self.bytes[self.index]
self.index += 1 self.index += 1
return x return x
def getFixBytes(self, lengthBytes): def getFixBytes(self, lengthBytes):
bytes = self.bytes[self.index : self.index+lengthBytes] bytes = self.bytes[self.index : self.index+lengthBytes]
self.index += lengthBytes self.index += lengthBytes
return bytes return bytes
def getVarBytes(self, lengthLength): def getVarBytes(self, lengthLength):
lengthBytes = self.get(lengthLength) lengthBytes = self.get(lengthLength)
return self.getFixBytes(lengthBytes) return self.getFixBytes(lengthBytes)
def getFixList(self, length, lengthList): def getFixList(self, length, lengthList):
l = [0] * lengthList l = [0] * lengthList
for x in range(lengthList): for x in range(lengthList):
l[x] = self.get(length) l[x] = self.get(length)
return l return l
def getVarList(self, length, lengthLength): def getVarList(self, length, lengthLength):
lengthList = self.get(lengthLength) lengthList = self.get(lengthLength)
if lengthList % length != 0: if lengthList % length != 0:
@ -97,19 +97,19 @@ class ASN1Parser(object):
for x in range(lengthList): for x in range(lengthList):
l[x] = self.get(length) l[x] = self.get(length)
return l return l
def startLengthCheck(self, lengthLength): def startLengthCheck(self, lengthLength):
self.lengthCheck = self.get(lengthLength) self.lengthCheck = self.get(lengthLength)
self.indexCheck = self.index self.indexCheck = self.index
def setLengthCheck(self, length): def setLengthCheck(self, length):
self.lengthCheck = length self.lengthCheck = length
self.indexCheck = self.index self.indexCheck = self.index
def stopLengthCheck(self): def stopLengthCheck(self):
if (self.index - self.indexCheck) != self.lengthCheck: if (self.index - self.indexCheck) != self.lengthCheck:
raise ASN1Error("Error decoding ASN.1") raise ASN1Error("Error decoding ASN.1")
def atLengthCheck(self): def atLengthCheck(self):
if (self.index - self.indexCheck) < self.lengthCheck: if (self.index - self.indexCheck) < self.lengthCheck:
return False return False
@ -162,7 +162,7 @@ class Decryptor(object):
path = elem.get('URI', None) path = elem.get('URI', None)
if path is not None: if path is not None:
encrypted.add(path) encrypted.add(path)
def decompress(self, bytes): def decompress(self, bytes):
dc = zlib.decompressobj(-15) dc = zlib.decompressobj(-15)
bytes = dc.decompress(bytes) bytes = dc.decompress(bytes)
@ -170,7 +170,7 @@ class Decryptor(object):
if ex: if ex:
bytes = bytes + ex bytes = bytes + ex
return bytes return bytes
def decrypt(self, path, data): def decrypt(self, path, data):
if path in self._encrypted: if path in self._encrypted:
data = self._aes.decrypt(data)[16:] data = self._aes.decrypt(data)[16:]
@ -336,5 +336,6 @@ def gui_main():
return 0 return 0
if __name__ == '__main__': if __name__ == '__main__':
# sys.exit(cli_main()) if len(sys.argv) > 1:
sys.exit(cli_main())
sys.exit(gui_main()) sys.exit(gui_main())

View File

@ -346,35 +346,40 @@ class DocParser(object):
if end == -1 : if end == -1 :
end = self.docSize end = self.docSize
# seems some xml has last* coming before first* so we have to
# handle any order
sp_first = -1
sp_last = -1
gl_first = -1
gl_last = -1
ws_first = -1
ws_last = -1
word_class = ''
while (line < end) : while (line < end) :
(name, argres) = self.lineinDoc(line) (name, argres) = self.lineinDoc(line)
# handle both span and _span
if name.endswith('span.firstWord') : if name.endswith('span.firstWord') :
first = int(argres) sp_first = int(argres)
(name, argres) = self.lineinDoc(line+1)
if not name.endswith('span.lastWord'): elif name.endswith('span.lastWord') :
print 'Error: - incorrect _span ordering inside paragraph' sp_last = int(argres)
last = int(argres)
for wordnum in xrange(first, last):
result.append(('ocr', wordnum))
line += 1
elif name.endswith('word.firstGlyph') : elif name.endswith('word.firstGlyph') :
first = int(argres) gl_first = int(argres)
(name, argres) = self.lineinDoc(line+1)
if not name.endswith('word.lastGlyph'): elif name.endswith('word.lastGlyph') :
print 'Error: - incorrect glyph ordering inside word in paragraph' gl_last = int(argres)
last = int(argres)
glyphList = [] elif name.endswith('word_semantic.firstWord'):
for glyphnum in xrange(first, last): ws_first = int(argres)
glyphList.append(glyphnum)
num = self.svgcount elif name.endswith('word_semantic.lastWord'):
self.glyphs_to_image(glyphList) ws_last = int(argres)
self.svgcount += 1
result.append(('svg', num))
line += 1
elif name.endswith('word.class'): elif name.endswith('word.class'):
(cname, space) = argres.split('-',1) (cname, space) = argres.split('-',1)
@ -386,15 +391,28 @@ class DocParser(object):
result.append(('img' + word_class, int(argres))) result.append(('img' + word_class, int(argres)))
word_class = '' word_class = ''
elif name.endswith('word_semantic.firstWord'): if (sp_first != -1) and (sp_last != -1):
first = int(argres) for wordnum in xrange(sp_first, sp_last):
(name, argres) = self.lineinDoc(line+1)
if not name.endswith('word_semantic.lastWord'):
print 'Error: - incorrect word_semantic ordering inside paragraph'
last = int(argres)
for wordnum in xrange(first, last):
result.append(('ocr', wordnum)) result.append(('ocr', wordnum))
line += 1 sp_first = -1
sp_last = -1
if (gl_first != -1) and (gl_last != -1):
glyphList = []
for glyphnum in xrange(gl_first, gl_last):
glyphList.append(glyphnum)
num = self.svgcount
self.glyphs_to_image(glyphList)
self.svgcount += 1
result.append(('svg', num))
gl_first = -1
gl_last = -1
if (ws_first != -1) and (ws_last != -1):
for wordnum in xrange(ws_first, ws_last):
result.append(('ocr', wordnum))
ws_first = -1
ws_last = -1
line += 1 line += 1