tools v1.1
This commit is contained in:
parent
f8154c4615
commit
2819550411
|
@ -62,7 +62,7 @@ class ASN1Parser(object):
|
||||||
def __init__(self, bytes):
|
def __init__(self, bytes):
|
||||||
self.bytes = bytes
|
self.bytes = bytes
|
||||||
self.index = 0
|
self.index = 0
|
||||||
|
|
||||||
def get(self, length):
|
def get(self, length):
|
||||||
if self.index + length > len(self.bytes):
|
if self.index + length > len(self.bytes):
|
||||||
raise ASN1Error("Error decoding ASN.1")
|
raise ASN1Error("Error decoding ASN.1")
|
||||||
|
@ -72,22 +72,22 @@ class ASN1Parser(object):
|
||||||
x |= self.bytes[self.index]
|
x |= self.bytes[self.index]
|
||||||
self.index += 1
|
self.index += 1
|
||||||
return x
|
return x
|
||||||
|
|
||||||
def getFixBytes(self, lengthBytes):
|
def getFixBytes(self, lengthBytes):
|
||||||
bytes = self.bytes[self.index : self.index+lengthBytes]
|
bytes = self.bytes[self.index : self.index+lengthBytes]
|
||||||
self.index += lengthBytes
|
self.index += lengthBytes
|
||||||
return bytes
|
return bytes
|
||||||
|
|
||||||
def getVarBytes(self, lengthLength):
|
def getVarBytes(self, lengthLength):
|
||||||
lengthBytes = self.get(lengthLength)
|
lengthBytes = self.get(lengthLength)
|
||||||
return self.getFixBytes(lengthBytes)
|
return self.getFixBytes(lengthBytes)
|
||||||
|
|
||||||
def getFixList(self, length, lengthList):
|
def getFixList(self, length, lengthList):
|
||||||
l = [0] * lengthList
|
l = [0] * lengthList
|
||||||
for x in range(lengthList):
|
for x in range(lengthList):
|
||||||
l[x] = self.get(length)
|
l[x] = self.get(length)
|
||||||
return l
|
return l
|
||||||
|
|
||||||
def getVarList(self, length, lengthLength):
|
def getVarList(self, length, lengthLength):
|
||||||
lengthList = self.get(lengthLength)
|
lengthList = self.get(lengthLength)
|
||||||
if lengthList % length != 0:
|
if lengthList % length != 0:
|
||||||
|
@ -97,19 +97,19 @@ class ASN1Parser(object):
|
||||||
for x in range(lengthList):
|
for x in range(lengthList):
|
||||||
l[x] = self.get(length)
|
l[x] = self.get(length)
|
||||||
return l
|
return l
|
||||||
|
|
||||||
def startLengthCheck(self, lengthLength):
|
def startLengthCheck(self, lengthLength):
|
||||||
self.lengthCheck = self.get(lengthLength)
|
self.lengthCheck = self.get(lengthLength)
|
||||||
self.indexCheck = self.index
|
self.indexCheck = self.index
|
||||||
|
|
||||||
def setLengthCheck(self, length):
|
def setLengthCheck(self, length):
|
||||||
self.lengthCheck = length
|
self.lengthCheck = length
|
||||||
self.indexCheck = self.index
|
self.indexCheck = self.index
|
||||||
|
|
||||||
def stopLengthCheck(self):
|
def stopLengthCheck(self):
|
||||||
if (self.index - self.indexCheck) != self.lengthCheck:
|
if (self.index - self.indexCheck) != self.lengthCheck:
|
||||||
raise ASN1Error("Error decoding ASN.1")
|
raise ASN1Error("Error decoding ASN.1")
|
||||||
|
|
||||||
def atLengthCheck(self):
|
def atLengthCheck(self):
|
||||||
if (self.index - self.indexCheck) < self.lengthCheck:
|
if (self.index - self.indexCheck) < self.lengthCheck:
|
||||||
return False
|
return False
|
||||||
|
@ -162,7 +162,7 @@ class Decryptor(object):
|
||||||
path = elem.get('URI', None)
|
path = elem.get('URI', None)
|
||||||
if path is not None:
|
if path is not None:
|
||||||
encrypted.add(path)
|
encrypted.add(path)
|
||||||
|
|
||||||
def decompress(self, bytes):
|
def decompress(self, bytes):
|
||||||
dc = zlib.decompressobj(-15)
|
dc = zlib.decompressobj(-15)
|
||||||
bytes = dc.decompress(bytes)
|
bytes = dc.decompress(bytes)
|
||||||
|
@ -170,7 +170,7 @@ class Decryptor(object):
|
||||||
if ex:
|
if ex:
|
||||||
bytes = bytes + ex
|
bytes = bytes + ex
|
||||||
return bytes
|
return bytes
|
||||||
|
|
||||||
def decrypt(self, path, data):
|
def decrypt(self, path, data):
|
||||||
if path in self._encrypted:
|
if path in self._encrypted:
|
||||||
data = self._aes.decrypt(data)[16:]
|
data = self._aes.decrypt(data)[16:]
|
||||||
|
@ -336,5 +336,6 @@ def gui_main():
|
||||||
return 0
|
return 0
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
# sys.exit(cli_main())
|
if len(sys.argv) > 1:
|
||||||
|
sys.exit(cli_main())
|
||||||
sys.exit(gui_main())
|
sys.exit(gui_main())
|
||||||
|
|
|
@ -346,35 +346,40 @@ class DocParser(object):
|
||||||
if end == -1 :
|
if end == -1 :
|
||||||
end = self.docSize
|
end = self.docSize
|
||||||
|
|
||||||
|
# seems some xml has last* coming before first* so we have to
|
||||||
|
# handle any order
|
||||||
|
sp_first = -1
|
||||||
|
sp_last = -1
|
||||||
|
|
||||||
|
gl_first = -1
|
||||||
|
gl_last = -1
|
||||||
|
|
||||||
|
ws_first = -1
|
||||||
|
ws_last = -1
|
||||||
|
|
||||||
|
word_class = ''
|
||||||
|
|
||||||
while (line < end) :
|
while (line < end) :
|
||||||
|
|
||||||
(name, argres) = self.lineinDoc(line)
|
(name, argres) = self.lineinDoc(line)
|
||||||
|
|
||||||
# handle both span and _span
|
|
||||||
if name.endswith('span.firstWord') :
|
if name.endswith('span.firstWord') :
|
||||||
first = int(argres)
|
sp_first = int(argres)
|
||||||
(name, argres) = self.lineinDoc(line+1)
|
|
||||||
if not name.endswith('span.lastWord'):
|
elif name.endswith('span.lastWord') :
|
||||||
print 'Error: - incorrect _span ordering inside paragraph'
|
sp_last = int(argres)
|
||||||
last = int(argres)
|
|
||||||
for wordnum in xrange(first, last):
|
|
||||||
result.append(('ocr', wordnum))
|
|
||||||
line += 1
|
|
||||||
|
|
||||||
elif name.endswith('word.firstGlyph') :
|
elif name.endswith('word.firstGlyph') :
|
||||||
first = int(argres)
|
gl_first = int(argres)
|
||||||
(name, argres) = self.lineinDoc(line+1)
|
|
||||||
if not name.endswith('word.lastGlyph'):
|
elif name.endswith('word.lastGlyph') :
|
||||||
print 'Error: - incorrect glyph ordering inside word in paragraph'
|
gl_last = int(argres)
|
||||||
last = int(argres)
|
|
||||||
glyphList = []
|
elif name.endswith('word_semantic.firstWord'):
|
||||||
for glyphnum in xrange(first, last):
|
ws_first = int(argres)
|
||||||
glyphList.append(glyphnum)
|
|
||||||
num = self.svgcount
|
elif name.endswith('word_semantic.lastWord'):
|
||||||
self.glyphs_to_image(glyphList)
|
ws_last = int(argres)
|
||||||
self.svgcount += 1
|
|
||||||
result.append(('svg', num))
|
|
||||||
line += 1
|
|
||||||
|
|
||||||
elif name.endswith('word.class'):
|
elif name.endswith('word.class'):
|
||||||
(cname, space) = argres.split('-',1)
|
(cname, space) = argres.split('-',1)
|
||||||
|
@ -386,15 +391,28 @@ class DocParser(object):
|
||||||
result.append(('img' + word_class, int(argres)))
|
result.append(('img' + word_class, int(argres)))
|
||||||
word_class = ''
|
word_class = ''
|
||||||
|
|
||||||
elif name.endswith('word_semantic.firstWord'):
|
if (sp_first != -1) and (sp_last != -1):
|
||||||
first = int(argres)
|
for wordnum in xrange(sp_first, sp_last):
|
||||||
(name, argres) = self.lineinDoc(line+1)
|
|
||||||
if not name.endswith('word_semantic.lastWord'):
|
|
||||||
print 'Error: - incorrect word_semantic ordering inside paragraph'
|
|
||||||
last = int(argres)
|
|
||||||
for wordnum in xrange(first, last):
|
|
||||||
result.append(('ocr', wordnum))
|
result.append(('ocr', wordnum))
|
||||||
line += 1
|
sp_first = -1
|
||||||
|
sp_last = -1
|
||||||
|
|
||||||
|
if (gl_first != -1) and (gl_last != -1):
|
||||||
|
glyphList = []
|
||||||
|
for glyphnum in xrange(gl_first, gl_last):
|
||||||
|
glyphList.append(glyphnum)
|
||||||
|
num = self.svgcount
|
||||||
|
self.glyphs_to_image(glyphList)
|
||||||
|
self.svgcount += 1
|
||||||
|
result.append(('svg', num))
|
||||||
|
gl_first = -1
|
||||||
|
gl_last = -1
|
||||||
|
|
||||||
|
if (ws_first != -1) and (ws_last != -1):
|
||||||
|
for wordnum in xrange(ws_first, ws_last):
|
||||||
|
result.append(('ocr', wordnum))
|
||||||
|
ws_first = -1
|
||||||
|
ws_last = -1
|
||||||
|
|
||||||
line += 1
|
line += 1
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue