topazscripts 1.8

2010-01-24 12:19:20 +00:00 · 2010-01-24 12:19:20 +00:00 · 24f001c61e
parent c93f8e1edd
commit 24f001c61e
12 changed files with 332 additions and 72 deletions
--- a/Topaz_Tools/lib/changes.txt
+++ b/Topaz_Tools/lib/changes.txt
@ -1,3 +1,22 @@
 Changes in version 1.8
 	- gensvg.py now builds wonderful xhtml pages with embedded svg 
 	    that can be easily paged through as if reading a book!
 	    (tested in Safari for Mac and Win and Firefox)
 	    (requires javascript to be enabled)
 	- genhtml.py now REQUIRES that gensvg.py be run FIRST
 	     this allows create of images on the fly from glyphs
 	- genhtml.py now automatically makes tables of words into svg
 	     based images and will handle glyph based ornate first 
 	     letters of words
 	- cmbtc_dump_mac_linux.py has been renamed to be
 	     cmbtc_dump_nonK4PC.py to make it clearer
 	     when it needs to be used
 Changes in version 1.7
 	- gensvg.py has been improved so that the glyphs render exactly (ClarkNova)
 	- gensvg.py has fixed a render order "bug" that allowed some images to cover or hide text. (ClarkNova)
@ -5,7 +24,6 @@ Changes in version 1.7
 	- add missing <title> tag
 	- make xhtml compliant doctype and minor changes to write correct xhtml
 	- make divs that act as anchors be hidden visually and to take up 0 height and 0 width to prevent any impact on layout
 	- added support for new version of the <_span> tag called <span>
 Changes in version 1.6
 	- support for books whose paragraphs have no styles
--- a/Topaz_Tools/lib/cmbtc_dump.py
+++ b/Topaz_Tools/lib/cmbtc_dump.py
@ -1,4 +1,5 @@
 #! /usr/bin/python
 # For use in Topaz Scripts version 1.8
 """
--- a/Topaz_Tools/lib/cmbtc_dump_nonK4PC.py
+++ b/Topaz_Tools/lib/cmbtc_dump_nonK4PC.py
@ -1,4 +1,5 @@
 #! /usr/bin/python
 # For use with Topaz Scripts Version 1.8
 from __future__ import with_statement
--- a/Topaz_Tools/lib/convert2xml.py
+++ b/Topaz_Tools/lib/convert2xml.py
@ -1,5 +1,6 @@
 #! /usr/bin/python
 # vim:ts=4:sw=4:softtabstop=4:smarttab:expandtab
 # For use with Topaz Scripts Version 1.8                                                                                                  
 from __future__ import with_statement
 import csv
--- a/Topaz_Tools/lib/decode_meta.py
+++ b/Topaz_Tools/lib/decode_meta.py
@ -1,5 +1,6 @@
 #! /usr/bin/python
 # vim:ts=4:sw=4:softtabstop=4:smarttab:expandtab
 # For use with Topaz Scripts Version 1.8                                                                                                  
 from __future__ import with_statement
 import csv
--- a/Topaz_Tools/lib/flatxml2html.py
+++ b/Topaz_Tools/lib/flatxml2html.py
@ -1,21 +1,27 @@
 #! /usr/bin/python
 # vim:ts=4:sw=4:softtabstop=4:smarttab:expandtab
 # For use with Topaz Scripts Version 1.8                                                                                                  
 from __future__ import with_statement
 import csv
 import sys
 import os
 import math
 import getopt
 from struct import pack
 from struct import unpack
 class DocParser(object):
-    def __init__(self, flatxml, classlst, fileid):
+    def __init__(self, flatxml, classlst, fileid, bookDir):
        self.id = os.path.basename(fileid).replace('.dat','')
        self.svgcount = 0
        self.docList = flatxml.split('\n')
        self.docSize = len(self.docList)
        self.classList = {}
        self.bookDir = bookDir
        self.glyphPaths = { }
        self.numPaths = 0
        tmpList = classlst.split('\n')
        for pclass in tmpList:
            if pclass != '':
@ -30,6 +36,107 @@ class DocParser(object):
        self.paracont_stemid = []
        self.parastems_stemid = []
    def getGlyph(self, gid):
        result = ''
        id='gl%d' % gid
        return self.glyphPaths[id]
    def glyphs_to_image(self, glyphList):
        def extract(path, key):
            b = path.find(key) + len(key)
            e = path.find(' ',b)
            return int(path[b:e])
        def extractID(path, key):
            b = path.find(key) + len(key)
            e = path.find('"',b)
            return path[b:e]
        svgDir = os.path.join(self.bookDir,'svg')
        glyfile = os.path.join(svgDir,'glyphs.svg')
        imgDir = os.path.join(self.bookDir,'img')
        imgname = self.id + '_%04d.svg' % self.svgcount
        imgfile = os.path.join(imgDir,imgname)
        # build hash table of glyph paths keyed by glyph id
        if self.numPaths == 0:
            gfile = open(glyfile, 'r')
            while True:
                path = gfile.readline()
                if (path == ''): break
                glyphid = extractID(path,'id="')
                self.glyphPaths[glyphid] = path
                self.numPaths += 1
            gfile.close()
        # get glyph information
        gxList = self.getData('info.glyph.x',0,-1)
        gyList = self.getData('info.glyph.y',0,-1)
        gidList = self.getData('info.glyph.glyphID',0,-1)
        gids = []
        maxws = []
        maxhs = []
        xs = []
        ys = []
        gdefs = []
        # get path defintions, positions, dimensions for ecah glyph 
        # that makes up the image, and find min x and min y to reposition origin
        minx = -1
        miny = -1
        for j in glyphList:
            gid = gidList[j]
            gids.append(gid)
            xs.append(gxList[j])
            if minx == -1: minx = gxList[j]
            else : minx = min(minx, gxList[j])
            ys.append(gyList[j])
            if miny == -1: miny = gyList[j]
            else : miny = min(miny, gyList[j])
            path = self.getGlyph(gid)
            gdefs.append(path)
            maxws.append(extract(path,'width='))
            maxhs.append(extract(path,'height='))
        # change the origin to minx, miny and calc max height and width
        maxw = maxws[0] + xs[0] - minx
        maxh = maxhs[0] + ys[0] - miny
        for j in xrange(0, len(xs)):
            xs[j] = xs[j] - minx
            ys[j] = ys[j] - miny
            maxw = max( maxw, (maxws[j] + xs[j]) )
            maxh = max( maxh, (maxhs[j] + ys[j]) )
        # open the image file for output
        ifile = open(imgfile,'w')
        ifile.write('<?xml version="1.0" standalone="no"?>\n')
        ifile.write('<!DOCTYPE svg PUBLIC "-//W3C/DTD SVG 1.1//EN" "http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd">\n')
        ifile.write('<svg width="%dpx" height="%dpx" viewBox="0 0 %d %d" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" version="1.1">\n' % (math.floor(maxw/10), math.floor(maxh/10), maxw, maxh))
        ifile.write('<defs>\n')
        for j in xrange(0,len(gdefs)):
            ifile.write(gdefs[j])
        ifile.write('</defs>\n')
        for j in xrange(0,len(gids)):
            ifile.write('<use xlink:href="#gl%d" x="%d" y="%d" />\n' % (gids[j], xs[j], ys[j]))
        ifile.write('</svg>')
        ifile.close()
        return 0
    # return tag at line pos in document
    def lineinDoc(self, pos) :
        if (pos >= 0) and (pos < self.docSize) :
@ -77,6 +184,17 @@ class DocParser(object):
        return startpos
    # returns a vector of integers for the tagpath
    def getData(self, tagpath, pos, end):
        argres=[]
        (foundat, argt) = self.findinDoc(tagpath, pos, end)
        if (argt != None) and (len(argt) > 0) :
            argList = argt.split('|')
            argres = [ int(strval) for strval in argList]
        return argres
    # build a description of the paragraph
    def getParaDescription(self, start, end):
@ -120,6 +238,7 @@ class DocParser(object):
        # this type of paragrph may be made up of multiple _spans, inline 
        # word monograms (images) and words with semantic meaning
        # and now a new type "span" versus the old "_span"
        # plus glyphs used to form starting letter of first word
        # need to parse this type line by line
        line = start + 1
@ -143,6 +262,21 @@ class DocParser(object):
                    result.append(('ocr', wordnum))
                line += 1
            elif name.endswith('word.firstGlyph') :
                first = int(argres)
                (name, argres) = self.lineinDoc(line+1)
                if not name.endswith('word.lastGlyph'):
                    print 'Error: - incorrect glyph ordering inside word in paragraph'
                last = int(argres)
                glyphList = []
                for glyphnum in xrange(first, last):
                    glyphList.append(glyphnum)
                num = self.svgcount
                self.glyphs_to_image(glyphList)
                self.svgcount += 1
                result.append(('svg', num))
                line += 1
            elif name.endswith('word.class'):
               (cname, space) = argres.split('-',1)
               if space == '' : space = '0'
@ -241,6 +375,11 @@ class DocParser(object):
                parares += '<img src="img/img%04d.jpg" alt="" />' % num
                parares += sep
            elif wtype == 'svg' :
                sep = ''
                parares += '<img src="img/' + self.id + '_%04d.svg" alt="" />' % num 
                parares += sep
        if len(sep) > 0 : parares = parares[0:-1]
        if (type == 'full') or (type == 'end') :
            parares += '</p>'
@ -260,10 +399,7 @@ class DocParser(object):
        if argres :  self.ocrtext = argres.split('|')
        # get information to dehyphenate the text
-        (pos, argres) = self.findinDoc('info.dehyphen.rootID',0,-1)
+        self.dehyphen_rootid = self.getData('info.dehyphen.rootID',0,-1)
        if argres: 
            argList = argres.split('|')
            self.dehyphen_rootid = [ int(strval) for strval in argList]
        # determine if first paragraph is continued from previous page
        (pos, self.parastems_stemid) = self.findinDoc('info.paraStems.stemID',0,-1)
@ -274,16 +410,10 @@ class DocParser(object):
        last_para_continued = (self.paracont_stemid != None)
        # collect link ids
-        (pos, argres) = self.findinDoc('info.word.link_id',0,-1)
+        self.link_id = self.getData('info.word.link_id',0,-1)
        if argres:
            argList = argres.split('|')
            self.link_id = [ int(strval) for strval in argList]
        # collect link destination page numbers
-        (pos, argres) = self.findinDoc('info.links.page',0,-1)
+        self.link_page = self.getData('info.links.page',0,-1)
        if argres :
            argList = argres.split('|')
            self.link_page = [ int(strval) for strval in argList]
        # collect link titles
        (pos, argres) = self.findinDoc('info.links.title',0,-1)
@ -382,23 +512,45 @@ class DocParser(object):
            elif (regtype == 'table') :
-                ptype = 'full'
+                # translate first and last word into first and last glyphs
-                if first_para_continued :
+                # and generate table as an image and include a link to it
-                    ptype = 'end'
+                glyphList = []
-                    first_para_continued = False
+                (pos, sfirst) = self.findinDoc('paragraph.firstWord',start,end)
-                (pclass, pdesc) = self.getParaDescription(start,end)
+                (pos, slast) = self.findinDoc('paragraph.lastWord',start,end)
-                htmlpage += self.buildParagraph(pclass, pdesc, ptype, regtype)
+                firstglyphList = self.getData('word.firstGlyph',0,-1)
-                print "Warnings - Table Conversions are notoriously poor"
+                gidList = self.getData('info.glyph.glyphID',0,-1)
-                print "Strongly recommend taking a screen capture image of the "
+                if (sfirst != None) and (slast != None) :
-                print "table in %s.svg and using it to replace this attempt at a table" % self.id
+                    first = int(sfirst)
-
+                    last = int(slast)
                    firstGlyph = firstglyphList[first]
                    if last < len(firstglyphList):
                        lastGlyph = firstglyphList[last]
                    else :
                        lastGlyph = len(gidList)
                    for glyphnum in xrange(firstGlyph, lastGlyph):
                        glyphList.append(glyphnum)
                    num = self.svgcount
                    self.glyphs_to_image(glyphList)
                    self.svgcount += 1
                    htmlpage += '<div class="graphic"><img src="img/' + self.id + '_%04d.svg" alt="" /></div>' % num
                else :
                    ptype = 'full'
                    if first_para_continued :
                        ptype = 'end'
                        first_para_continued = False
                        (pclass, pdesc) = self.getParaDescription(start,end)
                        htmlpage += self.buildParagraph(pclass, pdesc, ptype, regtype)
                        print " "
                        print "Warning: - Table Conversions are notoriously poor"
                        print "    Strongly recommend taking a screen capture image of the "
                        print "    table in %s.svg and using it to replace this attempt at a table" % self.id
                        print " "
            elif (regtype == 'synth_fcvr.center') or (regtype == 'synth_text.center'):
                (pos, simgsrc) = self.findinDoc('img.src',start,end)
                if simgsrc:
                    htmlpage += '<div class="graphic"><img src="img/img%04d.jpg" alt="" /></div>' % int(simgsrc)
            else :
                print 'Warning: region type', regtype
                (pos, temp) = self.findinDoc('paragraph',start,end)
@ -437,10 +589,10 @@ class DocParser(object):
-def convert2HTML(flatxml, classlst, fileid):
+def convert2HTML(flatxml, classlst, fileid, bookDir):
    # create a document parser
-    dp = DocParser(flatxml, classlst, fileid)
+    dp = DocParser(flatxml, classlst, fileid, bookDir)
    htmlpage = dp.process()
--- a/Topaz_Tools/lib/genhtml.py
+++ b/Topaz_Tools/lib/genhtml.py
@ -1,5 +1,6 @@
 #! /usr/bin/python
 # vim:ts=4:sw=4:softtabstop=4:smarttab:expandtab
 # For use with Topaz Scripts Version 1.8                                                                                                  
 import os, sys, getopt
@ -65,6 +66,12 @@ def main(argv):
        print "Can not find image directory in unencrypted book"
        sys.exit(-1)
    svgDir = os.path.join(bookDir,'svg')
    if not os.path.exists(svgDir) :
        print "Can not find svg directory in unencrypted book"
        print "please run gensvg.py before running genhtml.py"
        sys.exit(-1)
    otherFile = os.path.join(bookDir,'other0000.dat')
    if not os.path.exists(otherFile) :
        print "Can not find other0000.dat in unencrypted book"
@ -75,7 +82,6 @@ def main(argv):
        print "Can not find metadata0000.dat in unencrypted book"
        sys.exit(-1)
    htmlFileName = "book.html"
    htmlstr = '<!DOCTYPE HTML PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">\n'
    htmlstr += '<html>\n'
@ -133,7 +139,7 @@ def main(argv):
        print '     ', filename
        fname = os.path.join(pageDir,filename)
        flat_xml = convert2xml.main('convert2xml.py --flat-xml ' + dictFile + ' ' + fname) 
-        htmlstr += flatxml2html.convert2HTML(flat_xml, classlst, fname)
+        htmlstr += flatxml2html.convert2HTML(flat_xml, classlst, fname, bookDir)
    htmlstr += '</body>\n</html>\n'
--- a/Topaz_Tools/lib/gensvg.py
+++ b/Topaz_Tools/lib/gensvg.py
@ -1,11 +1,11 @@
 #! /usr/bin/python
 # vim:ts=4:sw=4:softtabstop=4:smarttab:expandtab
 # For use with Topaz Scripts Version 1.8                                                                                                  
 import os, sys, getopt
 # local routines
 import convert2xml
 import flatxml2html
 import decode_meta
@ -45,6 +45,13 @@ class GParser(object):
             argres[j] = int(argres[j])
     return result
 def getGlyphDim(self, gly):
     maxh = (self.gh[gly] * self.dpi) / self.gdpi[gly]
     maxw = (self.gw[gly] * self.dpi) / self.gdpi[gly]
     return maxh, maxw
 def getPath(self, gly):
     path = ''
     if (gly < 0) or (gly >= self.count):
@ -172,8 +179,10 @@ class PParser(object):
 def usage():
 print 'Usage: '
 print ' '
- print '   gensvg.py unencryptedBookDir'
+ print '   gensvg.py [options] unencryptedBookDir'
 print '  '
 print '   -x : output browseable XHTML+SVG pages (default)'
 print '   -r : output raw SVG images'
 def main(argv):
@ -185,7 +194,7 @@ def main(argv):
     argv = argv.split()
 try:
-     opts, args = getopt.getopt(argv[1:], "h:")
+     opts, args = getopt.getopt(argv[1:], "xrh")
 except getopt.GetoptError, err:
     print str(err)
@ -196,10 +205,15 @@ def main(argv):
     usage()
     sys.exit(2) 
 raw = 0
 for o, a in opts:
     if o =="-h":
         usage()
         sys.exit(0)
     if o =="-x":
         raw = 0
     if o =="-r":
         raw = 1
 bookDir = args[0]
@ -264,7 +278,9 @@ def main(argv):
     gp = GParser(flat_xml)
     for i in xrange(0, gp.count):
         path = gp.getPath(i)
-         glyfile.write('<path id="gl%d" d="%s" fill="black" />\n' % (counter * 256 + i, path))
+         maxh, maxw = gp.getGlyphDim(i)
         # glyfile.write('<path id="gl%d" d="%s" fill="black" />\n' % (counter * 256 + i, path))
         glyfile.write('<path id="gl%d" d="%s" fill="black" /><!-- width=%d height=%d -->\n' % (counter * 256 + i, path, maxw, maxh ))
     counter += 1
 glyfile.write('</defs>\n')
 glyfile.write('</svg>\n')
@ -274,7 +290,7 @@ def main(argv):
 # Books are at 1440 DPI.  This is rendering at twice that size for
 # readability when rendering to the screen.  
- scaledpi = 720
+ scaledpi = 1440
 filenames = os.listdir(pageDir)
 filenames = sorted(filenames)
 counter = 0
@ -283,11 +299,45 @@ def main(argv):
     fname = os.path.join(pageDir,filename)
     flat_xml = convert2xml.main('convert2xml.py --flat-xml ' + dictFile + ' ' + fname) 
     pp = PParser(flat_xml)
-     pfile = open(os.path.join(svgDir,filename.replace('.dat','.svg')), 'w')
+     if (raw) :
         pfile = open(os.path.join(svgDir,filename.replace('.dat','.svg')), 'w')
     else :
         pfile = open(os.path.join(svgDir,'page%04d.xhtml' % counter), 'w')
     pfile.write('<?xml version="1.0" standalone="no"?>\n')
-     pfile.write('<!DOCTYPE svg PUBLIC "-//W3C/DTD SVG 1.1//EN" "http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd">\n')
+     if (raw):
-     pfile.write('<svg width="%fin" height="%fin" viewBox="0 0 %d %d" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" version="1.1">\n' % (pp.pw / scaledpi, pp.ph / scaledpi, pp.pw -1, pp.ph -1))
+         pfile.write('<!DOCTYPE svg PUBLIC "-//W3C/DTD SVG 1.1//EN" "http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd">\n')
-     pfile.write('<title>Page %d - %s by %s</title>\n' % (counter, metadata['Title'],metadata['Authors']))
+         pfile.write('<svg width="%fin" height="%fin" viewBox="0 0 %d %d" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" version="1.1">\n' % (pp.pw / scaledpi, pp.ph / scaledpi, pp.pw -1, pp.ph -1))
         pfile.write('<title>Page %d - %s by %s</title>\n' % (counter, metadata['Title'],metadata['Authors']))
     else:
         pfile.write('<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN" "http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">\n');
         pfile.write('<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" ><head>\n');
         pfile.write('<title>Page %d - %s by %s</title>\n' % (counter, metadata['Title'],metadata['Authors']))
         pfile.write('<script><![CDATA[\n');
         pfile.write('function gd(){var p=window.location.href.replace(/^.*\?dpi=(\d+).*$/i,"$1");return p;}\n');
         pfile.write('var dpi=%d;\n' % scaledpi);
         if (counter) :
            pfile.write('var prevpage="page%04d.xhtml";\n' % (counter - 1))
         if (counter < len(filenames)-1) :
            pfile.write('var nextpage="page%04d.xhtml";\n' % (counter + 1))
         pfile.write('var pw=%d;var ph=%d;' % (pp.pw, pp.ph))
         pfile.write('function zoomin(){dpi=dpi*(2/3);setsize();}\n')
         pfile.write('function zoomout(){dpi=dpi*1.5;setsize();}\n')
         pfile.write('function setsize(){var svg=document.getElementById("svgimg");var prev=document.getElementById("prevsvg");var next=document.getElementById("nextsvg");var width=(pw/dpi)+"in";var height=(ph/dpi)+"in";svg.setAttribute("width",width);svg.setAttribute("height",height);prev.setAttribute("height",height);prev.setAttribute("width","50px");next.setAttribute("height",height);next.setAttribute("width","50px");}\n')
         pfile.write('function ppage(){window.location.href=prevpage+"?dpi="+Math.round(dpi);}\n')
         pfile.write('function npage(){window.location.href=nextpage+"?dpi="+Math.round(dpi);}\n')
         pfile.write('var gt=gd();if(gt>0){dpi=gt;}\n')
         pfile.write('window.onload=setsize;\n')
         pfile.write(']]></script>\n')
         pfile.write('</head>\n')
         pfile.write('<body onLoad="setsize();" style="background-color:#777;text-align:center;">\n')
         pfile.write('<div style="white-space:nowrap;">\n')
         if (counter == 0) :
             pfile.write('<a href="javascript:ppage();"><svg id="prevsvg" viewBox="0 0 100 300" xmlns="http://www.w3.org/2000/svg" version="1.1" style="background-color:#777"></svg></a>\n')
         else:
             pfile.write('<a href="javascript:ppage();"><svg id="prevsvg" viewBox="0 0 100 300" xmlns="http://www.w3.org/2000/svg" version="1.1" style="background-color:#777"><polygon points="5,150,95,5,95,295" fill="#AAAAAA" /></svg></a>\n')
         pfile.write('<a href="javascript:npage();"><svg id="svgimg" viewBox="0 0 %d %d" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" version="1.1" style="background-color:#FFF;border:1px solid black;">' % (pp.pw, pp.ph))
     if (pp.gid != None): 
         pfile.write('<defs>\n')
         gdefs = pp.getGlyphs(glyfname)
@ -303,7 +353,18 @@ def main(argv):
             pfile.write('<use xlink:href="#gl%d" x="%d" y="%d" />\n' % (pp.gid[j], pp.gx[j], pp.gy[j]))
     if (img == None or len(img) == 0) and (pp.gid == None or len(pp.gid) == 0):
         pfile.write('<text x="10" y="10" font-family="Helvetica" font-size="100" stroke="black">This page intentionally left blank.</text>\n<text x="10" y="110" font-family="Helvetica" font-size="50" stroke="black">Until this notice unintentionally gave it content.  (gensvg.py)</text>\n');
-     pfile.write('</svg>')
+     if (raw) :
         pfile.write('</svg>')
     else :
         pfile.write('</svg></a>\n')
         if (counter == len(filenames) - 1) :
             pfile.write('<a href="javascript:npage();"><svg id="nextsvg" viewBox="0 0 100 300" xmlns="http://www.w3.org/2000/svg" version="1.1" style="background-color:#777"></svg></a>\n')
         else :
             pfile.write('<a href="javascript:npage();"><svg id="nextsvg" viewBox="0 0 100 300" xmlns="http://www.w3.org/2000/svg" version="1.1" style="background-color:#777"><polygon points="5,5,5,295,95,150" fill="#AAAAAA" /></svg></a>\n')
         pfile.write('</div>\n')
         pfile.write('<div><a href="javascript:zoomin();">zoom in</a> - <a href="javascript:zoomout();">zoom out</a></div>\n')
         pfile.write('</body>\n')
         pfile.write('</html>\n')
     pfile.close()
     counter += 1
@ -312,4 +373,4 @@ def main(argv):
 return 0
 if __name__ == '__main__':
- sys.exit(main(''))
+ sys.exit(main(''))
--- a/Topaz_Tools/lib/genxml.py
+++ b/Topaz_Tools/lib/genxml.py
@ -1,5 +1,6 @@
 #! /usr/bin/python
 # vim:ts=4:sw=4:softtabstop=4:smarttab:expandtab
 # For use with Topaz Scripts Version 1.8                                                                                                  
 import os, sys, getopt
--- a/Topaz_Tools/lib/getpagedim.py
+++ b/Topaz_Tools/lib/getpagedim.py
@ -1,5 +1,6 @@
 #! /usr/bin/python
 # vim:ts=4:sw=4:softtabstop=4:smarttab:expandtab
 # For use with Topaz Scripts Version 1.8                                                                                                  
 from __future__ import with_statement
 import csv
--- a/Topaz_Tools/lib/readme.txt
+++ b/Topaz_Tools/lib/readme.txt
@ -19,25 +19,16 @@ Here are the steps:
 1. Unzip the topazscripts.zip file to get the full set of python scripts.
 The files you should have after unzipping are:
-cmbtc_dump.py - (author: cmbtc) unencrypts and dumps sections into separate files
+cmbtc_dump.py - (author: cmbtc) unencrypts and dumps sections into separate files for Kindle for PC
-decode_meta.py - converts metadata0000.dat to human readable text (for the most part)
+cmbtc_dump_nonK4PC.py - (author - DiapDealer) for use with standalone Kindle and ipod/iphone topaz books
 decode_meta.py - converts metadata0000.dat to make it available
 convert2xml.py - converts page*.dat, other*.dat, and glyphs*.dat files to pseudo xml descriptions
 flatxml2html.py - converts a "flattened" xml description to html using the ocrtext
 stylexml2css.py - converts stylesheet "flattened" xml into css (as best it can)
 getpagedim.py - reads page0000.dat to get the book height and width parameters
 genxml.py - main program to convert everything to xml
 genhtml.py - main program to generate "book.html"
-gensvg.py - (author: clarknova) main program to create an svg grpahic of each page
+gensvg.py - (author: clarknova) main program to create an xhmtl page with embedded svg graphics
 In addition there is now a new file:
 cmbtc_dump_mac_linux.py  
 If you know the pid of your ipod and/or your standalone Kindle and your book
 was meant for that device, you can use this program to dump the proper sections
 on Mac OSX and Linux (and even Windows if you do not have Kindle4PC installed).
 Thank DiapDealer for creating it!
 Please note, gensvg.py, genhtml.py, and genxml.py import and use
@ -52,8 +43,20 @@ of its contents as files
 All Thanks go to CMBTC who broke the DRM for Topaz - without it nothing else 
 would be possible
 If you purchased the book for Kindle For PC, you must do the following:
   cmbtc_dump.py -d -o TARGETDIR [-p pid] YOURTOPAZBOOKNAMEHERE
 However, if you purchased the book for a standalone Kindle or ipod/iphone 
 and you know your pid (at least the first 8 characters) then you should 
 instead do the following
   cmbtc_dump_nonK4PC.py -d -o TARGETDIR -p 12345678 YOURTOPAZBOOKNAMEHERE
 where 12345678 should be replaced by the first 8 characters of your PID
 This should create a directory called "TARGETDIR" in your current directory.  
 It should have the following files in it:
@ -64,35 +67,48 @@ page - directory filled with page*.dat files
 glyphs - directory filled with glyphs*.dat files
 3. REQUIRED: Create xhtml page descriptions with embedded svg
 that show the exact representation of each page as an image
 with proper glyphs and positioning.
-3. Convert the files in "TARGETDIR" to their xml descriptions
+The step must NOW be done BEFORE attempting conversion to html
 which can be found in TARGETDIR/xml/ upon completion.
-   genxml.py TARGETDIR
+   gensvg.py TARGETDIR
 When complete, use a web-browser to open the page*.xhtml files
 in TARGETDIR/svg/ to see what the book really looks like.
 All thanks go to CLARKNOVA for this program.  This program is 
 needed to actually see the true image of each page and so that
 the next step can properly create images from glyphs for 
 monograms, dropcaps and tables.
-
+4. Create "book.html" which can be found in "TARGETDIR" after 
-4. Create book.html which can be found in "TARGETDIR" after 
+completion.  
 completion.  This html conversion can not fully capture 
 all of the layouts actually used in the book and needs to 
 be edited to include special font handling such as bold 
 or italics that can not be determined from the ocrText
 information or the style information.  If you want to 
 see things exactly as they were, see step 5 below.
   genhtml.py TARGETDIR
 ***IMPORTANT NOTE***  This html conversion can not fully capture 
 all of the layouts and styles actually used in the book
 and the resulting html will need to be edited by hand to 
 properly set bold and/or italics, handle font size changes,
 and to fix the sometimes horiffic mistakes in the ocrText
 used to create the html.  
-5. Create an svg description of each page which can
+FYI: Sigil is a wonderful, free cross-
-be found in TARGETDIR/svg/ upon completion.
+platform program that can be used to edit the html and 
 create an epub if you so desire.
 All thanks go to CLARKNOVA for this program.  This program is 
 needed to actually see the true image of each page so that hand
 editing of the html created by step 4 can be done.  
-Or use the resulting svg files to read each page of the book
+5. Optional Step:  Convert the files in "TARGETDIR" to their 
-exactly as it has been laid out originally.
+xml descriptions which can be found in TARGETDIR/xml/ 
 upon completion.
-   gensvg.py TARGETDIR
+   genxml.py TARGETDIR
 These conversions are important for allowing future (and better)
 conversions to come later.
--- a/Topaz_Tools/lib/stylexml2css.py
+++ b/Topaz_Tools/lib/stylexml2css.py
@ -1,5 +1,6 @@
 #! /usr/bin/python
 # vim:ts=4:sw=4:softtabstop=4:smarttab:expandtab
 # For use with Topaz Scripts Version 1.8                                                                                                  
 from __future__ import with_statement
 import csv