topazscripts 1.8

2010-01-24 12:19:20 +00:00 · 2010-01-24 12:19:20 +00:00 · 24f001c61e
parent c93f8e1edd
commit 24f001c61e
12 changed files with 332 additions and 72 deletions
--- a/Topaz_Tools/lib/changes.txt
+++ b/Topaz_Tools/lib/changes.txt
@ -1,3 +1,22 @@
+Changes in version 1.8
+
+	- gensvg.py now builds wonderful xhtml pages with embedded svg 
+	    that can be easily paged through as if reading a book!
+	    (tested in Safari for Mac and Win and Firefox)
+	    (requires javascript to be enabled)
+
+	- genhtml.py now REQUIRES that gensvg.py be run FIRST
+	     this allows create of images on the fly from glyphs
+
+	- genhtml.py now automatically makes tables of words into svg
+	     based images and will handle glyph based ornate first 
+	     letters of words
+
+	- cmbtc_dump_mac_linux.py has been renamed to be
+	     cmbtc_dump_nonK4PC.py to make it clearer
+	     when it needs to be used
+	
+
 Changes in version 1.7
 	- gensvg.py has been improved so that the glyphs render exactly (ClarkNova)
 	- gensvg.py has fixed a render order "bug" that allowed some images to cover or hide text. (ClarkNova)
@ -5,7 +24,6 @@ Changes in version 1.7
 	- add missing <title> tag
 	- make xhtml compliant doctype and minor changes to write correct xhtml
 	- make divs that act as anchors be hidden visually and to take up 0 height and 0 width to prevent any impact on layout
-	- added support for new version of the <_span> tag called <span>

 Changes in version 1.6
 	- support for books whose paragraphs have no styles
--- a/Topaz_Tools/lib/cmbtc_dump.py
+++ b/Topaz_Tools/lib/cmbtc_dump.py
@ -1,4 +1,5 @@
 #! /usr/bin/python
+# For use in Topaz Scripts version 1.8

 """

--- a/Topaz_Tools/lib/cmbtc_dump_nonK4PC.py
+++ b/Topaz_Tools/lib/cmbtc_dump_nonK4PC.py
@ -1,4 +1,5 @@
 #! /usr/bin/python
+# For use with Topaz Scripts Version 1.8

 from __future__ import with_statement

--- a/Topaz_Tools/lib/convert2xml.py
+++ b/Topaz_Tools/lib/convert2xml.py
@ -1,5 +1,6 @@
 #! /usr/bin/python
 # vim:ts=4:sw=4:softtabstop=4:smarttab:expandtab
+# For use with Topaz Scripts Version 1.8                                                                                                  

 from __future__ import with_statement
 import csv
--- a/Topaz_Tools/lib/decode_meta.py
+++ b/Topaz_Tools/lib/decode_meta.py
@ -1,5 +1,6 @@
 #! /usr/bin/python
 # vim:ts=4:sw=4:softtabstop=4:smarttab:expandtab
+# For use with Topaz Scripts Version 1.8                                                                                                  

 from __future__ import with_statement
 import csv
--- a/Topaz_Tools/lib/flatxml2html.py
+++ b/Topaz_Tools/lib/flatxml2html.py
@ -1,21 +1,27 @@
 #! /usr/bin/python
 # vim:ts=4:sw=4:softtabstop=4:smarttab:expandtab
+# For use with Topaz Scripts Version 1.8                                                                                                  

 from __future__ import with_statement
 import csv
 import sys
 import os
+import math
 import getopt
 from struct import pack
 from struct import unpack


 class DocParser(object):
-    def __init__(self, flatxml, classlst, fileid):
+    def __init__(self, flatxml, classlst, fileid, bookDir):
        self.id = os.path.basename(fileid).replace('.dat','')
+        self.svgcount = 0
        self.docList = flatxml.split('\n')
        self.docSize = len(self.docList)
        self.classList = {}
+        self.bookDir = bookDir
+        self.glyphPaths = { }
+        self.numPaths = 0
        tmpList = classlst.split('\n')
        for pclass in tmpList:
            if pclass != '':
@ -30,6 +36,107 @@ class DocParser(object):
        self.paracont_stemid = []
        self.parastems_stemid = []

+
+    def getGlyph(self, gid):
+        result = ''
+        id='gl%d' % gid
+        return self.glyphPaths[id]
+
+
+    def glyphs_to_image(self, glyphList):
+
+        def extract(path, key):
+            b = path.find(key) + len(key)
+            e = path.find(' ',b)
+            return int(path[b:e])
+
+        def extractID(path, key):
+            b = path.find(key) + len(key)
+            e = path.find('"',b)
+            return path[b:e]
+            
+
+        svgDir = os.path.join(self.bookDir,'svg')
+        glyfile = os.path.join(svgDir,'glyphs.svg')
+
+        imgDir = os.path.join(self.bookDir,'img')
+        imgname = self.id + '_%04d.svg' % self.svgcount
+        imgfile = os.path.join(imgDir,imgname)
+
+        # build hash table of glyph paths keyed by glyph id
+        if self.numPaths == 0:
+            gfile = open(glyfile, 'r')
+            while True:
+                path = gfile.readline()
+                if (path == ''): break
+                glyphid = extractID(path,'id="')
+                self.glyphPaths[glyphid] = path
+                self.numPaths += 1
+            gfile.close()
+
+
+        # get glyph information
+        gxList = self.getData('info.glyph.x',0,-1)
+        gyList = self.getData('info.glyph.y',0,-1)
+        gidList = self.getData('info.glyph.glyphID',0,-1)
+
+        gids = []
+        maxws = []
+        maxhs = []
+        xs = []
+        ys = []
+        gdefs = []
+
+        # get path defintions, positions, dimensions for ecah glyph 
+        # that makes up the image, and find min x and min y to reposition origin
+        minx = -1
+        miny = -1
+        for j in glyphList:
+            gid = gidList[j]
+            gids.append(gid)
+
+            xs.append(gxList[j])
+            if minx == -1: minx = gxList[j]
+            else : minx = min(minx, gxList[j])
+ 
+            ys.append(gyList[j])
+            if miny == -1: miny = gyList[j]
+            else : miny = min(miny, gyList[j])
+
+            path = self.getGlyph(gid)
+            gdefs.append(path)
+
+            maxws.append(extract(path,'width='))
+            maxhs.append(extract(path,'height='))
+
+
+        # change the origin to minx, miny and calc max height and width
+        maxw = maxws[0] + xs[0] - minx
+        maxh = maxhs[0] + ys[0] - miny
+        for j in xrange(0, len(xs)):
+            xs[j] = xs[j] - minx
+            ys[j] = ys[j] - miny
+            maxw = max( maxw, (maxws[j] + xs[j]) )
+            maxh = max( maxh, (maxhs[j] + ys[j]) )
+
+        # open the image file for output
+        ifile = open(imgfile,'w')
+        ifile.write('<?xml version="1.0" standalone="no"?>\n')
+        ifile.write('<!DOCTYPE svg PUBLIC "-//W3C/DTD SVG 1.1//EN" "http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd">\n')
+        ifile.write('<svg width="%dpx" height="%dpx" viewBox="0 0 %d %d" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" version="1.1">\n' % (math.floor(maxw/10), math.floor(maxh/10), maxw, maxh))
+        ifile.write('<defs>\n')
+        for j in xrange(0,len(gdefs)):
+            ifile.write(gdefs[j])
+        ifile.write('</defs>\n')
+        for j in xrange(0,len(gids)):
+            ifile.write('<use xlink:href="#gl%d" x="%d" y="%d" />\n' % (gids[j], xs[j], ys[j]))
+        ifile.write('</svg>')
+        ifile.close()
+
+        return 0
+
+
+
    # return tag at line pos in document
    def lineinDoc(self, pos) :
        if (pos >= 0) and (pos < self.docSize) :
@ -77,6 +184,17 @@ class DocParser(object):
        return startpos


+    # returns a vector of integers for the tagpath
+    def getData(self, tagpath, pos, end):
+        argres=[]
+        (foundat, argt) = self.findinDoc(tagpath, pos, end)
+        if (argt != None) and (len(argt) > 0) :
+            argList = argt.split('|')
+            argres = [ int(strval) for strval in argList]
+        return argres
+
+
+
    # build a description of the paragraph
    def getParaDescription(self, start, end):

@ -120,6 +238,7 @@ class DocParser(object):
        # this type of paragrph may be made up of multiple _spans, inline 
        # word monograms (images) and words with semantic meaning
        # and now a new type "span" versus the old "_span"
+        # plus glyphs used to form starting letter of first word
        
        # need to parse this type line by line
        line = start + 1
@ -143,6 +262,21 @@ class DocParser(object):
                    result.append(('ocr', wordnum))
                line += 1

+            elif name.endswith('word.firstGlyph') :
+                first = int(argres)
+                (name, argres) = self.lineinDoc(line+1)
+                if not name.endswith('word.lastGlyph'):
+                    print 'Error: - incorrect glyph ordering inside word in paragraph'
+                last = int(argres)
+                glyphList = []
+                for glyphnum in xrange(first, last):
+                    glyphList.append(glyphnum)
+                num = self.svgcount
+                self.glyphs_to_image(glyphList)
+                self.svgcount += 1
+                result.append(('svg', num))
+                line += 1
+
            elif name.endswith('word.class'):
               (cname, space) = argres.split('-',1)
               if space == '' : space = '0'
@ -241,6 +375,11 @@ class DocParser(object):
                parares += '<img src="img/img%04d.jpg" alt="" />' % num
                parares += sep

+            elif wtype == 'svg' :
+                sep = ''
+                parares += '<img src="img/' + self.id + '_%04d.svg" alt="" />' % num 
+                parares += sep
+
        if len(sep) > 0 : parares = parares[0:-1]
        if (type == 'full') or (type == 'end') :
            parares += '</p>'
@ -260,10 +399,7 @@ class DocParser(object):
        if argres :  self.ocrtext = argres.split('|')

        # get information to dehyphenate the text
-        (pos, argres) = self.findinDoc('info.dehyphen.rootID',0,-1)
-        if argres: 
-            argList = argres.split('|')
-            self.dehyphen_rootid = [ int(strval) for strval in argList]
+        self.dehyphen_rootid = self.getData('info.dehyphen.rootID',0,-1)

        # determine if first paragraph is continued from previous page
        (pos, self.parastems_stemid) = self.findinDoc('info.paraStems.stemID',0,-1)
@ -274,16 +410,10 @@ class DocParser(object):
        last_para_continued = (self.paracont_stemid != None)

        # collect link ids
-        (pos, argres) = self.findinDoc('info.word.link_id',0,-1)
-        if argres:
-            argList = argres.split('|')
-            self.link_id = [ int(strval) for strval in argList]
+        self.link_id = self.getData('info.word.link_id',0,-1)

        # collect link destination page numbers
-        (pos, argres) = self.findinDoc('info.links.page',0,-1)
-        if argres :
-            argList = argres.split('|')
-            self.link_page = [ int(strval) for strval in argList]
+        self.link_page = self.getData('info.links.page',0,-1)

        # collect link titles
        (pos, argres) = self.findinDoc('info.links.title',0,-1)
@ -382,23 +512,45 @@ class DocParser(object):


            elif (regtype == 'table') :
+                # translate first and last word into first and last glyphs
+                # and generate table as an image and include a link to it
+                glyphList = []
+                (pos, sfirst) = self.findinDoc('paragraph.firstWord',start,end)
+                (pos, slast) = self.findinDoc('paragraph.lastWord',start,end)
+                firstglyphList = self.getData('word.firstGlyph',0,-1)
+                gidList = self.getData('info.glyph.glyphID',0,-1)
+                if (sfirst != None) and (slast != None) :
+                    first = int(sfirst)
+                    last = int(slast)
+                    firstGlyph = firstglyphList[first]
+                    if last < len(firstglyphList):
+                        lastGlyph = firstglyphList[last]
+                    else :
+                        lastGlyph = len(gidList)
+                    for glyphnum in xrange(firstGlyph, lastGlyph):
+                        glyphList.append(glyphnum)
+                    num = self.svgcount
+                    self.glyphs_to_image(glyphList)
+                    self.svgcount += 1
+                    htmlpage += '<div class="graphic"><img src="img/' + self.id + '_%04d.svg" alt="" /></div>' % num
+                else :
                    ptype = 'full'
                    if first_para_continued :
                        ptype = 'end'
                        first_para_continued = False
                        (pclass, pdesc) = self.getParaDescription(start,end)
                        htmlpage += self.buildParagraph(pclass, pdesc, ptype, regtype)
-                print "Warnings - Table Conversions are notoriously poor"
-                print "Strongly recommend taking a screen capture image of the "
-                print "table in %s.svg and using it to replace this attempt at a table" % self.id
-
+                        print " "
+                        print "Warning: - Table Conversions are notoriously poor"
+                        print "    Strongly recommend taking a screen capture image of the "
+                        print "    table in %s.svg and using it to replace this attempt at a table" % self.id
+                        print " "

            elif (regtype == 'synth_fcvr.center') or (regtype == 'synth_text.center'):
                (pos, simgsrc) = self.findinDoc('img.src',start,end)
                if simgsrc:
                    htmlpage += '<div class="graphic"><img src="img/img%04d.jpg" alt="" /></div>' % int(simgsrc)

-
            else :
                print 'Warning: region type', regtype
                (pos, temp) = self.findinDoc('paragraph',start,end)
@ -437,10 +589,10 @@ class DocParser(object):



-def convert2HTML(flatxml, classlst, fileid):
+def convert2HTML(flatxml, classlst, fileid, bookDir):

    # create a document parser
-    dp = DocParser(flatxml, classlst, fileid)
+    dp = DocParser(flatxml, classlst, fileid, bookDir)

    htmlpage = dp.process()

--- a/Topaz_Tools/lib/genhtml.py
+++ b/Topaz_Tools/lib/genhtml.py
@ -1,5 +1,6 @@
 #! /usr/bin/python
 # vim:ts=4:sw=4:softtabstop=4:smarttab:expandtab
+# For use with Topaz Scripts Version 1.8                                                                                                  

 import os, sys, getopt

@ -65,6 +66,12 @@ def main(argv):
        print "Can not find image directory in unencrypted book"
        sys.exit(-1)

+    svgDir = os.path.join(bookDir,'svg')
+    if not os.path.exists(svgDir) :
+        print "Can not find svg directory in unencrypted book"
+        print "please run gensvg.py before running genhtml.py"
+        sys.exit(-1)
+
    otherFile = os.path.join(bookDir,'other0000.dat')
    if not os.path.exists(otherFile) :
        print "Can not find other0000.dat in unencrypted book"
@ -75,7 +82,6 @@ def main(argv):
        print "Can not find metadata0000.dat in unencrypted book"
        sys.exit(-1)

-
    htmlFileName = "book.html"
    htmlstr = '<!DOCTYPE HTML PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">\n'
    htmlstr += '<html>\n'
@ -133,7 +139,7 @@ def main(argv):
        print '     ', filename
        fname = os.path.join(pageDir,filename)
        flat_xml = convert2xml.main('convert2xml.py --flat-xml ' + dictFile + ' ' + fname) 
-        htmlstr += flatxml2html.convert2HTML(flat_xml, classlst, fname)
+        htmlstr += flatxml2html.convert2HTML(flat_xml, classlst, fname, bookDir)

    htmlstr += '</body>\n</html>\n'

--- a/Topaz_Tools/lib/gensvg.py
+++ b/Topaz_Tools/lib/gensvg.py
@ -1,11 +1,11 @@
 #! /usr/bin/python
 # vim:ts=4:sw=4:softtabstop=4:smarttab:expandtab
+# For use with Topaz Scripts Version 1.8                                                                                                  

 import os, sys, getopt

 # local routines
 import convert2xml
-import flatxml2html
 import decode_meta


@ -45,6 +45,13 @@ class GParser(object):
             argres[j] = int(argres[j])
     return result

+
+ def getGlyphDim(self, gly):
+     maxh = (self.gh[gly] * self.dpi) / self.gdpi[gly]
+     maxw = (self.gw[gly] * self.dpi) / self.gdpi[gly]
+     return maxh, maxw
+     
+
 def getPath(self, gly):
     path = ''
     if (gly < 0) or (gly >= self.count):
@ -172,8 +179,10 @@ class PParser(object):
 def usage():
 print 'Usage: '
 print ' '
- print '   gensvg.py unencryptedBookDir'
+ print '   gensvg.py [options] unencryptedBookDir'
 print '  '
+ print '   -x : output browseable XHTML+SVG pages (default)'
+ print '   -r : output raw SVG images'


 def main(argv):
@ -185,7 +194,7 @@ def main(argv):
     argv = argv.split()

 try:
-     opts, args = getopt.getopt(argv[1:], "h:")
+     opts, args = getopt.getopt(argv[1:], "xrh")

 except getopt.GetoptError, err:
     print str(err)
@ -196,10 +205,15 @@ def main(argv):
     usage()
     sys.exit(2) 

+ raw = 0
 for o, a in opts:
     if o =="-h":
         usage()
         sys.exit(0)
+     if o =="-x":
+         raw = 0
+     if o =="-r":
+         raw = 1

 bookDir = args[0]

@ -264,7 +278,9 @@ def main(argv):
     gp = GParser(flat_xml)
     for i in xrange(0, gp.count):
         path = gp.getPath(i)
-         glyfile.write('<path id="gl%d" d="%s" fill="black" />\n' % (counter * 256 + i, path))
+         maxh, maxw = gp.getGlyphDim(i)
+         # glyfile.write('<path id="gl%d" d="%s" fill="black" />\n' % (counter * 256 + i, path))
+         glyfile.write('<path id="gl%d" d="%s" fill="black" /><!-- width=%d height=%d -->\n' % (counter * 256 + i, path, maxw, maxh ))
     counter += 1
 glyfile.write('</defs>\n')
 glyfile.write('</svg>\n')
@ -274,7 +290,7 @@ def main(argv):

 # Books are at 1440 DPI.  This is rendering at twice that size for
 # readability when rendering to the screen.  
- scaledpi = 720
+ scaledpi = 1440
 filenames = os.listdir(pageDir)
 filenames = sorted(filenames)
 counter = 0
@ -283,11 +299,45 @@ def main(argv):
     fname = os.path.join(pageDir,filename)
     flat_xml = convert2xml.main('convert2xml.py --flat-xml ' + dictFile + ' ' + fname) 
     pp = PParser(flat_xml)
+     if (raw) :
         pfile = open(os.path.join(svgDir,filename.replace('.dat','.svg')), 'w')
+     else :
+         pfile = open(os.path.join(svgDir,'page%04d.xhtml' % counter), 'w')
+
     pfile.write('<?xml version="1.0" standalone="no"?>\n')
+     if (raw):
         pfile.write('<!DOCTYPE svg PUBLIC "-//W3C/DTD SVG 1.1//EN" "http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd">\n')
         pfile.write('<svg width="%fin" height="%fin" viewBox="0 0 %d %d" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" version="1.1">\n' % (pp.pw / scaledpi, pp.ph / scaledpi, pp.pw -1, pp.ph -1))
         pfile.write('<title>Page %d - %s by %s</title>\n' % (counter, metadata['Title'],metadata['Authors']))
+     else:
+         pfile.write('<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN" "http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">\n');
+         pfile.write('<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" ><head>\n');
+         pfile.write('<title>Page %d - %s by %s</title>\n' % (counter, metadata['Title'],metadata['Authors']))
+         pfile.write('<script><![CDATA[\n');
+         pfile.write('function gd(){var p=window.location.href.replace(/^.*\?dpi=(\d+).*$/i,"$1");return p;}\n');
+         pfile.write('var dpi=%d;\n' % scaledpi);
+         if (counter) :
+            pfile.write('var prevpage="page%04d.xhtml";\n' % (counter - 1))
+         if (counter < len(filenames)-1) :
+            pfile.write('var nextpage="page%04d.xhtml";\n' % (counter + 1))
+         pfile.write('var pw=%d;var ph=%d;' % (pp.pw, pp.ph))
+         pfile.write('function zoomin(){dpi=dpi*(2/3);setsize();}\n')
+         pfile.write('function zoomout(){dpi=dpi*1.5;setsize();}\n')
+         pfile.write('function setsize(){var svg=document.getElementById("svgimg");var prev=document.getElementById("prevsvg");var next=document.getElementById("nextsvg");var width=(pw/dpi)+"in";var height=(ph/dpi)+"in";svg.setAttribute("width",width);svg.setAttribute("height",height);prev.setAttribute("height",height);prev.setAttribute("width","50px");next.setAttribute("height",height);next.setAttribute("width","50px");}\n')
+         pfile.write('function ppage(){window.location.href=prevpage+"?dpi="+Math.round(dpi);}\n')
+         pfile.write('function npage(){window.location.href=nextpage+"?dpi="+Math.round(dpi);}\n')
+         pfile.write('var gt=gd();if(gt>0){dpi=gt;}\n')
+         pfile.write('window.onload=setsize;\n')
+         pfile.write(']]></script>\n')
+         pfile.write('</head>\n')
+         pfile.write('<body onLoad="setsize();" style="background-color:#777;text-align:center;">\n')
+         pfile.write('<div style="white-space:nowrap;">\n')
+         if (counter == 0) :
+             pfile.write('<a href="javascript:ppage();"><svg id="prevsvg" viewBox="0 0 100 300" xmlns="http://www.w3.org/2000/svg" version="1.1" style="background-color:#777"></svg></a>\n')
+         else:
+             pfile.write('<a href="javascript:ppage();"><svg id="prevsvg" viewBox="0 0 100 300" xmlns="http://www.w3.org/2000/svg" version="1.1" style="background-color:#777"><polygon points="5,150,95,5,95,295" fill="#AAAAAA" /></svg></a>\n')
+         pfile.write('<a href="javascript:npage();"><svg id="svgimg" viewBox="0 0 %d %d" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" version="1.1" style="background-color:#FFF;border:1px solid black;">' % (pp.pw, pp.ph))
+
     if (pp.gid != None): 
         pfile.write('<defs>\n')
         gdefs = pp.getGlyphs(glyfname)
@ -303,7 +353,18 @@ def main(argv):
             pfile.write('<use xlink:href="#gl%d" x="%d" y="%d" />\n' % (pp.gid[j], pp.gx[j], pp.gy[j]))
     if (img == None or len(img) == 0) and (pp.gid == None or len(pp.gid) == 0):
         pfile.write('<text x="10" y="10" font-family="Helvetica" font-size="100" stroke="black">This page intentionally left blank.</text>\n<text x="10" y="110" font-family="Helvetica" font-size="50" stroke="black">Until this notice unintentionally gave it content.  (gensvg.py)</text>\n');
+     if (raw) :
         pfile.write('</svg>')
+     else :
+         pfile.write('</svg></a>\n')
+         if (counter == len(filenames) - 1) :
+             pfile.write('<a href="javascript:npage();"><svg id="nextsvg" viewBox="0 0 100 300" xmlns="http://www.w3.org/2000/svg" version="1.1" style="background-color:#777"></svg></a>\n')
+         else :
+             pfile.write('<a href="javascript:npage();"><svg id="nextsvg" viewBox="0 0 100 300" xmlns="http://www.w3.org/2000/svg" version="1.1" style="background-color:#777"><polygon points="5,5,5,295,95,150" fill="#AAAAAA" /></svg></a>\n')
+         pfile.write('</div>\n')
+         pfile.write('<div><a href="javascript:zoomin();">zoom in</a> - <a href="javascript:zoomout();">zoom out</a></div>\n')
+         pfile.write('</body>\n')
+         pfile.write('</html>\n')
     pfile.close()
     counter += 1

--- a/Topaz_Tools/lib/genxml.py
+++ b/Topaz_Tools/lib/genxml.py
@ -1,5 +1,6 @@
 #! /usr/bin/python
 # vim:ts=4:sw=4:softtabstop=4:smarttab:expandtab
+# For use with Topaz Scripts Version 1.8                                                                                                  

 import os, sys, getopt

--- a/Topaz_Tools/lib/getpagedim.py
+++ b/Topaz_Tools/lib/getpagedim.py
@ -1,5 +1,6 @@
 #! /usr/bin/python
 # vim:ts=4:sw=4:softtabstop=4:smarttab:expandtab
+# For use with Topaz Scripts Version 1.8                                                                                                  

 from __future__ import with_statement
 import csv
--- a/Topaz_Tools/lib/readme.txt
+++ b/Topaz_Tools/lib/readme.txt
@ -19,25 +19,16 @@ Here are the steps:
 1. Unzip the topazscripts.zip file to get the full set of python scripts.
 The files you should have after unzipping are:

-cmbtc_dump.py - (author: cmbtc) unencrypts and dumps sections into separate files
-decode_meta.py - converts metadata0000.dat to human readable text (for the most part)
+cmbtc_dump.py - (author: cmbtc) unencrypts and dumps sections into separate files for Kindle for PC
+cmbtc_dump_nonK4PC.py - (author - DiapDealer) for use with standalone Kindle and ipod/iphone topaz books
+decode_meta.py - converts metadata0000.dat to make it available
 convert2xml.py - converts page*.dat, other*.dat, and glyphs*.dat files to pseudo xml descriptions
 flatxml2html.py - converts a "flattened" xml description to html using the ocrtext
 stylexml2css.py - converts stylesheet "flattened" xml into css (as best it can)
 getpagedim.py - reads page0000.dat to get the book height and width parameters
 genxml.py - main program to convert everything to xml
 genhtml.py - main program to generate "book.html"
-gensvg.py - (author: clarknova) main program to create an svg grpahic of each page
-
-
-In addition there is now a new file:
-
-cmbtc_dump_mac_linux.py  
-
-If you know the pid of your ipod and/or your standalone Kindle and your book
-was meant for that device, you can use this program to dump the proper sections
-on Mac OSX and Linux (and even Windows if you do not have Kindle4PC installed).
-Thank DiapDealer for creating it!
+gensvg.py - (author: clarknova) main program to create an xhmtl page with embedded svg graphics


 Please note, gensvg.py, genhtml.py, and genxml.py import and use
@ -52,8 +43,20 @@ of its contents as files
 All Thanks go to CMBTC who broke the DRM for Topaz - without it nothing else 
 would be possible

+If you purchased the book for Kindle For PC, you must do the following:
+
   cmbtc_dump.py -d -o TARGETDIR [-p pid] YOURTOPAZBOOKNAMEHERE

+
+However, if you purchased the book for a standalone Kindle or ipod/iphone 
+and you know your pid (at least the first 8 characters) then you should 
+instead do the following
+
+   cmbtc_dump_nonK4PC.py -d -o TARGETDIR -p 12345678 YOURTOPAZBOOKNAMEHERE
+
+where 12345678 should be replaced by the first 8 characters of your PID
+
+
 This should create a directory called "TARGETDIR" in your current directory.  
 It should have the following files in it:

@ -64,35 +67,48 @@ page - directory filled with page*.dat files
 glyphs - directory filled with glyphs*.dat files


+3. REQUIRED: Create xhtml page descriptions with embedded svg
+that show the exact representation of each page as an image
+with proper glyphs and positioning.

-3. Convert the files in "TARGETDIR" to their xml descriptions
-which can be found in TARGETDIR/xml/ upon completion.
+The step must NOW be done BEFORE attempting conversion to html

-   genxml.py TARGETDIR
+   gensvg.py TARGETDIR
+
+When complete, use a web-browser to open the page*.xhtml files
+in TARGETDIR/svg/ to see what the book really looks like.
+
+All thanks go to CLARKNOVA for this program.  This program is 
+needed to actually see the true image of each page and so that
+the next step can properly create images from glyphs for 
+monograms, dropcaps and tables.


-
-4. Create book.html which can be found in "TARGETDIR" after 
-completion.  This html conversion can not fully capture 
-all of the layouts actually used in the book and needs to 
-be edited to include special font handling such as bold 
-or italics that can not be determined from the ocrText
-information or the style information.  If you want to 
-see things exactly as they were, see step 5 below.
+4. Create "book.html" which can be found in "TARGETDIR" after 
+completion.  

   genhtml.py TARGETDIR


+***IMPORTANT NOTE***  This html conversion can not fully capture 
+all of the layouts and styles actually used in the book
+and the resulting html will need to be edited by hand to 
+properly set bold and/or italics, handle font size changes,
+and to fix the sometimes horiffic mistakes in the ocrText
+used to create the html.  

-5. Create an svg description of each page which can
-be found in TARGETDIR/svg/ upon completion.
+FYI: Sigil is a wonderful, free cross-
+platform program that can be used to edit the html and 
+create an epub if you so desire.

-All thanks go to CLARKNOVA for this program.  This program is 
-needed to actually see the true image of each page so that hand
-editing of the html created by step 4 can be done.  

-Or use the resulting svg files to read each page of the book
-exactly as it has been laid out originally.
+5. Optional Step:  Convert the files in "TARGETDIR" to their 
+xml descriptions which can be found in TARGETDIR/xml/ 
+upon completion.

-   gensvg.py TARGETDIR
+   genxml.py TARGETDIR
+
+
+These conversions are important for allowing future (and better)
+conversions to come later.

--- a/Topaz_Tools/lib/stylexml2css.py
+++ b/Topaz_Tools/lib/stylexml2css.py
@ -1,5 +1,6 @@
 #! /usr/bin/python
 # vim:ts=4:sw=4:softtabstop=4:smarttab:expandtab
+# For use with Topaz Scripts Version 1.8                                                                                                  

 from __future__ import with_statement
 import csv