topazscripts 1.7

2009-01-22 12:15:33 +00:00 · 2009-01-22 12:15:33 +00:00 · 66933f6972
parent 58e9c973ab
commit 66933f6972
6 changed files with 316 additions and 272 deletions
--- a/Topaz_Tools/lib/changes.txt
+++ b/Topaz_Tools/lib/changes.txt
@ -1,3 +1,12 @@
+Changes in version 1.7
+	- gensvg.py has been improved so that the glyphs render exactly (ClarkNova)
+	- gensvg.py has fixed a render order "bug" that allowed some images to cover or hide text. (ClarkNova)
+	- change generated html to use external stylesheet via a link to "style.css"
+	- add missing <title> tag
+	- make xhtml compliant doctype and minor changes to write correct xhtml
+	- make divs that act as anchors be hidden visually and to take up 0 height and 0 width to prevent any impact on layout
+	- added support for new version of the <_span> tag called <span>
+
 Changes in version 1.6
 	- support for books whose paragraphs have no styles
 	- support to run cmbtc_dump on Linux and Mac OSX provided you know your PID of your ipod or standalone Kindle
--- a/Topaz_Tools/lib/convert2xml.py
+++ b/Topaz_Tools/lib/convert2xml.py
@ -249,11 +249,17 @@ class PageParser(object):
        'word'            : (1, 'snippets', 1, 0),
        'word.type'       : (1, 'scalar_text', 0, 0),
        'word.class'      : (1, 'scalar_text', 0, 0),
+        'word.firstGlyph' : (1, 'scalar_number', 0, 0),
+        'word.lastGlyph'  : (1, 'scalar_number', 0, 0),

        '_span'           : (1, 'snippets', 1, 0),
        '_span.firstWord' : (1, 'scalar_number', 0, 0),
        '-span.lastWord'  : (1, 'scalar_number', 0, 0),

+        'span'           : (1, 'snippets', 1, 0),
+        'span.firstWord' : (1, 'scalar_number', 0, 0),
+        'span.lastWord'  : (1, 'scalar_number', 0, 0),
+
        'extratokens'            : (1, 'snippets', 1, 0),
        'extratokens.type'       : (1, 'scalar_text', 0, 0),
        'extratokens.firstGlyph' : (1, 'scalar_number', 0, 0),
--- a/Topaz_Tools/lib/flatxml2html.py
+++ b/Topaz_Tools/lib/flatxml2html.py
@ -119,6 +119,7 @@ class DocParser(object):

        # this type of paragrph may be made up of multiple _spans, inline 
        # word monograms (images) and words with semantic meaning
+        # and now a new type "span" versus the old "_span"
        
        # need to parse this type line by line
        line = start + 1
@ -132,10 +133,10 @@ class DocParser(object):

            (name, argres) = self.lineinDoc(line)

-            if name.endswith('_span.firstWord') :
+            if name.endswith('span.firstWord') :
                first = int(argres)
                (name, argres) = self.lineinDoc(line+1)
-                if not name.endswith('_span.lastWord'):
+                if not name.endswith('span.lastWord'):
                    print 'Error: - incorrect _span ordering inside paragraph'
                last = int(argres)
                for wordnum in xrange(first, last):
@ -175,7 +176,7 @@ class DocParser(object):
        if pclass :
            classres = ' class="' + pclass + '"'

-        br_lb = (regtype == 'fixed') or (regtype == 'chapterheading')
+        br_lb = (regtype == 'fixed') or (regtype == 'chapterheading') or (regtype == 'vertical')

        handle_links = len(self.link_id) > 0
        
@ -317,7 +318,7 @@ class DocParser(object):

            # set anchor for link target on this page
            if not anchorSet and not first_para_continued:
-                htmlpage += '<div id="' + self.id + '" class="page_' + pagetype + '">&nbsp</div>\n'
+                htmlpage += '<div style="visibility: hidden; height: 0; width: 0;" id="' + self.id + '" title="pagetype_' + pagetype + '"></div>\n'
                anchorSet = True

            if regtype == 'graphic' :
@ -343,7 +344,7 @@ class DocParser(object):
                htmlpage += '</' + tag + '>'


-            elif (regtype == 'text') or (regtype == 'fixed') or (regtype == 'insert') or (regtype == 'listitem') :
+            elif (regtype == 'text') or (regtype == 'fixed') or (regtype == 'insert') or (regtype == 'listitem'):
                ptype = 'full'
                # check to see if this is a continution from the previous page
                if first_para_continued :
@ -371,6 +372,27 @@ class DocParser(object):
                htmlpage += self.buildParagraph(pclass, pdesc, ptype, regtype)


+            elif (regtype == 'vertical') :
+                ptype = 'full'
+                if first_para_continued :
+                    ptype = 'end'
+                    first_para_continued = False
+                (pclass, pdesc) = self.getParaDescription(start,end)
+                htmlpage += self.buildParagraph(pclass, pdesc, ptype, regtype)
+
+
+            elif (regtype == 'table') :
+                ptype = 'full'
+                if first_para_continued :
+                    ptype = 'end'
+                    first_para_continued = False
+                (pclass, pdesc) = self.getParaDescription(start,end)
+                htmlpage += self.buildParagraph(pclass, pdesc, ptype, regtype)
+                print "Warnings - Table Conversions are notoriously poor"
+                print "Strongly recommend taking a screen capture image of the "
+                print "table in %s.svg and using it to replace this attempt at a table" % self.id
+
+
            elif (regtype == 'synth_fcvr.center') or (regtype == 'synth_text.center'):
                (pos, simgsrc) = self.findinDoc('img.src',start,end)
                if simgsrc:
@ -378,10 +400,10 @@ class DocParser(object):


            else :
-                print 'Warning: Unknown region type', regtype
+                print 'Warning: region type', regtype
                (pos, temp) = self.findinDoc('paragraph',start,end)
-                if temp:
-                    print 'Treating this like a "text" region'
+                if pos != -1:
+                    print '   is a "text" region'
                    regtype = 'fixed'
                    ptype = 'full'
                    # check to see if this is a continution from the previous page
@ -400,7 +422,7 @@ class DocParser(object):
                    else :
                        htmlpage += self.buildParagraph(pclass, pdesc, ptype, regtype)
                else :
-                    print 'Treating this like a "graphic" region'
+                    print '    is a "graphic" region'
                    (pos, simgsrc) = self.findinDoc('img.src',start,end)
                    if simgsrc:
                        htmlpage += '<div class="graphic"><img src="img/img%04d.jpg" alt="" /></div>' % int(simgsrc)
--- a/Topaz_Tools/lib/genhtml.py
+++ b/Topaz_Tools/lib/genhtml.py
@ -77,7 +77,8 @@ def main(argv):


    htmlFileName = "book.html"
-    htmlstr = '<html>\n'
+    htmlstr = '<!DOCTYPE HTML PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">\n'
+    htmlstr += '<html>\n'

    filenames = os.listdir(pageDir)
    filenames = sorted(filenames)
@ -85,6 +86,7 @@ def main(argv):
    print 'Processing ... '

    htmlstr += '<head>\n'
+    htmlstr += '<meta http-equiv="content-type" content="text/html; charset=utf-8"/>\n'

    # process metadata and retrieve fontSize info
    print '     ', 'metadata0000.dat'
@ -93,6 +95,8 @@ def main(argv):
    metastr = decode_meta.getMetaData(fname)
    file(xname, 'wb').write(metastr)
    meta_array = decode_meta.getMetaArray(fname)
+
+    htmlstr += '<title>' + meta_array['Title'] + ' by ' + meta_array['Authors'] + '</title>\n' 
    htmlstr += '<meta name="Author" content="' + meta_array['Authors'] + '" />\n'
    htmlstr += '<meta name="Title" content="' + meta_array['Title'] + '" />\n'

@ -120,11 +124,9 @@ def main(argv):
    fname = os.path.join(bookDir,'other0000.dat')
    xname = os.path.join(bookDir, 'style.css')
    xmlstr = convert2xml.main('convert2xml.py --flat-xml ' + dictFile + ' ' + fname)
-    htmlstr += '<style>\n'
    cssstr , classlst = stylexml2css.convert2CSS(xmlstr, fontsize, ph, pw)
    file(xname, 'wb').write(cssstr)
-    htmlstr += cssstr
-    htmlstr += '</style>\n'
+    htmlstr += '<link href="style.css" rel="stylesheet" type="text/css" />\n'
    htmlstr += '</head>\n<body>\n'

    for filename in filenames:
--- a/Topaz_Tools/lib/gensvg.py
+++ b/Topaz_Tools/lib/gensvg.py
@ -49,8 +49,8 @@ class GParser(object):
     path = ''
     if (gly < 0) or (gly >= self.count):
         return path
-      tx = self.vx[self.gvtx[gly]:self.gvtx[gly+1]-1]
-      ty = self.vy[self.gvtx[gly]:self.gvtx[gly+1]-1]
+     tx = self.vx[self.gvtx[gly]:self.gvtx[gly+1]]
+     ty = self.vy[self.gvtx[gly]:self.gvtx[gly+1]]
     p = 0
     for k in xrange(self.glen[gly], self.glen[gly+1]):
         if (p == 0):
@ -272,6 +272,8 @@ def main(argv):

 print 'Processing Pages ... '

+ # Books are at 1440 DPI.  This is rendering at twice that size for
+ # readability when rendering to the screen.  
 scaledpi = 720
 filenames = os.listdir(pageDir)
 filenames = sorted(filenames)
@ -292,12 +294,15 @@ def main(argv):
         for j in xrange(0,len(gdefs)):
             pfile.write(gdefs[j])
         pfile.write('</defs>\n')
-          for j in xrange(0,len(pp.gid)):
-              pfile.write('<use xlink:href="#gl%d" x="%d" y="%d" />\n' % (pp.gid[j], pp.gx[j], pp.gy[j]))
     img = pp.getImages()
     if (img != None):
         for j in xrange(0,len(img)):
             pfile.write(img[j])
+     if (pp.gid != None): 
+         for j in xrange(0,len(pp.gid)):
+             pfile.write('<use xlink:href="#gl%d" x="%d" y="%d" />\n' % (pp.gid[j], pp.gx[j], pp.gy[j]))
+     if (img == None or len(img) == 0) and (pp.gid == None or len(pp.gid) == 0):
+         pfile.write('<text x="10" y="10" font-family="Helvetica" font-size="100" stroke="black">This page intentionally left blank.</text>\n<text x="10" y="110" font-family="Helvetica" font-size="50" stroke="black">Until this notice unintentionally gave it content.  (gensvg.py)</text>\n');
     pfile.write('</svg>')
     pfile.close()
     counter += 1
--- a/Topaz_Tools/lib/readme.txt
+++ b/Topaz_Tools/lib/readme.txt
@ -6,7 +6,7 @@ Contributors:
     DiapDealer - for extensive testing and feedback, and standalone linux/macosx version of cmbtc_dump
     stewball - for extensive testing and feedback

-and others for posting, feedback and testing
+and many others for posting, feedback and testing
  

 This is experimental and it will probably not work for you but...