2010-01-17 05:10:35 -07:00
|
|
|
#! /usr/bin/python
|
|
|
|
# vim:ts=4:sw=4:softtabstop=4:smarttab:expandtab
|
2010-03-02 05:46:56 -07:00
|
|
|
# For use with Topaz Scripts Version 2.6
|
2010-01-17 05:10:35 -07:00
|
|
|
|
|
|
|
import csv
|
|
|
|
import sys
|
|
|
|
import os
|
|
|
|
import getopt
|
2012-03-06 11:24:28 -07:00
|
|
|
import re
|
2010-01-17 05:10:35 -07:00
|
|
|
from struct import pack
|
|
|
|
from struct import unpack
|
|
|
|
|
|
|
|
|
|
|
|
class DocParser(object):
|
2010-01-20 05:13:31 -07:00
|
|
|
def __init__(self, flatxml, fontsize, ph, pw):
|
2010-01-17 05:10:35 -07:00
|
|
|
self.flatdoc = flatxml.split('\n')
|
2010-01-19 05:11:59 -07:00
|
|
|
self.fontsize = int(fontsize)
|
2010-01-20 05:13:31 -07:00
|
|
|
self.ph = int(ph) * 1.0
|
|
|
|
self.pw = int(pw) * 1.0
|
2010-01-17 05:10:35 -07:00
|
|
|
|
|
|
|
stags = {
|
|
|
|
'paragraph' : 'p',
|
|
|
|
'graphic' : '.graphic'
|
|
|
|
}
|
|
|
|
|
|
|
|
attr_val_map = {
|
2010-01-19 05:11:59 -07:00
|
|
|
'hang' : 'text-indent: ',
|
|
|
|
'indent' : 'text-indent: ',
|
|
|
|
'line-space' : 'line-height: ',
|
|
|
|
'margin-bottom' : 'margin-bottom: ',
|
|
|
|
'margin-left' : 'margin-left: ',
|
|
|
|
'margin-right' : 'margin-right: ',
|
|
|
|
'margin-top' : 'margin-top: ',
|
|
|
|
'space-after' : 'padding-bottom: ',
|
2010-01-17 05:10:35 -07:00
|
|
|
}
|
|
|
|
|
|
|
|
attr_str_map = {
|
|
|
|
'align-center' : 'text-align: center; margin-left: auto; margin-right: auto;',
|
|
|
|
'align-left' : 'text-align: left;',
|
|
|
|
'align-right' : 'text-align: right;',
|
|
|
|
'align-justify' : 'text-align: justify;',
|
|
|
|
'display-inline' : 'display: inline;',
|
|
|
|
'pos-left' : 'text-align: left;',
|
|
|
|
'pos-right' : 'text-align: right;',
|
|
|
|
'pos-center' : 'text-align: center; margin-left: auto; margin-right: auto;',
|
|
|
|
}
|
2012-03-06 11:24:28 -07:00
|
|
|
|
|
|
|
|
2010-01-17 05:10:35 -07:00
|
|
|
# find tag if within pos to end inclusive
|
|
|
|
def findinDoc(self, tagpath, pos, end) :
|
|
|
|
result = None
|
|
|
|
docList = self.flatdoc
|
|
|
|
cnt = len(docList)
|
|
|
|
if end == -1 :
|
|
|
|
end = cnt
|
|
|
|
else:
|
|
|
|
end = min(cnt,end)
|
|
|
|
foundat = -1
|
|
|
|
for j in xrange(pos, end):
|
|
|
|
item = docList[j]
|
|
|
|
if item.find('=') >= 0:
|
2010-01-19 05:11:59 -07:00
|
|
|
(name, argres) = item.split('=',1)
|
2012-03-06 11:24:28 -07:00
|
|
|
else :
|
2010-01-17 05:10:35 -07:00
|
|
|
name = item
|
|
|
|
argres = ''
|
2012-03-06 11:24:28 -07:00
|
|
|
if name.endswith(tagpath) :
|
2010-01-17 05:10:35 -07:00
|
|
|
result = argres
|
|
|
|
foundat = j
|
|
|
|
break
|
|
|
|
return foundat, result
|
|
|
|
|
|
|
|
|
|
|
|
# return list of start positions for the tagpath
|
|
|
|
def posinDoc(self, tagpath):
|
|
|
|
startpos = []
|
|
|
|
pos = 0
|
|
|
|
res = ""
|
|
|
|
while res != None :
|
|
|
|
(foundpos, res) = self.findinDoc(tagpath, pos, -1)
|
|
|
|
if res != None :
|
|
|
|
startpos.append(foundpos)
|
|
|
|
pos = foundpos + 1
|
|
|
|
return startpos
|
|
|
|
|
2011-10-28 00:24:15 -06:00
|
|
|
# returns a vector of integers for the tagpath
|
2012-03-06 11:24:28 -07:00
|
|
|
def getData(self, tagpath, pos, end, clean=False):
|
|
|
|
if clean:
|
|
|
|
digits_only = re.compile(r'''([0-9]+)''')
|
2011-10-28 00:24:15 -06:00
|
|
|
argres=[]
|
|
|
|
(foundat, argt) = self.findinDoc(tagpath, pos, end)
|
|
|
|
if (argt != None) and (len(argt) > 0) :
|
|
|
|
argList = argt.split('|')
|
2012-03-06 11:24:28 -07:00
|
|
|
for strval in argList:
|
|
|
|
if clean:
|
|
|
|
m = re.search(digits_only, strval)
|
|
|
|
if m != None:
|
|
|
|
strval = m.group()
|
|
|
|
argres.append(int(strval))
|
2011-10-28 00:24:15 -06:00
|
|
|
return argres
|
2010-01-17 05:10:35 -07:00
|
|
|
|
|
|
|
def process(self):
|
|
|
|
|
2010-01-19 05:11:59 -07:00
|
|
|
classlst = ''
|
2010-03-02 05:46:56 -07:00
|
|
|
csspage = '.cl-center { text-align: center; margin-left: auto; margin-right: auto; }\n'
|
|
|
|
csspage += '.cl-right { text-align: right; }\n'
|
|
|
|
csspage += '.cl-left { text-align: left; }\n'
|
|
|
|
csspage += '.cl-justify { text-align: justify; }\n'
|
2010-01-17 05:10:35 -07:00
|
|
|
|
|
|
|
# generate a list of each <style> starting point in the stylesheet
|
|
|
|
styleList= self.posinDoc('book.stylesheet.style')
|
|
|
|
stylecnt = len(styleList)
|
|
|
|
styleList.append(-1)
|
|
|
|
|
|
|
|
# process each style converting what you can
|
|
|
|
|
|
|
|
for j in xrange(stylecnt):
|
|
|
|
start = styleList[j]
|
|
|
|
end = styleList[j+1]
|
|
|
|
|
|
|
|
(pos, tag) = self.findinDoc('style._tag',start,end)
|
|
|
|
if tag == None :
|
|
|
|
(pos, tag) = self.findinDoc('style.type',start,end)
|
2012-03-06 11:24:28 -07:00
|
|
|
|
2010-01-17 05:10:35 -07:00
|
|
|
# Is this something we know how to convert to css
|
|
|
|
if tag in self.stags :
|
|
|
|
|
|
|
|
# get the style class
|
|
|
|
(pos, sclass) = self.findinDoc('style.class',start,end)
|
|
|
|
if sclass != None:
|
2010-03-02 05:46:56 -07:00
|
|
|
sclass = sclass.replace(' ','-')
|
2010-01-20 05:13:31 -07:00
|
|
|
sclass = '.cl-' + sclass.lower()
|
2012-03-06 11:24:28 -07:00
|
|
|
else :
|
2010-01-17 05:10:35 -07:00
|
|
|
sclass = ''
|
|
|
|
|
|
|
|
# check for any "after class" specifiers
|
|
|
|
(pos, aftclass) = self.findinDoc('style._after_class',start,end)
|
|
|
|
if aftclass != None:
|
2010-03-02 05:46:56 -07:00
|
|
|
aftclass = aftclass.replace(' ','-')
|
2010-01-20 05:13:31 -07:00
|
|
|
aftclass = '.cl-' + aftclass.lower()
|
2012-03-06 11:24:28 -07:00
|
|
|
else :
|
2010-01-17 05:10:35 -07:00
|
|
|
aftclass = ''
|
|
|
|
|
|
|
|
cssargs = {}
|
|
|
|
|
|
|
|
while True :
|
|
|
|
|
2010-01-20 05:13:31 -07:00
|
|
|
(pos1, attr) = self.findinDoc('style.rule.attr', start, end)
|
|
|
|
(pos2, val) = self.findinDoc('style.rule.value', start, end)
|
2010-01-17 05:10:35 -07:00
|
|
|
|
|
|
|
if attr == None : break
|
2012-03-06 11:24:28 -07:00
|
|
|
|
2010-01-17 05:10:35 -07:00
|
|
|
if (attr == 'display') or (attr == 'pos') or (attr == 'align'):
|
|
|
|
# handle text based attributess
|
|
|
|
attr = attr + '-' + val
|
|
|
|
if attr in self.attr_str_map :
|
|
|
|
cssargs[attr] = (self.attr_str_map[attr], '')
|
|
|
|
else :
|
|
|
|
# handle value based attributes
|
|
|
|
if attr in self.attr_val_map :
|
2010-01-19 05:11:59 -07:00
|
|
|
name = self.attr_val_map[attr]
|
2010-01-20 05:13:31 -07:00
|
|
|
if attr in ('margin-bottom', 'margin-top', 'space-after') :
|
|
|
|
scale = self.ph
|
|
|
|
elif attr in ('margin-right', 'indent', 'margin-left', 'hang') :
|
|
|
|
scale = self.pw
|
|
|
|
elif attr == 'line-space':
|
|
|
|
scale = self.fontsize * 2.0
|
|
|
|
|
2010-01-17 05:10:35 -07:00
|
|
|
if not ((attr == 'hang') and (int(val) == 0)) :
|
2010-01-20 05:13:31 -07:00
|
|
|
pv = float(val)/scale
|
|
|
|
cssargs[attr] = (self.attr_val_map[attr], pv)
|
2010-01-17 05:10:35 -07:00
|
|
|
keep = True
|
|
|
|
|
2010-01-20 05:13:31 -07:00
|
|
|
start = max(pos1, pos2) + 1
|
2010-01-17 05:10:35 -07:00
|
|
|
|
|
|
|
# disable all of the after class tags until I figure out how to handle them
|
|
|
|
if aftclass != "" : keep = False
|
|
|
|
|
|
|
|
if keep :
|
2012-03-06 11:24:28 -07:00
|
|
|
# make sure line-space does not go below 100% or above 300% since
|
2010-01-20 05:13:31 -07:00
|
|
|
# it can be wacky in some styles
|
2010-01-17 05:10:35 -07:00
|
|
|
if 'line-space' in cssargs:
|
|
|
|
seg = cssargs['line-space'][0]
|
|
|
|
val = cssargs['line-space'][1]
|
|
|
|
if val < 1.0: val = 1.0
|
2010-01-20 05:13:31 -07:00
|
|
|
if val > 3.0: val = 3.0
|
2010-01-17 05:10:35 -07:00
|
|
|
del cssargs['line-space']
|
2010-01-19 05:11:59 -07:00
|
|
|
cssargs['line-space'] = (self.attr_val_map['line-space'], val)
|
2010-01-17 05:10:35 -07:00
|
|
|
|
2012-03-06 11:24:28 -07:00
|
|
|
|
2010-01-17 05:10:35 -07:00
|
|
|
# handle modifications for css style hanging indents
|
|
|
|
if 'hang' in cssargs:
|
|
|
|
hseg = cssargs['hang'][0]
|
|
|
|
hval = cssargs['hang'][1]
|
|
|
|
del cssargs['hang']
|
2010-01-19 05:11:59 -07:00
|
|
|
cssargs['hang'] = (self.attr_val_map['hang'], -hval)
|
2010-01-17 05:10:35 -07:00
|
|
|
mval = 0
|
|
|
|
mseg = 'margin-left: '
|
2010-01-20 05:13:31 -07:00
|
|
|
mval = hval
|
2010-01-17 05:10:35 -07:00
|
|
|
if 'margin-left' in cssargs:
|
|
|
|
mseg = cssargs['margin-left'][0]
|
|
|
|
mval = cssargs['margin-left'][1]
|
2010-01-20 05:13:31 -07:00
|
|
|
if mval < 0: mval = 0
|
2010-01-17 05:10:35 -07:00
|
|
|
mval = hval + mval
|
2010-01-20 05:13:31 -07:00
|
|
|
cssargs['margin-left'] = (mseg, mval)
|
2010-01-17 05:10:35 -07:00
|
|
|
if 'indent' in cssargs:
|
|
|
|
del cssargs['indent']
|
|
|
|
|
|
|
|
cssline = sclass + ' { '
|
|
|
|
for key in iter(cssargs):
|
|
|
|
mseg = cssargs[key][0]
|
|
|
|
mval = cssargs[key][1]
|
|
|
|
if mval == '':
|
|
|
|
cssline += mseg + ' '
|
|
|
|
else :
|
2010-01-20 05:13:31 -07:00
|
|
|
aseg = mseg + '%.1f%%;' % (mval * 100.0)
|
2010-01-17 05:10:35 -07:00
|
|
|
cssline += aseg + ' '
|
|
|
|
|
|
|
|
cssline += '}'
|
|
|
|
|
2010-01-19 05:11:59 -07:00
|
|
|
if sclass != '' :
|
|
|
|
classlst += sclass + '\n'
|
2012-03-06 11:24:28 -07:00
|
|
|
|
2010-01-17 05:10:35 -07:00
|
|
|
# handle special case of paragraph class used inside chapter heading
|
|
|
|
# and non-chapter headings
|
|
|
|
if sclass != '' :
|
|
|
|
ctype = sclass[4:7]
|
|
|
|
if ctype == 'ch1' :
|
|
|
|
csspage += 'h1' + cssline + '\n'
|
|
|
|
if ctype == 'ch2' :
|
|
|
|
csspage += 'h2' + cssline + '\n'
|
|
|
|
if ctype == 'ch3' :
|
|
|
|
csspage += 'h3' + cssline + '\n'
|
|
|
|
if ctype == 'h1-' :
|
|
|
|
csspage += 'h4' + cssline + '\n'
|
|
|
|
if ctype == 'h2-' :
|
|
|
|
csspage += 'h5' + cssline + '\n'
|
|
|
|
if ctype == 'h3_' :
|
|
|
|
csspage += 'h6' + cssline + '\n'
|
|
|
|
|
2010-03-02 05:46:56 -07:00
|
|
|
if cssline != ' { }':
|
|
|
|
csspage += self.stags[tag] + cssline + '\n'
|
2010-01-19 05:11:59 -07:00
|
|
|
|
2012-03-06 11:24:28 -07:00
|
|
|
|
2010-01-19 05:11:59 -07:00
|
|
|
return csspage, classlst
|
2010-01-17 05:10:35 -07:00
|
|
|
|
|
|
|
|
|
|
|
|
2010-01-20 05:13:31 -07:00
|
|
|
def convert2CSS(flatxml, fontsize, ph, pw):
|
|
|
|
|
|
|
|
print ' ', 'Using font size:',fontsize
|
|
|
|
print ' ', 'Using page height:', ph
|
|
|
|
print ' ', 'Using page width:', pw
|
2010-01-17 05:10:35 -07:00
|
|
|
|
|
|
|
# create a document parser
|
2010-01-20 05:13:31 -07:00
|
|
|
dp = DocParser(flatxml, fontsize, ph, pw)
|
2010-01-17 05:10:35 -07:00
|
|
|
csspage = dp.process()
|
|
|
|
return csspage
|
2011-10-28 00:24:15 -06:00
|
|
|
|
|
|
|
|
|
|
|
def getpageIDMap(flatxml):
|
|
|
|
dp = DocParser(flatxml, 0, 0, 0)
|
2012-03-06 11:24:28 -07:00
|
|
|
pageidnumbers = dp.getData('info.original.pid', 0, -1, True)
|
2011-10-28 00:24:15 -06:00
|
|
|
return pageidnumbers
|