1
0
Fork 0
mirror of https://github.com/kastdeur/dotfiles.git synced 2025-01-03 14:33:34 +01:00
dotfiles/vim/vim-latex/doc/db2vim/db2vim

761 lines
23 KiB
Python
Executable file

#!/usr/bin/python
r"""
db2vim [options] file.xml
SHORT OPTIONS
-d Prints some debugging information on stderr.
-s If given, the db2vim operates in a 'stict' conversion mode, i.e, any
element which does not have a handler defined for them it be
completeley ignored including all its children. Otherwise, db2vim will
recurse into an unknown tag and process any of its children it
recognizes. Since db2vim always recognizes text nodes, not using this
option has the effect that all text will be printed out, even if
somewhat incorrectly.
LONG OPTIONS
--prefix=<prefix>
This is a string like "ls_" which will be prepended to the section
numbers. Default to 'ls_' if unsupplied.
"""
import xml.dom.minidom
import getopt
import string
import re
import sys
# Okay. so I import *. Shoot me.
from textutils import *
from domutils import *
# define a bunch of constants for formatting.
TEXT_WIDTH = 80
BLOCK_QUOTE = 4
COL_SPACE = 2
# a bunch of globals used in creating the Table of contents.
#
# TOC_HASH['section 1.1 label'] = 'ls_1_1'
#
# LEVEL_HASH['section 1.1 label'] = 1
# (top level article has level 0)
#
# TITLE_HASH['section 1.1 label'] = 'Title of section 1.1'
#
# FILENAME = the name of the file being processed with the last extension
# changed to .txt
#
# TOC_PREFIX = 'ls_' (the prefix used to create the section labels).
TOC_HASH = {}
LEVEL_HASH = {}
TITLE_HASH = {}
FILENAME = ''
TOC_PREFIX = ''
ANCHOR_HASH = {}
URL_HASH = {}
# STDERR for printing debugging info.
DEBUG = 0
STDERR = sys.stderr
STRICT = 0
NUM_ANCHORS = {0:1}
################################################################################
# Miscellaneous utility functions
################################################################################
# encodeTo52(num) {{{
def encodeTo52(num):
ret = ''
if num < 26:
return unichr(ord('a') + num)
elif num < 52:
return unichr(ord('A') + num - 26)
else:
return encodeTo52(int(num/52)) + encodeTo52(num % 52)
# }}}
# makeTocHash(rootElement) {{{
def makeTocHash(rootElement, width, prefix='', level=0):
retText = ""
sectionsTable = []
lastLabelUsed = 0
for section in rootElement.getChildrenByTagName('section'):
title = section.getChildrenByTagName('title')[0]
titleText = handleElement(title, width)
lastLabelUsed += 1
thisLabel = TOC_PREFIX + prefix + str(lastLabelUsed)
sectionid = section.getAttribute('id')
if not sectionid:
section.setAttribute('id', thisLabel)
sectionid = thisLabel
NUM_ANCHORS[0] += 1
ANCHOR_HASH[sectionid] = TOC_PREFIX + 'a_' + encodeTo52(NUM_ANCHORS[0] + 52)
TOC_HASH[sectionid] = thisLabel
LEVEL_HASH[sectionid] = level
TITLE_HASH[sectionid] = titleText
if section.getChildrenByTagName('section'):
childText = makeTocHash(section, width - 5,
prefix = prefix+str(lastLabelUsed) + '_',
level = level + 1)
# }}}
# makeAnchorHash(rootElement) {{{
def makeAnchorHash(rootElement):
anchors = rootElement.getElementsByTagName('anchor') + rootElement.getElementsByTagName('note')
numAnchors = 0
for anchor in anchors:
if not anchor.getAttribute('id'):
continue
NUM_ANCHORS[0] += 1
if ANCHOR_HASH.has_key(anchor.getAttribute('id')) or TOC_HASH.has_key(anchor.getAttribute('id')):
print >> STDERR, "Warning: anchor [%s] multiply defined" % anchor.getAttribute('id')
ANCHOR_HASH[anchor.getAttribute('id')] = TOC_PREFIX + 'a_' + encodeTo52(NUM_ANCHORS[0] + 52)
# }}}
# makeURLHash(rootElement) {{{
def makeURLHash(rootElement):
urls = rootElement.getElementsByTagName('ulink')
numURLs = 0
for url in urls:
if not url.getAttribute('url') or URL_HASH.has_key(url.getAttribute('url')):
continue
numURLs += 1
URL_HASH[url.getAttribute('url')] = TOC_PREFIX + 'u_' + str(numURLs)
# }}}
# makeTOC(node, width, prefix='', level=0, maxleve=1): {{{
def makeTOC(node, width, maxlevel=1):
retText = ""
sectionsTable = []
lastLabelUsed = 0
for section in node.getChildrenByTagName('section'):
sectionid = section.getAttribute('id')
thisLabel = TOC_HASH.get(sectionid, '')
titleText = TITLE_HASH.get(sectionid, '')
level = LEVEL_HASH.get(sectionid, 10)
if level <= maxlevel:
retText += '|' + thisLabel + '| ' + titleText + '\n'
if level < maxlevel and section.getChildrenByTagName('section'):
childText = makeTOC(section, width-5)
retText += VertCatString(" ", 4, childText) + '\n'
retText = re.sub(r'\s+$', r'\n', retText)
return retText
# }}}
################################################################################
# Generalized function for handling dom elements.
################################################################################
# IsInlineTag(self): {{{
def IsInlineTag(self):
if self.nodeType == self.TEXT_NODE:
return 1
elif inlineTags.get(self.tagName, 0):
return 1
else:
return 0
# }}}
# getChildrenByTagName(self, name): {{{
# Description: extension to the xml.dom.minidom.Element class.
# returns all direct descendants of this Element.
def getChildrenByTagName(self, name):
nodeList = []
child = self.firstChild
while not child is None:
if child.nodeType == child.ELEMENT_NODE and child.nodeName == name:
nodeList.append(child)
child = child.nextSibling
return nodeList
xml.dom.minidom.Element.getChildrenByTagName = getChildrenByTagName
# }}}
# handleElement(rootElement, width=TEXT_WIDTH): {{{
def handleElement(rootElement, width=TEXT_WIDTH):
"""
handleElement(rootElement, width=TEXT_WIDTH):
Generalized function to handle an Element node in a DOM tree.
"""
retText = ""
child = rootElement.firstChild
while not child is None:
printerr('node type = %d' % child.nodeType)
if child.nodeType == child.ELEMENT_NODE:
printerr('processing [%s]' % child.tagName)
isinline = IsInlineTag(child)
# if the child is an Element and if a handler exists, then call it.
if not isinline \
and child.nodeType == child.ELEMENT_NODE \
and handlerMaps.has_key(child.tagName):
# offset the child text by the current indentation value
printerr('making recursive call to known child.')
retText += handlerMaps[child.tagName](child, width)
child = child.nextSibling
elif not isinline \
and child.nodeType == child.PROCESSING_INSTRUCTION_NODE \
and child.target == 'vimhelp':
if handlerMaps.has_key(child.data):
retText += handlerMaps[child.data](child, width)
child = child.nextSibling
# if its a text node or an inline element node, collect consecutive
# text nodes into a single paragraph and indent it.
elif isinline:
text = ""
while not child is None and IsInlineTag(child):
if child.nodeType == child.TEXT_NODE:
text += child.data
elif child.nodeType == child.ELEMENT_NODE:
if handlerMaps.has_key(child.tagName):
text += handlerMaps[child.tagName](child, width)
else:
text += GetText(child.childNodes)
child = child.nextSibling
retText += IndentParagraphs(text, width)
# If we cannot understand _anything_ about the element, then just
# handle its children hoping we have something to gather from
# there.
elif not STRICT:
printerr('making recursive call for unkown child')
retText += handleElement(child, width)
child = child.nextSibling
else:
child = child.nextSibling
return retText
# }}}
################################################################################
# Functions for handling various xml tags
################################################################################
# handleArticleInfo(articleinfo, width): {{{
def handleArticleInfo(articleinfo, width):
makeTocHash(articleinfo.parentNode, width)
makeAnchorHash(articleinfo.parentNode)
makeURLHash(articleinfo.parentNode)
title = articleinfo.getChildrenByTagName('title')
if title is None:
print("Article should have a title!")
sys.exit(1)
name = GetText(title[0].childNodes)
authors = articleinfo.getChildrenByTagName('author')
authorText = ''
for author in authors:
firstname = ''
surname = ''
if author.getElementsByTagName('firstname'):
firstname = GetTextFromElementNode(author, 'firstname')[0]
if author.getChildrenByTagName('surname'):
surname = GetTextFromElementNode(author, 'surname')[0]
if author.getElementsByTagName('email'):
email = GetTextFromElementNode(author, 'email')[0]
authorText = authorText + firstname + ' ' + surname + ' <' + email + '>\n'
abstractText = ''
abstract = articleinfo.getChildrenByTagName('abstract')
if abstract is not None:
abstractText = '\n\n' + CenterText('Abstract\n========', width)
abstractText += handleElement(abstract[0], width) + '\n'
retText = CenterText(name + '\n*' + FILENAME + '*\n' + authorText, width)
retText += abstractText
toc = makeTOC(articleinfo.parentNode, width)
foldwarn = r'''
================================================================================
Viewing this file
This file can be viewed with all the sections and subsections folded to ease
navigation. By default, vim does not fold help documents. To create the folds,
press za now. The folds are created via a foldexpr which can be seen in the
last section of this file.
See |usr_28.txt| for an introduction to folding and |fold-commands| for key
sequences and commands to work with folds.
'''
return retText + '\n' + RightJustify('*' + FILENAME + '-toc*', width) + '\n' + toc + foldwarn
# }}}
# handleOption(option, width): {{{
def handleOption(option, width):
retText = ""
names = GetTextFromElementNode(option, "name")
for name in names:
retText += string.rjust("*"+name+"*", width) + "\n"
nameTexts = ""
maxNameLen = -1
for name in names:
maxNameLen = max(maxNameLen, len(name + " "))
nameTexts += name + " \n"
desc = option.getChildrenByTagName("desc")[0]
descText = handleElement(desc, width=width-maxNameLen)
retText += VertCatString(nameTexts + " ", None, descText)
return retText + "\n"
# }}}
# handleOptionDefault(default, width): {{{
def handleOptionDefault(default, width):
type = string.join(GetTextFromElementNode(default, "type"), "\n")
extra = string.join(GetTextFromElementNode(default, "extra"), "\n")
return type + "\t(" + extra + ")"
# }}}
# handleTableRoot(root, width): {{{
def handleTableRoot(root, width):
tgroup = root.getChildrenByTagName('tgroup')[0]
if tgroup is None:
return ''
rows = []
numHeadRows = 0
if tgroup.getChildrenByTagName('thead'):
thead = tgroup.getChildrenByTagName('thead')[0]
rows = thead.getChildrenByTagName('row')
numHeadRows = len(rows)
tbody = tgroup.getChildrenByTagName('tbody')[0]
rows += tbody.getChildrenByTagName('row')
widths, text = calculateColumnWidthsDoublePass(rows, width)
headText = text[0:numHeadRows]
bodyText = text[numHeadRows:]
headTable = FormatTable(headText, ROW_SPACE = 1, COL_SPACE =
COL_SPACE, justify = 0, widths = widths)
if headTable:
headTable = re.sub(r'\n|$', '\g<0>~', headTable)
bodyTable = FormatTable(bodyText, ROW_SPACE = 1, COL_SPACE =
COL_SPACE, justify = 0, widths = widths)
return headTable + '\n'+ re.sub(r'\n+$', '', bodyTable) + '\n\n'
# calculateColumnWidths(rows, width): {{{
def calculateColumnWidths(rows, alloc_widths):
widths = {}
text = []
for row in rows:
cols = row.getChildrenByTagName("entry")
if len(alloc_widths) == 1:
alloc_widths *= len(cols)
colwidths = []
rowtext = []
for col, width in zip(cols, alloc_widths):
coltext = handleElement(col, width)
rowtext.append(coltext)
# This is the 'width' of the current cell including the
# whitespace padding.
colwidths.append(max(map(len, coltext.split("\n"))) \
+ COL_SPACE)
text.append(rowtext)
# update the widths of the columns by finding the maximum
# width of all cells in this column.
for i in range(len(colwidths)):
widths[i] = max(colwidths[i], widths.get(i, -1))
return widths, text
# }}}
# calculateColumnWidthsDoublePass(rows, width): {{{
def calculateColumnWidthsDoublePass(rows, width):
maxwidths, text = calculateColumnWidths(rows, [width])
if reduce(lambda x, y: x+y, maxwidths.values()) <= width:
return maxwidths, text
# now find out how many columns exceed the maximum permitted width.
# nlarge: number of columns which are too wide.
# remainingWidth: width which these large columns can share.
nlarge = 0
remainingWidth = width
for colwidth in maxwidths.values():
if colwidth > width/len(maxwidths):
nlarge += 1
else:
remainingWidth += -colwidth
# newmaxwidth: width which each of the large columns is allowed.
newmaxwidth = remainingWidth/max(nlarge, 1)
newcolwidths = []
for colwidth in maxwidths.values():
newcolwidths += [min(colwidth, newmaxwidth)]
# make another run and this time ask each cell to restrict itself to
# newmaxwidth as calculated above.
newmaxwidth, newtext = calculateColumnWidths(rows, newcolwidths)
return newmaxwidth, newtext
# }}}
# }}}
# handleCode(code, width): {{{
def handleCode(code, width):
retText = GetText(code.childNodes)
return " &codebegin;\n" + VertCatString(" ", 4, retText) + "&codeend;"
# }}}
# handleList(list, width, marker=0): {{{
def handleList(list, width, marker=0):
if list.tagName == 'simplelist':
child = 'member'
decoration = ''
elif list.tagName == 'orderedlist':
child = 'listitem'
else:
child = 'member'
decoration = '- '
retText = ""
items = list.getChildrenByTagName(child)
i = 1
for item in items:
if list.tagName == 'orderedlist':
decoration = str(i) + '. '
i = i + 1
itemText = handleElement(item, width - len(decoration))
itemText = VertCatString(decoration, None, itemText)
retText += '\n' + re.sub(r'\s+$', '', itemText) + "\n"
return retText
# }}}
# handleNote(note, width): {{{
def handleNote(note, width):
title = None
if note.getChildrenByTagName('title'):
title = note.getChildrenByTagName('title')[0]
name = GetText(title.childNodes)
note.removeChild(title)
noteid = ''
if note.getAttribute('id'):
noteTagText = '*' + note.getAttribute('id') + '* '
noteTagText += '*' + ANCHOR_HASH[note.getAttribute('id')] + '*'
noteTagText = IndentParagraphs(noteTagText, width/2)
noteid = RightJustify(noteTagText, width) + '\n'
noteText = handleElement(note, width-len("NOTE: "))
if title is not None:
noteText = name + '\n' +('-' * len(name)) + '\n' + noteText
noteText = noteid + VertCatString("NOTE: ", None, noteText)
return noteText + "\n"
# }}}
# handleParagraph(paragraph, width): {{{
def handleParagraph(paragraph, width):
partext = handleElement(paragraph, width)
partext = re.sub(r'\n+$', '', partext)
partext = re.sub(r'^\n+', '', partext)
return partext + "\n\n"
# }}}
# handleFormalParagraph(paragraph, width): {{{
def handleFormalParagraph(formalparagraph, width):
title = None
if formalparagraph.getChildrenByTagName('title'):
title = formalparagraph.getChildrenByTagName('title')[0]
name = GetText(title.childNodes)
formalparagraph.removeChild(title)
partext = handleElement(formalparagraph, width)
partext = re.sub(r'\n+$', '', partext)
partext = re.sub(r'^\n+', '', partext)
if title is not None:
partext = name + '\n' + ('-' * len(name)) + '\n' + partext
return partext + "\n\n"
# }}}
# handleBlockQuote(block, width): {{{
def handleBlockQuote(block, width):
text = handleElement(block, width - BLOCK_QUOTE)
text = VertCatString(" "*BLOCK_QUOTE, \
BLOCK_QUOTE, text)
return text + "\n"
# }}}
# handleLink(link, width): {{{
def handleLink(link, width):
linkend = link.getAttribute('linkend')
if not ANCHOR_HASH.has_key(linkend):
print >> STDERR, "Warning: Link ID [%s] not found in TOC" % linkend
text = handleElement(link, width)
anchorpt = ANCHOR_HASH.get(linkend)
if not anchorpt:
anchorpt = ''
return text + ' [|' + anchorpt + '|]'
# }}}
# handleAnchor(anchor, width): {{{
def handleAnchor(anchor, width):
anchorText = '*'+anchor.getAttribute('id')+'* '
anchorText += '*'+ANCHOR_HASH[anchor.getAttribute('id')]+'*'
return RightJustify(anchorText, width) \
+ "\n"
# }}}
# handleSection(section, width): {{{
def handleSection(section, width):
title = section.getChildrenByTagName('title')[0]
name = handleElement(title, width)
sectionid = section.getAttribute('id')
tagsformatted = ''
if TOC_HASH.has_key(sectionid):
tagsformatted = '*%s* ' % TOC_HASH[sectionid]
if ANCHOR_HASH.has_key(sectionid):
tagsformatted += '*%s* ' % ANCHOR_HASH[sectionid]
if sectionid and TOC_HASH.has_key(sectionid) and sectionid != TOC_HASH[sectionid]:
tagsformatted += '*%s*' % sectionid
# try to indent to a width of 20
tagsformatted = RightJustify(IndentParagraphs(tagsformatted, 30), 0)
tagswidth = TextWidth(tagsformatted)
# width(name) + nspaces + width(tags) = 80
if len(tagsformatted) > 2:
header = VertCatString(name, 80-tagswidth, tagsformatted)
else:
header = name
section.removeChild(title)
text = handleElement(section, width)
thislevel = LEVEL_HASH.get(sectionid, -1)
if thislevel == 0:
delim = '='
newlines = '\n\n'
elif thislevel == 1:
delim = '-'
newlines = '\n'
else:
delim = ''
newlines = '\n'
thisTOC = ''
if thislevel <= 1:
thisTOC = makeTOC(section, width, maxlevel=1)
return "\n" + (delim * TEXT_WIDTH) + \
"\n" + header + newlines + thisTOC + newlines + re.sub(r'\n+$', '', text) + "\n"
# }}}
# handleUlink(ulink, width) {{{
def handleUlink(ulink, width):
url = ulink.getAttribute('url')
text = handleElement(ulink)
# URL_HASH is created at the very beginning
if url:
return text + ' |%s|' % URL_HASH[url]
else:
print >> STDERR, "Warning: url attribute empty for [%s]" % text
return text
# }}}
# handleIndexTerm(indexterm, width) {{{
def handleIndexTerm(indexterm, width) :
return ''
# }}}
# handleEmphasis(emphasis, width) {{{
def handleEmphasis(emphasis, width):
return '_' + GetText(emphasis.childNodes) + '_'
# }}}
################################################################################
# A dictionary for mapping xml tags to functions.
################################################################################
# {{{
handlerMaps = {
'articleinfo': handleArticleInfo,
'table': handleTableRoot,
'informaltable': handleTableRoot,
'code': handleCode,
'programlisting': handleCode,
'list': handleList,
'simplelist': handleList,
'orderedlist': handleList,
'para': handleParagraph,
'formalpara': handleFormalParagraph,
'note': handleNote,
'link': handleLink,
'anchor': handleAnchor,
'section': handleSection,
'blockquote': handleBlockQuote,
'ulink': handleUlink,
'emphasis': handleEmphasis,
'indexterm': handleIndexTerm
}
inlineTags = {'tag':1, 'literal':1, 'link':1,
'ulink':1, 'citetitle':1, 'indexterm':1,
'emphasis':1, 'filename':1 }
# }}}
# helper functions for usage() and printerr() {{{
def usage():
print __doc__
def printerr(statement):
if DEBUG:
print >> STDERR, statement
# }}}
# replaceComment(matchobj) {{{
def replaceComment(matchobj):
initspace = matchobj.group(1)
firstsent = matchobj.group(2)
code = matchobj.group(3)
if len(initspace) > 0:
if initspace[0] == '<':
lastspace = initspace
else:
lastspace = '<' + initspace[:-1]
else:
lastspace = initspace
return '\n' + initspace + firstsent + ' >\n' + code + '\n' + lastspace
# }}}
# main function {{{
if __name__ == "__main__":
option = {}
try:
opts, args = getopt.getopt(sys.argv[1:], 'ds', ['prefix=', 'help'])
for oa, ov in opts:
option[oa] = ov
except getopt.GetoptError:
print >> STDERR, "Usage error: db2vim --help for usage"
sys.exit(1)
if option.has_key('--help'):
usage();
sys.exit(0);
TOC_PREFIX = option.get('--prefix', 'ls_')
DEBUG = option.has_key('-d')
if len(args) != 1:
print >> STDERR, "Usage error: db2vim --help for usage"
sys.exit(1)
fileName = args[0]
FILENAME = re.sub(r'\.\w+$', r'.txt', fileName)
try:
fp = open(fileName)
except:
print "Error opening xml file"
dom = xml.dom.minidom.parse(fp)
modeline = r'''
================================================================================
About this file
This file was created automatically from its XML variant using db2vim. db2vim is
a python script which understands a very limited subset of the Docbook XML 4.2
DTD and outputs a plain text file in vim help format.
db2vim can be obtained via anonymous CVS from sourceforge.net. Use
cvs -d:pserver:anonymous@cvs.vim-latex.sf.net:/cvsroot/vim-latex co db2vim
Or you can visit the web-interface to sourceforge CVS at:
http://cvs.sourceforge.net/cgi-bin/viewcvs.cgi/vim-latex/db2vim/
The following modelines should nicely fold up this help manual.
vim:ft=help:fdm=expr:nowrap
vim:foldexpr=getline(v\:lnum-1)=~'-\\{80}'?'>2'\:getline(v\:lnum-1)=~'=\\{80}'?'>1'\:getline(v\:lnum)=~'=\\{80}'?'0'\:getline(v\:lnum)=~'-\\{80}'?'1'\:'='
vim:foldtext=substitute(v\:folddashes.substitute(getline(v\:foldstart),'\\s*\\*.*',"",""),'^--','\ \ \ \ \ \ ','')
================================================================================'''
STRICT = option.has_key('-s')
pattern = re.compile(r'\n([< ]*)([^\n]+)&codebegin;\n(.*?)&codeend;', re.DOTALL)
processedDoc = handleElement(dom.documentElement)
while re.search('&codebegin;', processedDoc):
processedDoc = re.sub(pattern, replaceComment, processedDoc)
urlsection = r"""
================================================================================
URLs used in this file
"""
labels = zip(URL_HASH.values(), URL_HASH.keys())
labels.sort()
for label, url in labels:
urlsection += '*%s* : %s\n' % (label, url)
processedDoc = processedDoc + urlsection + modeline
print processedDoc.encode('iso-8859-1')
# }}}
# vim:et:sts=4:fdm=marker