#!/usr/bin/env python
"""Usage: xhtmlforward [FILE]
Reformats XHTML, as generated by XML tools like XSLT processors, to make it
work in browsers that do not handle XML properly. Reads from FILE or standard
input and writes to standard output. More specifically:
1. Inserts a space before the closing slash in any empty-element shorthand tag
declared EMPTY in the specs, so that:
is rewritten to:
2. Rewrites any other empty-element shorthand tag to a start-tag immediately
followed by an end-tag, so that:
is rewritten to:
"""
__version__ = "0.91"
__author__ = "Thijs van der Vossen "
__copyright__ = "Copyright 2003, Fingertips "
__license__ = "Python"
__history__ = """
0.91 - 19/10/2003 - Thijs - fixed buffer bug
0.9 - 8/8/2003 - Thijs - first beta release
"""
# non-deprecated empty html elements
empty = ["area", "base", "br", "col", "frame", "hr", "img", "input", "link",
"meta", "param"]
def reformat(data):
"""Quick convenience function for reformatting xhtml in one go."""
xhtmlfilter = filter()
xhtmlfilter.write(data)
return xhtmlfilter.read()
class filter:
"""Filter for reformatting XHTML to make it more backwards-compatible."""
def __init__(self):
self.buffer = []
self.tag = 0
self.tagname = ""
def write(self, data):
"""Writes xhtml input to the filter."""
for char in data:
if char == "<":
self.tag = 1
elif self.tag and not self.tagname and (char.isspace() or char == "/"):
self.tagname = "".join(self.buffer[-self.tag+2:])
elif self.tag and char == ">":
if self.buffer[-1] == "/":
if self.tagname not in empty:
self.buffer[-1] = ">"
self.buffer.append(self.tagname)
else:
if not self.buffer[-2].isspace():
self.buffer[-1] = " "
self.buffer.append("/")
self.tag = 0
self.tagname = ""
if self.tag:
self.tag += 1
self.buffer.append(char)
def read(self):
"""Reads reformatted xhtml output from the filter."""
if self.tag:
r = "".join(self.buffer[:-self.tag])
self.buffer = self.buffer[-self.tag:]
else:
r = "".join(self.buffer)
self.buffer = []
return r
if __name__ == "__main__":
import sys
import getopt
try:
opts, args = getopt.getopt(sys.argv[1:], "h", ["help"])
except getopt.error, msg:
print msg
print "for help use --help"
sys.exit(2)
for o, a in opts:
if o in ("-h", "--help"):
print __doc__
sys.exit(0)
try:
infile = open(args[0])
except IndexError:
infile = sys.stdin
xhtmlfilter = filter()
while 1:
input = infile.read(4096)
xhtmlfilter.write(input)
sys.stdout.write(xhtmlfilter.read())
if not input:
break