# A not very elegant demonstration of the use of SAX to traverse an XML document 
# and to record the start and stop position of each element

# Note that this program can traverse very large documents.  It needs space to hold 
# a stack of names and positions (proportional to the depth of the XML tree) and 
# the longest character string.

import xml.sax

parser=xml.sax.make_parser()

# This works by overwriting methods that are called by the SAX parser as it 
# traverses the documents and encouters start tags, stop tags  and character strings
class ItemHandler(xml.sax.handler.ContentHandler):
    def __init__(self):
        self.nodecount = 0  #The serial number of the tag or text node
        self.buffer = ""    #For collecting character strings
        self.stack = []     #The stack "remembers" the matching start tag.
    def startElement(self,nme,att):
        self.stack.append((self.nodecount,nme))
        self.nodecount=self.nodecount+1
    def endElement(self,nme):
        (tcount,tnme)=self.stack.pop()
        if tnme <> nme: print "non-well-formed document"
        ss = self.buffer.strip()
        if len(ss) <> 0: 
            print "Text node", self.nodecount, ss
            self.nodecount=self.nodecount +1
            self.buffer=""
        print "Element", tcount, self.nodecount, nme
        self.nodecount=self.nodecount+1
    def characters(self,data): 
        self.buffer = self.buffer+data

parser.setContentHandler(ItemHandler())

parser.parse('emps.xml')
     

