# A not very elegant demonstration of the use of SAX to traverse an XML document # and to record the start and stop position of each element # Note that this program can traverse very large documents. It needs space to hold # a stack of names and positions (proportional to the depth of the XML tree) and # the longest character string. import xml.sax parser=xml.sax.make_parser() # This works by overwriting methods that are called by the SAX parser as it # traverses the documents and encouters start tags, stop tags and character strings class ItemHandler(xml.sax.handler.ContentHandler): def __init__(self): self.nodecount = 0 #The serial number of the tag or text node self.buffer = "" #For collecting character strings self.stack = [] #The stack "remembers" the matching start tag. def startElement(self,nme,att): self.stack.append((self.nodecount,nme)) self.nodecount=self.nodecount+1 def endElement(self,nme): (tcount,tnme)=self.stack.pop() if tnme <> nme: print "non-well-formed document" ss = self.buffer.strip() if len(ss) <> 0: print "Text node", self.nodecount, ss self.nodecount=self.nodecount +1 self.buffer="" print "Element", tcount, self.nodecount, nme self.nodecount=self.nodecount+1 def characters(self,data): self.buffer = self.buffer+data parser.setContentHandler(ItemHandler()) parser.parse('emps.xml')