| |
- SGMLParser
-
- TestSGMLParser
class SGMLParser |
|
# SGML parser base class -- find tags and call handler functions.
# Usage: p = SGMLParser(); p.feed(data); ...; p.close().
# The dtd is defined by deriving a class which defines methods
# with special names to handle tags: start_foo and end_foo to handle
# <foo> and </foo>, respectively, or do_foo to handle <foo> by itself.
# (Tags are converted to lower case for this purpose.) The data
# between tags is passed to the parser by calling self.handle_data()
# with some data as argument (the data may be split up in arbutrary
# chunks). Entity references are passed by calling
# self.handle_entityref() with the entity reference as argument.
|
| |
- __init__(self, verbose=0)
- # Interface -- initialize and reset this instance
- close(self)
- # Interface -- handle the remaining data
- feed(self, data)
- # Interface -- feed some data to the parser. Call this as
- # often as you want, with as little or as much text as you
- # want (may include '\n'). (This just saves the text, all the
- # processing is done by goahead().)
- finish_endtag(self, tag)
- # Internal -- finish processing of end tag
- finish_shorttag(self, tag, data)
- # Internal -- finish parsing of <tag/data/ (same as <tag>data</tag>)
- finish_starttag(self, tag, attrs)
- # Internal -- finish processing of start tag
- # Return -1 for unknown tag, 0 for open-only tag, 1 for balanced tag
- goahead(self, end)
- # Internal -- handle data as far as reasonable. May leave state
- # and data to be processed by a subsequent call. If 'end' is
- # true, force handling all data as if followed by EOF marker.
- handle_charref(self, name)
- # Example -- handle character reference, no need to override
- handle_comment(self, data)
- # Example -- handle comment, could be overridden
- handle_data(self, data)
- # Example -- handle data, should be overridden
- handle_endtag(self, tag, method)
- # Overridable -- handle end tag
- handle_entityref(self, name)
- # Example -- handle entity reference, no need to override
- handle_pi(self, data)
- # Example -- handle processing instruction, could be overridden
- handle_starttag(self, tag, method, attrs)
- # Overridable -- handle start tag
- parse_comment(self, i)
- # Internal -- parse comment, return length or -1 if not terminated
- parse_endtag(self, i)
- # Internal -- parse endtag
- parse_pi(self, i)
- # Internal -- parse processing instr, return length or -1 if not terminated
- parse_starttag(self, i)
- # Internal -- handle starttag, return length or -1 if not terminated
- report_unbalanced(self, tag)
- # Example -- report an unbalanced </...> tag.
- reset(self)
- # Interface -- reset this instance. Loses all unprocessed data
- setliteral(self, *args)
- # For derived classes only -- enter literal mode (CDATA)
- setnomoretags(self)
- # For derived classes only -- enter literal mode (CDATA) till EOF
- unknown_charref(self, ref)
- # To be overridden -- handlers for unknown objects
- unknown_endtag(self, tag)
- # To be overridden -- handlers for unknown objects
- unknown_entityref(self, ref)
- # To be overridden -- handlers for unknown objects
- unknown_starttag(self, tag, attrs)
- # To be overridden -- handlers for unknown objects
|
|