public class BoilerpipeHTMLContentHandler
extends java.lang.Object
implements org.xml.sax.ContentHandler
ContentHandler
, used by BoilerpipeSAXInput
. Can
be used by different parser implementations, e.g. NekoHTML and TagSoup.Constructor and Description |
---|
BoilerpipeHTMLContentHandler()
Constructs a
BoilerpipeHTMLContentHandler using the
DefaultTagActionMap . |
BoilerpipeHTMLContentHandler(TagActionMap tagActions)
Constructs a
BoilerpipeHTMLContentHandler using the given
TagActionMap . |
Modifier and Type | Method and Description |
---|---|
void |
addLabelAction(LabelAction la) |
protected void |
addTextBlock(TextBlock tb) |
void |
addWhitespaceIfNecessary() |
void |
characters(char[] ch,
int start,
int length) |
void |
endDocument() |
void |
endElement(java.lang.String uri,
java.lang.String localName,
java.lang.String qName) |
void |
endPrefixMapping(java.lang.String prefix) |
void |
flushBlock() |
java.lang.String |
getTitle() |
void |
ignorableWhitespace(char[] ch,
int start,
int length) |
void |
processingInstruction(java.lang.String target,
java.lang.String data) |
void |
recycle()
Recycles this instance.
|
void |
setDocumentLocator(org.xml.sax.Locator locator) |
void |
setTitle(java.lang.String s) |
void |
skippedEntity(java.lang.String name) |
void |
startDocument() |
void |
startElement(java.lang.String uri,
java.lang.String localName,
java.lang.String qName,
org.xml.sax.Attributes atts) |
void |
startPrefixMapping(java.lang.String prefix,
java.lang.String uri) |
TextDocument |
toTextDocument()
Returns a
TextDocument containing the extracted TextBlock
s. |
public BoilerpipeHTMLContentHandler()
BoilerpipeHTMLContentHandler
using the
DefaultTagActionMap
.public BoilerpipeHTMLContentHandler(TagActionMap tagActions)
BoilerpipeHTMLContentHandler
using the given
TagActionMap
.tagActions
- The TagActionMap
to use, e.g.
DefaultTagActionMap
.public void recycle()
public void endDocument() throws org.xml.sax.SAXException
endDocument
in interface org.xml.sax.ContentHandler
org.xml.sax.SAXException
public void endPrefixMapping(java.lang.String prefix) throws org.xml.sax.SAXException
endPrefixMapping
in interface org.xml.sax.ContentHandler
org.xml.sax.SAXException
public void ignorableWhitespace(char[] ch, int start, int length) throws org.xml.sax.SAXException
ignorableWhitespace
in interface org.xml.sax.ContentHandler
org.xml.sax.SAXException
public void processingInstruction(java.lang.String target, java.lang.String data) throws org.xml.sax.SAXException
processingInstruction
in interface org.xml.sax.ContentHandler
org.xml.sax.SAXException
public void setDocumentLocator(org.xml.sax.Locator locator)
setDocumentLocator
in interface org.xml.sax.ContentHandler
public void skippedEntity(java.lang.String name) throws org.xml.sax.SAXException
skippedEntity
in interface org.xml.sax.ContentHandler
org.xml.sax.SAXException
public void startDocument() throws org.xml.sax.SAXException
startDocument
in interface org.xml.sax.ContentHandler
org.xml.sax.SAXException
public void startPrefixMapping(java.lang.String prefix, java.lang.String uri) throws org.xml.sax.SAXException
startPrefixMapping
in interface org.xml.sax.ContentHandler
org.xml.sax.SAXException
public void startElement(java.lang.String uri, java.lang.String localName, java.lang.String qName, org.xml.sax.Attributes atts) throws org.xml.sax.SAXException
startElement
in interface org.xml.sax.ContentHandler
org.xml.sax.SAXException
public void endElement(java.lang.String uri, java.lang.String localName, java.lang.String qName) throws org.xml.sax.SAXException
endElement
in interface org.xml.sax.ContentHandler
org.xml.sax.SAXException
public void characters(char[] ch, int start, int length) throws org.xml.sax.SAXException
characters
in interface org.xml.sax.ContentHandler
org.xml.sax.SAXException
public void flushBlock()
protected void addTextBlock(TextBlock tb)
public java.lang.String getTitle()
public void setTitle(java.lang.String s)
public TextDocument toTextDocument()
TextDocument
containing the extracted TextBlock
s. NOTE: Only call this after parsing.TextDocument
public void addWhitespaceIfNecessary()
public void addLabelAction(LabelAction la) throws java.lang.IllegalStateException
java.lang.IllegalStateException