protected class WebcrawlerConnector.ProcessActivityHTMLHandler extends WebcrawlerConnector.ProcessActivityLinkHandler implements IHTMLHandler
activities, contextDescription, documentIdentifier, filter, linkType| Constructor and Description |
|---|
WebcrawlerConnector.ProcessActivityHTMLHandler(String documentIdentifier,
IProcessActivity activities,
WebcrawlerConnector.DocumentURLFilter filter)
Constructor.
|
| Modifier and Type | Method and Description |
|---|---|
void |
finishUp()
Done with the document.
|
void |
noteAHREF(String rawURL)
Note discovered href
|
void |
noteFormEnd()
Note the end of a form
|
void |
noteFormInput(Map inputAttributes)
Note an input tag
|
void |
noteFormStart(Map formAttributes)
Note the start of a form
|
void |
noteFRAMESRC(String rawURL)
Note discovered FRAME SRC
|
void |
noteIMGSRC(String rawURL)
Note discovered IMG SRC
|
void |
noteLINKHREF(String rawURL)
Note discovered href
|
void |
noteMetaTag(Map metaAttributes)
Note a meta tag
|
void |
noteTextCharacter(char textCharacter)
Note a character of text.
|
boolean |
shouldIndex()
Decide whether we should index.
|
noteDiscoveredLinkclone, equals, finalize, getClass, hashCode, notify, notifyAll, toString, wait, wait, waitnoteDiscoveredLinkpublic WebcrawlerConnector.ProcessActivityHTMLHandler(String documentIdentifier, IProcessActivity activities, WebcrawlerConnector.DocumentURLFilter filter)
public boolean shouldIndex()
public void noteTextCharacter(char textCharacter)
throws ManifoldCFException
noteTextCharacter in interface IHTMLHandlerManifoldCFExceptionpublic void noteMetaTag(Map metaAttributes) throws ManifoldCFException
noteMetaTag in interface IMetaTagHandlermetaAttributes - are the attributes that belong to the tag.ManifoldCFExceptionpublic void noteFormStart(Map formAttributes) throws ManifoldCFException
noteFormStart in interface IHTMLHandlerManifoldCFExceptionpublic void noteFormInput(Map inputAttributes) throws ManifoldCFException
noteFormInput in interface IHTMLHandlerManifoldCFExceptionpublic void noteFormEnd()
throws ManifoldCFException
noteFormEnd in interface IHTMLHandlerManifoldCFExceptionpublic void noteAHREF(String rawURL) throws ManifoldCFException
noteAHREF in interface IHTMLHandlerManifoldCFExceptionpublic void noteLINKHREF(String rawURL) throws ManifoldCFException
noteLINKHREF in interface IHTMLHandlerManifoldCFExceptionpublic void noteIMGSRC(String rawURL) throws ManifoldCFException
noteIMGSRC in interface IHTMLHandlerManifoldCFExceptionpublic void noteFRAMESRC(String rawURL) throws ManifoldCFException
noteFRAMESRC in interface IHTMLHandlerManifoldCFExceptionpublic void finishUp()
throws ManifoldCFException
IHTMLHandlerfinishUp in interface IHTMLHandlerManifoldCFException