|
||||||||||
| PREV CLASS NEXT CLASS | FRAMES NO FRAMES | |||||||||
| SUMMARY: NESTED | FIELD | CONSTR | METHOD | DETAIL: FIELD | CONSTR | METHOD | |||||||||
java.lang.Objectcom.raritantechnologies.concept.classifier.BasicDocumentMatcher
com.raritantechnologies.concept.classifier.TermDocumentMatcher
Document matcher that determines if a particular term is present in the document.
XML Configuration Template:
<DocumentMatcher class="com.raritantechnologies.concept.classifier.TermDocumentMatcher"
term="[ the term to match ]"
caseSensitive="[ true|false(default) ]"
allCapsAcronymFilter="[ true|false(default) ]"
stemming="[ true|false(default) ]" />
| Constructor Summary | |
TermDocumentMatcher()
|
|
TermDocumentMatcher(java.lang.String term)
|
|
TermDocumentMatcher(java.lang.String term,
boolean caseSensitive)
|
|
TermDocumentMatcher(java.lang.String term,
boolean caseSensitive,
boolean useStemming)
|
|
TermDocumentMatcher(java.lang.String term,
boolean caseSensitive,
java.lang.Double maximumDocumentFrequency)
|
|
| Method Summary | |
protected void |
collectPhraseSet(java.util.HashSet phraseSet)
|
protected void |
collectTermSet(java.util.HashSet termSet)
|
void |
extractTerms(IndexedDocument fromDocument,
java.util.HashMap termsMap)
Extracts the matching terms contained in the document. |
void |
extractTerms(IndexedDocument fromDocument,
java.util.Set termsSet)
|
DocumentMatchBean |
getMatchCriteria(IndexedDocument document,
java.util.Map termsMap)
returns a DocumentMatchBean containing the match criteria (the category or categories that specify the 'reason' or context of the match. |
void |
initialize(org.w3c.dom.Element elem)
Initializes the object from an XML tag or element. |
boolean |
isStopWord(IndexedDocument document)
Adds stop word support. |
boolean |
matches(IndexedDocument document)
returns true if the matcher matches the IndexedDocument, false otherwise. |
java.lang.String |
render()
Renders a human-readable version of the matcher's logic. |
void |
setCaseSensitive(boolean caseSensitive)
|
void |
setSubstringMatch(boolean substringMatch)
|
void |
setTerm(java.lang.String term)
|
void |
setUseStemming(boolean useStemming)
|
| Methods inherited from class com.raritantechnologies.concept.classifier.BasicDocumentMatcher |
addAttribute, addTerms, addTermsAsAttributes, extractTerms, getAttribute, getAttributeNames, getMatchCriteria, getName, getPhraseSet, getTermSet, setName |
| Methods inherited from class java.lang.Object |
clone, equals, finalize, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait |
| Methods inherited from interface com.raritantechnologies.concept.classifier.IDocumentMatcher |
addAttribute, addTermsAsAttributes, getAttribute, getAttributeNames, getMatchCriteria, getName, getPhraseSet, getTermSet, setName |
| Methods inherited from interface com.raritantechnologies.utils.tagging.ITermExtractor |
extractTerms |
| Constructor Detail |
public TermDocumentMatcher()
public TermDocumentMatcher(java.lang.String term)
public TermDocumentMatcher(java.lang.String term,
boolean caseSensitive)
public TermDocumentMatcher(java.lang.String term,
boolean caseSensitive,
boolean useStemming)
public TermDocumentMatcher(java.lang.String term,
boolean caseSensitive,
java.lang.Double maximumDocumentFrequency)
| Method Detail |
public DocumentMatchBean getMatchCriteria(IndexedDocument document,
java.util.Map termsMap)
IDocumentMatcher
getMatchCriteria in interface IDocumentMatchergetMatchCriteria in class BasicDocumentMatcherpublic boolean matches(IndexedDocument document)
IDocumentMatcher
matches in interface IDocumentMatchermatches in class BasicDocumentMatcherpublic boolean isStopWord(IndexedDocument document)
IDocumentMatcherTermDocumentMatcher.
isStopWord in interface IDocumentMatcherisStopWord in class BasicDocumentMatcher
public void extractTerms(IndexedDocument fromDocument,
java.util.HashMap termsMap)
IDocumentMatcher
extractTerms in interface IDocumentMatcherextractTerms in class BasicDocumentMatcher
public void extractTerms(IndexedDocument fromDocument,
java.util.Set termsSet)
extractTerms in interface IDocumentMatcherpublic void initialize(org.w3c.dom.Element elem)
IConfigurable
initialize in interface IConfigurablepublic void setTerm(java.lang.String term)
public void setCaseSensitive(boolean caseSensitive)
public void setSubstringMatch(boolean substringMatch)
protected void collectTermSet(java.util.HashSet termSet)
collectTermSet in class BasicDocumentMatcherprotected void collectPhraseSet(java.util.HashSet phraseSet)
collectPhraseSet in class BasicDocumentMatcherpublic java.lang.String render()
IDocumentMatcher
render in interface IDocumentMatcherpublic void setUseStemming(boolean useStemming)
|
||||||||||
| PREV CLASS NEXT CLASS | FRAMES NO FRAMES | |||||||||
| SUMMARY: NESTED | FIELD | CONSTR | METHOD | DETAIL: FIELD | CONSTR | METHOD | |||||||||