|
||||||||||
| PREV CLASS NEXT CLASS | FRAMES NO FRAMES | |||||||||
| SUMMARY: NESTED | FIELD | CONSTR | METHOD | DETAIL: FIELD | CONSTR | METHOD | |||||||||
java.lang.Objectcom.raritantechnologies.concept.classifier.BasicDocumentMatcher
com.raritantechnologies.concept.classifier.AccrueDocumentMatcher
Computes the weighted average of scores of a set of contained document matchers.
The combined score is determined by the average score of the child DocumentMatchers multiplied by a weighting factor that favors matches spread across the set of child matchers.
XML Configuration Template:
<DocumentMatcher class="com.raritantechnologies.concept.classifier.AccrueDocumentMatcher"
minMatchers=[ minimum number of matchers needed to reach threshold (default=2) ]"
maxFailures="[ maximum number of matchers that can fail before Accrue matcher fails (default=2) ]"
multipleMatchWeight="[ weighting factor for matches across matchers ]" >
<!-- two or more child matchers -->
<DocumentMatcher class="[ class of com.raritantechnologies.concept.classifier.IDocumentMatcher ]" >
</DocumentMatcher>
<!-- etc. . . -->
</DocumentMatcher>
| Constructor Summary | |
AccrueDocumentMatcher()
|
|
AccrueDocumentMatcher(java.util.ArrayList childMatchers)
|
|
AccrueDocumentMatcher(java.util.ArrayList childMatchers,
int minMatchers,
double multipleMatchWeight)
|
|
AccrueDocumentMatcher(java.util.ArrayList childMatchers,
int minMatchers,
int maxFailures,
double multipleMatchWeight)
|
|
| Method Summary | |
protected void |
collectPhraseSet(java.util.HashSet phraseSet)
|
protected void |
collectTermSet(java.util.HashSet termSet)
|
void |
extractTerms(IndexedDocument fromDocument,
java.util.HashMap termsMap)
Extracts the matching terms contained in the document. |
void |
extractTerms(IndexedDocument fromDocument,
java.util.Set termsSet)
|
DocumentMatchBean |
getMatchCriteria(IndexedDocument document,
java.util.Map termsMap)
Computes an average of child scores = sum of scores / number of matchers. |
void |
initialize(org.w3c.dom.Element elem)
Initializes the object from an XML tag or element. |
boolean |
isStopWord(IndexedDocument document)
Adds stop word support. |
boolean |
matches(IndexedDocument document)
returns true if the matcher matches the IndexedDocument, false otherwise. |
java.lang.String |
render()
Renders a human-readable version of the matcher's logic. |
void |
setMaxFailures(int maxFailures)
sets the maximum number of match failures that can occur. |
void |
setMinMatchers(int minMatchers)
sets the minimum number of child matchers that need to match. |
void |
setMultipleMatchWeight(double multipleMatchWeight)
sets the weighting factor for multiple-matcher matches. |
| Methods inherited from class com.raritantechnologies.concept.classifier.BasicDocumentMatcher |
addAttribute, addTerms, addTermsAsAttributes, extractTerms, getAttribute, getAttributeNames, getMatchCriteria, getName, getPhraseSet, getTermSet, setName |
| Methods inherited from class java.lang.Object |
clone, equals, finalize, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait |
| Methods inherited from interface com.raritantechnologies.concept.classifier.IDocumentMatcher |
addAttribute, addTermsAsAttributes, getAttribute, getAttributeNames, getMatchCriteria, getName, getPhraseSet, getTermSet, setName |
| Methods inherited from interface com.raritantechnologies.utils.tagging.ITermExtractor |
extractTerms |
| Constructor Detail |
public AccrueDocumentMatcher()
public AccrueDocumentMatcher(java.util.ArrayList childMatchers)
public AccrueDocumentMatcher(java.util.ArrayList childMatchers,
int minMatchers,
double multipleMatchWeight)
public AccrueDocumentMatcher(java.util.ArrayList childMatchers,
int minMatchers,
int maxFailures,
double multipleMatchWeight)
| Method Detail |
public void setMinMatchers(int minMatchers)
public void setMaxFailures(int maxFailures)
public void setMultipleMatchWeight(double multipleMatchWeight)
public DocumentMatchBean getMatchCriteria(IndexedDocument document,
java.util.Map termsMap)
getMatchCriteria in interface IDocumentMatchergetMatchCriteria in class BasicDocumentMatcherpublic boolean isStopWord(IndexedDocument document)
IDocumentMatcherTermDocumentMatcher.
isStopWord in interface IDocumentMatcherisStopWord in class BasicDocumentMatcherpublic boolean matches(IndexedDocument document)
IDocumentMatcher
matches in interface IDocumentMatchermatches in class BasicDocumentMatcher
public void extractTerms(IndexedDocument fromDocument,
java.util.HashMap termsMap)
IDocumentMatcher
extractTerms in interface IDocumentMatcherextractTerms in class BasicDocumentMatcher
public void extractTerms(IndexedDocument fromDocument,
java.util.Set termsSet)
extractTerms in interface IDocumentMatcherpublic void initialize(org.w3c.dom.Element elem)
IConfigurable
initialize in interface IConfigurableprotected void collectTermSet(java.util.HashSet termSet)
collectTermSet in class BasicDocumentMatcherprotected void collectPhraseSet(java.util.HashSet phraseSet)
collectPhraseSet in class BasicDocumentMatcherpublic java.lang.String render()
IDocumentMatcher
render in interface IDocumentMatcher
|
||||||||||
| PREV CLASS NEXT CLASS | FRAMES NO FRAMES | |||||||||
| SUMMARY: NESTED | FIELD | CONSTR | METHOD | DETAIL: FIELD | CONSTR | METHOD | |||||||||