|
||||||||||
| PREV CLASS NEXT CLASS | FRAMES NO FRAMES | |||||||||
| SUMMARY: NESTED | FIELD | CONSTR | METHOD | DETAIL: FIELD | CONSTR | METHOD | |||||||||
java.lang.Objectcom.raritantechnologies.searchApp.SearchSource
com.raritantechnologies.federated.html.HTMLSearchSource
Describes an HTTP SearchSource that can search web site search engines. Manages login
search, and result paging processes. Delegates search and retrival activities to
a HTMLScraper object. Manages the dynamic mapping of input
fields into a Login Process and "Search Process" using login and search templates.
<SourceType name="[unique search source name]"
type="HTMLSearchSource"
displayName="[ displayable name]"
sourceFactoryClass="com.raritantechnologies.federated.html.HTMLSearchSourceFactory"
queryProcessor="com.raritantechnologies.federated.html.HTMLQueryProcessor"
resultIsXML="true|false(default)"
cacheCookieKey="[ key to cache cookies from site ]"
cookieResultField="[ result field to put cookie String in ]"
IDField="[ field to use as unique ID ]"
URLField="[ field with fullText URL ]"
titleField="[ field with document Title ]" >
<!-- Describes mapping of query input parameters to the HTML SearchProcess and of the -->
<!-- abstract or normalized field names to the field name at the HTML source. -->
<Fields>
<!-- The value of the ID field in the query will be inserted into the SearchProcess -->
<!-- element at the xPath location specified in the xPath parameter. The sourceName -->
<!-- field defines the name of the field at the HTML source. -->
<Field ID="[ abstract field name ]"
xPath="[ xPath within the SearchProcess: e.g. '/SearchProcess/Step/params/param[@formName='q']/@value' ]"
sourceName="[ field name at source (e.g. 'q')]"/>
<!-- Describes complex formatting needed for a field value. -->
<!-- Describes complex formatting needed for a field value. -->
<FormattedField uses com.raritantechnologies.utils.format classes -->
<FormatField value="{(KY)_KY_}"
xPath="/SearchProcess/Step/params/param[@formName='term']/@value" >
<FilteredField uses com.raritantechnologies.utils.filter classes -->
<FilterField class="com.raritantechnologies.utils.filter.IStringFilter"
xPath="/SearchProcess/Step/params/param[@formName='term']/@value" >
<Field ID="[ boolean ID ]" xPath="path to search form value ]"
<!-- FieldLookup allows user selected value to be looked up for use in FormatField -->
<!-- used by Block: {(TI)(BOOL(TI TTL TI_OP ())) _TTL_[Title]}
see com.raritantechnologies.utils.format.BlockFormatter javadocs for details -->
<FieldMap ID="TIBOOL" name="TI_OP">
<Choice abstractVal="AND" sourceVal="AND" />
<Choice abstractVal="OR" sourceVal="OR" />
</FieldMap>
</Field>
</Fields>
<SecurityModel>
<search>[ Public | Restricted ]</search>
</SecurityModel>
<!-- Restricted sites require a LoginProcess which defines how login parameters are to be handled -->
<LoginMap>
<UserName xPath="[ xPath to userName in LoginProcess ]" />
<Password xPath="[ xPath to password in LoginProcess ]" />
</LoginMap>
<LoginProcess>
<Step type="[getURL|getURLSocket|postURL|postURLSocket]" URL="[ login form URL ]" >
<params>
<param formName="[ name of parameter in html form ]" value="[ form value ]" alwaysOutput="[true|false(default) - use for blank values ]" />
</params>
</Step>
</LoginProcess>
<!-- For NTLM Authentication - use this format: -->
<LoginProcess UserName="[user name]"
Password="[password]"
PasswordEnc="[DES encrypted DB password]" />
<!-- The SearchProcess describes the search form that will be sent to the search site: -->
<!-- It can consist of one or more "Steps" depending on the site -->
<SearchProcess
outputStep="[ step number (from 1) that generates output - if no value: the last step will be used ]" >
<Step type="[getURL|getURLSocket|postURL|postURLSocket]" URL="[the URL that the form should be sent to]" >
<params>
<param formName="[ name of parameter in html form ]" value="[ form value ]" alwaysOutput="[true|false(default) - use for blank values ]" />
<param formName="[ name of form parameter ]" value="" alwaysOutput="[true|false(default) - use for blank values ]" />
<!-- etc. . . -->
</params>
</Step>
</SearchProcess>
<!-- The PageProcess describes how paging commands (get or post) will be sent to the search site: -->
<PageProcess mapFrom="[ xPath within result to get paging data ]" method="[tagMap| ]" >
<TotalDocs mapFrom="[ xPath within result to get total docs e.g. '/Records/Page/TotalDocs' ]" />
<Step type="[getURL|getURLSocket|postURL|postURLSocket]" URL="[the URL that the form should be sent to]" >
<params>
<param formName="[ name of parameter in html form ]" value="[ form value ]" alwaysOutput="[true|false(default) - use for blank values ]" />
<!-- computed parameter -->
<param formName="[ name of parameter in html form ]" value="" computeFrom="[ compute formula with PAGE_NUM as placeholder for page Number with 1 representing the first page ]" />
<param formName="[ name of parameter ]" computeFrom="[ some string with pattern {COMPUTE_FROM:[ formula ]} ]" />
</params>
</PageProcess>
<ScraperConfigFile>[ path to the HTMLScraper configuration File ]
<OutputTransformer>[ path to the XSL file that translates the raw XML to result XML ]
<!-- Optional FieldFormatters section -->
<FieldFormatters>
<Formatter formatterClass="[ class of com.raritantechnologies.searchApp.IFieldFormatter ]" >
</Formatter>
<!-- etc. . . -->
</FieldFormatters>
<ResultSetAttributes>
<Attribute name="[ attribute name ]" xPath="[ xPath of value within HTMLScraper XML output ]" />
<Attribute name="[ another ]" xPath="[ its xPath ]" />
<!-- etc... -->
</ResultSetAttributes>
</SourceType>
| Field Summary |
| Fields inherited from class com.raritantechnologies.searchApp.SearchSource |
ID_FIELD, IS_FEDERATED, NUMBER_OF_FIELDS, SECURE, SOURCE_NAME, SOURCE_TYPE, TITLE_FIELD, URL_FIELD |
| Constructor Summary | |
HTMLSearchSource()
|
|
| Method Summary | |
void |
addFilterField(HTMLFilterField filterField)
|
void |
addFormatField(HTMLFormatField formatField)
|
java.lang.String |
getCacheCookieKey()
|
java.lang.String |
getCookieResultField()
|
HTMLScraper |
getHTMLScraper(ILoginInfo sourceLogin,
OrderedMap cookies)
Initializes an HTMLScraper using the loginInfo and a map to store cookies. |
org.w3c.dom.Document |
getHTMLScraperProcess()
|
boolean |
getIsXOD()
|
HTMLScraper |
getPageHTMLScraper(OrderedMap cookies)
|
org.w3c.dom.Document |
getPageProcess(java.util.Map pageProcessData,
int pageNum,
OrderedMap queryParams)
returns the SearchProcess used to get subsequent pages of data. |
org.w3c.dom.Document |
getPageProcessTemplate()
|
java.lang.String |
getProxyHost()
|
java.lang.String |
getProxyPassword()
|
java.lang.String |
getProxyPort()
|
java.lang.String |
getProxyUserName()
|
IQueryProcessor |
getQueryProcessor()
returns the type of QueryProcessor that can access this SearchSource. |
java.lang.String |
getScrapedRecordTagName()
|
java.lang.String |
getScrapedRootTagName()
|
org.w3c.dom.Document |
getSearchProcess(java.util.Map queryParams,
ISearchFieldMap searchFieldMap,
java.lang.Integer pageSize,
java.lang.Integer startRec)
returns Scraper process needed to direct the HTMLScraper. |
org.w3c.dom.Document |
getSearchProcess(java.util.Map sessionData,
java.util.Map queryParams,
ISearchFieldMap searchFieldMap,
java.lang.Integer pageSize,
java.lang.Integer startRec)
returns Scraper process needed to direct the HTMLScraper. |
void |
setCacheCookieKey(java.lang.String cacheCookieKey)
|
void |
setCookieResultField(java.lang.String cookieResultField)
|
void |
setHTMLScraperProcess(org.w3c.dom.Document htmlScraperConfig)
Sets the scraper configuration. |
void |
setInitCookies(java.lang.String initCookies)
|
void |
setIsXOD(boolean isXOD)
Determines if result is XML (XOD paradigm: XML on Demand). |
void |
setLoginProcessMapping(java.lang.String unamePath,
java.lang.String pwordPath)
set xPath map of "UserName" and "Password" |
void |
setLoginProcessTemplate(org.w3c.dom.Document loginProcessTemplate)
Sets the LoginProcess Template. |
void |
setPageProcessTemplate(org.w3c.dom.Document pageProcessTemplate)
|
void |
setPageSizeMapping(java.lang.String pageSizeXPath)
Sets the XPath within the SearchProcess for the PageSize variable Set by HTMLSearchSourceFactory |
void |
setProxyHost(java.lang.String proxyHost)
|
void |
setProxyPassword(java.lang.String proxyPassword)
|
void |
setProxyPort(java.lang.String proxyPort)
|
void |
setProxyUserName(java.lang.String proxyUserName)
|
void |
setResultSetAttributeMap(java.util.Map resultSetAttributeMap)
|
void |
setResultSetAttributes(org.w3c.dom.Document resultDoc,
IResultSet resultSet)
|
void |
setSearchProcessTemplate(org.w3c.dom.Document searchProcessTemplate)
Sets the SearchProcess template. |
void |
setSourcePassword(java.lang.String sourcePassword)
|
void |
setSourceUserName(java.lang.String sourceUserName)
|
void |
setStartRecMapping(java.lang.String startRecXPath)
Sets the XPath within the SearchProcess for the PageSize variable Set by HTMLSearchSourceFactory |
boolean |
shouldNotInitCookies()
|
| Methods inherited from class java.lang.Object |
clone, equals, finalize, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait |
| Constructor Detail |
public HTMLSearchSource()
| Method Detail |
public IQueryProcessor getQueryProcessor()
SearchSource
getQueryProcessor in class SearchSource
public HTMLScraper getHTMLScraper(ILoginInfo sourceLogin,
OrderedMap cookies)
public HTMLScraper getPageHTMLScraper(OrderedMap cookies)
public org.w3c.dom.Document getSearchProcess(java.util.Map queryParams,
ISearchFieldMap searchFieldMap,
java.lang.Integer pageSize,
java.lang.Integer startRec)
public org.w3c.dom.Document getSearchProcess(java.util.Map sessionData,
java.util.Map queryParams,
ISearchFieldMap searchFieldMap,
java.lang.Integer pageSize,
java.lang.Integer startRec)
public void setHTMLScraperProcess(org.w3c.dom.Document htmlScraperConfig)
public org.w3c.dom.Document getHTMLScraperProcess()
public java.lang.String getScrapedRecordTagName()
public java.lang.String getScrapedRootTagName()
public void setInitCookies(java.lang.String initCookies)
public boolean shouldNotInitCookies()
public void setSearchProcessTemplate(org.w3c.dom.Document searchProcessTemplate)
public void setPageProcessTemplate(org.w3c.dom.Document pageProcessTemplate)
public org.w3c.dom.Document getPageProcessTemplate()
public org.w3c.dom.Document getPageProcess(java.util.Map pageProcessData,
int pageNum,
OrderedMap queryParams)
public void setLoginProcessTemplate(org.w3c.dom.Document loginProcessTemplate)
public void setLoginProcessMapping(java.lang.String unamePath,
java.lang.String pwordPath)
public void setPageSizeMapping(java.lang.String pageSizeXPath)
public void setStartRecMapping(java.lang.String startRecXPath)
public void addFormatField(HTMLFormatField formatField)
public void addFilterField(HTMLFilterField filterField)
public void setResultSetAttributeMap(java.util.Map resultSetAttributeMap)
public void setResultSetAttributes(org.w3c.dom.Document resultDoc,
IResultSet resultSet)
public void setIsXOD(boolean isXOD)
public boolean getIsXOD()
public void setProxyHost(java.lang.String proxyHost)
public java.lang.String getProxyHost()
public void setProxyPort(java.lang.String proxyPort)
public java.lang.String getProxyPort()
public void setProxyUserName(java.lang.String proxyUserName)
public java.lang.String getProxyUserName()
public void setProxyPassword(java.lang.String proxyPassword)
public java.lang.String getProxyPassword()
public void setCacheCookieKey(java.lang.String cacheCookieKey)
public java.lang.String getCacheCookieKey()
public void setCookieResultField(java.lang.String cookieResultField)
public java.lang.String getCookieResultField()
public void setSourceUserName(java.lang.String sourceUserName)
public void setSourcePassword(java.lang.String sourcePassword)
|
||||||||||
| PREV CLASS NEXT CLASS | FRAMES NO FRAMES | |||||||||
| SUMMARY: NESTED | FIELD | CONSTR | METHOD | DETAIL: FIELD | CONSTR | METHOD | |||||||||