| Main | Javadoc | Example |
|
|
The URLPageImportRenderer is used to import pages from web URLs. This can be done to frame the page's information within a application portal and to add other information / processing to the content such as auto-tagging, entity extraction, annotation, etc.
The URLPageImportRenderer is used in the Quotes.com demo within a DynamicPageImportRenderer which directs the application flow so that a user can get a stock quote whether or not they know the ticker symbol of the company they are interested.
XML Configuration for this example:
<!-- ============================================================================== -->
<!-- URLPageImportRenderer executes a search on the lookup ticker symbol by -->
<!-- applying the 'company' parameter in the containing page's http request to -->
<!-- the "lookup" parameter of the www.quote.com search url: -->
<!-- ============================================================================== -->
<PageContextRenderer class="com.raritantechnologies.searchApp.taglibrary.URLPageImportRenderer"
pageFrom="dynamicURL"
urlBase="http://www.quote.com/qc/lookup/search_results.aspx" >
<Param inputParam="company" outputParam="lookup" />
<FixedParam outputParam="context" outputValue="www_stocks" />
<PageFilter class="com.raritantechnologies.utils.filter.SequentialStringFilter" >
<!-- =========================================================== -->
<!-- Use a SelectingStringFilter to check if we need to go -->
<!-- after the single result of let the results page stand. -->
<!-- =========================================================== -->
<StringFilter class="com.raritantechnologies.utils.filter.SelectingStringFilter" >
<!-- Detect a single result "Showing results 1 through 1 of 1" -->
<Comparator class="com.raritantechnologies.utils.comparators.StringContainsComparator"
contains="1 of 1." />
<!-- ========================================================= -->
<!-- If we have only one result: lets just go GET it (even -->
<!-- computers are smart enough to make this decision )!!! -->
<!-- ========================================================= -->
<StringFilter class="com.raritantechnologies.utils.filter.SequentialStringFilter" >
<!-- HTMLScraperFilter to get link to Quote page URL (set for httpMethod = none ) -->
<StringFilter class="com.raritantechnologies.HTML.HTMLScraperFilter"
xmlScraperConfig="BASE_PATH/WEB-INF/conf/StockQuotes/QuotesDotComScraper.xml"
httpMethod="none" />
<!-- SAXCallbackStringFilter to extract quote URL from XML -->
<StringFilter class="com.raritantechnologies.xml.filter.SAXCallbackStringFilter"
callbackClass="com.raritantechnologies.xml.sax.filter.callbacks.CDataCallbackOperation"
multiple="false" >
<Comparator class="com.raritantechnologies.xml.sax.filter.comparators.TagComparator" >
<!-- This matches the Tag created by the HTMLScraperFilter -->
<TagName>QuoteURL</TagName>
</Comparator>
</StringFilter>
<!-- Now that we have a URL to the final quotes page: get it -->
<StringFilter class="com.raritantechnologies.utils.filter.ConcatenateFilter"
prependString="http://www.quote.com" />
<!-- URLContentFilter to get page from href -->
<StringFilter class="com.raritantechnologies.utils.filter.URLContentFilter"
useURLSocket="false"
requestMethod="get" />
</StringFilter>
</StringFilter> <!-- SelectingStringFilter -->
<!-- =============================================================== -->
<!-- Got here either after a deep scrape to get Quote page OR a -->
<!-- search results page if no, or too many initial results. In -->
<!-- either case, we need to do some cleanup on the hrefs since we -->
<!-- are framing the result page. This filter adds base urls for -->
<!-- relative paths so all images display with the full absolute url. -->
<!-- =============================================================== -->
<StringFilter class="com.raritantechnologies.utils.filter.ReplaceSubstringFilter"
inPattern="href="/" outPattern="href="http://www.quote.com/"
replace="ALL" />
</PageFilter>
</PageContextRenderer>