MainJavadocExample
URLPageImportRenderer Demo

The URLPageImportRenderer is used to import pages from web URLs. This can be done to frame the page's information within a application portal and to add other information / processing to the content such as auto-tagging, entity extraction, annotation, etc.

The URLPageImportRenderer is used in the Quotes.com demo within a DynamicPageImportRenderer which directs the application flow so that a user can get a stock quote whether or not they know the ticker symbol of the company they are interested.

XML Configuration for this example:
  <!-- ============================================================================== -->
  <!--  URLPageImportRenderer executes a search on the lookup ticker symbol by        -->
  <!--  applying the 'company' parameter in the containing page's http request to     -->
  <!--  the "lookup" parameter of the www.quote.com search url:                       -->
  <!-- ============================================================================== -->
  <PageContextRenderer class="com.raritantechnologies.searchApp.taglibrary.URLPageImportRenderer"
                       pageFrom="dynamicURL"
                       urlBase="http://www.quote.com/qc/lookup/search_results.aspx" >

    <Param inputParam="company" outputParam="lookup" />
    <FixedParam outputParam="context" outputValue="www_stocks" />

    <PageFilter class="com.raritantechnologies.utils.filter.SequentialStringFilter" >

      <!-- =========================================================== -->
      <!-- Use a SelectingStringFilter to check if we need to go       -->
      <!-- after the single result of let the results page stand.      -->
      <!-- =========================================================== -->
      <StringFilter class="com.raritantechnologies.utils.filter.SelectingStringFilter" >
             
        <!-- Detect a single result "Showing results 1 through 1 of 1" -->
        <Comparator class="com.raritantechnologies.utils.comparators.StringContainsComparator"
                    contains="1 of 1." />

        <!-- ========================================================= -->
        <!-- If we have only one result: lets just go GET it (even     -->
        <!-- computers are smart enough to make this decision )!!!     -->
        <!-- ========================================================= -->
        <StringFilter class="com.raritantechnologies.utils.filter.SequentialStringFilter" >

           <!-- HTMLScraperFilter to get link to Quote page URL (set for httpMethod = none ) -->
           <StringFilter class="com.raritantechnologies.HTML.HTMLScraperFilter"
                         xmlScraperConfig="BASE_PATH/WEB-INF/conf/StockQuotes/QuotesDotComScraper.xml" 
                         httpMethod="none" />

           <!-- SAXCallbackStringFilter to extract quote URL from XML -->
           <StringFilter class="com.raritantechnologies.xml.filter.SAXCallbackStringFilter"
                         callbackClass="com.raritantechnologies.xml.sax.filter.callbacks.CDataCallbackOperation"
                         multiple="false" >

               <Comparator class="com.raritantechnologies.xml.sax.filter.comparators.TagComparator" >
                  <!-- This matches the Tag created by the HTMLScraperFilter -->
                  <TagName>QuoteURL</TagName>
               </Comparator>

           </StringFilter>

           <!-- Now that we have a URL to the final quotes page: get it -->
           <StringFilter class="com.raritantechnologies.utils.filter.ConcatenateFilter"
                         prependString="http://www.quote.com" />

           <!-- URLContentFilter to get page from href -->
           <StringFilter class="com.raritantechnologies.utils.filter.URLContentFilter"
                         useURLSocket="false"
                         requestMethod="get" />

         </StringFilter>

       </StringFilter> <!-- SelectingStringFilter -->

       <!-- ===============================================================  -->
       <!-- Got here either after a deep scrape to get Quote page OR a       -->
       <!-- search results page if no, or too many initial results. In       -->
       <!-- either case, we need to do some cleanup on the hrefs since we    -->
       <!-- are framing the result page.  This filter adds base urls for     -->
       <!-- relative paths so all images display with the full absolute url. -->
       <!-- ===============================================================  -->
       <StringFilter class="com.raritantechnologies.utils.filter.ReplaceSubstringFilter"
                     inPattern="href="/" outPattern="href="http://www.quote.com/"
                     replace="ALL" />

    </PageFilter>

  </PageContextRenderer>