This demo provides an enhancement to the
BrowseLinkElementRenderer demo in which Shakespearean terms could be browsed and
then searched using the Shakespeare online site.
The problem with the above demo is that it only provides a link to the search site. After clicking on this link, the user
has to manually navigate to the specific term reference. This demo, provides an enhancement by automatically displaying excerpts
from the dialog where the term is used. The excerpts are generated 'on-the-fly' by searching, deep-scraping and excerpting the
pages returned by absoluteshakespeare.com.
This demo uses a number of framework elements:
XML Configuration for this Demo:
<!-- ======================================================================== -->
<!-- Use DynamicResultRenderer to add the dynamic extraction of Shakespeare -->
<!-- Snippets when the page is requested from the HTMLScraperFilter page. -->
<!-- Check for displayType=advanced added to the action parameter. -->
<!-- ======================================================================== -->
<ResultRenderer
rendererClass="com.raritantechnologies.searchApp.taglibrary.DynamicResultRenderer" >
<!-- Only show this renderer if the &displayType=advanced is in the request -->
<UseRenderer>
<RequestParams>
<Param name="displayType">
<ValidValues>
<Value>advanced</Value>
</ValidValues>
</Param>
</RequestParams>
<!-- ============================================================================ -->
<!-- FormattingResultRenderer - Applies a formatting operation and then a -->
<!-- rendering operation. -->
<!-- ============================================================================ -->
<ResultRenderer class="com.raritantechnologies.searchApp.taglibrary.FormattingResultRenderer" >
<!-- ========================================================================== -->
<!-- Since results are cached, make sure that we only do the formatting -->
<!-- operation if we need to. Uses a ResultHasFieldMatcher to check if the -->
<!-- "quotationLinkXML" field has already been created for this term. If so, -->
<!-- don't do it again. -->
<!-- ========================================================================== -->
<Formatter
formatterClass="com.raritantechnologies.searchApp.formatters.DynamicFormatter" >
<UseFormatter>
<Matcher class="com.raritantechnologies.searchApp.resultComparators.ResultHasFieldMatcher"
isAnd="true" >
<Fields>
<Field ID="quotationLinkXML" matchValue="false" />
</Fields>
</Matcher>
<!-- ======================================================================= -->
<!-- This set of formatters creates a set of nested results, one for each -->
<!-- play link containing the term. The term is first used to do a search -->
<!-- against the absoluteShakespeare.com site and to scrape the results -->
<!-- page to get links to play sections containing the search term. -->
<!-- The individual links are then retrieved and excerpted. -->
<!-- ======================================================================= -->
<Formatter formatterClass="com.raritantechnologies.searchApp.formatters.SequentialFieldFormatter" >
<!-- ====================================================================== -->
<!-- 1) Convert the Term field to a search / page scrape of the -->
<!-- absoluteshakespeare.com site using StringFilterFormatter / -->
<!-- HTMLScraperFilter to transform the link into a set of links to -->
<!-- the plays (as XML) -->
<!-- ====================================================================== -->
<Formatter fieldID="Term"
outputID="quotationLinkXML"
formatterClass="com.raritantechnologies.searchApp.formatters.StringFilterFormatter" >
<StringFilter class="com.raritantechnologies.HTML.HTMLScraperFilter"
xmlScraperConfig="BASE_PATH/WEB-INF/conf/Shakespeare/ShakespeareResultScraper.xml"
httpMethod="get"
dataXPath="/SearchProcess/Step/params/param[@formName='q']/@value" >
<SearchProcess>
<Step type="getURLSocket" URL="http://absoluteshakespeare.com/cgi-bin/perlfect/search/search.pl">
<params>
<param formName="q" value="" />
<param formName="lang" value="en" />
<param formName="p" value="1" />
</params>
</Step>
</SearchProcess>
</StringFilter>
</Formatter>
<!-- ====================================================================== -->
<!-- 2) Use the XMLResultProcessorFormatter to extract fields from the XML -->
<!-- Specifically, the link to the play and any other information. -->
<!-- Creates nested "playLinks" results with play title and a URL to -->
<!-- a page containing the play section. -->
<!-- ====================================================================== -->
<Formatter formatterClass="com.raritantechnologies.searchApp.formatters.XMLResultProcessorFormatter"
xmlFieldID="quotationLinkXML"
nestedResultName="playLinks"
resultTag="Record"
multiple="true" >
<XMLProcessor>
<RecordTag>Record</RecordTag>
<Field ID="quotationURL" callbackType="CData" >
<ParamName>quotationURL</ParamName>
<Comparator class="TagComparator" >
<TagName>Link</TagName>
</Comparator>
</Field>
<Field ID="playTitle" callbackType="CData" >
<ParamName>playTitle</ParamName>
<Comparator class="TagComparator" >
<TagName>Title</TagName>
</Comparator>
<!-- StringFilter: snip off "at Absolute Shakespeare" -->
</Field>
</XMLProcessor>
</Formatter>
<!-- ====================================================================== -->
<!-- 3) Format the nested Result Set created by the above. -->
<!-- use URLContentFilter to get the play page scraped from the search -->
<!-- results page. Use HTMLFilter to extract the HTML for the excerpt. -->
<!-- ====================================================================== -->
<Formatter formatterClass="com.raritantechnologies.searchApp.formatters.NestedResultFormatter"
nestedField="playLinks" copyParentFields="Term" >
<Formatter formatterClass="com.raritantechnologies.searchApp.formatters.SequentialFieldFormatter" >
<!-- ============================================================ -->
<!-- 3.1) Read the play section using the quotationURL add to -->
<!-- the nested result as "playSection" field. -->
<!-- ============================================================ -->
<Formatter fieldID="quotationURL"
outputID="playSection"
formatterClass="com.raritantechnologies.searchApp.formatters.StringFilterFormatter" >
<StringFilter class="com.raritantechnologies.utils.filter.SequentialStringFilter" >
<!-- ============================================================== -->
<!-- URLContentFilter: Transforms the URL into its content -->
<!-- ============================================================== -->
<StringFilter class="com.raritantechnologies.utils.filter.URLContentFilter"
useURLSocket="false"
requestMethod="get" />
<!-- =================================================================== -->
<!-- HTMLFilter: Extracts the portion of the HTML document between the -->
<!-- #BeginEditable "comment" comment and the #EndEditable HTML comment -->
<!-- =================================================================== -->
<StringFilter class="com.raritantechnologies.HTML.filter.HTMLFilter" >
<EventProcessor class="com.raritantechnologies.HTML.filter.ExtractHTMLSectionFilter" >
<StartComparator class="com.raritantechnologies.utils.comparators.AndComparator" >
<Comparator class="com.raritantechnologies.utils.comparators.StringContainsComparator"
contains="#BeginEditable" />
<Comparator class="com.raritantechnologies.utils.comparators.StringContainsComparator"
contains="content" />
</StartComparator>
<EndComparator class="com.raritantechnologies.utils.comparators.StringContainsComparator"
contains="#EndEditable" />
</EventProcessor>
</StringFilter>
</StringFilter>
</Formatter>
<!-- ====================================================================== -->
<!-- 3.2) Use the ExcerptFormatter to extract the lines of test surrounding -->
<!-- the searched for term. -->
<!-- ====================================================================== -->
<Formatter formatterClass="com.raritantechnologies.searchApp.formatters.ExcerptFormatter"
keywordField="Term"
dataField="playSection"
excerptField="termExcerpt" />
</Formatter> <!-- SequentialFieldFormatter -->
</Formatter> <!-- NestedResultFormatter -->
</Formatter> <!-- SequentialFieldFormatter -->
</UseFormatter>
</Formatter> <!-- DynamicFormatter -->
<!-- ======================================================================== -->
<!-- Now render the accumulated data using the MultiValueResultRenderer -->
<!-- ======================================================================== -->
<ResultRenderer rendererClass="com.raritantechnologies.searchApp.taglibrary.MultipleValueResultRenderer"
name="quoteExcerpts"
multiValField="playLinks"
nColumns="1" >
<ResultRenderer rendererClass="com.raritantechnologies.quickstart.taglibrary.ResultRowRenderer" >
<!-- ======================================================= -->
<!-- Render the Link to the full quote -->
<!-- FieldHyperlinkRenderer using quotationURL and playTitle -->
<!-- ======================================================= -->
<ResultRenderer rendererClass="com.raritantechnologies.quickstart.taglibrary.FieldHyperlinkRenderer" >
<HrefField ID="quotationURL" />
<LabelField ID="playTitle" />
</ResultRenderer>
<!-- ======================================================= -->
<!-- Render the Excerpt -->
<!-- SimpleFieldRenderer using termExcerpt -->
<!-- ======================================================= -->
<!-- Highlighting result renderer highlights the term -->
<ResultRenderer
rendererClass="com.raritantechnologies.quickstart.taglibrary.HighlightingResultRenderer"
hlBegin="<b>"
hlEnd="</b>"
queryParam="Term" >
<ResultRenderer rendererClass="com.raritantechnologies.quickstart.taglibrary.SimpleFieldRenderer" >
<Field ID="termExcerpt" />
</ResultRenderer>
</ResultRenderer>
</ResultRenderer> <!-- ResultRowRenderer -->
</ResultRenderer> <!-- MultipleValueRenderer -->
</ResultRenderer> <!-- FormattingResultRenderer -->
</UseRenderer>
</ResultRenderer> <!-- DynamicResultRenderer -->