MainJavadocExample
LuceneOutputProcessor Demo

Uses FileXMLDataSource and an XMLResultProcessor to build a Lucene Collection with some XML Data files.

This collection is used by the Lucene Search Source demo.


XML Configuration for this example:

<LuceneCollectionBuild>

  <CollectionGateway class="com.raritantechnologies.xml.dataCollection.SimpleXMLGateway"
                        resultTag="ResultSet" >

    <XMLSource class="com.raritantechnologies.xml.dataCollection.FileXMLDataSource" 
                filePath="C:/Java/Projects/VerityInABox/TestPrograms/CollectionBuild/TestHTMLScraperGatewayHCI/VerityArticles/xmlFiles"
                recurseSubdirectories="false"
                fileNameTag="FileName" />

    <XMLResultProcessor>
       <RecordTag>ResultSet</RecordTag>

        <Field ID="FileName" callbackType="CData" >
          <ParamName>FileName</ParamName> 
          <Comparator class="TagComparator" >
            <TagName>FileName</TagName>
          </Comparator>
        </Field>

        <Field ID="TI" callbackType="CData" >
          <ParamName>TI</ParamName> 
          <Comparator class="TagComparator" >
            <TagName>TI</TagName>
          </Comparator>
        </Field>

        <Field ID="AB" callbackType="CData" >
          <ParamName>AB</ParamName> 
          <Comparator class="TagComparator" >
            <TagName>AB</TagName>
          </Comparator>
        </Field>

        <Field ID="AU" callbackType="CData" >
          <ParamName>AU</ParamName> 
          <Comparator class="TagComparator" >
            <TagName>AU</TagName>
          </Comparator>
        </Field>

        <Field ID="IS" callbackType="CData" >
          <ParamName>IS</ParamName> 
          <Comparator class="TagComparator" >
            <TagName>IS</TagName>
          </Comparator>
        </Field>

        <Field ID="PUB" callbackType="CData" >
          <ParamName>PUB</ParamName> 
          <Comparator class="TagComparator" >
            <TagName>PUB</TagName>
          </Comparator>
        </Field>

        <Field ID="FT" callbackType="CData" multiple="true" delimiter="\n\n" >
          <ParamName>FT</ParamName> 
          <Comparator class="com.raritantechnologies.xml.sax.filter.comparators.ElementListComparator" 
                      andLogic="false" >
            <Comparator class="com.raritantechnologies.xml.sax.filter.comparators.TagComparator" >
              <TagName>FT</TagName>
            </Comparator>

            <Comparator class="com.raritantechnologies.xml.sax.filter.comparators.TagComparator" >
              <TagName>FT2</TagName>
            </Comparator>

            <Comparator class="com.raritantechnologies.xml.sax.filter.comparators.TagComparator" >
              <TagName>FT3</TagName>
            </Comparator>

            <Comparator class="com.raritantechnologies.xml.sax.filter.comparators.TagComparator" >
              <TagName>FT4</TagName>
            </Comparator>

            <Comparator class="com.raritantechnologies.xml.sax.filter.comparators.TagComparator" >
              <TagName>FT5</TagName>
            </Comparator>

            <Comparator class="com.raritantechnologies.xml.sax.filter.comparators.TagComparator" >
              <TagName>FT6</TagName>
            </Comparator>

            <Comparator class="com.raritantechnologies.xml.sax.filter.comparators.TagComparator" >
              <TagName>FT7</TagName>
            </Comparator>

            <Comparator class="com.raritantechnologies.xml.sax.filter.comparators.TagComparator" >
              <TagName>FT8</TagName>
            </Comparator>
          </Comparator>
        </Field>

    </XMLResultProcessor>

  </CollectionGateway>
 
  <GatewayOutputProcessor class="com.raritantechnologies.federated.lucene.LuceneOutputProcessor"
                      indexDir="C:/Program Files/Apache Group/Tomcat 4.1/webapps/FrameworkDocumentation/data/HealthcareInformatics/luceneColl" >

    <Field ID="TI"  luceneField="title"     type="text" />
    <Field ID="AU"  luceneField="author"    type="text" />
    <Field ID="AB"  luceneField="abstract"  type="text" />
    <Field ID="FT"  luceneField="fullText"  type="indexed" />

    <Field ID="IS"  luceneField="issue"     type="text" />
    <Field ID="PUB" luceneField="publication"     type="text" />


    <Field ID="FileName"  luceneField="FileName"     type="unindexed" />

  </GatewayOutputProcessor>
 

</LuceneCollectionBuild>