emory-libraries/dlp-selfdeposit

View on GitHub
solr/conf/schema.xml

Summary

Maintainability
Test Coverage
<?xml version="1.0" encoding="UTF-8" ?>
<!--
 Licensed to the Apache Software Foundation (ASF) under one or more
 contributor license agreements.  See the NOTICE file distributed with
 this work for additional information regarding copyright ownership.
 The ASF licenses this file to You under the Apache License, Version 2.0
 (the "License"); you may not use this file except in compliance with
 the License.  You may obtain a copy of the License at

     http://www.apache.org/licenses/LICENSE-2.0

 Unless required by applicable law or agreed to in writing, software
 distributed under the License is distributed on an "AS IS" BASIS,
 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License.
-->

<!--
 This is the Solr schema file. This file should be named "schema.xml" and
 should be in the conf directory under the solr home
 (i.e. ./solr/conf/schema.xml by default)
 or located where the classloader for the Solr webapp can find it.

 This example schema is the recommended starting point for users.
 It should be kept correct and concise, usable out-of-the-box.

 For more information, on how to customize this file, please see
 http://wiki.apache.org/solr/SchemaXml

 PERFORMANCE NOTE: this schema includes many optional features and should not
 be used for benchmarking.  To improve performance one could
  - set stored="false" for all fields possible (esp large fields) when you
    only need to search on the field but don't need to return the original
    value.
  - set indexed="false" if you don't need to search on the field, but only
    return the field as a result of searching on other indexed fields.
  - remove all unneeded copyField statements
  - for best index size and searching performance, set "index" to false
    for all general text fields, use copyField to copy them to the
    catchall "text" field, and use that for searching.
  - For maximum indexing performance, use the StreamingUpdateSolrServer
    java client.
  - Remember to run the JVM in server mode, and use a higher logging level
    that avoids logging every request
-->

<schema name="Hydra Demo Index" version="1.5">
  <!-- attribute "name" is the name of this schema and is only used for display purposes.
       Applications should change this to reflect the nature of the search collection.
       version="1.5" is Solr's version number for the schema syntax and semantics.  It should
       not normally be changed by applications.
       1.0: multiValued attribute did not exist, all fields are multiValued by nature
       1.1: multiValued attribute introduced, false by default
       1.2: omitTermFreqAndPositions attribute introduced, true by default except for text fields.
       1.3: removed optional field compress feature
       1.4: default auto-phrase (QueryParser feature) to off
       1.5: omitNorms defaults to true for primitive field types (int, float, boolean, string...)
# TODO 1.6: useDocValuesAsStored defaults to true.
# See https://github.com/samvera/active_fedora/issues/1346
     -->

  <types>
    <fieldType name="string" class="solr.StrField" sortMissingLast="true" />
    <fieldType name="boolean" class="solr.BoolField" sortMissingLast="true"/>
    <fieldType name="rand" class="solr.RandomSortField" omitNorms="true"/>

    <!-- Default numeric field types.  -->
    <fieldType name="int" class="solr.IntPointField" docValues="true"/>
    <fieldType name="float" class="solr.FloatPointField" docValues="true"/>
    <fieldType name="long" class="solr.LongPointField" docValues="true"/>
    <fieldType name="double" class="solr.DoublePointField" docValues="true"/>

    <!-- PointField numeric field types for faster range queries -->
    <fieldType name="tint" class="solr.IntPointField" docValues="true"/>
    <fieldType name="tfloat" class="solr.FloatPointField" docValues="true"/>
    <fieldType name="tlong" class="solr.LongPointField" docValues="true"/>
    <fieldType name="tdouble" class="solr.DoublePointField" docValues="true"/>

    <!-- The format for this date field is of the form 1995-12-31T23:59:59Z
         Optional fractional seconds are allowed: 1995-12-31T23:59:59.999Z
      -->
    <fieldType name="date" class="solr.DatePointField" docValues="true"/>
    <!-- A PointField based date field for faster date range queries and date faceting. -->
    <fieldType name="tdate" class="solr.DatePointField" docValues="true"/>
    <!-- A DateRange based date field for truly faster date range queries. -->
    <fieldType name="dateRange" class="solr.DateRangeField" omitNorms="true" omitTermFreqAndPositions="true"/>

    <!-- This point type indexes the coordinates as separate fields (subFields)
      If subFieldType is defined, it references a type, and a dynamic field
      definition is created matching *___<typename>.  Alternately, if
      subFieldSuffix is defined, that is used to create the subFields.
      Example: if subFieldType="double", then the coordinates would be
        indexed in fields myloc_0___double,myloc_1___double.
      Example: if subFieldSuffix="_d" then the coordinates would be indexed
        in fields myloc_0_d,myloc_1_d
      The subFields are an implementation detail of the fieldType, and end
      users normally should not need to know about them.
     -->
    <fieldType name="point" class="solr.PointType" dimension="2" subFieldSuffix="_d"/>

    <!-- A geospatial field type new to Solr 4.  It supports multiValued and polygon shapes.
      For more information about this and other Spatial fields new to Solr 4, see:
      http://wiki.apache.org/solr/SolrAdaptersForLuceneSpatial4
    -->
    <fieldType name="location_rpt" class="solr.SpatialRecursivePrefixTreeFieldType"
      geo="true" distErrPct="0.025" maxDistErr="0.000009" distanceUnits="degrees" />

    <fieldType name="text" class="solr.TextField" omitNorms="false">
      <analyzer>
        <tokenizer class="solr.ICUTokenizerFactory"/>
        <filter class="solr.ICUFoldingFilterFactory"/>  <!-- NFKC, case folding, diacritics removed -->
        <filter class="solr.TrimFilterFactory"/>
      </analyzer>
    </fieldType>

    <!-- A text field that only splits on whitespace for exact matching of words -->
    <fieldType name="text_ws" class="solr.TextField" positionIncrementGap="100">
      <analyzer>
        <tokenizer class="solr.WhitespaceTokenizerFactory"/>
        <filter class="solr.TrimFilterFactory"/>
      </analyzer>
    </fieldType>

    <!-- single token analyzed text, for sorting.  Punctuation is significant. -->
    <fieldtype name="alphaSort" class="solr.TextField" sortMissingLast="true" omitNorms="true">
      <analyzer>
        <tokenizer class="solr.KeywordTokenizerFactory" />
        <filter class="solr.ICUFoldingFilterFactory"/>
        <filter class="solr.TrimFilterFactory" />
      </analyzer>
    </fieldtype>

    <!-- A text field with defaults appropriate for English -->
    <fieldType name="text_en" class="solr.TextField" positionIncrementGap="100">
      <analyzer>
        <tokenizer class="solr.ICUTokenizerFactory"/>
        <filter class="solr.ICUFoldingFilterFactory"/>  <!-- NFKC, case folding, diacritics removed -->
        <filter class="solr.EnglishPossessiveFilterFactory"/>
        <!-- EnglishMinimalStemFilterFactory is less aggressive than PorterStemFilterFactory: -->
        <filter class="solr.EnglishMinimalStemFilterFactory"/>
        <!--
        <filter class="solr.PorterStemFilterFactory"/>
        -->
        <filter class="solr.TrimFilterFactory"/>
      </analyzer>
    </fieldType>

    <!-- queries for paths match documents at that path, or in descendent paths -->
    <fieldType name="descendent_path" class="solr.TextField">
      <analyzer type="index">
        <tokenizer class="solr.PathHierarchyTokenizerFactory" delimiter="/" />
      </analyzer>
      <analyzer type="query">
        <tokenizer class="solr.KeywordTokenizerFactory" />
      </analyzer>
    </fieldType>

    <!-- queries for paths match documents at that path, or in ancestor paths -->
    <fieldType name="ancestor_path" class="solr.TextField">
      <analyzer type="index">
        <tokenizer class="solr.KeywordTokenizerFactory" />
      </analyzer>
      <analyzer type="query">
        <tokenizer class="solr.PathHierarchyTokenizerFactory" delimiter="/" />
      </analyzer>
    </fieldType>

    <fieldType class="solr.TextField" name="textSuggest" positionIncrementGap="100">
      <analyzer>
        <tokenizer class="solr.KeywordTokenizerFactory"/>
        <filter class="solr.LowerCaseFilterFactory"/>
        <filter class="solr.RemoveDuplicatesTokenFilterFactory"/>
      </analyzer>
    </fieldType>
  </types>


  <fields>
    <!-- If you remove this field, you must _also_ disable the update log in solrconfig.xml
    or Solr won't start. _version_ and update log are required for SolrCloud
    -->
    <field name="_version_" type="long" indexed="true" stored="true"/>

    <field name="id" type="string" stored="true" indexed="true" multiValued="false" required="true"/>
    <field name="timestamp" type="date" indexed="true" stored="true" default="NOW" multiValued="false"/>

    <field name="lat" type="tdouble" stored="true" indexed="true" multiValued="false"/>
    <field name="lng" type="tdouble" stored="true" indexed="true" multiValued="false"/>

    <!-- NOTE:  not all possible Solr field types are represented in the dynamic fields -->

    <!-- text (_t...) -->
    <dynamicField name="*_ti" type="text" stored="false" indexed="true" multiValued="false"/>
    <dynamicField name="*_tim" type="text" stored="false" indexed="true" multiValued="true"/>
    <dynamicField name="*_ts" type="text" stored="true" indexed="false" multiValued="false"/>
    <dynamicField name="*_tsm" type="text" stored="true" indexed="false" multiValued="true"/>
    <dynamicField name="*_tsi" type="text" stored="true" indexed="true" multiValued="false"/>
    <dynamicField name="*_tsim" type="text" stored="true" indexed="true" multiValued="true"/>
    <dynamicField name="*_tiv" type="text" stored="false" indexed="true" multiValued="false" termVectors="true" termPositions="true" termOffsets="true"/>
    <dynamicField name="*_timv" type="text" stored="false" indexed="true" multiValued="true" termVectors="true" termPositions="true" termOffsets="true"/>
    <dynamicField name="*_tsiv" type="text" stored="true" indexed="true" multiValued="false" termVectors="true" termPositions="true" termOffsets="true"/>
    <dynamicField name="*_tsimv" type="text" stored="true" indexed="true" multiValued="true" termVectors="true" termPositions="true" termOffsets="true"/>

    <!-- English text (_te...) -->
    <dynamicField name="*_tei" type="text_en" stored="false" indexed="true" multiValued="false"/>
    <dynamicField name="*_teim" type="text_en" stored="false" indexed="true" multiValued="true"/>
    <dynamicField name="*_tes" type="text_en" stored="true" indexed="false" multiValued="false"/>
    <dynamicField name="*_tesm" type="text_en" stored="true" indexed="false" multiValued="true"/>
    <dynamicField name="*_tesi" type="text_en" stored="true" indexed="true" multiValued="false"/>
    <dynamicField name="*_tesim" type="text_en" stored="true" indexed="true" multiValued="true"/>
    <dynamicField name="*_teiv" type="text_en" stored="false" indexed="true" multiValued="false" termVectors="true" termPositions="true" termOffsets="true"/>
    <dynamicField name="*_teimv" type="text_en" stored="false" indexed="true" multiValued="true" termVectors="true" termPositions="true" termOffsets="true"/>
    <dynamicField name="*_tesiv" type="text_en" stored="true" indexed="true" multiValued="false" termVectors="true" termPositions="true" termOffsets="true"/>
    <dynamicField name="*_tesimv" type="text_en" stored="true" indexed="true" multiValued="true" termVectors="true" termPositions="true" termOffsets="true"/>

    <!-- string (_s...) -->
    <dynamicField name="*_si" type="string" stored="false" indexed="true" multiValued="false"/>
    <dynamicField name="*_sim" type="string" stored="false" indexed="true" multiValued="true"/>
    <dynamicField name="*_ss" type="string" stored="true" indexed="false" multiValued="false"/>
    <dynamicField name="*_ssm" type="string" stored="true" indexed="false" multiValued="true"/>
    <dynamicField name="*_ssi" type="string" stored="true" indexed="true" multiValued="false"/>
    <dynamicField name="*_ssim" type="string" stored="true" indexed="true" multiValued="true"/>
    <dynamicField name="*_ssort" type="alphaSort" stored="false" indexed="true" multiValued="false"/>

    <!-- integer (_i...) -->
    <dynamicField name="*_ii" type="int" stored="false" indexed="true" multiValued="false"/>
    <dynamicField name="*_iim" type="int" stored="false" indexed="true" multiValued="true"/>
    <dynamicField name="*_is" type="int" stored="true" indexed="false" multiValued="false"/>
    <dynamicField name="*_ism" type="int" stored="true" indexed="false" multiValued="true"/>
    <dynamicField name="*_isi" type="int" stored="true" indexed="true" multiValued="false"/>
    <dynamicField name="*_isim" type="int" stored="true" indexed="true" multiValued="true"/>

    <!-- IntegerPointField (_it...) (for faster range queries) -->
    <dynamicField name="*_iti" type="tint" stored="false" indexed="true" multiValued="false"/>
    <dynamicField name="*_itim" type="tint" stored="false" indexed="true" multiValued="true"/>
    <dynamicField name="*_its" type="tint" stored="true" indexed="false" multiValued="false"/>
    <dynamicField name="*_itsm" type="tint" stored="true" indexed="false" multiValued="true"/>
    <dynamicField name="*_itsi" type="tint" stored="true" indexed="true" multiValued="false"/>
    <dynamicField name="*_itsim" type="tint" stored="true" indexed="true" multiValued="true"/>

    <!-- date (_dt...) -->
    <!-- The format for this date field is of the form 1995-12-31T23:59:59Z
         Optional fractional seconds are allowed: 1995-12-31T23:59:59.999Z -->
    <dynamicField name="*_dti" type="date" stored="false" indexed="true" multiValued="false"/>
    <dynamicField name="*_dtim" type="date" stored="false" indexed="true" multiValued="true"/>
    <dynamicField name="*_dts" type="date" stored="true" indexed="false" multiValued="false"/>
    <dynamicField name="*_dtsm" type="date" stored="true" indexed="false" multiValued="true"/>
    <dynamicField name="*_dtsi" type="date" stored="true" indexed="true" multiValued="false"/>
    <dynamicField name="*_dtsim" type="date" stored="true" indexed="true" multiValued="true"/>

    <!-- DatePointField (_dtt...) (for faster range queries) -->
    <dynamicField name="*_dtti" type="tdate" stored="false" indexed="true" multiValued="false"/>
    <dynamicField name="*_dttim" type="tdate" stored="false" indexed="true" multiValued="true"/>
    <dynamicField name="*_dtts" type="tdate" stored="true" indexed="false" multiValued="false"/>
    <dynamicField name="*_dttsm" type="tdate" stored="true" indexed="false" multiValued="true"/>
    <dynamicField name="*_dttsi" type="tdate" stored="true" indexed="true" multiValued="false"/>
    <dynamicField name="*_dttsim" type="tdate" stored="true" indexed="true" multiValued="true"/>


    <!-- date range (_dr...) (for faster AND better range queries) -->
    <dynamicField name="*_dri" type="dateRange" stored="false" indexed="true" multiValued="false"/>
    <dynamicField name="*_drim" type="dateRange" stored="false" indexed="true" multiValued="true"/>
    <dynamicField name="*_drsi" type="dateRange" stored="true" indexed="true" multiValued="false"/>
    <dynamicField name="*_drsim" type="dateRange" stored="true" indexed="true" multiValued="true"/>
    <dynamicField name="*_drs" type="dateRange" stored="true" indexed="true" multiValued="false"/> <!-- indexed anyway because DateRangeField errors otherwise -->
    <dynamicField name="*_drsm" type="dateRange" stored="true" indexed="true" multiValued="true"/> <!-- indexed anyway because DateRangeField errors otherwise -->

    <!-- long (_l...) -->
    <dynamicField name="*_li" type="long" stored="false" indexed="true" multiValued="false"/>
    <dynamicField name="*_lim" type="long" stored="false" indexed="true" multiValued="true"/>
    <dynamicField name="*_ls" type="long" stored="true" indexed="false" multiValued="false"/>
    <dynamicField name="*_lsm" type="long" stored="true" indexed="false" multiValued="true"/>
    <dynamicField name="*_lsi" type="long" stored="true" indexed="true" multiValued="false"/>
    <dynamicField name="*_lsim" type="long" stored="true" indexed="true" multiValued="true"/>

    <!-- LongPointField (_lt...) (for faster range queries) -->
    <dynamicField name="*_lti" type="tlong" stored="false" indexed="true" multiValued="false"/>
    <dynamicField name="*_ltim" type="tlong" stored="false" indexed="true" multiValued="true"/>
    <dynamicField name="*_lts" type="tlong" stored="true" indexed="false" multiValued="false"/>
    <dynamicField name="*_ltsm" type="tlong" stored="true" indexed="false" multiValued="true"/>
    <dynamicField name="*_ltsi" type="tlong" stored="true" indexed="true" multiValued="false"/>
    <dynamicField name="*_ltsim" type="tlong" stored="true" indexed="true" multiValued="true"/>

    <!-- double (_db...) -->
    <dynamicField name="*_dbi" type="double" stored="false" indexed="true" multiValued="false"/>
    <dynamicField name="*_dbim" type="double" stored="false" indexed="true" multiValued="true"/>
    <dynamicField name="*_dbs" type="double" stored="true" indexed="false" multiValued="false"/>
    <dynamicField name="*_dbsm" type="double" stored="true" indexed="false" multiValued="true"/>
    <dynamicField name="*_dbsi" type="double" stored="true" indexed="true" multiValued="false"/>
    <dynamicField name="*_dbsim" type="double" stored="true" indexed="true" multiValued="true"/>

    <!-- DoublePointField (_dbt...) (for faster range queries) -->
    <dynamicField name="*_dbti" type="tdouble" stored="false" indexed="true" multiValued="false"/>
    <dynamicField name="*_dbtim" type="tdouble" stored="false" indexed="true" multiValued="true"/>
    <dynamicField name="*_dbts" type="tdouble" stored="true" indexed="false" multiValued="false"/>
    <dynamicField name="*_dbtsm" type="tdouble" stored="true" indexed="false" multiValued="true"/>
    <dynamicField name="*_dbtsi" type="tdouble" stored="true" indexed="true" multiValued="false"/>
    <dynamicField name="*_dbtsim" type="tdouble" stored="true" indexed="true" multiValued="true"/>

    <!-- float (_f...) -->
    <dynamicField name="*_fi" type="float" stored="false" indexed="true" multiValued="false"/>
    <dynamicField name="*_fim" type="float" stored="false" indexed="true" multiValued="true"/>
    <dynamicField name="*_fs" type="float" stored="true" indexed="false" multiValued="false"/>
    <dynamicField name="*_fsm" type="float" stored="true" indexed="false" multiValued="true"/>
    <dynamicField name="*_fsi" type="float" stored="true" indexed="true" multiValued="false"/>
    <dynamicField name="*_fsim" type="float" stored="true" indexed="true" multiValued="true"/>

    <!-- FloatPointField (_ft...) (for faster range queries) -->
    <dynamicField name="*_fti" type="tfloat" stored="false" indexed="true" multiValued="false"/>
    <dynamicField name="*_ftim" type="tfloat" stored="false" indexed="true" multiValued="true"/>
    <dynamicField name="*_fts" type="tfloat" stored="true" indexed="false" multiValued="false"/>
    <dynamicField name="*_ftsm" type="tfloat" stored="true" indexed="false" multiValued="true"/>
    <dynamicField name="*_ftsi" type="tfloat" stored="true" indexed="true" multiValued="false"/>
    <dynamicField name="*_ftsim" type="tfloat" stored="true" indexed="true" multiValued="true"/>

    <!-- boolean (_b...) -->
    <dynamicField name="*_bi" type="boolean" stored="false" indexed="true" multiValued="false"/>
    <dynamicField name="*_bs" type="boolean" stored="true" indexed="false" multiValued="false"/>
    <dynamicField name="*_bsi" type="boolean" stored="true" indexed="true" multiValued="false"/>

    <!-- Type used to index the lat and lon components for the "location" FieldType -->
    <dynamicField name="*_coordinate" type="tdouble" indexed="true"  stored="false" />

    <dynamicField name="*suggest" type="textSuggest" indexed="true" stored="false" multiValued="true" />

    <!-- you must define copyField source and dest fields explicity or schemaBrowser doesn't work -->
    <field name="all_text_timv" type="text" stored="false" indexed="true" multiValued="true" termVectors="true" termPositions="true" termOffsets="true"/>


  </fields>

 <!-- Field to use to determine and enforce document uniqueness.
      Unless this field is marked with required="false", it will be a required field
   -->
 <uniqueKey>id</uniqueKey>

  <!-- copyField commands copy one field to another at the time a document
        is added to the index.  It's used either to index the same field differently,
        or to add multiple fields to the same field for easier/faster searching.  -->
   <!-- Copy Fields -->

   <!-- Above, multiple source fields are copied to the [text] field.
    Another way to map multiple source fields to the same
    destination field is to use the dynamic field syntax.
    copyField also supports a maxChars to copy setting.  -->

   <!-- <copyField source="*_tesim" dest="all_text_timv" maxChars="3000"/> -->
   <!-- for suggestions -->
   <copyField source="*_tesim" dest="suggest"/>
   <copyField source="*_ssim" dest="suggest"/>

 <!-- Similarity is the scoring routine for each document vs. a query.
      A custom similarity may be specified here, but the default is fine
      for most applications.  -->
 <!-- <similarity class="org.apache.lucene.search.DefaultSimilarity"/> -->
 <!-- ... OR ...
      Specify a SimilarityFactory class name implementation
      allowing parameters to be used.
 -->
 <!--
 <similarity class="com.example.solr.CustomSimilarityFactory">
   <str name="paramkey">param value</str>
 </similarity>
 -->

</schema>