sul-dlss/argo

View on GitHub
solr_conf/conf/schema.xml

Summary

Maintainability
Test Coverage
<?xml version="1.0" encoding="UTF-8"?>
<schema name="Argo Prod" version="1.5">
  <!-- NOTE: various comments and unused configuration possibilities have been purged
     from this file.  Please refer to http://wiki.apache.org/solr/SchemaXml,
     as well as the default schema file included with Solr -->

  <uniqueKey>id</uniqueKey>

  <fields>
    <field name="id" type="string" stored="true" indexed="true" multiValued="false" required="true"/>
    <!-- DEPRECATED: "long" is a Trie based field type; use solr.LongPointField class instead ... -->
    <field name="_version_" type="long" indexed="true" stored="true" multiValued="false"/>
    <!-- DEPRECATED: "date" is a Trie based field type; use solr.DatePointField class instead ... -->
    <field name="timestamp" type="date" indexed="true" stored="true" default="NOW" multiValued="false"/>

    <!-- NOTE:  not all possible Solr field types are represented in the dynamic fields -->

    <!-- string (_s...) -->
    <!-- docValues true makes for faster facets, but more storage -->
    <dynamicField name="*_si"     type="string"    stored="false" indexed="true"  multiValued="false"/>
    <dynamicField name="*_sim"    type="string"    stored="false" indexed="true"  multiValued="true"/>
    <dynamicField name="*_ss"     type="string"    stored="true"  indexed="false" multiValued="false"/>
    <dynamicField name="*_ssm"    type="string"    stored="true"  indexed="false" multiValued="true"/>
    <dynamicField name="*_ssi"    type="string"    stored="true"  indexed="true"  multiValued="false"/>
    <dynamicField name="*_ssidv"  type="string"    stored="true"  indexed="true"  multiValued="false" docValues="true"/>
    <dynamicField name="*_ssim"   type="string"    stored="true"  indexed="true"  multiValued="true"/>
    <dynamicField name="*_ssimdv" type="string"    stored="true"  indexed="true"  multiValued="true"  docValues="true"/>

    <!-- IntPointField integer (_ip...) (very efficient searches for specific values, or ranges of values) -->
    <!-- docValues true makes for faster facets, but more storage -->
    <dynamicField name="*_ipi"    type="intPoint" stored="false" indexed="true"  multiValued="false" docValues="true"/>
    <dynamicField name="*_ipim"   type="intPoint" stored="false" indexed="true"  multiValued="true"/>
    <dynamicField name="*_ips"    type="intPoint" stored="true"  indexed="false" multiValued="false" docValues="true"/>
    <dynamicField name="*_ipsm"   type="intPoint" stored="true"  indexed="false" multiValued="true"/>
    <dynamicField name="*_ipsi"   type="intPoint" stored="true"  indexed="true"  multiValued="false"/>
    <dynamicField name="*_ipsidv" type="intPoint" stored="true"  indexed="true"  multiValued="false" docValues="true"/>
    <dynamicField name="*_ipsim"  type="intPoint" stored="true"  indexed="true"  multiValued="true"/>
    <!-- DEPRECATED: use IntPointField (_ip...) instead as type int is TrieInteger integer (_i...) -->
    <dynamicField name="*_ii"   type="int" stored="false" indexed="true"  multiValued="false"/>
    <dynamicField name="*_iim"  type="int" stored="false" indexed="true"  multiValued="true"/>
    <dynamicField name="*_is"   type="int" stored="true" indexed="false" multiValued="false"/>
    <dynamicField name="*_ism"  type="int" stored="true" indexed="false" multiValued="true"/>
    <dynamicField name="*_isi"  type="int" stored="true" indexed="true"  multiValued="false"/>
    <dynamicField name="*_isim" type="int" stored="true" indexed="true"  multiValued="true"/>
    <!-- DEPRECATED: trie integer (_it...) (for faster range queries) -->
    <dynamicField name="*_iti"   type="tint" stored="false" indexed="true"  multiValued="false"/>
    <dynamicField name="*_itim"  type="tint" stored="false" indexed="true"  multiValued="true"/>
    <dynamicField name="*_its"   type="tint" stored="true"  indexed="false" multiValued="false"/>
    <dynamicField name="*_itsm"  type="tint" stored="true"  indexed="false" multiValued="true"/>
    <dynamicField name="*_itsi"  type="tint" stored="true"  indexed="true"  multiValued="false"/>
    <dynamicField name="*_itsim" type="tint" stored="true"  indexed="true"  multiValued="true"/>

    <!-- DatePointField (_dtp...) (very efficient searches for specific values, or ranges of values) -->
    <!-- The format for this date field is of the form 1995-12-31T23:59:59Z
         Optional fractional seconds are allowed: 1995-12-31T23:59:59.999Z -->
    <!-- docValues true makes for faster facets, but more storage -->
    <dynamicField name="*_dtpi"     type="datePoint" stored="false" indexed="true"  multiValued="false" docValues="true"/>
    <dynamicField name="*_dtpim"    type="datePoint" stored="false" indexed="true"  multiValued="true"/>
    <dynamicField name="*_dtps"     type="datePoint" stored="true"  indexed="false" multiValued="false" docValues="true"/>
    <dynamicField name="*_dtpsm"    type="datePoint" stored="true"  indexed="false" multiValued="true"/>
    <dynamicField name="*_dtpsi"    type="datePoint" stored="true"  indexed="true"  multiValued="false" docValues="true"/>
    <dynamicField name="*_dtpsidv"  type="datePoint" stored="true"  indexed="true"  multiValued="false" docValues="true"/>
    <dynamicField name="*_dtpsim"   type="datePoint" stored="true"  indexed="true"  multiValued="true"/>
    <dynamicField name="*_dtpsimdv" type="datePoint" stored="true"  indexed="true"  multiValued="true"  docValues="true"/>
    <!-- DEPRECATED: (_dt...) type date is a TrieDate -->
    <!-- The format for this date field is of the form 1995-12-31T23:59:59Z
         Optional fractional seconds are allowed: 1995-12-31T23:59:59.999Z -->
    <dynamicField name="*_dti"   type="date" stored="false" indexed="true"  multiValued="false"/>
    <dynamicField name="*_dtim"  type="date" stored="false" indexed="true"  multiValued="true"/>
    <dynamicField name="*_dts"   type="date" stored="true" indexed="false" multiValued="false"/>
    <dynamicField name="*_dtsm"  type="date" stored="true" indexed="false" multiValued="true"/>
    <dynamicField name="*_dtsi"  type="date" stored="true" indexed="true"  multiValued="false"/>
    <dynamicField name="*_dtsim" type="date" stored="true" indexed="true"  multiValued="true"/>
    <!-- DEPRECATED: trie date (_dtt...) (for faster range queries) -->
    <dynamicField name="*_dtti"   type="tdate" stored="false" indexed="true"  multiValued="false"/>
    <dynamicField name="*_dttim"  type="tdate" stored="false" indexed="true"  multiValued="true"/>
    <dynamicField name="*_dtts"   type="tdate" stored="true" indexed="false" multiValued="false"/>
    <dynamicField name="*_dttsm"  type="tdate" stored="true" indexed="false" multiValued="true"/>
    <dynamicField name="*_dttsi"  type="tdate" stored="true" indexed="true"  multiValued="false"/>
    <dynamicField name="*_dttsim" type="tdate" stored="true" indexed="true"  multiValued="true"/>

    <!-- DoublePointField (_dbp...) (very efficient searches for specific values, or ranges of values) -->
    <dynamicField name="*_dbpi"   type="doublePoint" stored="false" indexed="true"  multiValued="false" docValues="true"/>
    <dynamicField name="*_dbpim"  type="doublePoint" stored="false" indexed="true"  multiValued="true"/>
    <dynamicField name="*_dbps"   type="doublePoint" stored="true"  indexed="false" multiValued="false" docValues="true"/>
    <dynamicField name="*_dbpsm"  type="doublePoint" stored="true"  indexed="false" multiValued="true"/>
    <dynamicField name="*_dbpsi"  type="doublePoint" stored="true"  indexed="true"  multiValued="false" docValues="true"/>
    <dynamicField name="*_dbpsim" type="doublePoint" stored="true"  indexed="true"  multiValued="true"/>
    <!-- DEPRECATED: double is a TrieDouble (_db...) -->
    <dynamicField name="*_dbi"   type="double" stored="false" indexed="true"  multiValued="false"/>
    <dynamicField name="*_dbim"  type="double" stored="false" indexed="true"  multiValued="true"/>
    <dynamicField name="*_dbs"   type="double" stored="true" indexed="false" multiValued="false"/>
    <dynamicField name="*_dbsm"  type="double" stored="true" indexed="false" multiValued="true"/>
    <dynamicField name="*_dbsi"  type="double" stored="true" indexed="true"  multiValued="false"/>
    <dynamicField name="*_dbsim" type="double" stored="true" indexed="true"  multiValued="true"/>
    <!-- DEPRECATED: trie double (_dbt...) (for faster range queries) -->
    <dynamicField name="*_dbti"   type="tdouble" stored="false" indexed="true"  multiValued="false"/>
    <dynamicField name="*_dbtim"  type="tdouble" stored="false" indexed="true"  multiValued="true"/>
    <dynamicField name="*_dbts"   type="tdouble" stored="true" indexed="false" multiValued="false"/>
    <dynamicField name="*_dbtsm"  type="tdouble" stored="true" indexed="false" multiValued="true"/>
    <dynamicField name="*_dbtsi"  type="tdouble" stored="true" indexed="true"  multiValued="false"/>
    <dynamicField name="*_dbtsim" type="tdouble" stored="true" indexed="true"  multiValued="true"/>

    <!-- boolean (_b...) -->
    <dynamicField name="*_bi"  type="boolean" stored="false" indexed="true"  multiValued="false"/>
    <dynamicField name="*_bs"  type="boolean" stored="true" indexed="false" multiValued="false"/>
    <dynamicField name="*_bsi" type="boolean" stored="true" indexed="true"  multiValued="false"/>

    <!-- text English (_ten...) -->
    <dynamicField name="*_teni"    type="textEnglish" stored="false" indexed="true"  multiValued="false"/>
    <dynamicField name="*_tenim"   type="textEnglish" stored="false" indexed="true"  multiValued="true"/>
    <dynamicField name="*_tens"    type="textEnglish" stored="true"  indexed="false" multiValued="false"/>
    <dynamicField name="*_tensm"   type="textEnglish" stored="true"  indexed="false" multiValued="true"/>
    <dynamicField name="*_tensi"   type="textEnglish" stored="true"  indexed="true"  multiValued="false"/>
    <dynamicField name="*_tensim"  type="textEnglish" stored="true"  indexed="true"  multiValued="true"/>
    <dynamicField name="*_teniv"   type="textEnglish" stored="false" indexed="true"  multiValued="false" termVectors="true" termPositions="true" termOffsets="true"/>
    <dynamicField name="*_tenimv"  type="textEnglish" stored="false" indexed="true"  multiValued="true"  termVectors="true" termPositions="true" termOffsets="true"/>
    <dynamicField name="*_tensiv"  type="textEnglish" stored="true"  indexed="true"  multiValued="false" termVectors="true" termPositions="true" termOffsets="true"/>
    <dynamicField name="*_tensimv" type="textEnglish" stored="true"  indexed="true"  multiValued="true"  termVectors="true" termPositions="true" termOffsets="true"/>
    <!-- DEPRECATED: use textEnglish (_ten...) or textUnstemmed (..text_unstemmed...) instead -->
    <dynamicField name="*_ti"    type="text" stored="false" indexed="true"  multiValued="false"/>
    <dynamicField name="*_tim"   type="text" stored="false" indexed="true"  multiValued="true"/>
    <dynamicField name="*_ts"    type="text" stored="true" indexed="false" multiValued="false"/>
    <dynamicField name="*_tsm"   type="text" stored="true" indexed="false" multiValued="true"/>
    <dynamicField name="*_tsi"   type="text" stored="true" indexed="true"  multiValued="false"/>
    <dynamicField name="*_tsim"  type="text" stored="true" indexed="true"  multiValued="true"/>
    <dynamicField name="*_tiv"   type="text" stored="false" indexed="true"  multiValued="false" termVectors="true" termPositions="true" termOffsets="true"/>
    <dynamicField name="*_timv"  type="text" stored="false" indexed="true"  multiValued="true"  termVectors="true" termPositions="true" termOffsets="true"/>
    <dynamicField name="*_tsiv"  type="text" stored="true" indexed="true"  multiValued="false" termVectors="true" termPositions="true" termOffsets="true"/>
    <dynamicField name="*_tsimv" type="text" stored="true" indexed="true"  multiValued="true"  termVectors="true" termPositions="true" termOffsets="true"/>
    <!-- DEPRECATED: text_en is a deprecated type -->
    <dynamicField name="*_tei"    type="text_en" stored="false" indexed="true"  multiValued="false"/>
    <dynamicField name="*_teim"   type="text_en" stored="false" indexed="true"  multiValued="true"/>
    <dynamicField name="*_tes"    type="text_en" stored="true" indexed="false" multiValued="false"/>
    <dynamicField name="*_tesm"   type="text_en" stored="true" indexed="false" multiValued="true"/>
    <dynamicField name="*_tesi"   type="text_en" stored="true" indexed="true"  multiValued="false"/>
    <dynamicField name="*_tesim"  type="text_en" stored="true" indexed="true"  multiValued="true"/>
    <dynamicField name="*_teiv"   type="text_en" stored="false" indexed="true"  multiValued="false" termVectors="true" termPositions="true" termOffsets="true"/>
    <dynamicField name="*_teimv"  type="text_en" stored="false" indexed="true"  multiValued="true"  termVectors="true" termPositions="true" termOffsets="true"/>
    <dynamicField name="*_tesiv"  type="text_en" stored="true" indexed="true"  multiValued="false" termVectors="true" termPositions="true" termOffsets="true"/>
    <dynamicField name="*_tesimv" type="text_en" stored="true" indexed="true"  multiValued="true"  termVectors="true" termPositions="true" termOffsets="true"/>

    <!-- Text tokenized without stemming -->
    <dynamicField name="*_text_unstemmed_i"   type="textUnstemmed" indexed="true"  stored="false" multiValued="false"/>
    <dynamicField name="*_text_unstemmed_im"  type="textUnstemmed" indexed="true"  stored="false" multiValued="true"/>
    <dynamicField name="*_text_unstemmed_si"  type="textUnstemmed" indexed="true"  stored="true"  multiValued="false"/>
    <dynamicField name="*_text_unstemmed_sim" type="textUnstemmed" indexed="true"  stored="true"  multiValued="true"/>
    <!-- DEPRECATED:  textNoStem is a deprecated type -->
    <dynamicField name="*_text_nostem_i"  type="textNoStem" indexed="true"  stored="false" multiValued="false"/>
    <dynamicField name="*_text_nostem_im" type="textNoStem" indexed="true"  stored="false" multiValued="true"/>

    <!-- Text tokenized without stemming but left and right anchored, for "exactish" matches -->
    <dynamicField name="*_text_anchored_i"  type="textAnchored" indexed="true"  stored="false" multiValued="false"/>
    <dynamicField name="*_text_anchored_im" type="textAnchored" indexed="true"  stored="false" multiValued="true"/>

    <!-- copyFields must have explicitly declared destination fields -->
    <field name="main_title_text_anchored_im" type="textAnchored" indexed="true" stored="false" multiValued="true"/>
    <field name="main_title_text_unstemmed_im" type="textUnstemmed" indexed="true" stored="false" multiValued="true"/>
    <field name="full_title_unstemmed_im" type="textUnstemmed" indexed="true" stored="false" multiValued="true"/>
    <field name="additional_titles_unstemmed_im" type="textUnstemmed" indexed="true" stored="false" multiValued="true"/>

    <copyField source="main_title_tenim" dest="main_title_text_anchored_im" maxChars="5000" />
    <copyField source="main_title_tenim" dest="main_title_text_unstemmed_im" maxChars="5000" />
    <copyField source="full_title_tenim" dest="full_title_unstemmed_im" maxChars="5000" />
    <copyField source="additional_titles_tenim" dest="additional_titles_unstemmed_im" maxChars="5000" />
  </fields>

  <types>
    <fieldType name="string"  class="solr.StrField" sortMissingLast="true" />
    <fieldType name="boolean" class="solr.BoolField" sortMissingLast="true"/>

    <fieldType name="intPoint" class="solr.IntPointField" />
    <fieldType name="datePoint" class="solr.DatePointField" />
    <fieldType name="doublePoint" class="solr.DoublePointField" />

    <!-- DEPRECATED: All Trie* numeric and date field types have been deprecated in favor of *Point field types. -->

    <!-- Default numeric field types.  -->
    <fieldType name="int"     class="solr.TrieIntField"    precisionStep="0" positionIncrementGap="0"/>
    <fieldType name="float"   class="solr.TrieFloatField"  precisionStep="0" positionIncrementGap="0"/>
    <fieldType name="long"    class="solr.TrieLongField"   precisionStep="0" positionIncrementGap="0"/>
    <fieldType name="double"  class="solr.TrieDoubleField" precisionStep="0" positionIncrementGap="0"/>

    <!-- trie numeric field types for faster range queries -->
    <fieldType name="tint"    class="solr.TrieIntField"    precisionStep="8" positionIncrementGap="0"/>
    <fieldType name="tfloat"  class="solr.TrieFloatField"  precisionStep="8" positionIncrementGap="0"/>
    <fieldType name="tdouble" class="solr.TrieDoubleField" precisionStep="8" positionIncrementGap="0"/>

    <!-- The format for this date field is of the form 1995-12-31T23:59:59Z
         Optional fractional seconds are allowed: 1995-12-31T23:59:59.999Z
      -->
    <fieldType name="date" class="solr.TrieDateField" precisionStep="0" positionIncrementGap="0"/>
    <!-- A Trie based date field for faster date range queries and date faceting. -->
    <fieldType name="tdate" class="solr.TrieDateField" precisionStep="6" positionIncrementGap="0"/>

   <!-- Text stemmed for English -->
    <fieldtype name="textEnglish" class="solr.TextField" positionIncrementGap="100" autoGeneratePhraseQueries="true">
      <analyzer type="index">
        <!-- remove stand-alone punctuation, which gets dropped by the WDGFF but an empty hole is left in the position and screws up phrase searches -->
        <charFilter class="solr.PatternReplaceCharFilterFactory" pattern="\s+(\p{Punct}+)\s+" replacement=" " />
        <tokenizer class="solr.WhitespaceTokenizerFactory" />
        <filter class="solr.ICUFoldingFilterFactory"/>  <!-- NFKC_Casefold, diacritics removed -->
        <filter class="solr.WordDelimiterGraphFilterFactory"
          splitOnCaseChange="1" generateWordParts="1" catenateWords="1"
          splitOnNumerics="0" generateNumberParts="1" catenateNumbers="1"
          catenateAll="0" preserveOriginal="0" stemEnglishPossessive="1" />
        <filter class="solr.FlattenGraphFilterFactory"/>
        <filter class="solr.RemoveDuplicatesTokenFilterFactory" />
        <filter class="solr.SnowballPorterFilterFactory" language="English"/>
      </analyzer>
      <analyzer type="query">
        <charFilter class="solr.PatternReplaceCharFilterFactory" pattern="\s+(\p{Punct}+)\s+" replacement=" " />
        <tokenizer class="solr.WhitespaceTokenizerFactory" />
        <filter class="solr.ICUFoldingFilterFactory"/>
        <filter class="solr.WordDelimiterGraphFilterFactory"
          splitOnCaseChange="1" generateWordParts="1" catenateWords="1"
          splitOnNumerics="0" generateNumberParts="1" catenateNumbers="1"
          catenateAll="0" preserveOriginal="0" stemEnglishPossessive="1" />
        <filter class="solr.RemoveDuplicatesTokenFilterFactory" />
        <filter class="solr.SnowballPorterFilterFactory" language="English"/>
      </analyzer>
    </fieldtype>

    <!-- Text tokenized and case folded but not stemmed -->
    <fieldtype name="textUnstemmed" class="solr.TextField" positionIncrementGap="100" autoGeneratePhraseQueries="true">
      <analyzer type="index">
        <!-- for whitespace separated chars that will be processed as their own token stream at query time, e.g. in 'felines : warm and fuzzy' -->
        <charFilter class="solr.PatternReplaceCharFilterFactory" pattern="\s+(\p{Punct}+)\s+" replacement=" " />
        <tokenizer class="solr.WhitespaceTokenizerFactory" />
        <filter class="solr.ICUFoldingFilterFactory"/>  <!-- NFKC_Casefold, diacritics removed -->
        <filter class="solr.WordDelimiterGraphFilterFactory"
          splitOnCaseChange="1" generateWordParts="1" catenateWords="1"
          splitOnNumerics="0" generateNumberParts="1" catenateNumbers="1"
          catenateAll="0" preserveOriginal="0" stemEnglishPossessive="0" />
        <filter class="solr.FlattenGraphFilterFactory"/>
      </analyzer>
      <analyzer type="query">
        <charFilter class="solr.PatternReplaceCharFilterFactory" pattern="\s+(\p{Punct}+)\s+" replacement=" " />
        <tokenizer class="solr.WhitespaceTokenizerFactory" />
        <filter class="solr.ICUFoldingFilterFactory"/>
        <filter class="solr.WordDelimiterGraphFilterFactory"
          splitOnCaseChange="1" generateWordParts="1" catenateWords="1"
          splitOnNumerics="0" generateNumberParts="1" catenateNumbers="1"
          catenateAll="0" preserveOriginal="0" stemEnglishPossessive="0" />
      </analyzer>
    </fieldtype>

    <!-- Left and right anchored tokenized text, without stemming, for "exactish" matches -->
    <fieldtype name="textAnchored" class="solr.TextField" positionIncrementGap="100" autoGeneratePhraseQueries="true">
      <analyzer type="index">
        <!-- for whitespace separated chars that will be processed as their own token stream at query time, e.g. in 'felines : warm and fuzzy' -->
        <charFilter class="solr.PatternReplaceCharFilterFactory" pattern="\s+(\p{Punct}+)\s+" replacement=" " />
        <!-- anchor beginning and ending of field value, removing trailing chars -->
        <charFilter class="solr.PatternReplaceCharFilterFactory" pattern="^\s*(.*[\S&amp;&amp;[^\.\,:;/=&lt;&gt;\(\)\[\]\&amp;\|]])[\s\.\,:;/=&lt;&gt;\(\)\[\]\&amp;\|]*$" replacement="aaaaaa$1zzzzzz"/>
        <tokenizer class="solr.WhitespaceTokenizerFactory" />
        <filter class="solr.ICUFoldingFilterFactory"/>  <!-- NFKC_Casefold, diacritics removed -->
        <filter class="solr.WordDelimiterGraphFilterFactory"
          splitOnCaseChange="1" generateWordParts="1" catenateWords="1"
          splitOnNumerics="0" generateNumberParts="1" catenateNumbers="1"
          catenateAll="0" preserveOriginal="0" stemEnglishPossessive="1" />
        <filter class="solr.FlattenGraphFilterFactory"/>
      </analyzer>
      <analyzer type="query">
        <charFilter class="solr.PatternReplaceCharFilterFactory" pattern="\s+(\p{Punct}+)\s+" replacement=" " />
        <charFilter class="solr.PatternReplaceCharFilterFactory" pattern="^\s*(.*[\S&amp;&amp;[^\.\,:;/=&lt;&gt;\(\)\[\]\&amp;\|]])[\s\.\,:;/=&lt;&gt;\(\)\[\]\&amp;\|]*$" replacement="aaaaaa$1zzzzzz"/>
        <tokenizer class="solr.WhitespaceTokenizerFactory" />
        <filter class="solr.ICUFoldingFilterFactory"/>
        <filter class="solr.WordDelimiterGraphFilterFactory"
          splitOnCaseChange="1" generateWordParts="1" catenateWords="1"
          splitOnNumerics="0" generateNumberParts="1" catenateNumbers="1"
          catenateAll="0" preserveOriginal="0" stemEnglishPossessive="1" />
      </analyzer>
    </fieldtype>

    <!-- DEPRECATED:  use textUnstemmed or textEnglish. tokenized, case folded -->
    <fieldType name="text" class="solr.TextField" omitNorms="false">
      <analyzer>
        <tokenizer class="solr.ICUTokenizerFactory"/>
        <filter class="solr.ICUFoldingFilterFactory"/>  <!-- NFKC, case folding, diacritics removed -->
        <filter class="solr.TrimFilterFactory"/>
      </analyzer>
    </fieldType>

   <!-- DEPRECATED:  use textUnstemmed, as WordDelimiterFilterFactory is deprecated.  Analyzed Text, no Stemming or Synonyms -->
    <fieldtype name="textNoStem" class="solr.TextField" positionIncrementGap="100" autoGeneratePhraseQueries="true">
      <analyzer>
        <tokenizer class="solr.WhitespaceTokenizerFactory"/>
        <filter class="solr.ICUFoldingFilterFactory"/>
        <!-- NFKC, case folding, diacritics removed -->
        <filter class="solr.WordDelimiterFilterFactory" splitOnCaseChange="1" generateWordParts="1" catenateWords="1" splitOnNumerics="0" generateNumberParts="1" catenateNumbers="1" catenateAll="0" preserveOriginal="0" stemEnglishPossessive="0"/>
        <filter class="solr.RemoveDuplicatesTokenFilterFactory"/>
      </analyzer>
    </fieldtype>

    <!-- DEPRECATED:  use textEnglish, as more aggressive stemming desired.  A text field with defaults appropriate for English -->
    <fieldType name="text_en" class="solr.TextField" positionIncrementGap="100">
      <analyzer>
        <tokenizer class="solr.ICUTokenizerFactory"/>
        <filter class="solr.ICUFoldingFilterFactory"/>  <!-- NFKC, case folding, diacritics removed -->
        <filter class="solr.EnglishPossessiveFilterFactory"/>
        <!-- EnglishMinimalStemFilterFactory is less aggressive than PorterStemFilterFactory: -->
        <filter class="solr.EnglishMinimalStemFilterFactory"/>
        <filter class="solr.TrimFilterFactory"/>
      </analyzer>
    </fieldType>
  </types>
</schema>