LafayetteCollegeLibraries/spot

View on GitHub
docker/solr/config/solrconfig.xml

Summary

Maintainability
Test Coverage
<?xml version="1.0" encoding="UTF-8" ?>
<!--
 Licensed to the Apache Software Foundation (ASF) under one or more
 contributor license agreements.  See the NOTICE file distributed with
 this work for additional information regarding copyright ownership.
 The ASF licenses this file to You under the Apache License, Version 2.0
 (the "License"); you may not use this file except in compliance with
 the License.  You may obtain a copy of the License at

     http://www.apache.org/licenses/LICENSE-2.0

 Unless required by applicable law or agreed to in writing, software
 distributed under the License is distributed on an "AS IS" BASIS,
 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License.
-->

<!--
 This is a stripped down config file used for a simple example...
 It is *not* a good example to work from.
-->
<config>

  <!-- Controls what version of Lucene various components of Solr
       adhere to.  Generally, you want to use the latest version to
       get all bug fixes and improvements. It is highly recommended
       that you fully re-index after changing this setting as it can
       affect both how text is indexed and queried.
  -->
  <luceneMatchVersion>8.11.2</luceneMatchVersion>

  <lib dir="${solr.install.dir:../../../..}/contrib/analysis-extras/lib" />
  <lib dir="${solr.install.dir:../../../..}/contrib/analysis-extras/lucene-libs" />
  <lib dir="${solr.install.dir:../../../..}/contrib/extraction/lib" regex=".*\.jar" />
  <lib dir="${solr.install.dir:../../../..}/dist/" regex="solr-cell-\d.*\.jar" />

  <directoryFactory name="DirectoryFactory"
                    class="${solr.directoryFactory:solr.NRTCachingDirectoryFactory}" />

  <codecFactory class="solr.SchemaCodecFactory"/>

  <schemaFactory class="ClassicIndexSchemaFactory"/>

  <dataDir>${solr.blacklight-core.data.dir:}</dataDir>

  <requestDispatcher handleSelect="true" >
    <requestParsers enableRemoteStreaming="false" multipartUploadLimitInKB="2048000" />
  </requestDispatcher>

  <requestHandler name="/analysis/field" startup="lazy" class="solr.FieldAnalysisRequestHandler" />

  <!-- config for the admin interface -->
  <admin>
    <defaultQuery>*:*</defaultQuery>
  </admin>

  <updateHandler class="solr.DirectUpdateHandler2">
    <updateLog>
      <str name="dir">${solr.ulog.dir:}</str>
    </updateLog>

    <autoCommit>
      <maxTime>${solr.autoCommit.maxTime:15000}</maxTime>
      <openSearcher>false</openSearcher>
    </autoCommit>

    <autoSoftCommit>
      <maxTime>${solr.autoSoftCommit.maxTime:-1}</maxTime>
    </autoSoftCommit>
  </updateHandler>

  <!-- SearchHandler

       http://wiki.apache.org/solr/SearchHandler

       For processing Search Queries, the primary Request Handler
       provided with Solr is "SearchHandler" It delegates to a sequent
       of SearchComponents (see below) and supports distributed
       queries across multiple shards
    -->
  <requestHandler name="search" class="solr.SearchHandler" default="true">
    <!-- default values are used when an empty query is submitted -->
    <lst name="defaults">
      <!--
        `defType` defines which query parser to use. options include:
          - lucene
            - https://lucene.apache.org/solr/guide/7_5/the-standard-query-parser.html
          - dismax
            - https://lucene.apache.org/solr/guide/7_5/the-dismax-query-parser.html
          - edismax
            - https://lucene.apache.org/solr/guide/7_5/the-extended-dismax-query-parser.html
      -->
      <str name="defType">edismax</str>

      <!--
        in the response, append request parameters that we've explicitly used
        see: https://wiki.apache.org/solr/CoreQueryParameters#echoParams
      -->
      <str name="echoParams">explicit</str>

      <!--
        if no `q` parameter is provided, this is the fallback search
        see: https://lucene.apache.org/solr/guide/7_5/the-dismax-query-parser.html#q-alt-parameter
      -->
      <str name="q.alt">*:*</str>

      <!--
        using the defaults from Hyrax for now
        see: https://lucene.apache.org/solr/guide/7_5/the-dismax-query-parser.html#mm-minimum-should-match-parameter
      -->
      <str name="mm">2&lt;-1 5&lt;-2 6&lt;90%</str>

      <!--
        the 'query phrase slop' parameter. if I'm reading correctly,
        'slop' refers to how much reordering is allowed in matching
        phrases

        see: https://lucene.apache.org/solr/guide/7_5/the-dismax-query-parser.html#qs-query-phrase-slop-parameter
      -->
      <int name="qs">1</int>

      <!--
        the 'phrase slop' parameter

        see: https://lucene.apache.org/solr/guide/7_5/the-dismax-query-parser.html#ps-phrase-slop-parameter
      -->
      <int name="ps">2</int>

      <!--
        *shrug emoji* using the defaults from Hyrax

        see: https://lucene.apache.org/solr/guide/7_5/the-dismax-query-parser.html#the-tie-tie-breaker-parameter
      -->
      <float name="tie">0.01</float>

      <!-- default qf + df properites -->
      <str name="qf">
        id
        title_tesim
        creator_tesim
        contributor_tesim
        subject_tesim
      </str>
      <str name="pf">
        extracted_text_tsimv
      </str>
      <!--
        what fields should be returned in our response?

        see: https://wiki.apache.org/solr/CommonQueryParameters#fl-1
      -->
      <str name="fl">
        *,
        score
      </str>

      <!--
        show facets when we have at least one result
      -->
      <str name="facet">true</str>
      <str name="facet.mincount">1</str>

      <str name="spellcheck">true</str>
      <str name="spellcheck.dictionary">default</str>
      <str name="spellcheck.onlyMorePopular">true</str>
      <str name="spellcheck.extendedResults">true</str>
      <str name="spellcheck.collate">false</str>
      <str name="spellcheck.count">5</str>
    </lst> <!-- /end defaults -->

    <arr name="last-components">
      <str>spellcheck</str>
    </arr>
  </requestHandler>

  <requestHandler name="permissions" class="solr.SearchHandler" >
    <lst name="defaults">
      <str name="facet">off</str>
      <str name="echoParams">all</str>
      <str name="rows">1</str>
      <str name="q">{!raw f=id v=$id}</str> <!-- use id=666 instead of q=id:666 -->
      <str name="fl">
        id,
        access_ssim,
        discover_access_group_ssim,discover_access_person_ssim,
        read_access_group_ssim,read_access_person_ssim,
        edit_access_group_ssim,edit_access_person_ssim,
        depositor_ti,
        embargo_release_date_dtsi
        inheritable_access_ssim,
        inheritable_discover_access_group_ssim,inheritable_discover_access_person_ssim,
        inheritable_read_access_group_ssim,inheritable_read_access_person_ssim,
        inheritable_edit_access_group_ssim,inheritable_edit_access_person_ssim,
        inheritable_embargo_release_date_dtsi
      </str>
    </lst>
  </requestHandler>

  <requestHandler name="standard" class="solr.SearchHandler">
     <lst name="defaults">
       <str name="echoParams">explicit</str>
       <str name="defType">lucene</str>
     </lst>
  </requestHandler>

  <!-- for requests to get a single document; use id=666 instead of q=id:666 -->
  <requestHandler name="document" class="solr.SearchHandler" >
    <lst name="defaults">
      <str name="echoParams">all</str>
      <str name="fl">*</str>
      <str name="rows">1</str>
      <str name="q">{!term f=id v=$id}</str> <!-- use id=666 instead of q=id:666 -->
    </lst>
  </requestHandler>

  <searchComponent name="termsComponent" class="solr.TermsComponent" />

  <requestHandler name="/terms" class="solr.SearchHandler">
    <lst name="defaults">
      <bool name="terms">true</bool>
    </lst>
    <arr name="components">
      <str>termsComponent</str>
    </arr>
  </requestHandler>

<!-- Spell Check

        The spell check component can return a list of alternative spelling
        suggestions.

        http://wiki.apache.org/solr/SpellCheckComponent
     -->
  <searchComponent name="spellcheck" class="solr.SpellCheckComponent">

    <str name="queryAnalyzerFieldType">textSpell</str>

    <!-- Multiple "Spell Checkers" can be declared and used by this
         component
      -->

    <!-- a spellchecker built from a field of the main index, and
         written to disk
      -->
    <lst name="spellchecker">
      <str name="name">default</str>
      <str name="field">spell</str>
      <str name="spellcheckIndexDir">./spell</str>
      <str name="buildOnOptimize">true</str>
    </lst>
    <lst name="spellchecker">
      <str name="name">author</str>
      <str name="field">author_spell</str>
      <str name="spellcheckIndexDir">./spell_author</str>
      <str name="accuracy">0.7</str>
      <str name="buildOnOptimize">true</str>
    </lst>
    <lst name="spellchecker">
      <str name="name">subject</str>
      <str name="field">subject_spell</str>
      <str name="spellcheckIndexDir">./spell_subject</str>
      <str name="accuracy">0.7</str>
      <str name="buildOnOptimize">true</str>
    </lst>
    <lst name="spellchecker">
      <str name="name">title</str>
      <str name="field">title_spell</str>
      <str name="spellcheckIndexDir">./spell_title</str>
      <str name="accuracy">0.7</str>
      <str name="buildOnOptimize">true</str>
    </lst>

    <!-- a spellchecker that uses a different distance measure -->
    <!--
       <lst name="spellchecker">
         <str name="name">jarowinkler</str>
         <str name="field">spell</str>
         <str name="distanceMeasure">
           org.apache.lucene.search.spell.JaroWinklerDistance
         </str>
         <str name="spellcheckIndexDir">spellcheckerJaro</str>
       </lst>
     -->

    <!-- a spellchecker that use an alternate comparator

         comparatorClass be one of:
          1. score (default)
          2. freq (Frequency first, then score)
          3. A fully qualified class name
      -->
    <!--
       <lst name="spellchecker">
         <str name="name">freq</str>
         <str name="field">lowerfilt</str>
         <str name="spellcheckIndexDir">spellcheckerFreq</str>
         <str name="comparatorClass">freq</str>
         <str name="buildOnCommit">true</str>
      -->

    <!-- A spellchecker that reads the list of words from a file -->
    <!--
       <lst name="spellchecker">
         <str name="classname">solr.FileBasedSpellChecker</str>
         <str name="name">file</str>
         <str name="sourceLocation">spellings.txt</str>
         <str name="characterEncoding">UTF-8</str>
         <str name="spellcheckIndexDir">spellcheckerFile</str>
       </lst>
      -->
  </searchComponent>

  <searchComponent name="suggest" class="solr.SuggestComponent">
    <lst name="suggester">
      <str name="name">bibliographic_citation</str>
      <str name="lookupImpl">AnalyzingInfixLookupFactory</str>
      <str name="highlight">false</str>
      <str name="dictionaryImpl">HighFrequencyDictionaryFactory</str>
      <str name="indexPath">suggestion_index_bibliographic_citation</str>
      <str name="suggestAnalyzerFieldType">textSuggest</str>
      <str name="buildOnCommit">false</str>
      <str name="field">bibliographic_citation_sim</str>
    </lst>
    <lst name="suggester">
      <str name="name">keyword</str>
      <str name="lookupImpl">AnalyzingInfixLookupFactory</str>
      <str name="highlight">false</str>
      <str name="dictionaryImpl">HighFrequencyDictionaryFactory</str>
      <str name="indexPath">suggestion_index_keyword</str>
      <str name="suggestAnalyzerFieldType">textSuggest</str>
      <str name="buildOnCommit">false</str>
      <str name="field">keyword_sim</str>
    </lst>
    <lst name="suggester">
      <str name="name">name</str>
      <str name="lookupImpl">AnalyzingInfixLookupFactory</str>
      <str name="highlight">false</str>
      <str name="dictionaryImpl">HighFrequencyDictionaryFactory</str>
      <str name="indexPath">suggestion_index_name</str>
      <str name="suggestAnalyzerFieldType">textSuggest</str>
      <str name="buildOnCommit">false</str>
      <str name="field">name_suggest_ssim</str>
    </lst>
    <lst name="suggester">
      <str name="name">organization</str>
      <str name="lookupImpl">AnalyzingInfixLookupFactory</str>
      <str name="highlight">false</str>
      <str name="dictionaryImpl">HighFrequencyDictionaryFactory</str>
      <str name="indexPath">suggestion_index_organization</str>
      <str name="suggestAnalyzerFieldType">textSuggest</str>
      <str name="buildOnCommit">false</str>
      <str name="field">organization_sim</str>
    </lst>
    <lst name="suggester">
      <str name="name">physical_medium</str>
      <str name="lookupImpl">AnalyzingInfixLookupFactory</str>
      <str name="highlight">false</str>
      <str name="dictionaryImpl">HighFrequencyDictionaryFactory</str>
      <str name="indexPath">suggestion_index_physical_medium</str>
      <str name="suggestAnalyzerFieldType">textSuggest</str>
      <str name="buildOnCommit">false</str>
      <str name="field">physical_medium_sim</str>
    </lst>
    <lst name="suggester">
      <str name="name">publisher</str>
      <str name="lookupImpl">AnalyzingInfixLookupFactory</str>
      <str name="highlight">false</str>
      <str name="dictionaryImpl">HighFrequencyDictionaryFactory</str>
      <str name="indexPath">suggestion_index_publisher</str>
      <str name="suggestAnalyzerFieldType">textSuggest</str>
      <str name="buildOnCommit">false</str>
      <str name="field">publisher_sim</str>
    </lst>
    <lst name="suggester">
      <str name="name">source</str>
      <str name="lookupImpl">AnalyzingInfixLookupFactory</str>
      <str name="highlight">false</str>
      <str name="dictionaryImpl">HighFrequencyDictionaryFactory</str>
      <str name="indexPath">suggestion_index_source</str>
      <str name="suggestAnalyzerFieldType">textSuggest</str>
      <str name="buildOnCommit">false</str>
      <str name="field">source_sim</str>
    </lst>
  </searchComponent>

  <requestHandler name="/suggest" class="solr.SearchHandler" startup="lazy">
    <lst name="defaults">
      <str name="suggest">true</str>
      <str name="suggest.count">5</str>
      <str name="suggest.dictionary">keyword</str>
    </lst>
    <arr name="components">
      <str>suggest</str>
    </arr>
  </requestHandler>

  <requestHandler name="/update/extract" class="org.apache.solr.handler.extraction.ExtractingRequestHandler">
    <lst name="defaults">
      <str name="fmap.Last-Modified">last_modified</str>
      <str name="uprefix">ignored_</str>
    </lst>
    <!--Optional.  Specify a path to a tika configuration file. See the Tika docs for details.-->
    <!-- <str name="tika.config">/my/path/to/tika.config</str> -->
    <!-- Optional. Specify one or more date formats to parse. See DateUtil.DEFAULT_DATE_FORMATS
         for default date formats -->
    <!-- <lst name="date.formats"> -->
    <!--   <str>yyyy&#45;MM&#45;dd</str> -->
    <!-- </lst> -->
  </requestHandler>
</config>