docker/solr/config/solrconfig.xml
<?xml version="1.0" encoding="UTF-8" ?>
<!--
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-->
<!--
This is a stripped down config file used for a simple example...
It is *not* a good example to work from.
-->
<config>
<!-- Controls what version of Lucene various components of Solr
adhere to. Generally, you want to use the latest version to
get all bug fixes and improvements. It is highly recommended
that you fully re-index after changing this setting as it can
affect both how text is indexed and queried.
-->
<luceneMatchVersion>8.11.2</luceneMatchVersion>
<lib dir="${solr.install.dir:../../../..}/contrib/analysis-extras/lib" />
<lib dir="${solr.install.dir:../../../..}/contrib/analysis-extras/lucene-libs" />
<lib dir="${solr.install.dir:../../../..}/contrib/extraction/lib" regex=".*\.jar" />
<lib dir="${solr.install.dir:../../../..}/dist/" regex="solr-cell-\d.*\.jar" />
<directoryFactory name="DirectoryFactory"
class="${solr.directoryFactory:solr.NRTCachingDirectoryFactory}" />
<codecFactory class="solr.SchemaCodecFactory"/>
<schemaFactory class="ClassicIndexSchemaFactory"/>
<dataDir>${solr.blacklight-core.data.dir:}</dataDir>
<requestDispatcher handleSelect="true" >
<requestParsers enableRemoteStreaming="false" multipartUploadLimitInKB="2048000" />
</requestDispatcher>
<requestHandler name="/analysis/field" startup="lazy" class="solr.FieldAnalysisRequestHandler" />
<!-- config for the admin interface -->
<admin>
<defaultQuery>*:*</defaultQuery>
</admin>
<updateHandler class="solr.DirectUpdateHandler2">
<updateLog>
<str name="dir">${solr.ulog.dir:}</str>
</updateLog>
<autoCommit>
<maxTime>${solr.autoCommit.maxTime:15000}</maxTime>
<openSearcher>false</openSearcher>
</autoCommit>
<autoSoftCommit>
<maxTime>${solr.autoSoftCommit.maxTime:-1}</maxTime>
</autoSoftCommit>
</updateHandler>
<!-- SearchHandler
http://wiki.apache.org/solr/SearchHandler
For processing Search Queries, the primary Request Handler
provided with Solr is "SearchHandler" It delegates to a sequent
of SearchComponents (see below) and supports distributed
queries across multiple shards
-->
<requestHandler name="search" class="solr.SearchHandler" default="true">
<!-- default values are used when an empty query is submitted -->
<lst name="defaults">
<!--
`defType` defines which query parser to use. options include:
- lucene
- https://lucene.apache.org/solr/guide/7_5/the-standard-query-parser.html
- dismax
- https://lucene.apache.org/solr/guide/7_5/the-dismax-query-parser.html
- edismax
- https://lucene.apache.org/solr/guide/7_5/the-extended-dismax-query-parser.html
-->
<str name="defType">edismax</str>
<!--
in the response, append request parameters that we've explicitly used
see: https://wiki.apache.org/solr/CoreQueryParameters#echoParams
-->
<str name="echoParams">explicit</str>
<!--
if no `q` parameter is provided, this is the fallback search
see: https://lucene.apache.org/solr/guide/7_5/the-dismax-query-parser.html#q-alt-parameter
-->
<str name="q.alt">*:*</str>
<!--
using the defaults from Hyrax for now
see: https://lucene.apache.org/solr/guide/7_5/the-dismax-query-parser.html#mm-minimum-should-match-parameter
-->
<str name="mm">2<-1 5<-2 6<90%</str>
<!--
the 'query phrase slop' parameter. if I'm reading correctly,
'slop' refers to how much reordering is allowed in matching
phrases
see: https://lucene.apache.org/solr/guide/7_5/the-dismax-query-parser.html#qs-query-phrase-slop-parameter
-->
<int name="qs">1</int>
<!--
the 'phrase slop' parameter
see: https://lucene.apache.org/solr/guide/7_5/the-dismax-query-parser.html#ps-phrase-slop-parameter
-->
<int name="ps">2</int>
<!--
*shrug emoji* using the defaults from Hyrax
see: https://lucene.apache.org/solr/guide/7_5/the-dismax-query-parser.html#the-tie-tie-breaker-parameter
-->
<float name="tie">0.01</float>
<!-- default qf + df properites -->
<str name="qf">
id
title_tesim
creator_tesim
contributor_tesim
subject_tesim
</str>
<str name="pf">
extracted_text_tsimv
</str>
<!--
what fields should be returned in our response?
see: https://wiki.apache.org/solr/CommonQueryParameters#fl-1
-->
<str name="fl">
*,
score
</str>
<!--
show facets when we have at least one result
-->
<str name="facet">true</str>
<str name="facet.mincount">1</str>
<str name="spellcheck">true</str>
<str name="spellcheck.dictionary">default</str>
<str name="spellcheck.onlyMorePopular">true</str>
<str name="spellcheck.extendedResults">true</str>
<str name="spellcheck.collate">false</str>
<str name="spellcheck.count">5</str>
</lst> <!-- /end defaults -->
<arr name="last-components">
<str>spellcheck</str>
</arr>
</requestHandler>
<requestHandler name="permissions" class="solr.SearchHandler" >
<lst name="defaults">
<str name="facet">off</str>
<str name="echoParams">all</str>
<str name="rows">1</str>
<str name="q">{!raw f=id v=$id}</str> <!-- use id=666 instead of q=id:666 -->
<str name="fl">
id,
access_ssim,
discover_access_group_ssim,discover_access_person_ssim,
read_access_group_ssim,read_access_person_ssim,
edit_access_group_ssim,edit_access_person_ssim,
depositor_ti,
embargo_release_date_dtsi
inheritable_access_ssim,
inheritable_discover_access_group_ssim,inheritable_discover_access_person_ssim,
inheritable_read_access_group_ssim,inheritable_read_access_person_ssim,
inheritable_edit_access_group_ssim,inheritable_edit_access_person_ssim,
inheritable_embargo_release_date_dtsi
</str>
</lst>
</requestHandler>
<requestHandler name="standard" class="solr.SearchHandler">
<lst name="defaults">
<str name="echoParams">explicit</str>
<str name="defType">lucene</str>
</lst>
</requestHandler>
<!-- for requests to get a single document; use id=666 instead of q=id:666 -->
<requestHandler name="document" class="solr.SearchHandler" >
<lst name="defaults">
<str name="echoParams">all</str>
<str name="fl">*</str>
<str name="rows">1</str>
<str name="q">{!term f=id v=$id}</str> <!-- use id=666 instead of q=id:666 -->
</lst>
</requestHandler>
<searchComponent name="termsComponent" class="solr.TermsComponent" />
<requestHandler name="/terms" class="solr.SearchHandler">
<lst name="defaults">
<bool name="terms">true</bool>
</lst>
<arr name="components">
<str>termsComponent</str>
</arr>
</requestHandler>
<!-- Spell Check
The spell check component can return a list of alternative spelling
suggestions.
http://wiki.apache.org/solr/SpellCheckComponent
-->
<searchComponent name="spellcheck" class="solr.SpellCheckComponent">
<str name="queryAnalyzerFieldType">textSpell</str>
<!-- Multiple "Spell Checkers" can be declared and used by this
component
-->
<!-- a spellchecker built from a field of the main index, and
written to disk
-->
<lst name="spellchecker">
<str name="name">default</str>
<str name="field">spell</str>
<str name="spellcheckIndexDir">./spell</str>
<str name="buildOnOptimize">true</str>
</lst>
<lst name="spellchecker">
<str name="name">author</str>
<str name="field">author_spell</str>
<str name="spellcheckIndexDir">./spell_author</str>
<str name="accuracy">0.7</str>
<str name="buildOnOptimize">true</str>
</lst>
<lst name="spellchecker">
<str name="name">subject</str>
<str name="field">subject_spell</str>
<str name="spellcheckIndexDir">./spell_subject</str>
<str name="accuracy">0.7</str>
<str name="buildOnOptimize">true</str>
</lst>
<lst name="spellchecker">
<str name="name">title</str>
<str name="field">title_spell</str>
<str name="spellcheckIndexDir">./spell_title</str>
<str name="accuracy">0.7</str>
<str name="buildOnOptimize">true</str>
</lst>
<!-- a spellchecker that uses a different distance measure -->
<!--
<lst name="spellchecker">
<str name="name">jarowinkler</str>
<str name="field">spell</str>
<str name="distanceMeasure">
org.apache.lucene.search.spell.JaroWinklerDistance
</str>
<str name="spellcheckIndexDir">spellcheckerJaro</str>
</lst>
-->
<!-- a spellchecker that use an alternate comparator
comparatorClass be one of:
1. score (default)
2. freq (Frequency first, then score)
3. A fully qualified class name
-->
<!--
<lst name="spellchecker">
<str name="name">freq</str>
<str name="field">lowerfilt</str>
<str name="spellcheckIndexDir">spellcheckerFreq</str>
<str name="comparatorClass">freq</str>
<str name="buildOnCommit">true</str>
-->
<!-- A spellchecker that reads the list of words from a file -->
<!--
<lst name="spellchecker">
<str name="classname">solr.FileBasedSpellChecker</str>
<str name="name">file</str>
<str name="sourceLocation">spellings.txt</str>
<str name="characterEncoding">UTF-8</str>
<str name="spellcheckIndexDir">spellcheckerFile</str>
</lst>
-->
</searchComponent>
<searchComponent name="suggest" class="solr.SuggestComponent">
<lst name="suggester">
<str name="name">bibliographic_citation</str>
<str name="lookupImpl">AnalyzingInfixLookupFactory</str>
<str name="highlight">false</str>
<str name="dictionaryImpl">HighFrequencyDictionaryFactory</str>
<str name="indexPath">suggestion_index_bibliographic_citation</str>
<str name="suggestAnalyzerFieldType">textSuggest</str>
<str name="buildOnCommit">false</str>
<str name="field">bibliographic_citation_sim</str>
</lst>
<lst name="suggester">
<str name="name">keyword</str>
<str name="lookupImpl">AnalyzingInfixLookupFactory</str>
<str name="highlight">false</str>
<str name="dictionaryImpl">HighFrequencyDictionaryFactory</str>
<str name="indexPath">suggestion_index_keyword</str>
<str name="suggestAnalyzerFieldType">textSuggest</str>
<str name="buildOnCommit">false</str>
<str name="field">keyword_sim</str>
</lst>
<lst name="suggester">
<str name="name">name</str>
<str name="lookupImpl">AnalyzingInfixLookupFactory</str>
<str name="highlight">false</str>
<str name="dictionaryImpl">HighFrequencyDictionaryFactory</str>
<str name="indexPath">suggestion_index_name</str>
<str name="suggestAnalyzerFieldType">textSuggest</str>
<str name="buildOnCommit">false</str>
<str name="field">name_suggest_ssim</str>
</lst>
<lst name="suggester">
<str name="name">organization</str>
<str name="lookupImpl">AnalyzingInfixLookupFactory</str>
<str name="highlight">false</str>
<str name="dictionaryImpl">HighFrequencyDictionaryFactory</str>
<str name="indexPath">suggestion_index_organization</str>
<str name="suggestAnalyzerFieldType">textSuggest</str>
<str name="buildOnCommit">false</str>
<str name="field">organization_sim</str>
</lst>
<lst name="suggester">
<str name="name">physical_medium</str>
<str name="lookupImpl">AnalyzingInfixLookupFactory</str>
<str name="highlight">false</str>
<str name="dictionaryImpl">HighFrequencyDictionaryFactory</str>
<str name="indexPath">suggestion_index_physical_medium</str>
<str name="suggestAnalyzerFieldType">textSuggest</str>
<str name="buildOnCommit">false</str>
<str name="field">physical_medium_sim</str>
</lst>
<lst name="suggester">
<str name="name">publisher</str>
<str name="lookupImpl">AnalyzingInfixLookupFactory</str>
<str name="highlight">false</str>
<str name="dictionaryImpl">HighFrequencyDictionaryFactory</str>
<str name="indexPath">suggestion_index_publisher</str>
<str name="suggestAnalyzerFieldType">textSuggest</str>
<str name="buildOnCommit">false</str>
<str name="field">publisher_sim</str>
</lst>
<lst name="suggester">
<str name="name">source</str>
<str name="lookupImpl">AnalyzingInfixLookupFactory</str>
<str name="highlight">false</str>
<str name="dictionaryImpl">HighFrequencyDictionaryFactory</str>
<str name="indexPath">suggestion_index_source</str>
<str name="suggestAnalyzerFieldType">textSuggest</str>
<str name="buildOnCommit">false</str>
<str name="field">source_sim</str>
</lst>
</searchComponent>
<requestHandler name="/suggest" class="solr.SearchHandler" startup="lazy">
<lst name="defaults">
<str name="suggest">true</str>
<str name="suggest.count">5</str>
<str name="suggest.dictionary">keyword</str>
</lst>
<arr name="components">
<str>suggest</str>
</arr>
</requestHandler>
<requestHandler name="/update/extract" class="org.apache.solr.handler.extraction.ExtractingRequestHandler">
<lst name="defaults">
<str name="fmap.Last-Modified">last_modified</str>
<str name="uprefix">ignored_</str>
</lst>
<!--Optional. Specify a path to a tika configuration file. See the Tika docs for details.-->
<!-- <str name="tika.config">/my/path/to/tika.config</str> -->
<!-- Optional. Specify one or more date formats to parse. See DateUtil.DEFAULT_DATE_FORMATS
for default date formats -->
<!-- <lst name="date.formats"> -->
<!-- <str>yyyy-MM-dd</str> -->
<!-- </lst> -->
</requestHandler>
</config>