solr/config/schema.xml
<?xml version="1.0" encoding="UTF-8" ?>
<schema name="Stanford Exhibits based on SearchWorks" version="1.5">
<uniqueKey>id</uniqueKey>
<fields>
<!-- needed by some of Solr 4.0 functionality like transaction log or partial documents update -->
<field name="_version_" type="long" indexed="true" stored="true"/>
<field name="timestamp" type="date" indexed="true" stored="true" default="NOW" />
<field name="id" type="string_punct_stop" indexed="true" stored="true" required="true" />
<field name="created" type="date" indexed="true" stored="true" default="NOW/SECOND" />
<field name="last_updated" type="date" indexed="true" stored="true" default="NOW/SECOND" />
<!-- entire marc bib record -->
<field name="marcxml" type="string" indexed="false" stored="true" />
<!-- all_search: catch-all field for metadata text; stored for hit highlighting -->
<field name="all_search" type="text" indexed="true" stored="true" termVectors="true" termPositions="true" termOffsets="true" multiValued="true" />
<field name="all_unstem_search" type="textNoStem" indexed="true" stored="true" multiValued="true" />
<field name="vern_all_search" type="text" indexed="true" stored="true" multiValued="true" />
<!-- Format Field: facet and display -->
<field name="format" type="string" indexed="true" stored="true" multiValued="true" />
<!-- Language Field: facet and display -->
<field name="language" type="string" indexed="true" stored="true" multiValued="true" />
<!-- Standard Number Fields -->
<!-- allow end users to search our index by ISBN and ISSN; more values than display isbn/issn.-->
<!-- isbn_search is type text to accommodate hyphens that could be entered by end users -->
<field name="isbn_search" type="text" indexed="true" stored="true" multiValued="true" omitNorms="true" />
<!-- text for issn to accommodate hyphen present or not -->
<field name="issn_search" type="text" indexed="true" stored="true" multiValued="true" omitNorms="true"/>
<!-- display fields to allow external lookups (e.g. Google book search, xISBN, xISSN ...) -->
<field name="isbn_display" type="string" indexed="false" stored="true" multiValued="true" />
<field name="issn_display" type="string" indexed="false" stored="true" multiValued="true" />
<field name="lccn" type="string" indexed="false" stored="true" />
<field name="oclc" type="string_punct_stop" indexed="true" stored="true" multiValued="true" />
<!-- Title Search Fields -->
<field name="title_245a_exact_search" type="text_anchored" indexed="true" stored="true" />
<field name="title_245a_search" type="text" indexed="true" stored="true" />
<field name="vern_title_245a_search" type="textNoStem" indexed="true" stored="true" />
<field name="title_245a_unstem_search" type="textNoStem" indexed="true" stored="true" />
<field name="title_245_search" type="text" indexed="true" stored="true" />
<field name="vern_title_245_search" type="textNoStem" indexed="true" stored="true" />
<field name="title_245_unstem_search" type="textNoStem" indexed="true" stored="true" />
<field name="title_uniform_search" type="text" indexed="true" stored="true" />
<field name="vern_title_uniform_search" type="textNoStem" indexed="true" stored="true" />
<field name="title_uniform_unstem_search" type="textNoStem" indexed="true" stored="true" />
<field name="title_variant_search" type="text" indexed="true" stored="true" multiValued="true" />
<field name="vern_title_variant_search" type="textNoStem" indexed="true" stored="true" multiValued="true" />
<field name="title_variant_unstem_search" type="textNoStem" indexed="true" stored="true" multiValued="true" />
<field name="title_related_search" type="text" indexed="true" stored="true" multiValued="true" />
<field name="vern_title_related_search" type="textNoStem" indexed="true" stored="true" multiValued="true" />
<field name="title_related_unstem_search" type="textNoStem" indexed="true" stored="true" multiValued="true" />
<!-- Title Display fields -->
<field name="title_display" type="string" indexed="false" stored="true" />
<field name="vern_title_display" type="string" indexed="false" stored="true" />
<field name="title_245a_display" type="string" indexed="false" stored="true" />
<field name="vern_title_245a_display" type="string" indexed="false" stored="true" />
<field name="title_245c_display" type="string" indexed="false" stored="true" />
<field name="vern_title_245c_display" type="string" indexed="false" stored="true" />
<field name="title_uniform_display" type="string" indexed="false" stored="true" />
<field name="vern_title_uniform_display" type="string" indexed="false" stored="true" />
<field name="title_variant_display" type="string" indexed="false" stored="true" multiValued="true" />
<field name="title_sort" type="alphaSort" indexed="true" stored="true" />
<!-- Series Search Fields -->
<field name="series_exact_search" type="text_anchored" indexed="true" stored="true" multiValued="true" />
<field name="series_search" type="text" indexed="true" stored="true" multiValued="true" />
<field name="vern_series_search" type="textNoStem" indexed="true" stored="true" multiValued="true" />
<field name="series_unstem_search" type="textNoStem" indexed="true" stored="true" multiValued="true" />
<!-- Author Title Search Fields -->
<field name="author_title_search" type="text" indexed="true" stored="true" multiValued="true" />
<!-- Author Search Fields -->
<field name="author_1xx_search" type="text" indexed="true" stored="true" />
<field name="vern_author_1xx_search" type="textNoStem" indexed="true" stored="true" />
<field name="author_1xx_unstem_search" type="textNoStem" indexed="true" stored="true" />
<field name="author_7xx_search" type="text" indexed="true" stored="true" multiValued="true" />
<field name="vern_author_7xx_search" type="textNoStem" indexed="true" stored="true" multiValued="true" />
<field name="author_7xx_unstem_search" type="textNoStem" indexed="true" stored="true" multiValued="true" />
<field name="author_8xx_search" type="text" indexed="true" stored="true" multiValued="true" />
<field name="vern_author_8xx_search" type="textNoStem" indexed="true" stored="true" multiValued="true" />
<field name="author_8xx_unstem_search" type="textNoStem" indexed="true" stored="true" multiValued="true" />
<!-- Author Facet Fields -->
<field name="author_person_facet" type="string" indexed="true" stored="true" multiValued="true" />
<field name="author_other_facet" type="string" indexed="true" stored="true" multiValued="true" />
<field name="author_sort" type="alphaSort" indexed="true" stored="true" />
<!-- Author Display Fields -->
<field name="author_person_display" type="string" indexed="false" stored="true" multiValued="true" />
<field name="vern_author_person_display" type="string" indexed="false" stored="true" />
<field name="author_person_full_display" type="string" indexed="false" stored="true" multiValued="true" />
<field name="vern_author_person_full_display" type="string" indexed="false" stored="true" />
<field name="author_corp_display" type="string" indexed="false" stored="true" multiValued="true"/>
<field name="vern_author_corp_display" type="string" indexed="false" stored="true" />
<field name="author_meeting_display" type="string" indexed="false" stored="true" multiValued="true"/>
<field name="vern_author_meeting_display" type="string" indexed="false" stored="true" />
<!-- Subject Search Fields -->
<field name="topic_search" type="text" indexed="true" stored="true" multiValued="true" />
<field name="vern_topic_search" type="textNoStem" indexed="true" stored="true" multiValued="true" />
<field name="topic_unstem_search" type="textNoStem" indexed="true" stored="true" multiValued="true" />
<field name="topic_subx_search" type="text" indexed="true" stored="true" multiValued="true" />
<field name="vern_topic_subx_search" type="textNoStem" indexed="true" stored="true" multiValued="true" />
<field name="topic_subx_unstem_search" type="textNoStem" indexed="true" stored="true" multiValued="true" />
<field name="geographic_search" type="text" indexed="true" stored="true" multiValued="true" />
<field name="vern_geographic_search" type="textNoStem" indexed="true" stored="true" multiValued="true" />
<field name="geographic_unstem_search" type="textNoStem" indexed="true" stored="true" multiValued="true" />
<field name="geographic_subz_search" type="text" indexed="true" stored="true" multiValued="true" />
<field name="vern_geographic_subz_search" type="textNoStem" indexed="true" stored="true" multiValued="true" />
<field name="geographic_subz_unstem_search" type="textNoStem" indexed="true" stored="true" multiValued="true" />
<field name="subject_other_search" type="text" indexed="true" stored="true" multiValued="true" />
<field name="vern_subject_other_search" type="textNoStem" indexed="true" stored="true" multiValued="true" />
<field name="subject_other_unstem_search" type="textNoStem" indexed="true" stored="true" multiValued="true" />
<field name="subject_other_subvy_search" type="text" indexed="true" stored="true" multiValued="true" />
<field name="vern_subject_other_subvy_search" type="textNoStem" indexed="true" stored="true" multiValued="true" />
<field name="subject_other_subvy_unstem_search" type="textNoStem" indexed="true" stored="true" multiValued="true" />
<field name="subject_all_search" type="text" indexed="true" stored="true" multiValued="true" />
<field name="vern_subject_all_search" type="textNoStem" indexed="true" stored="true" multiValued="true" />
<field name="subject_all_unstem_search" type="textNoStem" indexed="true" stored="true" multiValued="true" />
<field name="db_az_subject" type="string" indexed="true" stored="true" multiValued="true" />
<field name="db_az_subject_search" type="text" indexed="true" stored="true" multiValued="true" />
<!-- Subject Facet Fields -->
<field name="topic_facet" type="string" indexed="true" stored="true" multiValued="true" />
<field name="geographic_facet" type="string" indexed="true" stored="true" multiValued="true" />
<field name="era_facet" type="string" indexed="true" stored="true" multiValued="true" />
<!-- Subject Display Fields -->
<field name="topic_display" type="string" indexed="false" stored="true" multiValued="true" />
<field name="subject_other_display" type="string" indexed="false" stored="true" multiValued="true" />
<!-- Publishing Fields -->
<field name="pub_search" type="text" indexed="true" stored="true" omitNorms="true" multiValued="true"/>
<field name="vern_pub_search" type="textNoStem" indexed="true" stored="true" omitNorms="true" multiValued="true"/>
<field name="pub_country" type="text" indexed="true" stored="true" omitNorms="true"/>
<!-- TODO: should pub_date_search be a date or a text field? -->
<field name="pub_date_search" type="text" indexed="true" stored="true" omitNorms="true"/>
<!-- pub_date_sort now deprecated; replaced by pub_year_isi -->
<field name="pub_date_sort" type="alphaSort" indexed="true" stored="true" />
<!-- Pub Date Facet Fields -->
<!-- pub_year_tisim is for date slider; pub_year_w_approx_isi/pub_year_no_approx_isi are other facets -->
<field name="pub_year_tisim" type="tint" indexed="true" stored="true" multiValued="true" />
<field name="pub_display" type="string" indexed="false" stored="true" multiValued="true"/>
<field name="imprint_display" type="string" indexed="false" stored="true" multiValued="true"/>
<!-- URL Fields -->
<field name="url_fulltext" type="string" indexed="false" stored="true" multiValued="true"/>
<field name="url_suppl" type="string" indexed="false" stored="true" multiValued="true"/>
<!-- sfx urls should rarely occur more than once in a marc bib record -->
<field name="url_sfx" type="string" indexed="false" stored="true" multiValued="true" />
<field name="url_restricted" type="string" indexed="false" stored="true" multiValued="true" />
<!-- Physical Fields -->
<field name="physical" type="text" indexed="true" stored="true" multiValued="true" />
<field name="vern_physical" type="textNoStem" indexed="true" stored="true" multiValued="true" />
<!-- Table of Contents -->
<field name="toc_search" type="text" indexed="true" stored="true" multiValued="true" />
<field name="vern_toc_search" type="textNoStem" indexed="true" stored="true" multiValued="true" />
<field name="toc_unstem_search" type="textNoStem" indexed="true" stored="true" multiValued="true" />
<!-- Context -->
<field name="context_search" type="text" indexed="true" stored="true" multiValued="true" />
<field name="vern_context_search" type="textNoStem" indexed="true" stored="true" multiValued="true" />
<field name="context_unstem_search" type="textNoStem" indexed="true" stored="true" multiValued="true" />
<!-- Summary -->
<field name="summary_search" type="text" indexed="true" stored="true" multiValued="true" />
<field name="vern_summary_search" type="textNoStem" indexed="true" stored="true" multiValued="true" />
<field name="summary_unstem_search" type="textNoStem" indexed="true" stored="true" multiValued="true" />
<field name="summary_display" type="string" indexed="false" stored="true" multiValued="true" />
<!-- Award -->
<field name="award_search" type="text" indexed="true" stored="true" multiValued="true" />
<!-- Item Info Fields (derived from 999) -->
<!-- Call Number Fields -->
<field name="callnum_search" type="callnum_ws" indexed="true" stored="true" multiValued="true"/>
<!-- for nearby on shelf: term lookups to get next X alpha sorted terms -->
<field name="shelfkey" type="alphaSort" indexed="true" stored="true" multiValued="true"/>
<field name="reverse_shelfkey" type="alphaSort" indexed="true" stored="true" multiValued="true"/>
<field name="barcode_search" type="string_punct_stop" indexed="true" stored="true" multiValued="true" />
<field name="preferred_barcode" type="string" indexed="false" stored="true" />
<field name="access_facet" type="string" indexed="true" stored="true" multiValued="true" />
<field name="building_facet" type="string" indexed="true" stored="true" multiValued="true" />
<!-- barcode -|- lib -|- location -|- lopped_callnum -|- shelfkey -|- reverse_shelfkey -|- full_callnum -|- callnum_show_sort -->
<field name="item_display" type="string" indexed="false" stored="true" multiValued="true" />
<!-- lib -|- location -|- note -|- holdings summary -|- last received -->
<field name="mhld_display" type="string" indexed="false" stored="true" multiValued="true" />
<!-- Course Reserve Fields -->
<field name="crez_course_id_search" type="textNoStem" indexed="true" stored="true" multiValued="true" />
<field name="crez_course_name_search" type="textNoStem" indexed="true" stored="true" multiValued="true" />
<field name="crez_instructor_search" type="textNoStem" indexed="true" stored="true" multiValued="true" />
<!-- instructor, course facet field names are exposed to end users in SW -->
<field name="instructor" type="string" indexed="true" stored="true" multiValued="true" />
<field name="course" type="string" indexed="true" stored="true" multiValued="true" />
<!-- crez_course_info is a facet and display field at this time (2012-03-21) -->
<field name="crez_course_info" type="string" indexed="true" stored="true" multiValued="true" />
<!-- the next two facets are not used in SW at this time -->
<field name="crez_dept_facet" type="string" indexed="true" stored="true" multiValued="true" />
<field name="crez_desk_facet" type="string" indexed="true" stored="true" multiValued="true" />
<!-- *************** additional fields for DOR objects ****************** -->
<field name="druid" type="string_punct_stop" indexed="true" stored="true" />
<field name="modsxml" type="string" indexed="false" stored="true" />
<!-- collection (facet and display): "sirsi" or, for DOR items, the id of their parent coll -->
<field name="collection" type="string" indexed="true" stored="true" multiValued="true" omitNorms="true" />
<!-- collection_with_title: easy way to indicate item's parent coll title in UI (may be deprecated in future) -->
<field name="collection_with_title" type="string" indexed="true" stored="true" multiValued="true" />
<!-- collection_search: allow searching within collections for aggr. colls -->
<field name="collection_search" type="string_punct_stop" indexed="true" stored="true" multiValued="true" omitNorms="true" />
<!-- display_type: used by UI code, e.g. 'file' or 'image' -->
<field name="display_type" type="string" indexed="true" stored="true" multiValued="true" omitNorms="true" />
<!-- used to determine when something is a digital collection -->
<field name="collection_type" type="string" indexed="true" stored="true" multiValued="true" />
<!-- file_id: ids of files (including images) in the digital stacks -->
<field name="file_id" type="string" indexed="false" stored="true" multiValued="true"/>
<!-- *************** dynamic field types ****************** -->
<!--
<dynamicField name="*_unstem_search" type="textNoStem" stored="true" indexed="true" multiValued="true" />
<dynamicField name="*_search" type="text" stored="true" indexed="true" multiValued="true" />
<dynamicField name="*_facet" type="string" stored="true" indexed="true" multiValued="true" />
<dynamicField name="*_display" type="string" stored="true" indexed="false" multiValued="true"/>
-->
<dynamicField name="*_ss" type="string" stored="true" indexed="false" omitNorms="true" />
<dynamicField name="*_ssm" type="string" stored="true" indexed="false" multiValued="true" omitNorms="true" />
<dynamicField name="*_ssi" type="string" stored="true" indexed="true" omitNorms="true" />
<dynamicField name="*_ssim" type="string" stored="true" indexed="true" multiValued="true" omitNorms="true" />
<dynamicField name="*_tsi" type="text" stored="true" indexed="true" omitNorms="true" />
<dynamicField name="*_isi" type="tint" stored="true" indexed="true" omitNorms="true" />
<dynamicField name="*_isim" type="tint" stored="true" indexed="true" multiValued="true" omitNorms="true" />
<dynamicField name="*_sort" type="alphaSort" stored="true" indexed="true"/>
<dynamicField name="*_xml" type="string" stored="true" indexed="false" omitNorms="true" />
<dynamicField name="cjk_*" type="text_cjk" stored="true" indexed="true" multiValued="true" />
<dynamicField name="*_hsim" type="string_hierarch" stored="true" indexed="true" multiValued="true" />
<!-- exhibits fields -->
<field name="full_text_search" type="text" indexed="true" stored="true" multiValued="true" storeOffsetsWithPositions="true" termVectors="true" />
<field name="full_text_search_en" type="text_en" indexed="true" stored="true" multiValued="true" storeOffsetsWithPositions="true" termVectors="true" />
<field name="full_text_search_pt" type="text_pt" indexed="true" stored="true" multiValued="true" storeOffsetsWithPositions="true" termVectors="true" />
<field name="full_text_search_id" type="text_id" indexed="true" stored="true" multiValued="true" storeOffsetsWithPositions="true" termVectors="true" />
<field name="full_text_unstem_search" type="textNoStem" indexed="true" stored="true" multiValued="true" />
<!-- NOTE: *_tesim fields are copied into all_search which is meant for metadata, not full text. Use _tesimv for full text -->
<dynamicField name="*_tesim" type="text" stored="true" indexed="true" multiValued="true" omitNorms="true" />
<!-- NOTE: *_tesimv fields are copied into full_text_search which is meant for full text, not metadata. Use _tesim for metadata -->
<dynamicField name="*_tesimv" type="text" stored="true" indexed="true" multiValued="true" storeOffsetsWithPositions="true" termVectors="true" />
<dynamicField name="*_bsi" type="boolean" stored="true" indexed="true" multiValued="true" omitNorms="true" />
<dynamicField name="*_ng" type="text_en_ng" stored="false" indexed="true" multiValued="true"/>
<dynamicField name="*_pt" type="location" stored="true" indexed="true"/>
<dynamicField name="*_bbox" type="bbox" stored="true" indexed="true" multiValued="true"/>
<dynamicField name="*_srpt" type="location_rpt" stored="true" indexed="true" multiValued="true"/>
<dynamicField name="*_geohash" type="geohash" stored="true" indexed="true" multiValued="true"/>
<dynamicField name="*_ts" type="text" indexed="false" stored="true" multiValued="false" />
<dynamicField name="random*" type="random" />
</fields>
<!-- copy fields -->
<copyField source="collection" dest="collection_search" />
<copyField source="pub_year_w_approx_isi" dest="pub_date_search" />
<copyField source="db_az_subject" dest="db_az_subject_search" />
<!-- unstemmed and anchored search fields: title -->
<copyField source="title_245a_search" dest="title_245a_exact_search" />
<copyField source="title_245a_search" dest="title_245a_unstem_search" />
<copyField source="title_245_search" dest="title_245_unstem_search" />
<copyField source="title_uniform_search" dest="title_uniform_unstem_search" />
<copyField source="title_variant_search" dest="title_variant_unstem_search" />
<copyField source="title_related_search" dest="title_related_unstem_search" />
<!-- unstemmed search fields: author -->
<copyField source="author_1xx_search" dest="author_1xx_unstem_search" />
<copyField source="author_7xx_search" dest="author_7xx_unstem_search" />
<copyField source="author_8xx_search" dest="author_8xx_unstem_search" />
<!-- unstemmed search fields: subject -->
<copyField source="topic_search" dest="topic_unstem_search" />
<copyField source="topic_subx_search" dest="topic_subx_unstem_search" />
<copyField source="geographic_search" dest="geographic_unstem_search" />
<copyField source="geographic_subz_search" dest="geographic_subz_unstem_search" />
<copyField source="subject_other_search" dest="subject_other_unstem_search" />
<copyField source="subject_other_subvy_search" dest="subject_other_subvy_unstem_search" />
<copyField source="subject_all_search" dest="subject_all_unstem_search" />
<!-- unstemmed search fields: toc/summary -->
<copyField source="toc_search" dest="toc_unstem_search" />
<copyField source="context_search" dest="context_unstem_search" />
<copyField source="summary_search" dest="summary_unstem_search" />
<!-- other unstemmed search fields -->
<copyField source="series_search" dest="series_unstem_search" />
<!-- course reserve fields -->
<copyField source="crez_instructor_search" dest="instructor" />
<copyField source="crez_course_id_search" dest="course" />
<!-- image fields -->
<copyField source="topic_search" dest="topic_display" />
<copyField source="subject_other_search" dest="subject_other_display" />
<copyField source="title_variant_search" dest="title_variant_display" />
<copyField source="summary_search" dest="summary_display" />
<copyField source="pub_search" dest="pub_display" />
<!-- CJK fields -->
<!-- both bigrams and unigrams in same field -->
<copyField source="vern_title_245a_search" dest="cjk_title_245a_search" />
<copyField source="vern_title_245_search" dest="cjk_title_245_search" />
<copyField source="vern_title_uniform_search" dest="cjk_title_uniform_search" />
<copyField source="vern_title_variant_search" dest="cjk_title_variant_search" />
<copyField source="vern_title_related_search" dest="cjk_title_related_search" />
<copyField source="vern_series_search" dest="cjk_series_search" />
<copyField source="vern_author_1xx_search" dest="cjk_author_1xx_search" />
<copyField source="vern_author_7xx_search" dest="cjk_author_7xx_search" />
<copyField source="vern_author_8xx_search" dest="cjk_author_8xx_search" />
<copyField source="vern_topic_search" dest="cjk_topic_search" />
<copyField source="vern_topic_subx_search" dest="cjk_topic_subx_search" />
<copyField source="vern_geographic_search" dest="cjk_geographic_search" />
<copyField source="vern_geographic_subz_search" dest="cjk_geographic_subz_search" />
<copyField source="vern_subject_other_search" dest="cjk_subject_other_search" />
<copyField source="vern_subject_other_subvy_search" dest="cjk_subject_other_subvy_search" />
<copyField source="vern_subject_all_search" dest="cjk_subject_all_search" />
<copyField source="vern_pub_search" dest="cjk_pub_search" />
<copyField source="vern_physical" dest="cjk_physical_search" />
<copyField source="toc_search" dest="cjk_toc_search" /> <!-- we find CJK text in regular 505s -->
<copyField source="vern_toc_search" dest="cjk_toc_search" />
<copyField source="vern_context_search" dest="cjk_context_search" />
<copyField source="summary_search" dest="cjk_summary_search" /> <!-- we find CJK text in regular 520s -->
<copyField source="vern_summary_search" dest="cjk_summary_search" />
<copyField source="vern_all_search" dest="cjk_all_search" />
<!-- exhibits fields -->
<copyField source="id" dest="id_ng" maxChars="3000"/>
<copyField source="title_display" dest="full_title_ng" maxChars="3000"/>
<!-- NOTE: all_search is meant for metadata -->
<copyField source="*_tesim" dest="all_search" />
<copyField source="*_tsi" dest="all_search" />
<copyField source="*_search" dest="all_search" />
<copyField source="*_tags_ssim" dest="all_search" />
<copyField source="*_tesim" dest="all_unstem_search" />
<copyField source="*_tsi" dest="all_unstem_search" />
<copyField source="*_search" dest="all_unstem_search" />
<copyField source="*_tags_ssim" dest="all_unstem_search" />
<!-- NOTE: full_text_search is meant for full text -->
<copyField source="*_tesimv" dest="full_text_search" />
<copyField source="*_tesimv" dest="full_text_unstem_search" />
<copyField source="*_tesimv" dest="full_text_search_en"/>
<copyField source="*_tesimv" dest="full_text_search_pt"/>
<copyField source="*_tesimv" dest="full_text_search_id"/>
<types>
<fieldType name="string" class="solr.StrField" sortMissingLast="true" />
<fieldtype name="boolean" class="solr.BoolField" sortMissingLast="true" />
<fieldtype name="binary" class="solr.BinaryField"/>
<fieldType name="int" class="solr.TrieIntField" precisionStep="0" positionIncrementGap="0"/>
<fieldType name="float" class="solr.TrieFloatField" precisionStep="0" positionIncrementGap="0"/>
<fieldType name="long" class="solr.TrieLongField" precisionStep="0" positionIncrementGap="0"/>
<fieldType name="double" class="solr.TrieDoubleField" precisionStep="0" positionIncrementGap="0"/>
<fieldType name="date" class="solr.TrieDateField" precisionStep="0" positionIncrementGap="0"/>
<!-- t fields are for accelerating range queries -->
<fieldType name="tint" class="solr.TrieIntField" precisionStep="4" positionIncrementGap="0"/>
<fieldType name="tfloat" class="solr.TrieFloatField" precisionStep="8" positionIncrementGap="0"/>
<fieldType name="tlong" class="solr.TrieLongField" precisionStep="8" positionIncrementGap="0"/>
<fieldType name="tdouble" class="solr.TrieDoubleField" precisionStep="8" positionIncrementGap="0"/>
<fieldType name="tdate" class="solr.TrieDateField" precisionStep="6" positionIncrementGap="0"/>
<fieldType name="random" class="solr.RandomSortField" indexed="true" />
<!-- Analyzed Text, general case -->
<fieldtype name="text" class="solr.TextField" positionIncrementGap="10000" autoGeneratePhraseQueries="true">
<analyzer type="index">
<tokenizer class="solr.ICUTokenizerFactory"/>
<filter class="solr.ICUFoldingFilterFactory"/> <!-- NFKC, case folding, diacritics removed -->
<filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="false"/>
<filter class="solr.TrimFilterFactory"/>
<filter class="solr.HyphenatedWordsFilterFactory"/>
<filter class="solr.SnowballPorterFilterFactory"/>
<filter class="solr.WordDelimiterGraphFilterFactory" catenateAll="1" preserveOriginal="1"/>
<filter class="solr.FlattenGraphFilterFactory"/>
</analyzer>
<analyzer type="query">
<tokenizer class="solr.ICUTokenizerFactory"/>
<filter class="solr.ICUFoldingFilterFactory"/> <!-- NFKC, case folding, diacritics removed -->
<filter class="solr.TrimFilterFactory"/>
<filter class="solr.SnowballPorterFilterFactory"/>
<filter class="solr.WordDelimiterGraphFilterFactory" catenateAll="1" preserveOriginal="1"/>
</analyzer>
</fieldtype>
<!-- Analyzed Text, English -->
<fieldtype name="text_en" class="solr.TextField" positionIncrementGap="10000" autoGeneratePhraseQueries="true">
<analyzer type="index">
<tokenizer class="solr.ICUTokenizerFactory"/>
<filter class="solr.ICUFoldingFilterFactory"/> <!-- NFKC, case folding, diacritics removed -->
<filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="false"/>
<filter class="solr.SnowballPorterFilterFactory" language="English"/>
<filter class="solr.TrimFilterFactory"/>
<filter class="solr.HyphenatedWordsFilterFactory"/>
<filter class="solr.WordDelimiterGraphFilterFactory" catenateAll="1" preserveOriginal="1"/>
<filter class="solr.FlattenGraphFilterFactory"/>
</analyzer>
<analyzer type="query">
<tokenizer class="solr.ICUTokenizerFactory"/>
<filter class="solr.ICUFoldingFilterFactory"/> <!-- NFKC, case folding, diacritics removed -->
<filter class="solr.SnowballPorterFilterFactory" language="English"/>
<filter class="solr.TrimFilterFactory"/>
<filter class="solr.WordDelimiterGraphFilterFactory" catenateAll="1" preserveOriginal="1"/>
</analyzer>
</fieldtype>
<!-- Analyzed Text, Portuguese -->
<fieldtype name="text_pt" class="solr.TextField" positionIncrementGap="10000" autoGeneratePhraseQueries="true">
<analyzer type="index">
<tokenizer class="solr.ICUTokenizerFactory"/>
<filter class="solr.PortugueseLightStemFilterFactory"/>
<filter class="solr.ICUFoldingFilterFactory"/> <!-- NFKC, case folding, diacritics removed -->
<filter class="solr.TrimFilterFactory"/>
<filter class="solr.HyphenatedWordsFilterFactory"/>
<filter class="solr.FlattenGraphFilterFactory"/>
</analyzer>
<analyzer type="query">
<tokenizer class="solr.ICUTokenizerFactory"/>
<filter class="solr.PortugueseLightStemFilterFactory"/>
<filter class="solr.ICUFoldingFilterFactory"/> <!-- NFKC, case folding, diacritics removed -->
<filter class="solr.TrimFilterFactory"/>
</analyzer>
</fieldtype>
<!-- Analyzed Text, Indonesian -->
<fieldtype name="text_id" class="solr.TextField" positionIncrementGap="10000" autoGeneratePhraseQueries="true">
<analyzer type="index">
<tokenizer class="solr.ICUTokenizerFactory"/>
<filter class="solr.IndonesianStemFilterFactory" stemDerivational="true" />
<filter class="solr.ICUFoldingFilterFactory"/> <!-- NFKC, case folding, diacritics removed -->
<filter class="solr.TrimFilterFactory"/>
<filter class="solr.HyphenatedWordsFilterFactory"/>
<filter class="solr.FlattenGraphFilterFactory"/>
</analyzer>
<analyzer type="query">
<tokenizer class="solr.ICUTokenizerFactory"/>
<filter class="solr.IndonesianStemFilterFactory" stemDerivational="true" />
<filter class="solr.ICUFoldingFilterFactory"/> <!-- NFKC, case folding, diacritics removed -->
<filter class="solr.TrimFilterFactory"/>
</analyzer>
</fieldtype>
<!-- Analyzed Text, no Stemming -->
<fieldtype name="textNoStem" class="solr.TextField" positionIncrementGap="10000" autoGeneratePhraseQueries="true">
<analyzer>
<tokenizer class="solr.ICUTokenizerFactory"/>
<filter class="solr.ICUFoldingFilterFactory"/> <!-- NFKC, case folding, diacritics removed -->
<filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="false"/>
</analyzer>
</fieldtype>
<!-- Left and Right Anchored Analyzed Text, no Stemming -->
<fieldtype name="text_anchored" class="solr.TextField" positionIncrementGap="10000" autoGeneratePhraseQueries="true">
<analyzer>
<!-- put beginning and ending anchors on field value, removing trailing chars -->
<!-- watch out for query time whitespace separated chars that will be processed as their own token stream, e.g. in 'felines : warm and fuzzy' -->
<charFilter class="solr.PatternReplaceCharFilterFactory" pattern="^\s*(.*[\S&&[^\.\,:;/=<>\(\)\[\]\&\|]])[\s\.\,:;/=<>\(\)\[\]\&\|]*$" replacement="aaaaaa$1zzzzzz"/>
<tokenizer class="solr.WhitespaceTokenizerFactory" />
<filter class="solr.ICUFoldingFilterFactory"/> <!-- NFKC, case folding, diacritics removed -->
<filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="false"/>
<filter class="solr.SynonymFilterFactory" synonyms="synonyms_both_anchors.txt" ignoreCase="true" expand="true"/>
<filter class="solr.SynonymFilterFactory" synonyms="synonyms_left_anchor.txt" ignoreCase="true" expand="true"/>
<filter class="solr.SynonymFilterFactory" synonyms="synonyms_right_anchor.txt" ignoreCase="true" expand="true"/>
<filter class="solr.WordDelimiterFilterFactory"
splitOnCaseChange="1" generateWordParts="1" catenateWords="1"
splitOnNumerics="0" generateNumberParts="1" catenateNumbers="1"
catenateAll="0" preserveOriginal="0" stemEnglishPossessive="1" />
</analyzer>
</fieldtype>
<fieldtype name="text_cjk" class="solr.TextField" positionIncrementGap="10000" autoGeneratePhraseQueries="false">
<analyzer>
<!-- remove spaces among hangul and han chars if there is at least one hangul char -->
<!-- a korean char guaranteed at the start of the pattern: pattern="(\p{Hangul}\p{Han}*)\s+(?=[\p{Hangul}\p{Han}])" -->
<charFilter class="solr.PatternReplaceCharFilterFactory" pattern="([\p{InHangul_Jamo}\p{InHangul_Compatibility_Jamo}\p{InHangul_Syllables}][\p{InBopomofo}\p{InBopomofo_Extended}\p{InCJK_Compatibility}\p{InCJK_Compatibility_Forms}\p{InCJK_Compatibility_Ideographs}\p{InCJK_Compatibility_Ideographs_Supplement}\p{InCJK_Radicals_Supplement}\p{InCJK_Symbols_And_Punctuation}\p{InCJK_Unified_Ideographs}\p{InCJK_Unified_Ideographs_Extension_A}\p{InCJK_Unified_Ideographs_Extension_B}\p{InKangxi_Radicals}\p{InHalfwidth_And_Fullwidth_Forms}\p{InIdeographic_Description_Characters}]*)\s+(?=[\p{InHangul_Jamo}\p{InHangul_Compatibility_Jamo}\p{InHangul_Syllables}\p{InBopomofo}\p{InBopomofo_Extended}\p{InCJK_Compatibility}\p{InCJK_Compatibility_Forms}\p{InCJK_Compatibility_Ideographs}\p{InCJK_Compatibility_Ideographs_Supplement}\p{InCJK_Radicals_Supplement}\p{InCJK_Symbols_And_Punctuation}\p{InCJK_Unified_Ideographs}\p{InCJK_Unified_Ideographs_Extension_A}\p{InCJK_Unified_Ideographs_Extension_B}\p{InKangxi_Radicals}\p{InHalfwidth_And_Fullwidth_Forms}\p{InIdeographic_Description_Characters}])" replacement="$1"/>
<!-- a korean char guaranteed at the end of the pattern: pattern="([\p{Hangul}\p{Han}])\s+(?=[\p{Han}\s]*\p{Hangul})" -->
<charFilter class="solr.PatternReplaceCharFilterFactory" pattern="([\p{InHangul_Jamo}\p{InHangul_Compatibility_Jamo}\p{InHangul_Syllables}\p{InBopomofo}\p{InBopomofo_Extended}\p{InCJK_Compatibility}\p{InCJK_Compatibility_Forms}\p{InCJK_Compatibility_Ideographs}\p{InCJK_Compatibility_Ideographs_Supplement}\p{InCJK_Radicals_Supplement}\p{InCJK_Symbols_And_Punctuation}\p{InCJK_Unified_Ideographs}\p{InCJK_Unified_Ideographs_Extension_A}\p{InCJK_Unified_Ideographs_Extension_B}\p{InKangxi_Radicals}\p{InHalfwidth_And_Fullwidth_Forms}\p{InIdeographic_Description_Characters}])\s+(?=[\p{InBopomofo}\p{InBopomofo_Extended}\p{InCJK_Compatibility}\p{InCJK_Compatibility_Forms}\p{InCJK_Compatibility_Ideographs}\p{InCJK_Compatibility_Ideographs_Supplement}\p{InCJK_Radicals_Supplement}\p{InCJK_Symbols_And_Punctuation}\p{InCJK_Unified_Ideographs}\p{InCJK_Unified_Ideographs_Extension_A}\p{InCJK_Unified_Ideographs_Extension_B}\p{InKangxi_Radicals}\p{InHalfwidth_And_Fullwidth_Forms}\p{InIdeographic_Description_Characters}\s]*[\p{InHangul_Jamo}\p{InHangul_Compatibility_Jamo}\p{InHangul_Syllables}])" replacement="$1"/>
<tokenizer class="solr.ICUTokenizerFactory" />
<filter class="solr.CJKWidthFilterFactory"/>
<!--<filter class="edu.stanford.lucene.analysis.CJKFoldingFilterFactory"/>-->
<filter class="solr.ICUTransformFilterFactory" id="Traditional-Simplified"/>
<filter class="solr.ICUTransformFilterFactory" id="Katakana-Hiragana"/>
<filter class="solr.ICUFoldingFilterFactory"/> <!-- NFKC, case folding, diacritics removed -->
<filter class="solr.CJKBigramFilterFactory" han="true" hiragana="true" katakana="true" hangul="true" outputUnigrams="true" />
</analyzer>
</fieldtype>
<!-- single token analyzed text, for sorting. Punctuation is significant. -->
<fieldtype name="alphaSort" class="solr.TextField" sortMissingLast="true" omitNorms="true">
<analyzer>
<tokenizer class="solr.KeywordTokenizerFactory" />
<filter class="solr.ICUFoldingFilterFactory"/>
<filter class="solr.TrimFilterFactory" />
</analyzer>
</fieldtype>
<!-- single token with punctuation terms removed so dismax doesn't look for punctuation terms in these fields -->
<!-- On client side, Lucene query parser breaks things up by whitespace *before* field analysis for dismax -->
<!-- so punctuation terms (& : ;) are stopwords to allow results from other fields when these chars are surrounded by spaces in query -->
<!-- do not lowercase -->
<fieldType name="string_punct_stop" class="solr.TextField" omitNorms="true">
<analyzer type="index">
<tokenizer class="solr.KeywordTokenizerFactory" />
<filter class="solr.ICUNormalizer2FilterFactory" name="nfkc" mode="compose" />
</analyzer>
<analyzer type="query">
<tokenizer class="solr.KeywordTokenizerFactory" />
<filter class="solr.ICUNormalizer2FilterFactory" name="nfkc" mode="compose" />
<!-- removing punctuation for Lucene query parser issues -->
<filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords_punctuation.txt" />
</analyzer>
</fieldType>
<!-- field designed for LC call number searching -->
<fieldType name="callnum_ws" class="solr.TextField" omitNorms="true" positionIncrementGap="100" autoGeneratePhraseQueries="true">
<analyzer type="index">
<!-- LC: no space between class letters and digits; normalize to " ." before first cutter, no leading space -->
<charFilter class="solr.PatternReplaceCharFilterFactory" pattern="^ *([A-Za-z]{1,3}) ?(\d{1,4}(\.\d+)?) ?\.?([A-Za-z]\d+)" replacement="$1$2 .$4"/>
<!-- LC: add space between first cutter letter and its digits to allow matching on first cutter letter only -->
<charFilter class="solr.PatternReplaceCharFilterFactory" pattern="^([A-Za-z]{1,3}\d{1,4}(\.\d+)? \.([A-Za-z]))(\d+)" replacement="$1 $4"/>
<!-- prepend yyyy to string so searches can be left anchored -->
<charFilter class="solr.PatternReplaceCharFilterFactory" pattern="^\s*(\S{1})" replacement="yyyy$1"/>
<tokenizer class="solr.WhitespaceTokenizerFactory" />
<filter class="solr.LowerCaseFilterFactory" />
</analyzer>
<!-- Note that the query string could be a partial call number, so we can't combine all patterns -->
<analyzer type="query">
<!-- LC: no space between class letters and digits, no leading space -->
<charFilter class="solr.PatternReplaceCharFilterFactory" pattern="^ *([A-Za-z]{1,3}) *(\d{1,4})" replacement="$1$2" />
<!-- LC: normalize to " ." before first cutter or first letter of cutter (could be preceded by " ." "." " " or nothing) -->
<charFilter class="solr.PatternReplaceCharFilterFactory" pattern="^([A-Za-z]{1,3}\d{1,4}(\.\d+)?) *\.?([A-Za-z](\d+)?)" replacement="$1 .$3" />
<!-- LC: add space between first cutter letter and its digits to allow matching on first cutter letter only -->
<charFilter class="solr.PatternReplaceCharFilterFactory" pattern="^([A-Za-z]{1,3}\d{1,4}(\.\d+)? \.([A-Za-z]))([^ ])" replacement="$1 $4"/>
<!-- prepend yyyy to string so searches can be left anchored -->
<charFilter class="solr.PatternReplaceCharFilterFactory" pattern="^\s*(\S{1})" replacement="yyyy$1"/>
<tokenizer class="solr.WhitespaceTokenizerFactory" />
<filter class="solr.LowerCaseFilterFactory" />
</analyzer>
</fieldType>
<!-- for hierarchical facets
a query for Books/NonFic will match documents indexed with values like
Books/NonFic, Books/NonFic/Law, Books/NonFic/Science/Physics, etc.
But it will not match documents indexed with values like Books, or Books/Fic...
-->
<fieldType name="string_hierarch" class="solr.TextField" positionIncrementGap="100">
<analyzer type="index">
<tokenizer class="solr.PathHierarchyTokenizerFactory" delimiter="|" />
</analyzer>
<analyzer type="query">
<tokenizer class="solr.KeywordTokenizerFactory" />
</analyzer>
</fieldType>
<!-- exhibits fieldTypes below -->
<!-- A text field with defaults appropriate for English and NGrams -->
<fieldType name="text_en_ng" class="solr.TextField" positionIncrementGap="100">
<analyzer type="index">
<tokenizer class="solr.ICUTokenizerFactory"/>
<filter class="solr.ICUFoldingFilterFactory"/> <!-- NFKC, case folding, diacritics removed -->
<filter class="solr.EnglishPossessiveFilterFactory"/>
<!-- EnglishMinimalStemFilterFactory is less aggressive than PorterStemFilterFactory: -->
<filter class="solr.EnglishMinimalStemFilterFactory"/>
<filter class="solr.TrimFilterFactory"/>
<filter class="solr.EdgeNGramFilterFactory" minGramSize="3" maxGramSize="15" />
</analyzer>
<analyzer type="query">
<tokenizer class="solr.ICUTokenizerFactory"/>
<filter class="solr.ICUFoldingFilterFactory"/>
<filter class="solr.EnglishPossessiveFilterFactory"/>
<filter class="solr.EnglishMinimalStemFilterFactory"/>
<filter class="solr.TrimFilterFactory"/>
</analyzer>
</fieldType>
<!-- This point type indexes the coordinates as separate fields (subFields)
If subFieldType is defined, it references a type, and a dynamic field
definition is created matching *___<typename>. Alternately, if
subFieldSuffix is defined, that is used to create the subFields.
Example: if subFieldType="double", then the coordinates would be
indexed in fields myloc_0___double,myloc_1___double.
Example: if subFieldSuffix="_d" then the coordinates would be indexed
in fields myloc_0_d,myloc_1_d
The subFields are an implementation detail of the fieldType, and end
users normally should not need to know about them.
-->
<fieldType name="point" class="solr.PointType" dimension="2" subFieldSuffix="_d"/>
<!-- A Geohash is a compact representation of a latitude longitude pair in a single field.
See http://wiki.apache.org/solr/SpatialSearch
-->
<fieldtype name="geohash" class="solr.GeoHashField"/>
<!-- A specialized field for geospatial search. If indexed, fields of this type must NOT be multivalued. -->
<fieldType name="location" class="solr.LatLonType" subFieldSuffix="_coordinate"/>
<!-- An alternative geospatial field type new to Solr 4. It supports multiValued and polygon shapes.
For more information about this and other Spatial fields new to Solr 4, see:
http://wiki.apache.org/solr/SolrAdaptersForLuceneSpatial4
-->
<fieldType name="location_rpt" class="solr.SpatialRecursivePrefixTreeFieldType" geo="true" distErrPct="0.025" maxDistErr="0.001" distanceUnits="kilometers"/>
<fieldType name="bbox" class="solr.BBoxField" geo="true" distanceUnits="kilometers" numberType="_bbox_coord" storeSubFields="false"/>
<fieldType name="_bbox_coord" class="solr.TrieDoubleField" precisionStep="8" docValues="true" stored="false"/>
</types>
</schema>