psu-libraries/psulib_blacklight

View on GitHub
app/controllers/sitemap_controller.rb

Summary

Maintainability
A
0 mins
Test Coverage
B
87%
# frozen_string_literal: true

class SitemapController < ApplicationController
  def index
    @access_list = access_list
  end

  def show
    @solr_response = Blacklight.default_index.connection.select(
      params:
            {
              q: "{!prefix f=hashed_id_si v=#{access_params}}}",
              defType: 'lucene',
              fl: 'id,timestamp',
              facet: false,
              rows: 3_000_000 # a sufficiently large number to prevent Solr from ever attempting paging
            }
    )
  end

  private

    def access_params
      params.require(:id) # :id collides with Solr id but they are different. This is the Sitemap ID.
    end

    def max_documents
      Rails.cache.fetch('index_max_docs', expires_in: 1.day) do
        Blacklight.default_index.connection.select(params: { q: '*:*', rows: 0 })['response']['numFound']
      end
    end

    # Important note: 4,000 is a good average_chunk because it's large enough to fit into the 4096 leaves option (3
    # character placeholders) scheme, and can be rendered by Rails performantly. Larger numbers can tax Rails.
    def access_list
      approximate_chunk_size = [4000, max_documents].min # Sufficiently less than 50,000 max per sitemap
      access = (Math.log(max_documents / approximate_chunk_size) / Math.log(16)).ceil
      (0...(16**access))
        .to_a
        .map { |v| v.to_s(16).rjust(access, '0') }
    end
end