fairplaysk/datacamp

View on GitHub
lib/site_map_generator.rb

Summary

Maintainability
A
2 hrs
Test Coverage
# -*- encoding : utf-8 -*-

class SiteMapGenerator

  LOCALES = {sk: '', en: 'en'}
  ROOT_URL = 'http://datanest.fair-play.sk'
  LIMIT_URLS = 50000
  PER_PAGE = 10

  # first run generator
  def self.generate_all_files
    FileUtils.rm_rf(sitemaps_dir) if File.exists?(sitemaps_dir)
    LOCALES.each_pair do |locale, locale_path|
      generator = Generator.new(locale, locale_path)
      generator.delay.generate
    end
  end

  # last run generate xml file
  def self.create_site_map
    f = File.open(self.site_map_file, "wb:UTF-8")
    str = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n<sitemapindex xmlns=\"http://www.sitemaps.org/schemas/sitemap/0.9\">\n"
    file_names = []
    self.find_site_maps_files.each do |file_path|
      file_names << File.basename(file_path, ".txt")
    end
    file_names = file_names.sort
    file_names.each do |file_name|
      str+= "\t<sitemap>\n\t\t<loc>#{ROOT_URL}/sitemaps/#{file_name}.txt</loc>\n\t</sitemap>\n"
    end
    str+= "</sitemapindex>\n"
    f.write(str)
    f.close
  end

  private

  def self.find_site_maps_files
    file_path = Rails.root.join 'public', 'sitemaps', '*.txt'
    Dir.glob(file_path)
  end

  def self.site_map_file
    Rails.root.join 'public', 'sitemap.xml'
  end

  def self.sitemaps_dir
    Rails.root.join 'public', 'sitemaps'
  end

  class Generator

    attr_reader :locale, :locale_path

    def initialize(locale, locale_path)
      @locale = locale
      @locale_path = locale_path
    end

    def generate
      # static pages
      delay.generate_static_pages
      # searches
      delay.generate_searches
      # datasets
      delay.generate_datasets
    end

    # static pages --------

    def generate_static_pages
      # init file
      f = init_site_map_file('static')
      generate_pages(f)
      generate_news(f)
      # close file
      f.close
    end

    def generate_pages(f)
      Page.all.each do |page|
        add_url f, page_path(page)
      end
    end

    def generate_news(f)
      add_url f, new_path
      News.published.each do |new|
        add_url f, new_path(new)
      end
    end

    # searches ---------

    def generate_searches
      f_index = 1
      f = init_site_map_file("searches-#{f_index}")
      count = 0
      Search.find_each do |search|
        add_url f, search_path(search)
        count+= 1
        if count == LIMIT_URLS
          f.close
          f_index+= 1
          count = 0
          f = init_site_map_file("searches-#{f_index}")
        end
      end
      f.close
    end

    def generate_datasets
      DatasetDescription.where(is_active: true).each do |dataset_description|
        delay.generate_dataset(dataset_description, dataset_description.dataset_model)
      end
    end

    def generate_dataset(dataset_description, dataset_model)
      count_records = dataset_model.active.count
      count_pages = (count_records / PER_PAGE) + (count_records % PER_PAGE == 0 ? 0 : 1)
      f_index = 1
      f = init_site_map_file(dataset_site_map_file(dataset_description, f_index))
      add_url f, dataset_path(dataset_description)
      count = 1
      for page in 2..count_pages do
        add_url f, dataset_path(dataset_description, page)
        count+= 1
        if count == LIMIT_URLS
          f.close
          f_index+= 1
          count = 0
          f = init_site_map_file(dataset_site_map_file(dataset_description, f_index))
        end
      end
      f.close

      delay.generate_records(dataset_description, dataset_model)
    end

    def generate_records(dataset_description, dataset_model)
      count_records = dataset_model.active.count
      count_pages = (count_records / LIMIT_URLS) + (count_records % LIMIT_URLS == 0 ? 0 : 1)
      for index in 1..count_pages do
        delay.create_dataset_records_file(dataset_description, dataset_model, index)
      end
    end

    def create_dataset_records_file(dataset_description, dataset_model, index)
      f = init_site_map_file(dataset_records_site_map_file(dataset_description, index))
      limit = LIMIT_URLS
      offset = LIMIT_URLS * (index-1)
      dataset_model.active.offset(offset).limit(limit).order(:_record_id).each do |record|
        add_url f, record_path(dataset_description, record)
      end
      f.close
    end

    private

    def new_path(new = nil)
      if new
        "news/#{new.id}"
      else
        "news"
      end
    end

    def dataset_path(dataset_description = nil, page = nil)
      if dataset_description
        if page
          "datasets/#{dataset_description.id}?page=#{page}"
        else
          "datasets/#{dataset_description.id}"
        end
      else
        'datasets'
      end
    end

    def dataset_site_map_file(dataset_description, index)
      "dataset-#{dataset_description.id}-#{index}"
    end

    def dataset_records_site_map_file(dataset_description, index)
      "dataset-#{dataset_description.id}-records-#{index}"
    end

    def record_path(dataset_description, record)
      "datasets/#{dataset_description.id}/records/#{record.id}"
    end

    def page_path(page)
      "pages/#{page.page_name}"
    end

    def search_path(search)
      "searches/#{search.id}"
    end

    def add_url(f, path)
      f.write url_for(path)
      f.write "\n"
    end

    def init_site_map_file(file)
      urls_file = site_map_urls_file(file)
      FileUtils.mkdir_p(File.dirname(urls_file))
      File.open(urls_file, 'wb:UTF-8')
    end

    def url_for(path)
      l = locale_path.blank? ? '' : "#{locale_path}/"
      "#{ROOT_URL}/#{l}#{path}"
    end

    def site_map_urls_file(file_name)
      file_path = "sitemap-#{locale}-#{file_name}.txt"
      Rails.root.join 'public', 'sitemaps', file_path
    end
  end

end