SpeciesFileGroup/taxonworks

View on GitHub
app/models/source.rb

Summary

Maintainability
A
0 mins
Test Coverage
# A Source is the metadata that identifies the origin of some information/data.
#
# The primary purpose of Source metadata is to allow the user to find the source, that's all.
#
# See https://en.wikipedia.org/wiki/BibTeX for a definition of attributes, in nearly all cases they are 1:1 with the TW model.  We use this https://github.com/inukshuk/bibtex-ruby awesomeness.  See https://github.com/inukshuk/bibtex-ruby/tree/master/lib/bibtex/entry, in particular rdf_converter.rb for the types of field managed.
#
#
# @!attribute serial_id
#   @return [Integer]
#     The TaxonWorks Serial
#
# @!attribute address
#   @return [String]
#    see https://en.wikipedia.org/wiki/BibTeX#Field_types
#
# @!attribute annote
#   @return [String]
#    see https://en.wikipedia.org/wiki/BibTeX#Field_types
#
# @!attribute author
#   @return [String]
#    see https://en.wikipedia.org/wiki/BibTeX#Field_types
#
# @!attribute booktitle
#   @return [String]
#    see https://en.wikipedia.org/wiki/BibTeX#Field_types
#
# @!attribute chapter
#   @return [String]
#    see https://en.wikipedia.org/wiki/BibTeX#Field_types
#
# @!attribute crossref
#   @return [String]
#    see https://en.wikipedia.org/wiki/BibTeX#Field_types
#
# @!attribute edition
#   @return [String]
#    see https://en.wikipedia.org/wiki/BibTeX#Field_types
#
# @!attribute editor
#   @return [String]
#    see https://en.wikipedia.org/wiki/BibTeX#Field_types
#
# @!attribute howpublished
#   @return [String]
#    see https://en.wikipedia.org/wiki/BibTeX#Field_types
#
# @!attribute institution
#   @return [String]
#    see https://en.wikipedia.org/wiki/BibTeX#Field_types
#
# @!attribute journal
#   @return [String]
#    see https://en.wikipedia.org/wiki/BibTeX#Field_types
#
# @!attribute key
#   @return [String]
#    see https://en.wikipedia.org/wiki/BibTeX#Field_types
#
# @!attribute month
#   @return [String]
#    see https://en.wikipedia.org/wiki/BibTeX#Field_types
#      stored as a three letter value, see ::VALID_BIBTEX_MONTHS
#
# @!attribute note
#   @return [String]
#    see https://en.wikipedia.org/wiki/BibTeX#Field_types
#
# @!attribute number
#   @return [String]
#    see https://en.wikipedia.org/wiki/BibTeX#Field_types
#
# @!attribute organization
#   @return [String]
#    see https://en.wikipedia.org/wiki/BibTeX#Field_types
#
# @!attribute pages
#   @return [String]
#    see https://en.wikipedia.org/wiki/BibTeX#Field_types
#
# @!attribute publisher
#   @return [String]
#    see https://en.wikipedia.org/wiki/BibTeX#Field_types
#
# @!attribute school
#   @return [String]
#    see https://en.wikipedia.org/wiki/BibTeX#Field_types
#
# @!attribute series
#   @return [String]
#    see https://en.wikipedia.org/wiki/BibTeX#Field_types
#
# @!attribute title
#   @return [String]
#    see https://en.wikipedia.org/wiki/BibTeX#Field_types
#
## @!attribute year
#   @return [Integer]
#    see https://en.wikipedia.org/wiki/BibTeX#Field_types
#
# @!attribute type
#   @return [String]
#     An exception to the 1:1 modelling.  We retain for Rails STI usage. Either Source::Verbatim or Source::Bibtex.
#     The former can only consist of a single field (the full citation as a string).
#     The latter is a Bibtex model.  See "bibtex_type" for the bibtex attribute "type".
#
# @!attribute bibtex_type
#   @return [String]
#     alias for "type" in the bibtex framework  see https://en.wikipedia.org/wiki/BibTeX#Field_types
#
# @!attribute volume
#   @return [String]
#    see https://en.wikipedia.org/wiki/BibTeX#Field_types
#
# @!attribute doi
#   @return [String]
#    When provided also cloned to an Identifier::Global. See https://en.wikipedia.org/wiki/BibTeX#Field_types
#
# @!attribute abstract
#   @return [String]
#    see https://en.wikipedia.org/wiki/BibTeX#Field_types
#
# @!attribute copyright
#   @return [String]
#    see https://en.wikipedia.org/wiki/BibTeX#Field_types
#
# @!attribute language
#   @return [String]
#    see https://en.wikipedia.org/wiki/BibTeX#Field_types
#
# @!attribute stated_year
#   @return [String]
#    See source/bibtex.rb
#    TODO: Why is this character but year is int?
#
# @!attribute day
#   @return [Integer]
#     the calendar day (1-31)
##
# @!attribute isbn
#   @return [String]
#   @todo
#
# @!attribute issn
#   @return [String]
#   @todo
#
# @!attribute verbatim
#   @return [String]
#     the full citation, used only for type = SourceVerbatim
#
# @!attribute verbatim_contents
#   @return [String]
#   @todo
#
# @!attribute verbatim_keywords
#   @return [String]
#   @todo
#
# @!attribute language_id
#   @return [Integer]
#     The TaxonWorks normalization of language to Language.
#
# @!attribute translator
#   @return [String]
#   @todo
#
# @!attribute year_suffix
#   @return [String]
#     Arbitrary user-provided suffix to the year.  Use is highly discouraged.
#
# @!attribute url
#   @return [String]
#   @todo
##
# @!attribute cached
#   @return [String]
#    calculated full citation, searched again in "full text"
#
# @!attribute cached_author_string
#   @return [String]
#     calculated author string
#
# @!attribute cached_nomenclature_date
#   @return [DateTime]
#     Date sensu nomenclature algorithm in TaxonWorks (see Utilities::Dates)
#
class Source < ApplicationRecord
  include Housekeeping::Users
  include Housekeeping::Timestamps
  include Shared::AlternateValues
  include Shared::DataAttributes
  include Shared::Documentation
  include Shared::Identifiers
  include Shared::Notes
  include Shared::SharedAcrossProjects
  include Shared::Tags
  include Shared::HasPapertrail
  include SoftValidation
  include Shared::IsData
  # !! Must not have Shared::Depictions

  ignore_whitespace_on(:verbatim_contents)

  ALTERNATE_VALUES_FOR = [
    :address, :annote, :booktitle, :edition, :editor, :institution, :journal, :note, :organization,
    :publisher, :school, :title, :doi, :abstract, :language, :translator, :author, :url].freeze

  # @return [Boolean, nil]
  #   When true, cached values are not built
  attr_accessor :no_year_suffix_validation

  # Keep this order for citations/topics
  has_many :citations, inverse_of: :source, dependent: :restrict_with_error
  has_many :origin_citations, -> {where(citations: {is_original: true})}, class_name: 'Citation', dependent: :restrict_with_error, inverse_of: :source
  has_many :citation_topics, through: :citations, inverse_of: :source
  has_many :topics, through: :citation_topics, inverse_of: :sources

  has_many :project_sources, inverse_of: :source, dependent: :destroy
  has_many :projects, inverse_of: :sources, through: :project_sources

  after_save :set_cached

  validates_presence_of :type
  validates :type, inclusion: {in: ['Source::Bibtex', 'Source::Human', 'Source::Verbatim']} # TODO: not needed

  accepts_nested_attributes_for :project_sources, reject_if: :reject_project_sources

  soft_validate(
    :sv_cached_names,
    set: :cached_names,
    fix: :sv_fix_cached_names,
    name: 'Cached names',
    description: 'Check if cached values need to be updated' )

  soft_validate(
    :sv_stated_year,
    set: :stated_year,
    fix: :sv_fix_stated_year,
    name: 'Stated year',
    description: "'Stated year' is not needed if identical to 'year'" )

  soft_validate(
    :sv_html_tags,
    set: :html_tags,
    name: 'html tags',
    description: 'Check if html has both open and close tags' )

    # Redirect type here
  # @param [String] file
  # @return [[Array, message]]
  #   TODO: return a more informative response?
  def self.batch_preview(file)
    begin
      bibliography = BibTeX::Bibliography.parse(file.read.force_encoding('UTF-8'), filter: :latex)
      sources = []
      bibliography.each do |record|
        a = Source::Bibtex.new_from_bibtex(record)
        sources.push(a)
      end
      return sources, nil
    rescue BibTeX::ParseError => e
      return [], e.message
    rescue
      raise
    end
  end

  # @return [String]
  #   A string that represents the authors last_names and year (no suffix)
  def author_year
    return 'not yet calculated' if new_record?
    [cached_author_string, year].compact.join(', ')
  end

    # @param [String] file
  # @return [Array, Boolean]
  def self.batch_create(file)
    sources = []
    valid = 0
    begin
      # error_msg = []
      Source.transaction do
        bibliography = BibTeX::Bibliography.parse(file.read.force_encoding('UTF-8'), filter: :latex)
        bibliography.each do |record|
          a = Source::Bibtex.new_from_bibtex(record)
          if a.valid?
            if a.save
              valid += 1
            end
          else
            # error_msg = a.errors.messages.to_s
          end
          sources.push(a)
        end
      end
    rescue
      return false
    end
    return {records: sources, count: valid}
  end

  # @param used_on [String] a model name
  # @return [Scope]
  #    the max 10 most recently used (1 week, could parameterize) TaxonName, as used
  def self.used_recently(user_id, project_id, used_on = 'TaxonName')
    Source.select('sources.id').
      joins(:citations)
          .where(citations: {updated_by_id: user_id,
                 project_id:,
                 citation_object_type: used_on,
                 updated_at: 1.week.ago..})
         .order('citations.updated_at DESC')
       .pluck(:id).uniq
  end

  # @params target [String] a citable model name
  # @return [Hash] sources optimized for user selection
  def self.select_optimized(user_id, project_id, target = 'TaxonName')
    r = used_recently(user_id, project_id, target)
    h = {
      quick: [],
      pinboard: Source.pinned_by(user_id).where(pinboard_items: {project_id:}).to_a,
      recent: []
    }

    if r.empty?
      h[:recent] = Source.where(created_by_id: user_id, updated_at: 2.hours.ago..Time.now )
        .order('created_at DESC')
        .limit(5).order(:cached).to_a
      h[:quick] = Source.pinned_by(user_id).pinboard_inserted.where(pinboard_items: {project_id:}).to_a
    else
      h[:recent] =
        (Source.where(created_by_id: user_id, updated_at: 2.hours.ago..Time.now )
        .order('created_at DESC')
        .limit(5).order(:cached).to_a +
      Source.where('"sources"."id" IN (?)', r.first(6) ).to_a).uniq
      h[:quick] = ( Source.pinned_by(user_id).pinboard_inserted.where(pinboard_items: {project_id:}).to_a +
                   Source.where('"sources"."id" IN (?)', r.first(4) ).to_a).uniq
    end

    h
  end

  # @return [Array]
  #    objects this source is linked to through citations
  def cited_objects
    self.citations.collect { |t| t.citation_object }
  end

  # @return [Boolean]
  def is_bibtex?
    type == 'Source::Bibtex'
  end

  # @return [Boolean]
  def is_in_project?(project_id)
    projects.where(id: project_id).any?
  end

  # Month handling allows values from bibtex like 'may' to be handled
  # @return [Date]
  def nomenclature_date
    Utilities::Dates.nomenclature_date(day, Utilities::Dates.month_index(month), year)&.to_date
  end

  # @return [Source]
  def clone
    s = dup

    m = "[CLONE of #{id}] "

    case type
    when 'Source::Verbatim'
      s.verbatim = m + verbatim.to_s
    when 'Source::Bibtex'
      s.title = m + title.to_s
    end

    roles.reload.each do |r|
      s.roles << Role.new(person: r.person, type: r.type, position: r.position )
    end

    s.year_suffix = nil
    s.save
    s
  end

  protected

  # Defined in subclasses
  # @return [Nil]
  def set_cached
  end

  # Defined in subclasses
  def get_cached
  end

    # @param [Hash] attributed
  # @return [Boolean]
  def reject_project_sources(attributed)
    return true if attributed['project_id'].blank?
    return true if ProjectSource.where(project_id: attributed['project_id'], source_id: id).any?
  end

  def sv_cached_names
    true # see validation in subclasses
  end

  def sv_fix_cached_names
    begin
      Source.transaction do
        self.set_cached
      end
      true
    rescue
      false
    end
  end

  def sv_stated_year
    soft_validations.add(
      :base, "'Stated year' is not needed if identical to 'year'; applying the Fix will delete it",
      success_message: "'Stated year' was deleted",
      failure_message:  "Failed to delete 'Stated year'") if year.to_s == stated_year.to_s
  end

  def sv_fix_stated_year
    begin
      Source.transaction do
        self.stated_year = nil
        self.save
      end
      true
    rescue
      false
    end
  end


  def sv_html_tags
    if title.present?
      str = title.squish.gsub(/\<i>[^<>]*?<\/i>/, '')
      soft_validations.add(:title, 'The title contains unmatched html tags') if str.include?('<i>') || str.include?('</i>')
    end
end
end