lib/arx/entities/paper.rb

Summary

Maintainability
A
0 mins
Test Coverage
module Arx

  # Entity/model representing an arXiv paper.
  class Paper
    include HappyMapper
    include Inspector

    # The attributes of an arXiv paper.
    # @note {comment}, {journal}, {pdf_url} and {doi_url} may raise errors when called.
    ATTRIBUTES = %i[
      id url version revision?
      title summary authors
      primary_category categories
      published_at updated_at
      comment? comment
      journal? journal
      pdf? pdf_url
      doi? doi_url
    ]

    tag 'entry'

    element :id, Cleaner, parser: :clean, tag: 'id'
    # The identifier of the paper.
    #
    # @note This is either in {OLD_IDENTIFIER_FORMAT} or {NEW_IDENTIFIER_FORMAT}.
    # @example
    #   1705.01662v1
    #   cond-mat/0211034
    # @param version [Boolean] Whether or not to include the paper's version.
    # @return [String] The paper's identifier.
    def id(version = false)
      Cleaner.extract_id @id, version: version
    end

    # The URL of the paper on the arXiv website.
    #
    # @example
    #   http://arxiv.org/abs/1705.01662v1
    #   http://arxiv.org/abs/cond-mat/0211034
    # @param version [Boolean] Whether or not to include the paper's version.
    # @return [String] The paper's arXiv URL.
    def url(version = false)
      "http://arxiv.org/abs/#{id version}"
    end

    # The version of the paper.
    #
    # @return [Integer] The paper's version.
    def version
      Cleaner.extract_version @id
    end

    # Whether the paper is a revision or not.
    #
    # @note A paper is a revision if its {version} is greater than 1.
    # @return [Boolean]
    def revision?
      version > 1
    end

    # @!method updated_at
    # The date that the paper was last updated.
    #
    # @return [DateTime]
    element :updated_at, DateTime, tag: 'updated'

    # @!method published_at
    # The original publish/submission date of the paper.
    #
    # @return [DateTime]
    element :published_at, DateTime, tag: 'published'

    # @!method title
    # The title of the paper.
    #
    # @return [DateTime]
    element :title, Cleaner, parser: :clean, tag: 'title'

    # @!method authors
    # The authors of the paper.
    #
    # @return [Array<Author>]
    has_many :authors, Author, tag: 'author'

    # @!method primary_category
    # The primary category of the paper.
    #
    # @return [Category]
    element :primary_category, Category, tag: 'primary_category'
    alias_method :category, :primary_category

    # @!method categories
    # The categories of the paper.
    #
    # @return [Array<Category>]
    has_many :categories, Category, tag: 'category'

    # @!method summary
    # The summary (or abstract) of the paper.
    #
    # @return [String]
    element :summary, Cleaner, parser: :clean, tag: 'summary'
    alias_method :abstract, :summary

    # @!method comment?
    # Whether or not the paper has a comment.
    #
    # @return [Boolean]

    # @!method comment
    # The comment of the paper.
    #
    # @note This is an optional metadata field on an arXiv paper. To check whether the paper has a comment, use {comment?}
    # @raise {Error::MissingField} If the paper does not have a comment.
    # @return [String]
    element :comment, Cleaner, parser: :clean, tag: 'comment'

    # @!method journal?
    # Whether or not the paper has a journal reference.
    #
    # @return [Boolean]

    # @!method journal
    # The journal reference of the paper.
    #
    # @note This is an optional metadata field on an arXiv paper. To check whether the paper has a journal reference, use {journal?}
    # @raise {Error::MissingField} If the paper does not have a journal reference.
    # @return [String]
    element :journal, Cleaner, parser: :clean, tag: 'journal_ref'

    %i[comment journal].each do |optional|
      exists = "#{optional}?"

      define_method exists do
        !instance_variable_get("@#{optional}").empty?
      end

      define_method optional do
        if self.send "#{optional}?"
          instance_variable_get("@#{optional}")
        else
          raise Error::MissingField.new id, optional
        end
      end
    end

    has_many :links, Link, tag: 'link'

    # @!method pdf?
    # Whether or not the paper has a PDF link.
    #
    # @return [Boolean]

    # @!method pdf_url
    # Link to the PDF version of the paper.
    #
    # @note This is an optional metadata field on an arXiv paper. To check whether the paper has a PDF link, use {pdf?}
    # @raise {Error::MissingLink} If the paper does not have a PDF link.
    # @return [String]

    # @!method doi?
    # Whether or not the paper has a DOI (Digital Object Identifier) link.
    #
    # @see https://arxiv.org/help/jref#doi
    # @see https://arxiv.org/help/prep#doi
    # @return [Boolean]

    # @!method doi_url
    # Link to the DOI (Digital Object Identifier) of the paper.
    #
    # @see https://arxiv.org/help/jref#doi
    # @see https://arxiv.org/help/prep#doi
    # @note This is an optional metadata field on an arXiv paper. To check whether the paper has a DOI link, use {doi?}
    # @raise {Error::MissingLink} If the paper does not have a DOI link.
    # @return [String]

    %i[pdf doi].each do |link_type|
      exists = "#{link_type}?".to_sym

      define_method exists do
        links.any? &exists
      end

      define_method "#{link_type}_url" do
        if self.send exists
          links.find(&exists).href
        else
          raise Error::MissingLink.new id, link_type.to_s.upcase
        end
      end
    end

    # Serializes the {Paper} object into a +Hash+.
    #
    # @param deep [Boolean] Whether to deep-serialize {Author} and {Category} objects.
    # @return [Hash]
    def to_h(deep = false)
      Hash[*ATTRIBUTES.map {|_| [_, send(_)] rescue nil}.compact.flatten(1)].tap do |hash|
        if deep
          hash[:authors].map! &:to_h
          hash[:categories].map! &:to_h
          hash[:primary_category] = hash[:primary_category].to_h
        end
      end
    end

    # Serializes the {Paper} object into a valid JSON hash.
    #
    # @note Deep-serializes {Author} and {Category} objects.
    # @return [Hash] The resulting JSON hash.
    def as_json
      JSON.parse to_json
    end

    # Serializes the {Paper} object into a valid JSON string.
    #
    # @note Deep-serializes {Author} and {Category} objects.
    # @return [String] The resulting JSON string.
    def to_json
      to_h(true).to_json
    end

    # Equality check against another paper.
    #
    # @note This only performs a basic equality check between the papers' identifiers (disregarding version).
    #   This means that a different version of the same paper will be viewed as equal.
    # @param paper [Paper] The paper to compare against.
    # @return [Boolean]
    def ==(paper)
      if paper.is_a? Paper
        id == paper.id
      else
        false
      end
    end

    # Downloads the paper and saves it in PDF format at the specified path.
    #
    # @param path [String] The file path to store the PDF at.
    def save(path)
      begin
        pdf_content = URI.open(pdf_url).read
        File.open(path, 'wb') {|f| f.write pdf_content}
      rescue
        File.delete(path) if File.file? path
        raise
      end
    end

    # A string representation of the {Paper} object.
    #
    # @return [String]
    def to_s
      _id = id true
      _published_at = published_at.strftime("%Y-%m-%d")
      _authors = authors.map(&:name)
      _authors = [*_authors.first(2), '...'] if _authors.size > 2
      "Arx::Paper(id: #{_id}, published_at: #{_published_at}, authors: [#{_authors.join(', ')}], title: #{title})"
    end

    inspector *ATTRIBUTES
  end
end