lib/arx/entities/paper.rb
module Arx
# Entity/model representing an arXiv paper.
class Paper
include HappyMapper
include Inspector
# The attributes of an arXiv paper.
# @note {comment}, {journal}, {pdf_url} and {doi_url} may raise errors when called.
ATTRIBUTES = %i[
id url version revision?
title summary authors
primary_category categories
published_at updated_at
comment? comment
journal? journal
pdf? pdf_url
doi? doi_url
]
tag 'entry'
element :id, Cleaner, parser: :clean, tag: 'id'
# The identifier of the paper.
#
# @note This is either in {OLD_IDENTIFIER_FORMAT} or {NEW_IDENTIFIER_FORMAT}.
# @example
# 1705.01662v1
# cond-mat/0211034
# @param version [Boolean] Whether or not to include the paper's version.
# @return [String] The paper's identifier.
def id(version = false)
Cleaner.extract_id @id, version: version
end
# The URL of the paper on the arXiv website.
#
# @example
# http://arxiv.org/abs/1705.01662v1
# http://arxiv.org/abs/cond-mat/0211034
# @param version [Boolean] Whether or not to include the paper's version.
# @return [String] The paper's arXiv URL.
def url(version = false)
"http://arxiv.org/abs/#{id version}"
end
# The version of the paper.
#
# @return [Integer] The paper's version.
def version
Cleaner.extract_version @id
end
# Whether the paper is a revision or not.
#
# @note A paper is a revision if its {version} is greater than 1.
# @return [Boolean]
def revision?
version > 1
end
# @!method updated_at
# The date that the paper was last updated.
#
# @return [DateTime]
element :updated_at, DateTime, tag: 'updated'
# @!method published_at
# The original publish/submission date of the paper.
#
# @return [DateTime]
element :published_at, DateTime, tag: 'published'
# @!method title
# The title of the paper.
#
# @return [DateTime]
element :title, Cleaner, parser: :clean, tag: 'title'
# @!method authors
# The authors of the paper.
#
# @return [Array<Author>]
has_many :authors, Author, tag: 'author'
# @!method primary_category
# The primary category of the paper.
#
# @return [Category]
element :primary_category, Category, tag: 'primary_category'
alias_method :category, :primary_category
# @!method categories
# The categories of the paper.
#
# @return [Array<Category>]
has_many :categories, Category, tag: 'category'
# @!method summary
# The summary (or abstract) of the paper.
#
# @return [String]
element :summary, Cleaner, parser: :clean, tag: 'summary'
alias_method :abstract, :summary
# @!method comment?
# Whether or not the paper has a comment.
#
# @return [Boolean]
# @!method comment
# The comment of the paper.
#
# @note This is an optional metadata field on an arXiv paper. To check whether the paper has a comment, use {comment?}
# @raise {Error::MissingField} If the paper does not have a comment.
# @return [String]
element :comment, Cleaner, parser: :clean, tag: 'comment'
# @!method journal?
# Whether or not the paper has a journal reference.
#
# @return [Boolean]
# @!method journal
# The journal reference of the paper.
#
# @note This is an optional metadata field on an arXiv paper. To check whether the paper has a journal reference, use {journal?}
# @raise {Error::MissingField} If the paper does not have a journal reference.
# @return [String]
element :journal, Cleaner, parser: :clean, tag: 'journal_ref'
%i[comment journal].each do |optional|
exists = "#{optional}?"
define_method exists do
!instance_variable_get("@#{optional}").empty?
end
define_method optional do
if self.send "#{optional}?"
instance_variable_get("@#{optional}")
else
raise Error::MissingField.new id, optional
end
end
end
has_many :links, Link, tag: 'link'
# @!method pdf?
# Whether or not the paper has a PDF link.
#
# @return [Boolean]
# @!method pdf_url
# Link to the PDF version of the paper.
#
# @note This is an optional metadata field on an arXiv paper. To check whether the paper has a PDF link, use {pdf?}
# @raise {Error::MissingLink} If the paper does not have a PDF link.
# @return [String]
# @!method doi?
# Whether or not the paper has a DOI (Digital Object Identifier) link.
#
# @see https://arxiv.org/help/jref#doi
# @see https://arxiv.org/help/prep#doi
# @return [Boolean]
# @!method doi_url
# Link to the DOI (Digital Object Identifier) of the paper.
#
# @see https://arxiv.org/help/jref#doi
# @see https://arxiv.org/help/prep#doi
# @note This is an optional metadata field on an arXiv paper. To check whether the paper has a DOI link, use {doi?}
# @raise {Error::MissingLink} If the paper does not have a DOI link.
# @return [String]
%i[pdf doi].each do |link_type|
exists = "#{link_type}?".to_sym
define_method exists do
links.any? &exists
end
define_method "#{link_type}_url" do
if self.send exists
links.find(&exists).href
else
raise Error::MissingLink.new id, link_type.to_s.upcase
end
end
end
# Serializes the {Paper} object into a +Hash+.
#
# @param deep [Boolean] Whether to deep-serialize {Author} and {Category} objects.
# @return [Hash]
def to_h(deep = false)
Hash[*ATTRIBUTES.map {|_| [_, send(_)] rescue nil}.compact.flatten(1)].tap do |hash|
if deep
hash[:authors].map! &:to_h
hash[:categories].map! &:to_h
hash[:primary_category] = hash[:primary_category].to_h
end
end
end
# Serializes the {Paper} object into a valid JSON hash.
#
# @note Deep-serializes {Author} and {Category} objects.
# @return [Hash] The resulting JSON hash.
def as_json
JSON.parse to_json
end
# Serializes the {Paper} object into a valid JSON string.
#
# @note Deep-serializes {Author} and {Category} objects.
# @return [String] The resulting JSON string.
def to_json
to_h(true).to_json
end
# Equality check against another paper.
#
# @note This only performs a basic equality check between the papers' identifiers (disregarding version).
# This means that a different version of the same paper will be viewed as equal.
# @param paper [Paper] The paper to compare against.
# @return [Boolean]
def ==(paper)
if paper.is_a? Paper
id == paper.id
else
false
end
end
# Downloads the paper and saves it in PDF format at the specified path.
#
# @param path [String] The file path to store the PDF at.
def save(path)
begin
pdf_content = URI.open(pdf_url).read
File.open(path, 'wb') {|f| f.write pdf_content}
rescue
File.delete(path) if File.file? path
raise
end
end
# A string representation of the {Paper} object.
#
# @return [String]
def to_s
_id = id true
_published_at = published_at.strftime("%Y-%m-%d")
_authors = authors.map(&:name)
_authors = [*_authors.first(2), '...'] if _authors.size > 2
"Arx::Paper(id: #{_id}, published_at: #{_published_at}, authors: [#{_authors.join(', ')}], title: #{title})"
end
inspector *ATTRIBUTES
end
end