relaton/relaton-bib

View on GitHub
lib/relaton_bib/bibtex_parser.rb

Summary

Maintainability
A
25 mins
Test Coverage
require "bibtex"
require "iso639"

module RelatonBib
  # @todo: move this class to the RelatonBib::Bibtex module
  class BibtexParser
    class << self
      # @param bibtex [String]
      # @return [Hash{String=>RelatonBib::BibliographicItem}]
      def from_bibtex(bibtex) # rubocop:disable Metrics/AbcSize,Metrics/MethodLength
        BibTeX.parse(bibtex).reduce({}) do |h, bt|
          h[bt.key] = BibliographicItem.new(
            id: bt.key,
            docid: fetch_docid(bt),
            fetched: fetch_fetched(bt),
            type: fetch_type(bt),
            title: fetch_title(bt),
            contributor: fetch_contributor(bt),
            date: fetch_date(bt),
            place: fetch_place(bt),
            biblionote: fetch_note(bt),
            relation: fetch_relation(bt),
            extent: fetch_extent(bt),
            edition: bt["edition"]&.to_s,
            series: fetch_series(bt),
            link: fetch_link(bt),
            language: fetch_language(bt),
            classification: fetch_classification(bt),
            keyword: fetch_keyword(bt),
          )
          h
        end
      end

      private

      # @param bibtex [BibTeX::Entry]
      # @return [Array<RelatonBib::DocumentIdentifier>]
      def fetch_docid(bibtex) # rubocop:disable Metrics/AbcSize
        docid = []
        docid << DocumentIdentifier.new(id: bibtex.isbn.to_s, type: "isbn") if bibtex["isbn"]
        docid << DocumentIdentifier.new(id: bibtex.lccn.to_s, type: "lccn") if bibtex["lccn"]
        docid << DocumentIdentifier.new(id: bibtex.issn.to_s, type: "issn") if bibtex["issn"]
        docid
      end

      # @param bibtex [BibTeX::Entry]
      # @return [String, nil]
      def fetch_fetched(bibtex)
        Date.parse(bibtex.timestamp.to_s) if bibtex["timestamp"]
      end

      # @param bibtex [BibTeX::Entry]
      # @return [String]
      def fetch_type(bibtex)
        case bibtex.type
        when :mastersthesis, :phdthesis then "thesis"
        when :conference then "inproceedings"
        when :misc then "standard"
        else bibtex.type.to_s
        end
      end

      # @param bibtex [BibTeX::Entry]
      # @return [Array<Hash>]
      def fetch_place(bibtex)
        bibtex["address"] ? [bibtex.address.to_s] : []
      end

      # @param bibtex [BibTeX::Entry]
      # @return [RelatonBib::TypedTitleStringCollection]
      def fetch_title(bibtex)
        title = []
        title << { type: "main", content: bibtex.convert(:latex).title.to_s } if bibtex["title"]
        title << { type: "main", content: bibtex.convert(:latex).subtitle.to_s } if bibtex["subtitle"]
        TypedTitleStringCollection.new title
      end

      # @param bibtex [BibTeX::Entry]
      # @return [Array<Hash>]
      def fetch_contributor(bibtex) # rubocop:disable Metrics/AbcSize
        contribs = []
        fetch_person(bibtex, "author") { |author| contribs << author }
        fetch_person(bibtex, "editor") { |editor| contribs << editor }

        fetch_org(bibtex["publisher"], "publisher") { |pub| contribs << pub }
        fetch_org(bibtex["institution"], "distributor", "sponsor") { |distr| contribs << distr }
        fetch_org(bibtex["organization"], "distributor", "sponsor") { |org| contribs << org }
        fetch_org(bibtex["school"], "distributor", "sponsor") { |school| contribs << school }

        fetch_howpublished(bibtex) { |pub| contribs << pub }

        contribs
      end

      def fetch_howpublished(bibtex, &_)
        return unless bibtex["howpublished"]

        /\\publisher\{(?<name>.+)\},\\url\{(?<url>.+)\}/ =~ bibtex.howpublished.to_s
        return unless name && url

        name.gsub!(/\{\\?([^\\]+)\}/, '\1')
        org = Organization.new(name: name, url: url)
        yield entity: org, role: [{ type: "publisher" }]
      end

      def fetch_org(org, type, desc = nil, &_)
        return unless org

        role = { type: type }
        role[:description] = [desc] if desc
        yield entity: Organization.new(name: org.to_s), role: [role]
      end

      # @param bibtex [BibTeX::Entry]
      # @return [Array<RelatonBib::Person>]
      def fetch_person(bibtex, role, &_) # rubocop:disable Metrics/AbcSize
        bibtex[role]&.each do |name|
          parts = name.split ", "
          surname = LocalizedString.new parts.first
          fname = parts.size > 1 ? parts[1].split : []
          forename = fname.map { |fn| Forename.new content: fn }
          name = FullName.new(surname: surname, forename: forename)
          yield entity: Person.new(name: name), role: [{ type: role }]
        end
      end

      # @param bibtex [BibTeX::Entry]
      # @return [Array<Hash>]
      def fetch_date(bibtex)
        date = []
        if bibtex["year"]
          on = Date.new(bibtex.year.to_i, bibtex["month_numeric"]&.to_i || 1).to_s
          date << { type: "published", on: on }
        end

        if bibtex["urldate"]
          date << { type: "accessed", on: Date.parse(bibtex.urldate.to_s).to_s }
        end

        date
      end

      # @param bibtex [BibTeX::Entry]
      # @return [RelatonBib::BiblioNoteCollection]
      def fetch_note(bibtex) # rubocop:disable Metrics/AbcSize,Metrics/CyclomaticComplexity,Metrics/MethodLength
        bibtex.select do |k, _v|
          %i[annote howpublished comment note content].include? k
        end.reduce(BiblioNoteCollection.new([])) do |mem, note|
          type = case note[0]
                 when :note then nil
                 when :content then "tableOfContents"
                 else note[0].to_s
                 end
          next mem if type == "howpublished" && note[1].to_s.match?(/^\\publisher\{.+\},\\url\{.+\}$/)

          mem << BiblioNote.new(type: type, content: note[1].to_s)
        end
      end

      # @param bibtex [BibTeX::Entry]
      # @return [Array<Hash>]
      def fetch_relation(bibtex)
        return [] unless bibtex["booktitle"]

        ttl = TypedTitleString.new(type: "main", content: bibtex.booktitle.to_s)
        title = TypedTitleStringCollection.new [ttl]
        [{ type: "partOf", bibitem: BibliographicItem.new(title: title) }]
      end

      # @param bibtex [BibTeX::Entry]
      # @return [Array<RelatonBib::BibItemLocality>]
      def fetch_extent(bibtex) # rubocop:disable Metrics/MethodLength,Metrics/AbcSize
        bibtex.select do |k, _v|
          %i[chapter pages volume].include? k
        end.reduce([]) do |mem, loc|
          if loc[0] == :pages
            type = "page"
            from, to = loc[1].to_s.split "-"
          else
            type = loc[0].to_s
            from = loc[1].to_s
            to = nil
          end
          mem << BibItemLocality.new(type, from, to)
        end
      end

      # @param bibtex [BibTeX::Entry]
      # @return [Array<RelatonBib::Series>]
      def fetch_series(bibtex) # rubocop:disable Metrics/MethodLength
        series = []
        if bibtex["journal"]
          series << Series.new(
            type: "journal",
            title: TypedTitleString.new(content: bibtex.journal.to_s),
            number: bibtex["number"]&.to_s
          )
        end

        if bibtex["series"]
          title = TypedTitleString.new content: bibtex.series.to_s
          series << Series.new(title: title)
        end
        series
      end

      # @param bibtex [BibTeX::Entry]
      # @return [Array<RelatonBib::TypedUri>]
      def fetch_link(bibtex) # rubocop:disable Metrics/AbcSize
        link = []
        link << TypedUri.new(type: "src", content: bibtex.url.to_s) if bibtex["url"]
        link << TypedUri.new(type: "doi", content: bibtex.doi.to_s) if bibtex["doi"]
        link << TypedUri.new(type: "file", content: bibtex.file2.to_s) if bibtex["file2"]
        link
      end

      # @param bibtex [BibTeX::Entry]
      # @return [Array<String>]
      def fetch_language(bibtex)
        return [] unless bibtex["language"]

        [Iso639[bibtex.language.to_s].alpha2]
      end

      # @param bibtex [BibTeX::Entry]
      # @return [RelatonBib::Classification, nil]
      def fetch_classification(bibtex)
        cls = []
        cls << Classification.new(type: "type", value: bibtex["type"].to_s) if bibtex["type"]
        # cls << Classification.new(type: "keyword", value: bibtex.keywords.to_s) if bibtex["keywords"]
        if bibtex["mendeley-tags"]
          cls << Classification.new(type: "mendeley", value: bibtex["mendeley-tags"].to_s)
        end
        cls
      end

      # @param bibtex [BibTeX::Entry]
      # @return [Array<String>]
      def fetch_keyword(bibtex)
        bibtex["keywords"]&.split(/,\s?/) || []
      end
    end
  end
end