athityakumar/daru-io

View on GitHub
lib/daru/io/importers/json.rb

Summary

Maintainability
A
0 mins
Test Coverage
require 'daru/io/importers/base'

module Daru
  module IO
    module Importers
      # JSON Importer Class, that extends `from_json` and `read_json` methods
      # to `Daru::DataFrame`
      class JSON < Base
        Daru::DataFrame.register_io_module :from_json, self
        Daru::DataFrame.register_io_module :read_json, self

        # Checks for required gem dependencies of JSON Importer
        def initialize
          require 'open-uri'
          require 'json'
          optional_gem 'jsonpath'
        end

        # Reads data from a json file / remote json response
        #
        # @!method self.read(path)
        #
        # @param path [String] Local / Remote path to json file, where the dataframe is to be imported
        #   from.
        #
        # @return [Daru::IO::Importers::JSON]
        #
        # @example Reading from simply nested remote json response
        #   url = 'https://data.nasa.gov/resource/2vr3-k9wn.json'
        #   simple_read_instance = Daru::IO::Importers::JSON.read(url)
        #
        # @example Reading from complexy nested remote json response
        #   url = 'http://api.tvmaze.com/singlesearch/shows?q=game-of-thrones&embed=episodes'
        #   complex_read_instance = Daru::IO::Importers::JSON.read(url)
        def read(path)
          @file_data = ::JSON.parse(open(path).read)
          @json      = @file_data
          self
        end

        # Loads from a Ruby structure of Hashes and / or Arrays
        #
        # @!method self.from(instance)
        #
        # @param instance [Hash or Array] A simple / complexly nested JSON structure
        #
        # @return [Daru::IO::Importers::JSON]
        #
        # @example Loading from Ruby Hash of Arrays
        #   from_instance = Daru::IO::Importers::JSON.from({x: [1,4], y: [2,5], z: [3, 6]})
        def from(instance)
          @file_data = instance
          @json      = @file_data.is_a?(String) ? ::JSON.parse(@file_data) : @file_data
          self
        end

        # Imports a `Daru::DataFrame` from a JSON Importer instance
        #
        # @param columns [Array] JSON-path slectors to select specific fields
        #   from the JSON input.
        # @param order [String or Array] Either a JSON-path selector string, or
        #   an array containing the order of the `Daru::DataFrame`.
        # @param index [String or Array] Either a JSON-path selector string, or
        #   an array containing the order of the `Daru::DataFrame`.
        # @param named_columns [Hash] JSON-path slectors to select specific fields
        #   from the JSON input.
        #
        # @return [Daru::DataFrame]
        #
        # @note For more information on using JSON-path selectors, have a look at
        #   the explanations {http://www.rubydoc.info/gems/jsonpath/0.5.8 here}
        #   and {http://goessner.net/articles/JsonPath/ here}.
        #
        # @example Importing without jsonpath selectors
        #   df = simple_read_instance.call
        #
        #   #=> #<Daru::DataFrame(202x10)>
        #   #    designation discovery_      h_mag      i_deg    moid_au orbit_clas  period_yr ...
        #   #  0 419880 (20 2011-01-07       19.7       9.65      0.035     Apollo       4.06  ...
        #   #  1 419624 (20 2010-09-17       20.5      14.52      0.028     Apollo          1  ...
        #   #  2 414772 (20 2010-07-28         19      23.11      0.333     Apollo       1.31  ...
        #   # ...        ...        ...        ...        ...        ...        ...       ...  ...
        #
        # @example Importing with jsonpath selectors
        #   df = complex_read_instance.call(
        #     "$.._embedded..episodes..name",
        #     "$.._embedded..episodes..season",
        #     "$.._embedded..episodes..number",
        #     index: (10..70).to_a,
        #     RunTime: "$.._embedded..episodes..runtime"
        #   )
        #
        #   #=> #<Daru::DataFrame(61x4)>
        #   #         name           season     number    RunTime
        #   #   10 Winter is           1          1         60
        #   #   11 The Kingsr          1          2         60
        #   #   12  Lord Snow          1          3         60
        #   #  ...        ...        ...        ...        ...
        #
        # @example Importing from `from` method
        #   df = from_instance.call
        #
        #   #=> #<Daru::DataFrame(2x3)>
        #   #       x   y   z
        #   #   0   1   2   3
        #   #   1   4   5   6
        def call(*columns, order: nil, index: nil, **named_columns)
          init_opts(*columns, order: order, index: index, **named_columns)
          @data    = fetch_data
          @index   = at_jsonpath(@index)
          @order   = at_jsonpath(@order)
          @order ||= Array.new(@columns.count) { |x| x } + @named_columns.keys

          Daru::DataFrame.new(@data, order: @order, index: @index)
        end

        private

        def at_jsonpath(jsonpath)
          jsonpath.is_a?(String) ? JsonPath.on(@json, jsonpath) : jsonpath
        end

        def fetch_data
          return @json if @columns.empty? && @named_columns.empty?

          # If only one unnamed column is provided without any named_columns,
          # entire dataset is assumed to reside in that JSON-path.
          return at_jsonpath(@columns.first) if @columns.size == 1 && @named_columns.empty?
          data_columns = @columns + @named_columns.values
          data_columns.map { |col| at_jsonpath(col) }
        end

        def init_opts(*columns, order: nil, index: nil, **named_columns)
          @columns       = columns
          @order         = order
          @index         = index
          @named_columns = named_columns

          validate_params
        end

        def validate_params
          return if @order.nil? || @named_columns.empty?

          raise ArgumentError,
            'Do not pass on order and named columns together, at the same '\
            'function call. Please use only order or only named_columns.'
        end
      end
    end
  end
end