CartoDB/cartodb20

View on GitHub
services/datasources/lib/datasources/url/box.rb

Summary

Maintainability
D
2 days
Test Coverage
require_relative '../../../../../lib/carto/http/client'

module CartoDB
  module Datasources
    module Url
      # BoxAPI module is a replacement for Boxr, which requires Ruby >= 2.
      # Most of this code has been extracted from Boxr.
      # This should be migrated when we upgrade to Ruby 2.
      module BoxAPI
        class ExpiredTokenError < StandardError; end

        def self.oauth_url(state, options = {})
          host = options.fetch(:host, "app.box.com")
          response_type = options.fetch(:response_type, "code")
          scope = options[:scope]
          folder_id = options[:folder_id]
          client_id = options[:client_id]

          template = Addressable::Template.new("https://{host}/api/oauth2/authorize{?query*}")

          query = { "response_type" => "#{response_type}", "state" => "#{state}", "client_id" => "#{client_id}" }
          query["scope"] = "#{scope}" unless scope.nil?
          query["folder_id"] = "#{folder_id}" unless folder_id.nil?

          template.expand("host" => "#{host}", "query" => query)
        end

        def self.get_tokens(code, options = {})
          grant_type = options[:grant_type]
          assertion = options[:assertion]
          scope = options[:scope]
          username = options[:username]
          client_id = options[:client_id]
          client_secret = options[:client_secret]

          uri = "https://api.box.com/oauth2/token"
          body = "grant_type=#{grant_type}&client_id=#{client_id}&client_secret=#{client_secret}"
          body = body + "&code=#{code}" unless code.nil?
          body = body + "&scope=#{scope}" unless scope.nil?
          body = body + "&username=#{username}" unless username.nil?
          body = body + "&assertion=#{assertion}" unless assertion.nil?

          auth_post(uri, body)
        end

        def self.refresh_tokens(refresh_token, options = {})
          client_id = options[:client_id]
          client_secret = options[:client_secret]

          uri = "https://api.box.com/oauth2/token"
          body = "grant_type=refresh_token&refresh_token=#{refresh_token}&client_id=#{client_id}&client_secret=#{client_secret}"

          auth_post(uri, body)
        end

        def self.auth_post(uri, body, json_body = true)
          uri = Addressable::URI.encode(uri)

          res = post(uri, body: body)

          if res.response_code == 200
            json_body ? JSON.parse(res.response_body) : res.response_body
          else
            handle_error_response(res)
          end
        end

        def self.handle_error_response(res)
          body_json = JSON.parse(res.response_body)

          if body_json['error'] == 'invalid_grant'
            raise ExpiredTokenError.new(body_json.fetch('error_description', 'Expired token'))
          else
            raise "Box Error status: #{res.response_code}, body: #{res.response_body}, headers: #{res.response_headers}"
          end
        rescue StandardError => e
          CartoDB.notify_exception(e, self: inspect, response: res.inspect)
          raise e
        end

        def self.post(uri, options = {})
          http_client = Carto::Http::Client.get('box',
                                                connecttimeout: 60,
                                                timeout: 600
                                               )
          http_client.post(uri.to_s, options)
        end

        def self.get(uri, options = {})
          query = options[:query]
          header = options[:header]
          follow_redirect = options[:follow_redirect]

          http_client = Carto::Http::Client.get('box',
                                                connecttimeout: 60,
                                                timeout: 600)
          response = http_client.get(uri.to_s, headers: header, followlocation: follow_redirect, params: query)
          response
        end
      end

      module BoxAPI
        class Client

          API_URI = "https://api.box.com/2.0"
          SEARCH_URI = "#{API_URI}/search"
          FILES_URI = "#{API_URI}/files"

          def initialize(access_token, options = {})
            client_id = options[:client_id]
            client_secret = options[:client_secret]

            @access_token = access_token
            if @access_token.nil?
              raise CartoDB::Datasources::TokenExpiredOrInvalidError.new('Access token cannot be nil', DATASOURCE_NAME)
            end

            @client_id = client_id
            @client_secret = client_secret
          end

          def search(query, options = {})
            scope = options[:scope]
            file_extensions = options[:file_extensions]
            created_at_range = options[:created_at_range]
            updated_at_range = options[:updated_at_range]
            size_range = options[:size_range]
            owner_user_ids = options[:owner_user_ids]
            ancestor_folder_ids = options[:ancestor_folder_ids]
            content_types = options[:content_types]
            type = options[:type]
            limit = options.fetch(:limit, 30)
            offset = options.fetch(:offset, 0)

            query = { query: query }
            query[:scope] = scope unless scope.nil?
            query[:file_extensions] = file_extensions unless file_extensions.nil?
            query[:created_at_range] = created_at_range unless created_at_range.nil?
            query[:updated_at_range] = updated_at_range unless updated_at_range.nil?
            query[:size_range] = size_range unless size_range.nil?
            query[:owner_user_ids] = owner_user_ids unless owner_user_ids.nil?
            query[:ancestor_folder_ids] = ancestor_folder_ids unless ancestor_folder_ids.nil?
            query[:content_types] = content_types unless content_types.nil?
            query[:type] = type unless type.nil?
            query[:limit] = limit unless limit.nil?
            query[:offset] = offset unless offset.nil?

            if offset.present? && offset > 0
              # external pagination
              results, _response = get(SEARCH_URI, query: query)
              return results['entries']
            end

            entries = []
            offset = 0
            loop do
              query[:offset] = offset
              query[:limit] = limit - entries.size if limit.present? # possibly capped by the Box API
              results, _response = get(SEARCH_URI, query: query)
              new_entries = results['entries']
              entries += new_entries
              offset += new_entries.size
              break if entries.size >= results['total_count'] || entries.size >= limit
            end
            entries
          end

          def download_url(file, options = {})
            version = options[:version]

            download_file(file, version: version, follow_redirect: false)
          end

          def download_file(file, options = {})
            version = options[:version]
            follow_redirect = options.fetch(:follow_redirect, true)

            file_id = ensure_id(file)
            begin
              uri = "#{FILES_URI}/#{file_id}/content"
              query = {}
              query[:version] = version unless version.nil?

              # Boxr didn't have 200
              _body_json, response = get(uri, query: query, success_codes: [302, 202, 200], follow_redirect: false, process_response: false)

              if response.response_code == 302
                location = response.header['Location'][0]

                if follow_redirect
                  file, response = get(location, process_response: false)
                else
                  return location # simply return the url
                end
              elsif response.response_code == 202
                retry_after_seconds = response.header['Retry-After'][0]
                sleep retry_after_seconds.to_i
              elsif response.response_code == 200
                file = response.response_body
              end
            end until file

            file
          end

          FOLDER_AND_FILE_FIELDS = [:type, :id, :sequence_id, :etag, :name, :created_at, :modified_at, :description,
                                    :size, :path_collection, :created_by, :modified_by, :trashed_at, :purged_at,
                                    :content_created_at, :content_modified_at, :owned_by, :shared_link,
                                    :folder_upload_email,
                                    :parent, :item_status, :item_collection, :sync_state, :has_collaborations,
                                    :permissions, :tags,
                                    :sha1, :shared_link, :version_number, :comment_count, :lock, :extension,
                                    :is_package,
                                    :expiring_embed_link, :can_non_owners_invite]
          FOLDER_AND_FILE_FIELDS_QUERY = FOLDER_AND_FILE_FIELDS.join(',')

          def file_from_id(file_id, fields = [])
            file_id = ensure_id(file_id)
            uri = "#{FILES_URI}/#{file_id}"
            query = build_fields_query(fields, FOLDER_AND_FILE_FIELDS_QUERY)
            file, _response = get(uri, query: query)
            file
          end

          # Required for all providers
          DATASOURCE_NAME = 'box'

          def revoke_tokens(token)
            uri = "https://api.box.com/oauth2/revoke"
            body = "client_id=#{@client_id}&client_secret=#{@client_secret}&token=#{token}"

            BoxAPI::auth_post(uri, body, false)
          rescue StandardError => ex
            raise AuthError.new("revoke_token: #{ex.message}", DATASOURCE_NAME)
          end

          private

          def build_fields_query(fields, all_fields_query)
            if fields == :all
              { fields: all_fields_query }
            elsif fields.is_a?(Array) && fields.length > 0
              { fields: fields.join(',') }
            else
              {}
            end
          end

          def ensure_id(item)
            return item if item.class == String || item.class == Integer || item.nil?
            return item.id if item.respond_to?(:id)
            return item['id'] if item.class == Hash
            raise "Expecting an id of class String or Integer, or object that responds to :id"
          end

          def get(uri, options = {})
            query = options[:query]
            success_codes = options.fetch(:success_codes, [200])
            process_response = options.fetch(:process_response, true)
            if_match = options[:if_match]
            box_api_header = options[:box_api_header]
            follow_redirect = options.fetch(follow_redirect, true)

            headers = standard_headers
            headers['If-Match'] = if_match unless if_match.nil?
            headers['BoxApi'] = box_api_header unless box_api_header.nil?

            res = BoxAPI::get(uri, query: query, header: headers, follow_redirect: follow_redirect)

            check_response_status(res, success_codes)

            if process_response
              return JSON.parse(res.response_body)
            else
              return res.response_body, res
            end
          end

          def check_response_status(res, success_codes)
            unless success_codes.include?(res.response_code)
              raise "BoxError status: #{res.response_code}, body: #{res.response_body}, header: #{res.response_headers}"
            end
          end

          def standard_headers
            { "Authorization" => "Bearer #{@access_token}" }
          end

        end
      end

      class Box < BaseOAuth

        # Required for all providers
        DATASOURCE_NAME = 'box'

        # Constructor (hidden)
        # @param config
        # [
        #  'application_name'
        #  'client_id'
        #  'client_secret'
        # ]
        # @param user ::User
        # @throws UninitializedError
        # @throws MissingConfigurationError
        def initialize(config, user)
          super(config, user, %w{ application_name client_id client_secret box_host }, DATASOURCE_NAME)

          raise UninitializedError.new('missing user instance', DATASOURCE_NAME) if user.nil?

          @access_token = nil
          @refresh_token = nil
        end

        # Factory method
        # @param config {}
        # @param user ::User
        # @return CartoDB::Datasources::Url::GDrive
        def self.get_new(config, user)
          new(config, user)
        end

        # If will provide a url to download the resource, or requires calling get_resource()
        # @return bool
        def providers_download_url?
          false
        end

        # Return the url to be displayed or sent the user to to authenticate and get authorization code.
        # Older implementations had a use_callback_flow parameter that became deprecated. Not implemented.
        # @return string | nil
        def get_auth_url
          service_name = service_name_for_user(DATASOURCE_NAME, @user)
          state = CALLBACK_STATE_DATA_PLACEHOLDER.sub('service', service_name).sub('user', @user.username)
          BoxAPI::oauth_url(state,
                            host: config['box_host'],
                            response_type: "code",
                            scope: nil,
                            folder_id: nil,
                            client_id: config['client_id']).to_s
        end

        # Validates authorization code, sets access and refresh tokens for current instance and stores (refresh) token
        # @param auth_code : string
        # @return string : Refresh token
        # Older implementations had a use_callback_flow parameter that became deprecated. Not implemented.
        # @throws AuthError
        def validate_auth_code(auth_code)
          set_tokens(get_tokens(auth_code))
          @refresh_token
        end

        # Validates the authorization callback
        # @param params : mixed
        def validate_callback(params)
          if params[:error].present?
            raise AuthError.new("validate_callback: #{params[:error]}", DATASOURCE_NAME)
          end

          if params[:code]
            validate_auth_code(params[:code])
          else
            raise AuthError.new('validate_callback: Missing authorization code', DATASOURCE_NAME)
          end
        end

        # Store (refresh) token. If it's not valid both access_token and refresh_token will be nil.
        # Triggers generation of a valid access token for the lifetime of this instance
        # @param token string
        # @throws AuthError
        def token=(token)
          set_tokens(get_fresh_tokens(token))
        rescue CartoDB::Datasources::Url::BoxAPI::ExpiredTokenError => e
          CartoDB.notify_exception(e, self: inspect, token: token)
          set_tokens('access_token' => nil, 'refresh_token' => nil)
        end

        # Retrieve (refresh) token
        # @return string | nil
        def token
          @refresh_token
        end

        # Perform the listing and return results
        # @param filter Array : (Optional) filter to specify which resources to retrieve. Leave empty for all supported.
        # @return [ { :id, :title, :url, :service } ]
        # @throws TokenExpiredOrInvalidError
        # @throws DataDownloadError
        def get_resources_list(filter = [])
          self.filter = filter

          # Box doesn't have a way to "retrieve everything" but it supports whitespaces for multiple search terms
          result = client.search(supported_extensions.join(' '),
                                 scope: nil,
                                 file_extensions: nil,
                                 created_at_range: nil,
                                 updated_at_range: nil,
                                 size_range: nil,
                                 owner_user_ids: nil,
                                 ancestor_folder_ids: nil,
                                 content_types: nil,
                                 type: nil,
                                 limit: 2000,
                                 offset: 0)

          result = result.map { |i| format_item_data(i) }.sort { |x, y| y[:updated_at] <=> x[:updated_at] }

          unless @formats.nil? || @formats.empty?
            result = result.select { |item| item[:filename] =~ /.*(#{@formats.join(')|(')})$/i }
          end

          result
        end

        # Retrieves a resource and returns its contents
        # @param id string
        # @return mixed
        # @throws TokenExpiredOrInvalidError
        # @throws DataDownloadError
        def get_resource(id)
          file = client.file_from_id(id)
          client.download_file(file)
        end

        # @param id string
        # @return Hash
        # @throws TokenExpiredOrInvalidError
        # @throws DataDownloadError
        # @throws NotFoundDownloadError
        def get_resource_metadata(id)
          result = client.file_from_id(id)

          if result.nil?
            message = "Retrieving file #{id} metadata: #{result.inspect}, should stop syncing"
            raise NotFoundDownloadError.new(message, DATASOURCE_NAME)
          end

          if result['item_status'] != 'active'
            raise DataDownloadError.new("Retrieving file #{id} metadata: #{result.inspect}", DATASOURCE_NAME)
          end

          format_item_data(result)
        end

        # Retrieves current filters
        # @return {}
        def filter
          @formats
        end

        # Sets current filters
        # @param filter_data {}
        def filter=(filter_data = [])
          @formats = filter_data
        end

        # Just return datasource name
        # @return string
        def to_s
          DATASOURCE_NAME
        end

        # If this datasource accepts a data import instance
        # @return Boolean
        def persists_state_via_data_import?
          false
        end

        # Stores the data import item instance to use/manipulate it
        # @param value DataImport
        # Not implemented
        def data_import_item=(_value)
          nil
        end

        # Checks if token is still valid or has been revoked
        # @return bool
        # @throws AuthError
        def token_valid?
          raise 'invalid_token' unless token

          # Any call would do, we just want to see if communicates or refuses the token
          result = client.search('test search')
          !result.nil?
        rescue StandardError => e
          if e.message =~ /invalid_token/
            CartoDB.notify_debug('Box invalid_token', self: inspect)
            false
          else
            CartoDB.notify_exception(e, self: inspect)
            raise e
          end
        end

        # Revokes current set token
        def revoke_token
          client.revoke_tokens(token)
        end

        private

        def set_tokens(tokens)
          @access_token = tokens['access_token']
          @refresh_token = tokens['refresh_token']
        end

        def client
          @client ||= get_client
        end

        def get_client
          BoxAPI::Client.new(@access_token,
                             client_id: config['client_id'],
                             client_secret: config['client_secret'])
        end

        def get_tokens(code)
          BoxAPI::get_tokens(code,
                             grant_type: "authorization_code",
                             assertion: nil,
                             scope: nil,
                             username: nil,
                             client_id: config['client_id'],
                             client_secret: config['client_secret'])
        end

        def get_fresh_tokens(refresh_token)
          tokens = BoxAPI::refresh_tokens(refresh_token,
                                          client_id: config['client_id'],
                                          client_secret: config['client_secret'])
          # Box refresh tokens can only be used once
          update_user_oauth(tokens['refresh_token'])
          tokens
        end

        def update_user_oauth(refresh_token)
          carto_user = Carto::User.find(@user.id)
          oauth = carto_user.oauth_for_service('box')
          if oauth
            oauth.token = refresh_token
            oauth.save
          end
        end

        # Formats all data to comply with our desired format
        # @param item_data Hash : Single item returned from GDrive API
        # @return { :id, :title, :url, :service, :checksum, :size, :filename, :updated_at }
        def format_item_data(item_data)
          {
            id:           item_data['id'],
            title:        item_data['name'],
            service:      DATASOURCE_NAME,
            checksum:     checksum_of(item_data.fetch('modified_at')),
            filename: item_data['name'],
            size: item_data['size'].to_i,
            updated_at: DateTime.rfc3339(item_data['content_modified_at'])
          }
        end

      end
    end
  end
end