opennorth/scraperwiki-api-ruby

View on GitHub
lib/scraperwiki-api/matchers.rb

Summary

Maintainability
F
3 days
Test Coverage
require 'time'

require 'rspec'
require 'yajl'

module ScraperWiki
  class API
    # @example
    #   require 'scraperwiki-api'
    #   api = ScraperWiki::API.new
    #
    #   info = api.scraper_getinfo('example-scraper').first
    #
    #   describe 'example-scraper' do
    #     include ScraperWiki::API::Matchers
    #     subject {info}
    #
    #     it {should be_protected}
    #     it {should be_editable_by('frabcus')}
    #     it {should run(:daily)}
    #     it {should_not be_broken}
    #     it {should have_a_table('swdata')}
    #     it {should have_a_row_count_of(42).on('swdata')}
    #
    #     # Check for missing keys:
    #     it {should have_at_least_the_keys(['name', 'email']).on('swdata')}
    #
    #     # Check for extra keys:
    #     it {should have_at_most_the_keys(['name', 'email', 'tel', 'fax']).on('swdata')}
    #   end
    #
    #   data = api.datastore_sqlite('example-scraper', 'SELECT * from `swdata`')
    #
    #   describe 'example-scraper' do
    #     include ScraperWiki::API::Matchers
    #     subject {data}
    #
    #     it {should set_any_of(['name', 'first_name', 'last_name'])}
    #
    #     # Validate the values of individual fields:
    #     it {should_not have_blank_values.in('name')}
    #     it {should have_unique_values.in('email')}
    #     it {should have_values_of(['M', 'F']).in('gender')}
    #     it {should have_values_matching(/\A[^@\s]+@[^@\s]+\z/).in('email')}
    #     it {should have_values_starting_with('http://').in('url')}
    #     it {should have_values_ending_with('Inc.').in('company_name')}
    #     it {should have_integer_values.in('year')}
    #
    #     # If you store a hash or an array of hashes in a field as a JSON string,
    #     # you can validate the values of these subfields by chaining on an +at+:
    #     it {should have_values_of(['M', 'F']).in('extra').at('gender')}
    #
    #     # Check for missing keys within subfields:
    #     it {should have_values_with_at_least_the_keys(['subfield1', 'subfield2']).in('fieldA')}
    #
    #     # Check for extra keys within subfields:
    #     it {should have_values_with_at_most_the_keys(['subfield1', 'subfield2', 'subfield3', 'subfield4']).in('fieldA')}
    #   end
    #
    # RSpec matchers for ScraperWiki scrapers.
    # @see http://rubydoc.info/gems/rspec-expectations/RSpec/Matchers
    module Matchers
      class CustomMatcher
        def initialize(expected)
          @expected = expected
        end

        def matches?(actual)
          @actual = actual
        end

        def does_not_match?(actual)
          @actual = actual
        end

        def failure_message
          raise NotImplementerError, 'Subclasses must implement this method'
        end

        def negative_failure_message
          raise NotImplementerError, 'Subclasses must implement this method'
        end
      end

      # Scraper matchers -------------------------------------------------------

      class ScraperInfoMatcher < CustomMatcher
      end

      class PrivacyStatusMatcher < ScraperInfoMatcher
        def matches?(actual)
          super
          actual['privacy_status'] == @expected
        end

        def failure_message
          "expected #{@actual['short_name']} to be #{@expected}"
        end

        def negative_failure_message
          "expected #{@actual['short_name']} to not be #{@expected}"
        end
      end
      # @example
      #   it {should be_public}
      def be_public
        PrivacyStatusMatcher.new 'public'
      end
      # @example
      #   it {should be_protected}
      def be_protected
        PrivacyStatusMatcher.new 'visible'
      end
      # @example
      #   it {should be_private}
      def be_private
        PrivacyStatusMatcher.new 'private'
      end

      class UserRolesMatcher < ScraperInfoMatcher
        def matches?(actual)
          super
          %w(owner editor).any? do |userrole|
            actual['userroles'][userrole].include? @expected
          end
        end

        def failure_message
          "expected #{@actual['short_name']} to be editable by #{@expected}"
        end

        def negative_failure_message
          "expected #{@actual['short_name']} to not be editable by #{@expected}"
        end
      end
      # @example
      #   it {should be_editable_by 'frabcus'}
      def be_editable_by(expected)
        UserRolesMatcher.new expected
      end

      class RunIntervalMatcher < ScraperInfoMatcher
        def matches?(actual)
          super
          actual['run_interval'] == ScraperWiki::API::RUN_INTERVALS[@expected]
        end

        def failure_message
          if @expected == -1
            "expected #{@actual['short_name']} to never run"
          else
            "expected #{@actual['short_name']} to run #{@expected}"
          end
        end

        def negative_failure_message
          if @expected == -1
            "expected #{@actual['short_name']} to run at some time"
          else
            "expected #{@actual['short_name']} to not run #{@expected}"
          end
        end
      end
      # @example
      #   it {should run(:daily)}
      def run(expected)
        RunIntervalMatcher.new expected
      end
      # @example
      #   it {should never_run}
      def never_run
        RunIntervalMatcher.new :never
      end

      class LastRunMatcher < ScraperInfoMatcher
        def months
          @multiplier = 2592000 # 30 days
          self
        end
        alias :month :months

        def weeks
          @multiplier = 604800
          self
        end
        alias :week :weeks

        def days
          @multiplier = 86400
          self
        end
        alias :day :days

        def hours
          @multiplier = 3600
          self
        end
        alias :hour :hours

        def minutes
          @multiplier = 60
          self
        end
        alias :minute :minutes

        def seconds
          @multiplier = 3600
          self
        end
        alias :second :seconds

        # @todo +last_run+ seems to follow British Summer Time, in which case it
        #   will be +00, not +01, for part of the year.
        def matches?(actual)
          super
          Time.now - Time.parse("#{@actual['last_run']}+01") < span
        end

        def span
          @expected * (@multiplier || 1)
        end

        def failure_message
          "expected #{@actual['short_name']} to have run within #{span} seconds"
        end

        def negative_failure_message
          "expected #{@actual['short_name']} to not have run within #{span} seconds"
        end
      end
      # @example
      #   it {should have_run_within(7).days}
      def have_run_within(expected)
        LastRunMatcher.new expected
      end

      class TableMatcher < ScraperInfoMatcher
        def matches?(actual)
          super
          actual['datasummary']['tables'].key?(@expected)
        end

        def failure_message
          "expected #{@actual['short_name']} to have a #{@expected} table"
        end

        def negative_failure_message
          "expected #{@actual['short_name']} to not have a #{@expected} table"
        end
      end
      # @example
      #   it {should have_a_table('swdata')}
      def have_a_table(expected)
        TableMatcher.new expected
      end

      class TablesMatcher < ScraperInfoMatcher
        def on(table)
          @table = table
          self
        end
      end

      class KeysMatcher < TablesMatcher
        def matches?(actual)
          super
          difference.empty?
        end

        def failure_message
          "#{@actual['short_name']} #{failure_predicate}: #{difference.join ', '}"
        end

        def negative_failure_message
          "#{@actual['short_name']} #{negative_failure_predicate}: #{difference.join ', '}"
        end

        def failure_predicate
          raise NotImplementerError, 'Subclasses must implement this method'
        end

        def negative_failure_message
          raise NotImplementerError, 'Subclasses must implement this method'
        end

        def difference
          raise NotImplementerError, 'Subclasses must implement this method'
        end
      end

      class MissingKeysMatcher < KeysMatcher
        def difference
          keys = if @actual['datasummary']['tables'][@table]
            @actual['datasummary']['tables'][@table]['keys']
          else
            []
          end
          @expected - keys
        end

        def failure_predicate
          'is missing keys'
        end

        def negative_failure_predicate
          "isn't missing keys"
        end
      end
      # @example
      #   it {should have_at_least_the_keys(['fieldA', 'fieldB']).on('swdata')}
      def have_at_least_the_keys(expected)
        MissingKeysMatcher.new expected
      end

      class ExtraKeysMatcher < KeysMatcher
        def difference
          keys = if @actual['datasummary']['tables'][@table]
            @actual['datasummary']['tables'][@table]['keys']
          else
            []
          end
          keys - @expected
        end

        def failure_predicate
          'has extra keys'
        end

        def negative_failure_predicate
          'has no extra keys'
        end
      end
      # @example
      #   it {should have_at_most_the_keys(['fieldA', 'fieldB', 'fieldC', 'fieldD']).on('swdata')}
      def have_at_most_the_keys(expected)
        ExtraKeysMatcher.new expected
      end

      class CountMatcher < TablesMatcher
        def count
          if @actual['datasummary']['tables'][@table]
            @actual['datasummary']['tables'][@table]['count']
          else
            0
          end
        end

        def matches?(actual)
          super
          count == @expected
        end

        def failure_message
          "expected #{@actual['short_name']} to have #{@expected} rows, not #{count}"
        end

        def negative_failure_message
          "expected #{@actual['short_name']} to not have #{@expected} rows"
        end
      end
      # @example
      #   it {should have_a_row_count_of(42).on('swdata')}
      def have_a_row_count_of(expected)
        CountMatcher.new expected
      end

      class RunEventsMatcher < ScraperInfoMatcher
        def last_run
          @actual['runevents'][0]
        end
      end

      class ExceptionMessageMatcher < RunEventsMatcher
        def matches?(actual)
          super
          exception_message
        end

        def exception_message
          last_run['exception_message']
        end

        def failure_message
          "#{@actual['short_name']} is broken: #{exception_message}"
        end

        def negative_failure_message
          "#{@actual['short_name']} isn't broken: #{exception_message}"
        end
      end
      # @example
      #   it {should_not be_broken}
      def be_broken
        ExceptionMessageMatcher.new nil
      end

      # Datastore matchers -----------------------------------------------------

      class DatastoreMatcher < CustomMatcher
        def items
          @items ||= if Array === @actual
            @actual
          elsif Hash === @actual
            if @actual['data']
              @actual['data'].map do |array|
                hash = {}
                @actual['keys'].each_with_index do |key,index|
                  hash[key] = array[index]
                end
                hash
              end
            else
              {}
            end
          else
            raise NotImplementerError, "Can only handle jsondict or jsonlist formats"
          end
        end

        def matches?(actual)
          super
          @mismatches = mismatches
          @mismatches.empty?
        end

        def does_not_match?(actual)
          super
          @matches = matches
          @matches.empty?
        end

        def matches
          raise NotImplementerError, 'Subclasses must implement this method'
        end

        def mismatches
          raise NotImplementerError, 'Subclasses must implement this method'
        end

        def failures
          if @mismatches
            @mismatches
          else
            @matches
          end
        end

        def failure_size
          if @mismatches
            @mismatches.size
          else
            @matches.size
          end
        end

        def failure_message
          "#{failure_size} of #{items.size} #{failure_description}\n#{failures.map(&:inspect).join "\n"}"
        end

        def negative_failure_message
          "#{failure_size} of #{items.size} #{negative_failure_description}\n#{failures.map(&:inspect).join "\n"}"
        end

        def failure_description
          raise NotImplementerError, 'Subclasses must implement this method'
        end

        def negative_failure_description
          raise NotImplementerError, 'Subclasses must implement this method'
        end
      end

      class SetAnyOf < DatastoreMatcher
        def mismatches
          items.select do |item|
            @expected.all? do |field|
              item[field].respond_to?(:empty?) ? item[field].empty? : !item[field]
            end
          end
        end

        def failure_description
          "records didn't set any of #{@expected.join ','}"
        end

        def negative_failure_description
          "records set any of #{@expected.join ','}"
        end
      end
      # @example
      #   it {should set_any_of(['name', 'first_name', 'last_name'])}
      def set_any_of(expected)
        SetAnyOf.new expected
      end

      class FieldMatcher < DatastoreMatcher
        def in(field)
          @field = field
          self
        end

        def at(subfield)
          @subfield = subfield
          self
        end

        # @note +@subfield+ can be a hash or an array of hashes
        def matcher(meth)
          if @subfield
            items.send(meth) do |item|
              if blank? item[@field]
                meth == :reject
              else
                v = Yajl::Parser.parse item[@field]
                if Hash === v
                  if blank? v[@subfield]
                    meth == :reject
                  else
                    match? v[@subfield]
                  end
                elsif Array === v
                  v.all? do |w|
                    if Hash === w
                      if blank? w[@subfield]
                        meth == :reject
                      else
                        match? w[@subfield]
                      end
                    else
                      raise NotImplementerError, 'Can only handle subfields that are hashes or arrays of hashes'
                    end
                  end
                else
                  raise NotImplementerError, 'Can only handle subfields that are hashes or arrays of hashes'
                end
              end
            end
          else
            items.send(meth) do |item|
              if blank? item[@field]
                meth == :reject
              else
                match? item[@field]
              end
            end
          end
        end

        def matches
          matcher :select
        end

        def mismatches
          matcher :reject
        end

        def blank?(v)
          v.respond_to?(:empty?) ? v.empty? : !v
        end

        def failure_description
          if @subfield
            "#{@field}:#{@subfield} values #{failure_predicate}"
          else
            "#{@field} values #{failure_predicate}"
          end
        end

        def negative_failure_description
          if @subfield
            "#{@field}:#{@subfield} values #{negative_failure_predicate}"
          else
            "#{@field} values #{negative_failure_predicate}"
          end
        end

        def failure_predicate
          raise NotImplementerError, 'Subclasses must implement this method'
        end

        def negative_failure_predicate
          raise NotImplementerError, 'Subclasses must implement this method'
        end
      end

      class HaveBlankValues < FieldMatcher
        def match?(v)
          blank? v
        end

        def failure_predicate
          'are blank'
        end

        def negative_failure_predicate
          'are present'
        end
      end
      # @example
      #   it {should_not have_blank_values.in('name')}
      def have_blank_values
        HaveBlankValues.new nil
      end

      class HaveValuesOf < FieldMatcher
        def match?(v)
          @expected.include? v
        end

        def failure_predicate
          "aren't one of #{@expected.join ', '}"
        end

        def negative_failure_predicate
          "are one of #{@expected.join ', '}"
        end
      end
      # @example
      #   it {should have_values_of(['M', 'F']).in('gender')}
      def have_values_of(expected)
        HaveValuesOf.new expected
      end

      class HaveValuesMatching < FieldMatcher
        def match?(v)
          v[@expected]
        end

        def failure_predicate
          "don't match #{@expected.inspect}"
        end

        def negative_failure_predicate
          "match #{@expected.inspect}"
        end
      end
      # @example
      #   it {should have_values_matching(/\A[^@\s]+@[^a\s]+\z/).in('email')}
      def have_values_matching(expected)
        HaveValuesMatching.new expected
      end

      class HaveUniqueValues < FieldMatcher
        def mismatches
          counts = Hash.new 0
          if @subfield
            items.each do |item|
              unless blank? item[@field]
                v = Yajl::Parser.parse item[@field]
                if Hash === v
                  unless blank? v[@subfield]
                    counts[v[@subfield]] += 1
                  end
                elsif Array === v
                  v.each do |w|
                    if Hash === w
                      unless blank? w[@subfield]
                        counts[w[@subfield]] += 1
                      end
                    else
                      raise NotImplementerError, 'Can only handle subfields that are hashes or arrays of hashes'
                    end
                  end
                else
                  raise NotImplementerError, 'Can only handle subfields that are hashes or arrays of hashes'
                end
              end
            end
          else
            items.each do |item|
              unless blank? item[@field]
                counts[item[@field]] += 1
              end
            end
          end
          counts.select{|_,count| count > 1}.keys
        end

        def failure_predicate
          "aren't unique"
        end

        def negative_failure_predicate
          'are unique'
        end
      end
      # @example
      #   it {should have_unique_values.in('email')}
      def have_unique_values
        HaveUniqueValues.new nil
      end

      class HaveValuesStartingWith < FieldMatcher
        def match?(v)
          v.start_with? @expected
        end

        def failure_predicate
          "don't start with #{@expected}"
        end

        def negative_failure_predicate
          "start with #{@expected}"
        end
      end
      # @example
      #   it {should have_values_starting_with('http://').in('url')}
      def have_values_starting_with(expected)
        HaveValuesStartingWith.new expected
      end

      class HaveValuesEndingWith < FieldMatcher
        def match?(v)
          v.end_with? @expected
        end

        def failure_predicate
          "don't end with #{@expected}"
        end

        def negative_failure_predicate
          "end with #{@expected}"
        end
      end
      # @example
      #   it {should have_values_ending_with('Inc.').in('company_name')}
      def have_values_ending_with(expected)
        HaveValuesEndingWith.new expected
      end

      class HaveIntegerValues < FieldMatcher
        def match?(v)
          Integer(v) rescue false
        end

        def failure_predicate
          "aren't integers"
        end

        def negative_failure_predicate
          'are integers'
        end
      end
      # @example
      #   it {should have_integer_values.in('year')}
      def have_integer_values
        HaveIntegerValues.new nil
      end

      class FieldKeyMatcher < FieldMatcher
        def match?(v)
          w = Yajl::Parser.parse v
          if Hash === w
            difference(w).empty?
          elsif Array === w
            w.all? do |x|
              if Hash === x
                difference(x).empty?
              else
                raise NotImplementerError, 'Can only handle subfields that are hashes or arrays of hashes'
              end
            end
          else
            raise NotImplementerError, 'Can only handle subfields that are hashes or arrays of hashes'
          end
        end

        def difference(v)
          raise NotImplementerError, 'Subclasses must implement this method'
        end

        def failure_predicate
          "#{predicate}: #{difference.join ', '}"
        end

        def negative_failure_predicate
          "#{negative_predicate}: #{difference.join ', '}"
        end
      end

      class HaveValuesWithAtLeastTheKeys < FieldKeyMatcher
        def difference(v)
          @expected - v.keys
        end

        def predicate
          'are missing keys'
        end

        def negative_predicate
          "aren't missing keys"
        end
      end
      # @example
      #   it {should have_values_with_at_least_the_keys(['subfield1', 'subfield2']).in('fieldA')}
      def have_values_with_at_least_the_keys(expected)
        HaveValuesWithAtLeastTheKeys.new expected
      end

      class HaveValuesWithAtMostTheKeys < FieldKeyMatcher
        def difference(v)
          v.keys - @expected
        end

        def predicate
          'have extra keys'
        end

        def negative_predicate
          'have no extra keys'
        end
      end
      # @example
      #   it {should have_values_with_at_most_the_keys(['subfield1', 'subfield2', 'subfield3', 'subfield4']).in('fieldA')}
      def have_values_with_at_most_the_keys(expected)
        HaveValuesWithAtMostTheKeys.new expected
      end
    end
  end
end