heronshoes/red_amber

View on GitHub
benchmark/dataframe.yml

Summary

Maintainability
Test Coverage
loop_count: 3

contexts:
  - name: 0.2.0
    gems:
      red_amber: 0.2.0
  - name: 0.3.0
    gems:
      red_amber: 0.3.0
  - name: HEAD
    prelude: |
      $LOAD_PATH.unshift(File.expand_path('lib'))

prelude: |
  require 'red_amber'
  require 'datasets-arrow'

  diamonds = RedAmber::DataFrame.new(Datasets::Diamonds.new.to_arrow)

  starwars = RedAmber::DataFrame.new(Datasets::Rdataset.new('dplyr', 'starwars').to_arrow)

  import_cars = RedAmber::DataFrame.load(Arrow::Buffer.new(<<~TSV), format: :tsv)
    Year    Audi    BMW    BMW_MINI    Mercedes-Benz    VW
    2017    28336    52527    25427    68221    49040
    2018    26473    50982    25984    67554    51961
    2019    24222    46814    23813    66553    46794
    2020    22304    35712    20196    57041    36576
    2021    22535    35905    18211    51722    35215
  TSV

  ds = Datasets::Rdataset.new('openintro', 'simpsons_paradox_covid')
  simpsons_paradox_covid = RedAmber::DataFrame.new(ds.to_arrow)

benchmark:
  'D01: Diamonds test': |
    diamonds
      .slice { v(:carat) > 1 }
      .pick(:cut, :price)
      .group(:cut)
      .mean
      .sort('-mean(price)')
      .rename('mean(price)': :mean_price_USD)
      .assign { [:mean_price_JPY, v(:mean_price_USD) * 110.0] }

  'D02: Starwars test': |
    starwars
      .drop { keys.select { |key| key.end_with?('color') } }
      .remove { v(:species) == 'NA' }
      .group(:species) { [count(:species), mean(:height, :mass)] }
      .slice { v(:count) > 1 }

  'D03: Import cars test': |
    import_cars
      .to_long(:Year, name: :Manufacturer, value: :Num_of_imported)
      .to_wide(name: :Manufacturer, value: :Num_of_imported)
      .transpose

  'D04: Simpsons paradox test': |
    simpsons_paradox_covid[simpsons_paradox_covid[:age_group] == 'under 50']
      .group(:vaccine_status, :outcome)
      .count
      .then { |df| df.to_wide(name: :vaccine_status, value: df.keys[-1]) }
      .assign do
        [
          [:'vaccinated_%', (100.0 * v(:vaccinated) / v(:vaccinated).sum)],
          [:'unvaccinated_%', (100.0 * v(:unvaccinated) / v(:unvaccinated).sum)]
        ]
      end