benchmark/dataframe.yml
loop_count: 3
contexts:
- name: 0.2.0
gems:
red_amber: 0.2.0
- name: 0.3.0
gems:
red_amber: 0.3.0
- name: HEAD
prelude: |
$LOAD_PATH.unshift(File.expand_path('lib'))
prelude: |
require 'red_amber'
require 'datasets-arrow'
diamonds = RedAmber::DataFrame.new(Datasets::Diamonds.new.to_arrow)
starwars = RedAmber::DataFrame.new(Datasets::Rdataset.new('dplyr', 'starwars').to_arrow)
import_cars = RedAmber::DataFrame.load(Arrow::Buffer.new(<<~TSV), format: :tsv)
Year Audi BMW BMW_MINI Mercedes-Benz VW
2017 28336 52527 25427 68221 49040
2018 26473 50982 25984 67554 51961
2019 24222 46814 23813 66553 46794
2020 22304 35712 20196 57041 36576
2021 22535 35905 18211 51722 35215
TSV
ds = Datasets::Rdataset.new('openintro', 'simpsons_paradox_covid')
simpsons_paradox_covid = RedAmber::DataFrame.new(ds.to_arrow)
benchmark:
'D01: Diamonds test': |
diamonds
.slice { v(:carat) > 1 }
.pick(:cut, :price)
.group(:cut)
.mean
.sort('-mean(price)')
.rename('mean(price)': :mean_price_USD)
.assign { [:mean_price_JPY, v(:mean_price_USD) * 110.0] }
'D02: Starwars test': |
starwars
.drop { keys.select { |key| key.end_with?('color') } }
.remove { v(:species) == 'NA' }
.group(:species) { [count(:species), mean(:height, :mass)] }
.slice { v(:count) > 1 }
'D03: Import cars test': |
import_cars
.to_long(:Year, name: :Manufacturer, value: :Num_of_imported)
.to_wide(name: :Manufacturer, value: :Num_of_imported)
.transpose
'D04: Simpsons paradox test': |
simpsons_paradox_covid[simpsons_paradox_covid[:age_group] == 'under 50']
.group(:vaccine_status, :outcome)
.count
.then { |df| df.to_wide(name: :vaccine_status, value: df.keys[-1]) }
.assign do
[
[:'vaccinated_%', (100.0 * v(:vaccinated) / v(:vaccinated).sum)],
[:'unvaccinated_%', (100.0 * v(:unvaccinated) / v(:unvaccinated).sum)]
]
end