czuger/dd-next-encounters

View on GitHub
work/read_monsters_from_website.rb

Summary

Maintainability
A
0 mins
Test Coverage
require 'nokogiri'
require 'open-uri'
require 'pp'
require 'yaml'

# Best nokogiri tuto
# http://ruby.bastardsbook.com/chapters/html-parsing/

require_relative '../lib/monsters/monster'

base_page = 'https://www.dndbeyond.com/'
next_page = 'monsters'

def read_page( page, monsters )
  page.css( '//div[data-type=monsters]' ).each do |monster|
    challenge = monster.css( 'div.monster-challenge' ).children.children.text
    challenge = challenge.match( /(\d+)\/?(\d)?/ )
    # p challenge

    numerator = challenge[1]
    denominator = challenge[2]

    if denominator
      challenge = numerator.to_f / denominator.to_f
    else
      challenge = numerator.to_i
    end

    # p challenge

    name = monster.css( 'div.monster-name' ).children.children.children.text
    source = monster.css( 'div.monster-name' ).children.children.last.text

    type = monster.css( 'div.monster-type' ).children.children.text

    # p monster.css( 'div.monster-environment' ).children.children.text

    monsters << Monster.new( challenge, name, type, source )
  end
  monsters
end

monsters = []
while next_page
  puts 'reading ' + next_page
  bp = Nokogiri::HTML( open( base_page + next_page ).read )
  monsters = read_page( bp, monsters )

  next_page = bp.css( '//a[data-next-page]' ).first
  next_page = next_page.attributes['href'].value if next_page
  # break
end

# pp monsters

File.open('data/monsters.yml', 'w') {|f| f.write monsters.to_yaml }