muneebalam/scrapenhl2

View on GitHub
scrapenhl2/scrape/parse_toi.py

Summary

Maintainability
F
1 wk
Test Coverage
"""
This module contains methods for parsing TOI.
"""

import os.path
import re

import pandas as pd

import scrapenhl2.scrape.general_helpers as helpers
import scrapenhl2.scrape.organization as organization
import scrapenhl2.scrape.players as players
import scrapenhl2.scrape.schedules as schedules
import scrapenhl2.scrape.scrape_toi as scrape_toi
from numba import jit

def parse_season_toi(season, force_overwrite=False):
    """
    Parses toi from the given season. Final games covered only.

    :param season: int, the season
    :param force_overwrite: bool. If true, parses all games. If false, only previously unparsed ones

    :return:
    """

    if season is None:
        season = schedules.get_current_season()

    sch = schedules.get_season_schedule(season)
    games = sch[sch.Status == "Final"].Game.values
    games.sort()
    intervals = helpers.intervals(games)
    interval_j = 0
    for i, game in enumerate(games):
        try:
            if season >= 2010:
                parse_game_toi(season, game, force_overwrite)
            else:
                parse_game_toi_from_html(season, game, force_overwrite)
            if len(get_parsed_toi(season, game)) < 3600:
                parse_game_toi_from_html(season, game, force_overwrite)
        except Exception as e:
            try:
                parse_game_toi_from_html(season, game, force_overwrite)
            except Exception as e:
                pass  # ed.print_and_log('{0:d} {1:d} {2:s}'.format(season, game, str(e)), 'warn')
        if interval_j < len(intervals):
            if i == intervals[interval_j][0]:
                print('Done parsing toi through {0:d} {1:d} ({2:d}%)'.format(
                    season, game, round(intervals[interval_j][0] / len(games) * 100)))
                interval_j += 1


def parse_game_toi(season, game, force_overwrite=False):
    """
    Parses TOI from json for this game

    :param season: int, the season
    :param game: int, the game
    :param force_overwrite: bool. If True, will execute. If False, executes only if file does not exist yet.

    :return: nothing
    """
    filename = get_game_parsed_toi_filename(season, game)
    if not force_overwrite and os.path.exists(filename):
        return False

    # TODO for some earlier seasons I need to read HTML instead. Also for live games
    # Looks like 2010-11 is the first year where this feed supplies more than just boxscore data
    rawtoi = scrape_toi.get_raw_toi(season, game)
    try:
        parsedtoi = read_shifts_from_page(rawtoi, season, game)
    except ValueError as ve:
        # ed.print_and_log('Error with {0:d} {1:d}'.format(season, game), 'warning')
        # ed.print_and_log(str(ve), 'warning')  # TODO look through 2016, getting some errors
        parsedtoi = None

    if parsedtoi is None:
        return False

    # PbP doesn't have strengths, so let's add those in
    # Ok maybe leave strengths, scores, etc, for team logs
    # update_pbp_from_toi(parsedtoi, season, game)
    save_parsed_toi(parsedtoi, season, game)
    # ed.print_and_log('Parsed shifts for {0:d} {1:d}'.format(season, game))
    return True


def parse_game_toi_from_html(season, game, force_overwrite=False):
    """
    Parses TOI from the html shift log from this game.

    :param season: int, the season
    :param game: int, the game
    :param force_overwrite: bool. If True, will execute. If False, executes only if file does not exist yet.

    :return: nothing
    """
    # TODO force_overwrite support
    filenames = (scrape_toi.get_home_shiftlog_filename(season, game),
                 scrape_toi.get_road_shiftlog_filename(season, game))
    if force_overwrite is False and os.path.exists(scrape_toi.get_home_shiftlog_filename(season, game)) and \
            os.path.exists(scrape_toi.get_home_shiftlog_filename(season, game)):
        return False

    gameinfo = schedules.get_game_data_from_schedule(season, game)
    try:
        parsedtoi = read_shifts_from_html_pages(scrape_toi.get_raw_html_toi(season, game, 'H'),
                                                scrape_toi.get_raw_html_toi(season, game, 'R'),
                                                gameinfo['Home'], gameinfo['Road'],
                                                season, game)
    except ValueError as ve:
        # ed.print_and_log('Error with {0:d} {1:d}'.format(season, game), 'warning')
        # ed.print_and_log(str(ve), 'warning')
        parsedtoi = None

    save_parsed_toi(parsedtoi, season, game)
    # ed.print_and_log('Parsed shifts for {0:d} {1:d}'.format(season, game))
    return True


def get_parsed_toi(season, game):
    """
    Loads the compressed json file containing this game's shifts from disk.

    :param season: int, the season
    :param game: int, the game

    :return: json, the json shifts
    """
    return pd.read_hdf(get_game_parsed_toi_filename(season, game))


def save_parsed_toi(toi, season, game):
    """
    Saves the pandas dataframe containing shift information to disk as an HDF5.

    :param toi: df, a pandas dataframe with the shifts of the game
    :param season: int, the season
    :param game: int, the game

    :return: nothing
    """
    if toi is None:
        print('None for TOI for', season, game)
        return
    toi = toi.drop_duplicates()  # TODO why do I need this? E.g. see 20008 second 329
    toi.to_hdf(get_game_parsed_toi_filename(season, game),
               key='T{0:d}0{1:d}'.format(season, game),
               mode='w', complib='zlib')


def read_shifts_from_html_pages(rawtoi1, rawtoi2, teamid1, teamid2, season, game):
    """
    Aggregates information from two html pages given into a dataframe with one row per second and one col per player.

    :param rawtoi1: str, html page of shift log for team id1
    :param rawtoi2: str, html page of shift log for teamid2
    :param teamid1: int, team id corresponding to rawtoi1
    :param teamid2: int, team id corresponding to rawtoi1
    :param season: int, the season
    :param game: int, the game

    :return: dataframe
    """

    from html_table_extractor.extractor import Extractor
    dflst = []
    for rawtoi, teamid in zip((rawtoi1, rawtoi2), (teamid1, teamid2)):
        extractor = Extractor(rawtoi)
        extractor.parse()
        tables = extractor.return_list()

        ids = []
        periods = []
        starts = []
        ends = []
        durationtime = []
        teams = []
        i = 0
        while i < len(tables):
            # A convenient artefact of this package: search for [p, p, p, p, p, p, p, p]
            if len(tables[i]) == 8 and helpers.check_number_last_first_format(tables[i][0]):
                pname = helpers.remove_leading_number(tables[i][0])
                pname = helpers.flip_first_last(pname)
                pid = players.player_as_id(pname)
                i += 2  # skip the header row
                while re.match('\d{1,2}', tables[i][0]):  # First entry is shift number
                    # print(tables[i])
                    shiftnum, per, start, end, dur, ev = tables[i]
                    # print(pname, pid, shiftnum, per, start, end)
                    ids.append(pid)
                    if per == 'OT':
                        per = 4
                    periods.append(int(per))
                    starts.append(start[:start.index('/')].strip())
                    ends.append(end[:end.index('/')].strip())
                    durationtime.append(helpers.mmss_to_secs(dur))
                    teams.append(teamid)
                    i += 1
                i += 1
            else:
                i += 1

        startmin = [x[:x.index(':')] for x in starts]
        startsec = [x[x.index(':') + 1:] for x in starts]
        starttimes = [1200 * (p - 1) + 60 * int(m) + int(s) + 1 for p, m, s in zip(periods, startmin, startsec)]
        # starttimes = [0 if x == 1 else x for x in starttimes]
        endmin = [x[:x.index(':')] for x in ends]
        endsec = [x[x.index(':') + 1:] for x in ends]
        # There is an extra -1 in endtimes to avoid overlapping start/end
        endtimes = [1200 * (p - 1) + 60 * int(m) + int(s) for p, m, s in zip(periods, endmin, endsec)]

        durationtime = [e - s for s, e in zip(starttimes, endtimes)]

        df = pd.DataFrame({'PlayerID': ids, 'Period': periods, 'Start': starttimes, 'End': endtimes,
                           'Team': teams, 'Duration': durationtime})
        dflst.append(df)

    return _finish_toidf_manipulations(pd.concat(dflst), season, game)


def read_shifts_from_page(rawtoi, season, game):
    """
    Turns JSON shift start-ends into TOI matrix with one row per second and one col per player

    :param rawtoi: dict, json from NHL API
    :param season: int, the season
    :param game: int, the game

    :return: dataframe
    """
    toi = rawtoi['data']
    if len(toi) == 0:
        return
    ids = ['' for _ in range(len(toi))]
    periods = [0 for _ in range(len(toi))]
    starts = ['0:00' for _ in range(len(toi))]
    ends = ['0:00' for _ in range(len(toi))]
    teams = ['' for _ in range(len(toi))]
    durations = [0 for _ in range(len(toi))]

    # The shifts are ordered shortest duration to longest.
    for i, dct in enumerate(toi):
        ids[i] = helpers.try_to_access_dict(dct, 'playerId', default_return='')
        periods[i] = helpers.try_to_access_dict(dct, 'period', default_return=0)
        starts[i] = helpers.try_to_access_dict(dct, 'startTime', default_return='0:00')
        ends[i] = helpers.try_to_access_dict(dct, 'endTime', default_return='0:00')
        durations[i] = helpers.try_to_access_dict(dct, 'duration', default_return=0)
        teams[i] = helpers.try_to_access_dict(dct, 'teamId', default_return='')

    gameinfo = schedules.get_game_data_from_schedule(season, game)

    # I originally took start times at face value and subtract 1 from end times
    # This caused problems with joining events--when there's a shot and the goalie freezes immediately
    # then, when you join this to the pbp, you'll get the players on the ice for the next draw as having
    # been on ice for the shot.
    # So I switch to adding 1 to start times, and leaving end times as-are.
    # That means that when joining on faceoffs, add 1 to faceoff times.
    # Exception: start time 1 --> start time 0
    startmin = [x[:x.index(':')] for x in starts]
    startsec = [x[x.index(':') + 1:] for x in starts]
    starttimes = [1200 * (p - 1) + 60 * int(m) + int(s) + 1 for p, m, s in zip(periods, startmin, startsec)]
    # starttimes = [0 if x == 1 else x for x in starttimes]
    endmin = [x[:x.index(':')] for x in ends]
    endsec = [x[x.index(':') + 1:] for x in ends]
    # There is an extra -1 in endtimes to avoid overlapping start/end
    endtimes = [1200 * (p - 1) + 60 * int(m) + int(s) for p, m, s in zip(periods, endmin, endsec)]

    durationtime = [e - s for s, e in zip(starttimes, endtimes)]

    df = pd.DataFrame({'PlayerID': ids, 'Period': periods, 'Start': starttimes, 'End': endtimes,
                       'Team': teams, 'Duration': durationtime})

    return _finish_toidf_manipulations(df, season, game)


def _finish_toidf_manipulations(df, season, game):
    """
    Takes dataframe of shifts (one row per shift) and makes into a matrix of players on ice for each second.

    :param df: dataframe
    :param season: int, the season
    :param game: int, the game

    :return: dataframe
    """
    gameinfo = schedules.get_game_data_from_schedule(season, game)

    # TODO don't read end times. Use duration, which has good coverage, to infer end. Then end + 1200 not needed below.
    # Sometimes shifts have the same start and time.
    # By the time we're here, they'll have start = end + 1
    # So let's remove shifts with duration -1
    df = df[df.Start != df.End + 1]

    # Sometimes you see goalies with a shift starting in one period and ending in another
    # This is to help in those cases.
    if sum(df.End < df.Start) > 0:
        # ed.print_and_log('Have to adjust a shift time', 'warn')
        # TODO I think I'm making a mistake with overtime shifts--end at 3900!
        # TODO also, maybe only go to the end of the period, not to 1200
        # ed.print_and_log(df[df.End < df.Start])
        df.loc[df.End < df.Start, 'End'] = df.loc[df.End < df.Start, 'End'] + 1200
    # One issue coming up is when the above line comes into play--missing times are filled in as 0:00
    tempdf = df[['PlayerID', 'Start', 'End', 'Team', 'Duration']].query("Duration > 0")
    tempdf = tempdf.assign(Time=tempdf.Start)
    # print(tempdf.head(20))

    # Let's filter out goalies for now. We can add them back in later.
    # This will make it easier to get the strength later
    pids = players.get_player_ids_file()
    tempdf = tempdf.merge(pids[['ID', 'Pos']], how='left', left_on='PlayerID', right_on='ID')

    # toi = pd.DataFrame({'Time': [i for i in range(0, max(df.End) + 1)]})
    toi = pd.DataFrame({'Time': [i for i in range(0, int(round(max(df.End))))]})

    # Originally used a hacky way to fill in times between shift start and end: increment tempdf by one, filter, join
    # Faster to work with base structures
    # Or what if I join each player to full df, fill backward on start and end, and filter out rows where end > time
    # toidict = toi.to_dict(orient='list')
    # players_by_sec = [[] for _ in range(min(toidict['Start'], toidict['End'] + 1))]
    # for i in range(len(players_by_sec)):
    #    for j in range(toidict['Start'][i], toidict['End'][i] + 1):
    #        players_by_sec[j].append(toidict['PlayerID'][i])
    # Maybe I can create a matrix with rows = time and columns = players
    # Loop over start and end, and use iloc[] to set booleans en masse.
    # Then melt and filter

    # Create one row per second
    alltimes = toi.Time
    newdf = pd.DataFrame(index=alltimes)

    # Add rows and set times to True simultaneously
    for i, (pid, start, end, team, duration, time, pid, pos) in tempdf.iterrows():
        newdf.loc[start:end, pid] = True

    # Fill NAs to False
    for col in newdf:
        newdf.loc[:, col] = newdf[col].fillna(False)

    # Go wide to long and then drop unneeded rows
    newdf = helpers.melt_helper(newdf.reset_index(), id_vars='Time', # value_vars=newdf.columns,  # cols with num colnames causing errors
                                var_name='PlayerID', value_name='OnIce')
    newdf = newdf[newdf.OnIce].drop('OnIce', axis=1)
    newdf = newdf.merge(tempdf.drop('Time', axis=1), how='left', on='PlayerID') \
        .query("Time <= End & Time >= Start") \
        .drop('ID', axis=1)

    # In case there were rows that were all missing, join onto TOI
    tempdf = toi.merge(newdf, how='left', on='Time')
    # TODO continue here--does newdf match tempdf after sort_values?

    # Old method
    # toidfs = []
    # while len(tempdf.index) > 0:
    #    temptoi = toi.merge(tempdf, how='inner', on='Time')
    #    toidfs.append(temptoi)

    #    tempdf = tempdf.assign(Time=tempdf.Time + 1)
    #    tempdf = tempdf.query('Time <= End')

    # tempdf = pd.concat(toidfs)
    # tempdf = tempdf.sort_values(by='Time')

    goalies = tempdf[tempdf.Pos == 'G'].drop({'Pos'}, axis=1)
    tempdf = tempdf[tempdf.Pos != 'G'].drop({'Pos'}, axis=1)

    # Append team name to start of columns by team
    home = str(gameinfo['Home'])
    road = str(gameinfo['Road'])

    # Goalies
    # Let's assume we get only one goalie per second per team.
    # TODO: flag if there are multiple listed and pick only one
    goalies.loc[:, 'GTeam'] = goalies.Team.apply(lambda x: 'HG' if str(int(x)) == home else 'RG')
    try:
        goalies2 = goalies[['Time', 'PlayerID', 'GTeam']] \
            .pivot(index='Time', columns='GTeam', values='PlayerID') \
            .reset_index()
    except ValueError:
        # Duplicate entries in index error.
        # ed.print_and_log('Multiple goalies for a team in {0:d} {1:d}, picking one with the most TOI'.format(
        #    season, game), 'warn')

        # Find times with multiple goalies
        too_many_goalies_h = goalies[goalies.GTeam == 'HG'][['Time']] \
            .assign(GoalieCount=1) \
            .groupby('Time').count() \
            .reset_index() \
            .query('GoalieCount > 1')

        too_many_goalies_r = goalies[goalies.GTeam == 'RG'][['Time']] \
            .assign(GoalieCount=1) \
            .groupby('Time').count() \
            .reset_index() \
            .query('GoalieCount > 1')

        # Find most common goalie for each team
        if len(too_many_goalies_h) == 0:
            problem_times_revised_h = goalies
        else:  # i.e. if len(too_many_goalies_h) > 0:
            top_goalie_h = goalies[goalies.GTeam == 'HG'][['PlayerID']] \
                .assign(GoalieCount=1) \
                .groupby('PlayerID').count() \
                .reset_index() \
                .sort_values('GoalieCount', ascending=False) \
                .PlayerID.iloc[0]
            # and now finally drop problem times
            problem_times_revised_h = goalies \
                .merge(too_many_goalies_h[['Time']], how='outer', on='Time', indicator=True)
            problem_times_revised_h.loc[:, 'ToDrop'] = (problem_times_revised_h._merge == 'both') & \
                                                       (problem_times_revised_h.PlayerID != top_goalie_h)
            problem_times_revised_h = problem_times_revised_h[problem_times_revised_h.ToDrop == False] \
                .drop({'_merge', 'ToDrop'}, axis=1)

        if len(too_many_goalies_r) == 0:
            problem_times_revised_r = problem_times_revised_h
        else:  # i.e. if len(too_many_goalies_r) > 0:
            top_goalie_r = goalies[goalies.GTeam == 'RG'][['PlayerID']] \
                .assign(GoalieCount=1) \
                .groupby('PlayerID').count() \
                .reset_index() \
                .sort_values('GoalieCount', ascending=False) \
                .PlayerID.iloc[0]
            problem_times_revised_r = problem_times_revised_h \
                .merge(too_many_goalies_r[['Time']], how='outer', on='Time', indicator=True)
            problem_times_revised_r.loc[:, 'ToDrop'] = (problem_times_revised_r._merge == 'both') & \
                                                       (problem_times_revised_r.PlayerID != top_goalie_r)
            problem_times_revised_r = problem_times_revised_r[problem_times_revised_r.ToDrop == False] \
                .drop({'_merge', 'ToDrop'}, axis=1)

        # Pivot again
        goalies2 = problem_times_revised_r[['Time', 'PlayerID', 'GTeam']] \
            .pivot(index='Time', columns='GTeam', values='PlayerID') \
            .reset_index()

    # Home
    hdf = tempdf.query('Team == "' + home + '"').sort_values(['Time', 'Duration'], ascending=[True, False])
    if len(hdf) == 0:
        # Earlier versions of pandas can have diff behavior
        hdf = tempdf.query('Team == ' + home).sort_values(['Time', 'Duration'], ascending=[True, False])
    hdf2 = hdf[['Time', 'Duration']].groupby('Time').rank(method='first', ascending=False)
    hdf2 = hdf2.rename(columns={'Duration': 'rank'})
    hdf2.loc[:, 'rank'] = hdf2['rank'].apply(lambda x: int(x))
    hdf.loc[:, 'rank'] = 'H' + hdf2['rank'].astype('str')

    rdf = tempdf.query('Team == "' + road + '"').sort_values(['Time', 'Duration'], ascending=[True, False])
    if len(rdf) == 0:
        rdf = tempdf.query('Team == ' + road).sort_values(['Time', 'Duration'], ascending=[True, False])
    rdf2 = rdf[['Time', 'Duration']].groupby('Time').rank(method='first', ascending=False)
    rdf2 = rdf2.rename(columns={'Duration': 'rank'})
    rdf2.loc[:, 'rank'] = rdf2['rank'].apply(lambda x: int(x))
    rdf.loc[:, 'rank'] = 'R' + rdf2['rank'].astype('str')

    # Remove values above 6--looking like there won't be many
    # But in those cases take shifts with longest durations
    # That's why we create hdf and rdf by also sorting by Time and Duration above, and select duration for rank()
    if len(hdf[hdf['rank'] == "H7"]) > 0:
        # ed.print_and_log('Some times from {0:d} {1:d} have too many home players; cutting off at 6'.format(
        #    season, game), 'warn')
        # ed.print_and_log('Longest shift being lost was {0:d} seconds'.format(
        #    hdf[hdf['rank'] == "H7"].Duration.max()), 'warn')
        pass
    if len(rdf[rdf['rank'] == "R7"]) > 0:
        # ed.print_and_log('Some times from {0:d} {1:d} have too many road players; cutting off at 6'.format(
        #    season, game), 'warn')
        # ed.print_and_log('Longest shift being lost was {0:d} seconds'.format(
        #    rdf[rdf['rank'] == "H7"].Duration.max()), 'warn')
        pass

    hdf = hdf.pivot(index='Time', columns='rank', values='PlayerID').iloc[:, 0:6]
    hdf.reset_index(inplace=True)  # get time back as a column
    rdf = rdf.pivot(index='Time', columns='rank', values='PlayerID').iloc[:, 0:6]
    rdf.reset_index(inplace=True)

    toi = toi.merge(hdf, how='left', on='Time') \
        .merge(rdf, how='left', on='Time') \
        .merge(goalies2, how='left', on='Time')

    column_order = list(toi.columns.values)
    column_order = ['Time'] + [x for x in sorted(column_order[1:])]  # First entry is Time; sort rest
    toi = toi[column_order]
    # Now should be Time, H1, H2, ... HG, R1, R2, ..., RG

    # For games in the first, HG and RG may not exist yet. Have dummy replacements in there.
    # Will be wrong for when goalie is pulled in first, but oh well...
    if 'HG' not in toi.columns:
        newcol = [0 for _ in range(len(toi))]
        toi.insert(loc=toi.columns.get_loc('R1'), column='HG', value=newcol)
    if 'RG' not in toi.columns:
        toi.loc[:, 'RG'] = 0

    toi.loc[:, 'HomeSkaters'] = 0
    for col in toi.loc[:, 'H1':'HG'].columns[:-1]:
        toi.loc[:, 'HomeSkaters'] = toi[col].notnull() + toi.HomeSkaters
    toi.loc[:, 'HomeSkaters'] = 100 * toi['HG'].notnull() + toi.HomeSkaters  # a hack to make it easy to recognize
    toi.loc[:, 'RoadSkaters'] = 0
    for col in toi.loc[:, 'R1':'RG'].columns[:-1]:
        toi.loc[:, 'RoadSkaters'] = toi[col].notnull() + toi.RoadSkaters
    toi.loc[:, 'RoadSkaters'] = 100 * toi['RG'].notnull() + toi.RoadSkaters  # a hack to make it easy to recognize

    # This is how we label strengths: 5 means 5 skaters plus goalie; five skaters w/o goalie is 4+1.
    toi.loc[:, 'HomeStrength'] = toi.HomeSkaters.apply(
        lambda x: '{0:d}'.format(x - 100) if x >= 100 else '{0:d}+1'.format(x - 1))
    toi.loc[:, 'RoadStrength'] = toi.RoadSkaters.apply(
        lambda x: '{0:d}'.format(x - 100) if x >= 100 else '{0:d}+1'.format(x - 1))

    toi.drop({'HomeSkaters', 'RoadSkaters'}, axis=1, inplace=True)

    # Also drop -1+1 and 0+1 cases, which are clearly errors, and the like.
    # Need at least 3 skaters apiece, 1 goalie apiece, time, and strengths to be non-NA = 11 non NA values
    toi2 = toi.dropna(axis=0, thresh=11)  # drop rows without at least 11 non-NA values
    if len(toi2) < len(toi):
        # ed.print_and_log('Dropped {0:d}/{1:d} times in {2:d} {3:d} because of invalid strengths'.format(
        #    len(toi) - len(toi2), len(toi), season, game), 'warn')
        pass

    # TODO data quality check that I don't miss times in the middle of the game

    return toi2


def get_game_parsed_toi_filename(season, game):
    """
    Returns the filename of the parsed toi folder

    :param season: int, current season
    :param game: int, game

    :return: str, /scrape/data/parsed/toi/[season]/[game].zlib
    """
    return os.path.join(organization.get_season_parsed_toi_folder(season), str(game) + '.h5')


def get_melted_home_road_5v5_toi(season, game):
    """
    Reads parsed TOI for this game, filters for 5v5 TOI, and melts from wide to long on player

    :param season: int, the season
    :param game: int, the game

    :return: (home_df, road_df), each with columns Time, PlayerID, and Team (which will be H or R)
    """
    toi = get_parsed_toi(season, game)
    fives = toi[(toi.HomeStrength == "5") & (toi.RoadStrength == "5")]
    home = helpers.melt_helper(fives[['Time', 'H1', 'H2', 'H3', 'H4', 'H5']],
                               id_vars='Time', var_name='P', value_name='PlayerID') \
        .drop('P', axis=1) \
        .assign(Team='H')
    road = helpers.melt_helper(fives[['Time', 'R1', 'R2', 'R3', 'R4', 'R5']],
                               id_vars='Time', var_name='P', value_name='PlayerID') \
        .drop('P', axis=1) \
        .assign(Team='R')
    return home, road


def parse_toi_setup():
    """
    Creates parsed toi folders if need be

    :return:
    """
    for season in range(2005, schedules.get_current_season() + 1):
        organization.check_create_folder(organization.get_season_parsed_toi_folder(season))


parse_toi_setup()