muneebalam/scrapenhl2

View on GitHub
scrapenhl2/plot/team_score_state_toi.py

Summary

Maintainability
A
1 hr
Test Coverage
"""
This module contains methods for making a stacked bar graph indicating how much TOI each team spends in score states.
"""

import matplotlib.pyplot as plt
import numpy as np

import scrapenhl2.manipulate.manipulate as manip
import scrapenhl2.scrape.team_info as team_info
import scrapenhl2.plot.visualization_helper as vhelper

def score_state_graph(season):
    """
    Generates a horizontal stacked bar graph showing how much 5v5 TOI each team has played in each score state
    for given season.

    :param season: int, the season

    :return:
    """
    #TODO make kwargs match other methods: startseason, startdate, etc

    state_toi = manip.team_5v5_score_state_summary_by_game(season) \
        .drop('Game', axis=1) \
        .groupby(['Team', 'ScoreState'], as_index=False).sum()

    bar_positions = _score_state_graph_bar_positions(state_toi)
    bar_positions.loc[:, 'Team'] = bar_positions.Team.apply(lambda x: team_info.team_as_str(x))

    plt.clf()
    tiedcolor, leadcolor, trailcolor = plt.rcParams['axes.prop_cycle'].by_key()['color'][:3]
    colors = {0: tiedcolor, 1: leadcolor, -1: trailcolor}
    for i in range(2, 4):
        colors[i] = vhelper.make_color_lighter(colors[i - 1])
        colors[-1 * i] = vhelper.make_color_lighter(colors[-1 * i + 1])
    for score in (-3, -2, -1, 0, 1, 2, 3):  # bar_positions.ScoreState.unique():
        score = int(score)
        if score == 3:
            label = 'Up 3+'
        elif score > 0:
            label = 'Up {0:d}'.format(score)
        elif score == 0:
            label = 'Tied'
        elif score == -3:
            label = 'Trail 3+'
        else:
            label = 'Trail {0:d}'.format(-1 * score)

        temp = bar_positions.query('ScoreState == {0:d}'.format(score))
        alpha = 0.5
        plt.barh(bottom=temp.Y.values, width=temp.Width.values, left=temp.Left.values, label=label, alpha=alpha,
                 color=colors[score])

    for _, y, team in bar_positions[['Y', 'Team']].drop_duplicates().itertuples():
        plt.annotate(team, xy=(0, y), ha='center', va='center', fontsize=6)

    plt.ylim(-1, len(bar_positions.Team.unique()))
    plt.legend(loc='lower right', fontsize=8)
    plt.yticks([])
    for spine in ['right', 'left', 'top', 'bottom']:
        plt.gca().spines[spine].set_visible(False)
    plt.title(get_score_state_graph_title(season))

    lst = list(np.arange(-0.6, 0.61, 0.2))
    plt.xticks(lst, ['{0:d}%'.format(abs(int(round(100 * x)))) for x in lst])
    plt.show()


def _order_for_score_state_graph(toidf):
    """
    Want to arrange teams so top team has most time leading minus trailing.

    This method sums over lead/trail, sorts, and arranges so the team with the largest (lead-trail) has the highest Y.

    :param toidf: dataframe, unique on team and score state

    :return: dataframe with team and Y
    """
    temp = toidf.assign(LeadTrail=toidf.ScoreState.apply(lambda x: 'Lead' if x > 0 else 'Trail')) \
        .query("ScoreState != 0") \
        [['Team', 'LeadTrail', 'Secs']] \
        .groupby(['Team', 'LeadTrail'], as_index=False) \
        .sum() \
        .pivot(index='Team', columns='LeadTrail', values='Secs') \
        .reset_index()
    temp = temp.assign(Diff=temp.Lead - temp.Trail).sort_values('Diff').assign(Y=1)
    temp.loc[:, 'Y'] = temp.Y.cumsum() - 1
    return temp[['Team', 'Y']]


def _score_state_graph_bar_positions(toidf):
    """
    Figures out where bars should start and stop so that the y-axis bisects the "tied" bar.

    :param toidf:

    :return:
    """

    totaltoi = toidf[['Team', 'Secs']].groupby('Team', as_index=False).sum().rename(columns={'Secs': 'TotalTOI'})

    # Trim score states to -3 to 3
    toidf.loc[:, 'ScoreState'] = toidf.ScoreState.apply(lambda x: max(-3, min(3, x)))
    toidf = toidf.groupby(['Team', 'ScoreState'], as_index=False).sum()

    # Change numbers to fractions of 100%
    df = toidf.merge(totaltoi, how='left', on='Team')
    df = df.assign(FracTOI=df.Secs / df.TotalTOI) \
        .drop({'Secs', 'TotalTOI'}, axis=1) \
        .rename(columns={'FracTOI': 'Width'}) \
        .sort_values('ScoreState')

    # Take cumsums for the left in a barh
    df.loc[:, 'Left'] = df[['Team', 'Width']].groupby('Team', as_index=False).cumsum().Width
    df.loc[:, 'Left'] = df.Left - df.Width  # because cumsum is inclusive, no remove it

    # Shift them over so the center of the tied bar is at zero
    zeroes = df.query('ScoreState == 0')
    zeroes = zeroes.assign(Shift=zeroes.Left + zeroes.Width / 2)[['Team', 'Shift']]

    # Shift
    df = df.merge(zeroes, how='left', on='Team')
    df.loc[:, 'Left'] = df.Left - df.Shift
    df = df.drop('Shift', axis=1)

    # Check that zeroes are centered
    tempdf = df.query('ScoreState == 0')
    tempdf = tempdf.assign(Diff=tempdf.Left * 2 + tempdf.Width)
    assert np.isclose(0, tempdf.Diff.sum())  # sometimes have little float nonzeroes, like 1e-16

    return df.merge(_order_for_score_state_graph(toidf), how='left', on='Team').sort_values('Y')


def get_score_state_graph_title(season):
    """

    :param season: int, the season

    :return:
    """
    return 'Team 5v5 TOI by score state in {0:d}-{1:s}'.format(season, str(season + 1)[2:])