muneebalam/scrapenhl2

View on GitHub
scrapenhl2/plot/game_h2h.py

Summary

Maintainability
F
4 days
Test Coverage
"""
This module contains methods for creating a game H2H chart.
"""

import matplotlib.pyplot as plt
import numpy as np  # standard scientific python stack
import pandas as pd  # standard scientific python stack

from scrapenhl2.manipulate import manipulate as manip
from scrapenhl2.scrape import schedules, team_info, players
from scrapenhl2.plot import visualization_helper


def live_h2h(team1, team2, update=True, save_file=None):
    """
    A convenience method that updates data then displays h2h for most recent game between specified tams.

    :param team1: str or int, team
    :param team2: str or int, other team
    :param update: bool, should data be updated first?
    :param save_file: str, specify a valid filepath to save to file. If None, merely shows on screen.

    :return: nothing
    """
    if update:
        from scrapenhl2.scrape import autoupdate
        autoupdate.autoupdate()
    from scrapenhl2.scrape import games
    game = games.most_recent_game_id(team1, team2)
    return game_h2h(2017, game, save_file)


def game_h2h(season, game, save_file=None):
    """
    Creates the grid H2H charts seen on @muneebalamcu

    :param season: int, the season
    :param game: int, the game
    :param save_file: str, specify a valid filepath to save to file. If None, merely shows on screen.

    :return: nothing
    """
    h2htoi = manip.get_game_h2h_toi(season, game).query('Team1 == "H" & Team2 == "R"')
    h2hcorsi = manip.get_game_h2h_corsi(season, game).query('Team1 == "H" & Team2 == "R"')
    playerorder_h, numf_h = _get_h2h_chart_player_order(season, game, 'H')
    playerorder_r, numf_r = _get_h2h_chart_player_order(season, game, 'R')

    # TODO create chart and filter out RH, HH, and RR
    # TODO link players by ID. When I link by name have issue with Mike Green for example
    return _game_h2h_chart(season, game, h2hcorsi, h2htoi, playerorder_h, playerorder_r, numf_h, numf_r, save_file)


def _game_h2h_chart(season, game, corsi, toi, orderh, orderr, numf_h=None, numf_r=None, save_file=None):
    """
    This method actually does the plotting for game_h2h

    :param season: int, the season
    :param game: int, the game
    :param
    :param corsi: df of P1, P2, Corsi +/- for P1
    :param toi: df of P1, P2, H2H TOI
    :param orderh: list of float, player order on y-axis, top to bottom
    :param orderr: list of float, player order on x-axis, left to right
    :param numf_h: int. Number of forwards for home team. Used to add horizontal bold line between F and D
    :param numf_r: int. Number of forwards for road team. Used to add vertical bold line between F and D.
    :param save_file: str of file to save the figure to, or None to simply display

    :return: nothing
    """

    hname = team_info.team_as_str(schedules.get_home_team(season, game), True)
    homename = team_info.team_as_str(schedules.get_home_team(season, game), False)
    rname = team_info.team_as_str(schedules.get_road_team(season, game), True)
    roadname = team_info.team_as_str(schedules.get_road_team(season, game), False)

    plt.close('all')
    fig, ax = plt.subplots(1, figsize=[11, 7])

    # Convert dataframes to coordinates
    horderdf = pd.DataFrame({'PlayerID1': orderh[::-1], 'Y': list(range(len(orderh)))})
    rorderdf = pd.DataFrame({'PlayerID2': orderr, 'X': list(range(len(orderr)))})
    plotdf = toi.merge(corsi, how='left', on=['PlayerID1', 'PlayerID2']) \
        .merge(horderdf, how='left', on='PlayerID1') \
        .merge(rorderdf, how='left', on='PlayerID2')

    # Hist2D of TOI
    # I make the bins a little weird so my coordinates are centered in them. Otherwise, they're all on the edges.
    _, _, _, image = ax.hist2d(x=plotdf.X, y=plotdf.Y, bins=(np.arange(-0.5, len(orderr) + 0.5, 1),
                                                             np.arange(-0.5, len(orderh) + 0.5, 1)),
                               weights=plotdf.Min, cmap=plt.cm.summer)

    # Convert IDs to names and label axes and axes ticks
    ax.set_xlabel(roadname)
    ax.set_ylabel(homename)
    xorder = players.playerlst_as_str(orderr)
    yorder = players.playerlst_as_str(orderh)[::-1]  # need to go top to bottom, so reverse order
    ax.set_xticks(range(len(xorder)))
    ax.set_yticks(range(len(yorder)))
    ax.set_xticklabels(xorder, fontsize=10, rotation=45, ha='right')
    ax.set_yticklabels(yorder, fontsize=10)
    ax.set_xlim(-0.5, len(orderr) - 0.5)
    ax.set_ylim(-0.5, len(orderh) - 0.5)

    # Hide the little ticks on the axes by setting their length to 0
    ax.tick_params(axis='both', which='both', length=0)

    # Add dividing lines between rows
    for x in np.arange(0.5, len(orderr) - 0.5, 1):
        ax.plot([x, x], [-0.5, len(orderh) - 0.5], color='k')
    for y in np.arange(0.5, len(orderh) - 0.5, 1):
        ax.plot([-0.5, len(orderr) - 0.5], [y, y], color='k')

    # Add a bold line between F and D.
    if numf_r is not None:
        ax.plot([numf_r - 0.5, numf_r - 0.5], [-0.5, len(orderh) - 0.5], color='k', lw=3)
    if numf_h is not None:
        ax.plot([-0.5, len(orderr) - 0.5], [len(orderh) - numf_h - 0.5, len(orderh) - numf_h - 0.5], color='k', lw=3)

    # Colorbar for TOI
    cbar = fig.colorbar(image, pad=0.1)
    cbar.ax.set_ylabel('TOI (min)')

    # Add trademark
    cbar.ax.set_xlabel('Muneeb Alam\n@muneebalamcu', labelpad=20)

    # Add labels for Corsi and circle negatives
    neg_x = []
    neg_y = []
    for y in range(len(orderh)):
        hpid = orderh[len(orderh) - y - 1]
        for x in range(len(orderr)):
            rpid = orderr[x]

            cf = corsi[(corsi.PlayerID1 == hpid) & (corsi.PlayerID2 == rpid)]
            if len(cf) == 0:  # In this case, player will not have been on ice for a corsi event
                cf = 0
            else:
                cf = int(cf.HomeCorsi.iloc[0])

            if cf == 0:
                cf = '0'
            elif cf > 0:
                cf = '+' + str(cf)  # Easier to pick out positives with plus sign
            else:
                cf = str(cf)
                neg_x.append(x)
                neg_y.append(y)

            ax.annotate(cf, xy=(x, y), ha='center', va='center')

    # Circle negative numbers by making a scatterplot with black edges and transparent faces
    ax.scatter(neg_x, neg_y, marker='o', edgecolors='k', s=200, facecolors='none')

    # Add TOI and Corsi totals at end of rows/columns
    topax = ax.twiny()
    topax.set_xticks(range(len(xorder)))
    rtotals = pd.DataFrame({'PlayerID2': orderr}) \
        .merge(toi[['PlayerID2', 'Secs']].groupby('PlayerID2').sum().reset_index(),
               how='left', on='PlayerID2') \
        .merge(corsi[['PlayerID2', 'HomeCorsi']].groupby('PlayerID2').sum().reset_index(),
               how='left', on='PlayerID2')
    rtotals.loc[:, 'HomeCorsi'] = rtotals.HomeCorsi.fillna(0)
    rtotals.loc[:, 'CorsiLabel'] = rtotals.HomeCorsi.apply(lambda x:
                                                           visualization_helper.format_number_with_plus(-1 *
                                                                                                        int(x / 5)))
    rtotals.loc[:, 'TOILabel'] = rtotals.Secs.apply(lambda x: manip.time_to_mss(x / 5))
    toplabels = ['{0:s} in {1:s}'.format(x, y) for x, y, in zip(list(rtotals.CorsiLabel), list(rtotals.TOILabel))]

    ax.set_xticks(range(len(xorder)))
    topax.set_xticklabels(toplabels, fontsize=6, rotation=45, ha='left')
    topax.set_xlim(-0.5, len(orderr) - 0.5)
    topax.tick_params(axis='both', which='both', length=0)

    rightax = ax.twinx()
    rightax.set_yticks(range(len(yorder)))
    htotals = pd.DataFrame({'PlayerID1': orderh[::-1]}) \
        .merge(toi[['PlayerID1', 'Secs']].groupby('PlayerID1').sum().reset_index(),
               how='left', on='PlayerID1') \
        .merge(corsi[['PlayerID1', 'HomeCorsi']].groupby('PlayerID1').sum().reset_index(),
               how='left', on='PlayerID1')
    htotals.loc[:, 'HomeCorsi'] = htotals.HomeCorsi.fillna(0)
    htotals.loc[:, 'CorsiLabel'] = htotals.HomeCorsi.apply(lambda x:
                                                           visualization_helper.format_number_with_plus(int(x / 5)))
    htotals.loc[:, 'TOILabel'] = htotals.Secs.apply(lambda x: manip.time_to_mss(x / 5))
    rightlabels = ['{0:s} in {1:s}'.format(x, y) for x, y, in zip(list(htotals.CorsiLabel), list(htotals.TOILabel))]

    rightax.set_yticks(range(len(yorder)))
    rightax.set_yticklabels(rightlabels, fontsize=6)
    rightax.set_ylim(-0.5, len(orderh) - 0.5)
    rightax.tick_params(axis='both', which='both', length=0)

    # plt.subplots_adjust(top=0.80)
    # topax.set_ylim(-0.5, len(orderh) - 0.5)

    # Add brief explanation for the top left cell at the bottom
    explanation = []
    row1name = yorder.iloc[-1]
    col1name = xorder.iloc[0]
    timeh2h = int(toi[(toi.PlayerID1 == orderh[0]) & (toi.PlayerID2 == orderr[0])].Secs.iloc[0])
    shoth2h = int(corsi[(corsi.PlayerID1 == orderh[0]) & (corsi.PlayerID2 == orderr[0])].HomeCorsi.iloc[0])

    explanation.append('The top left cell indicates {0:s} (row 1) faced {1:s} (column 1) for {2:s}.'.format(
        row1name, col1name, manip.time_to_mss(timeh2h)))
    if shoth2h == 0:
        explanation.append('During that time, {0:s} and {1:s} were even in attempts.'.format(hname, rname))
    elif shoth2h > 0:
        explanation.append('During that time, {0:s} out-attempted {1:s} by {2:d}.'.format(hname, rname, shoth2h))
    else:
        explanation.append('During that time, {1:s} out-attempted {0:s} by {2:d}.'.format(hname, rname, -1 * shoth2h))
    explanation = '\n'.join(explanation)

    # Hacky way to annotate: add this to x-axis label
    ax.set_xlabel(ax.get_xlabel() + '\n\n' + explanation)

    plt.subplots_adjust(bottom=0.27)
    plt.subplots_adjust(left=0.17)
    plt.subplots_adjust(top=0.82)
    plt.subplots_adjust(right=1.0)

    # Add title
    plt.title(_get_game_h2h_chart_title(season, game, corsi.HomeCorsi.sum() / 25, toi.Secs.sum() / 25),
              y=1.1, va='bottom')

    plt.gcf().canvas.set_window_title('{0:d} {1:d} H2H.png'.format(season, game))

    # fig.tight_layout()
    if save_file is None:
        plt.show()
    elif save_file == 'fig':
        return plt.gcf()
    else:
        plt.savefig(save_file)
    return None


def _get_game_h2h_chart_title(season, game, homecf_diff=None, totaltoi=None):
    """
    Returns the title for the H2H chart

    :param season: int, the season
    :param game: int, the game
    :param homecf_diff: int. The home team corsi advantage
    :param totaltoi: int. The TOI played so far.

    :return:
    """
    titletext = []
    # Note if a game was OT or SO
    otso_str = schedules.get_game_result(season, game)
    if otso_str[:2] == 'OT' or otso_str[:2] == 'SO':
        otso_str = ' ({0:s})'.format(otso_str[:2])
    else:
        otso_str = ''
    # Add strings to a list then join them together with newlines
    titletext.append('H2H Corsi and TOI for {0:d}-{1:s} Game {2:d}'.format(season, str(season + 1)[2:], game))
    titletext.append('{0:s} {1:d} at {2:s} {3:d}{4:s} ({5:s})'.format(
        team_info.team_as_str(schedules.get_road_team(season, game), abbreviation=False),
        schedules.get_road_score(season, game),
        team_info.team_as_str(schedules.get_home_team(season, game), abbreviation=False),
        schedules.get_home_score(season, game),
        otso_str, schedules.get_game_status(season, game)))
    if homecf_diff is not None and totaltoi is not None:
        titletext.append('{0:s} {1:s} in 5v5 attempts in {2:s}'.format(
            team_info.team_as_str(schedules.get_home_team(season, game)),
            visualization_helper.format_number_with_plus(int(homecf_diff)),
            manip.time_to_mss(int(totaltoi) + 1)))
    return '\n'.join(titletext)


def _get_h2h_chart_player_order(season, game, homeroad='H'):
    """
    Reads lines and pairs for this game and finds arrangement using this algorithm:

    - Top player in TOI
    - First player's top line combination, player with more total TOI
    - First player's top line combination, player with less total TOI
    - Top player in TOI not already listed
    - (etc)

    :param season: int, the game
    :param game: int, the season
    :param homeroad: str, 'H' for home or 'R' for road

    :return: [list of IDs], NumFs
    """
    combos = manip.get_line_combos(season, game, homeroad)
    pairs = manip.get_pairings(season, game, homeroad)

    playerlist = []

    # forwards
    # I can simply drop PlayerID2 because dataframe contains duplicates of every line
    ftoi = manip.get_player_toi(season, game, 'F', homeroad)
    while len(ftoi) > 0:
        next_player = ftoi.PlayerID.iloc[0]
        top_line_for_next_player = combos[(combos.PlayerID1 == next_player) | (combos.PlayerID2 == next_player) |
                                          (combos.PlayerID3 == next_player)].sort_values(by='Secs', ascending=False)
        if len(top_line_for_next_player) == 0:  # sometimes this happens. Special case
            playerlist.append(next_player)
            ftoi = ftoi[ftoi.PlayerID != next_player]
            combos = combos[(combos.PlayerID1 != next_player) & (combos.PlayerID2 != next_player) &
                            (combos.PlayerID3 != next_player)]
        else:
            thisline = [top_line_for_next_player.PlayerID1.iloc[0],
                        top_line_for_next_player.PlayerID2.iloc[0],
                        top_line_for_next_player.PlayerID3.iloc[0]]
            thislinedf = ftoi[(ftoi.PlayerID == thisline[0]) | (ftoi.PlayerID == thisline[1]) |
                              (ftoi.PlayerID == thisline[2])].sort_values(by='Secs', ascending=False)

            playerlist += list(thislinedf.PlayerID.values)

            # Remove these players from ftoi
            ftoi = ftoi.merge(thislinedf[['PlayerID']], how='outer', indicator=True) \
                .query('_merge == "left_only"') \
                .drop('_merge', axis=1)
            # Remove these players from combos df
            for i in range(3):
                combos = combos[(combos.PlayerID1 != thisline[i]) & (combos.PlayerID2 != thisline[i]) &
                                (combos.PlayerID3 != thisline[i])]

    numf = len(playerlist)

    # defensemen
    dtoi = manip.get_player_toi(season, game, 'D', homeroad)
    while len(dtoi) > 0:
        next_player = dtoi.PlayerID.iloc[0]
        top_line_for_next_player = pairs[(pairs.PlayerID1 == next_player) | (pairs.PlayerID2 == next_player)] \
            .sort_values(by='Secs', ascending=False)
        if len(top_line_for_next_player) == 0:
            playerlist.append(next_player)
            dtoi = dtoi[dtoi.PlayerID != next_player]
            pairs = pairs[(pairs.PlayerID1 != next_player) & (pairs.PlayerID2 != next_player)]
        else:
            thispair = [top_line_for_next_player.PlayerID1.iloc[0],
                        top_line_for_next_player.PlayerID2.iloc[0]]
            thispairdf = dtoi[(dtoi.PlayerID == thispair[0]) | (dtoi.PlayerID == thispair[1])] \
                .sort_values(by='Secs', ascending=False)

            playerlist += list(thispairdf.PlayerID.values)

            # Remove these players from dtoi
            dtoi = dtoi.merge(thispairdf[['PlayerID']], how='outer', indicator=True) \
                .query('_merge == "left_only"') \
                .drop('_merge', axis=1)
            # Remove pairs including these players from pairs df
            for i in range(2):
                pairs = pairs[(pairs.PlayerID1 != thispair[i]) & (pairs.PlayerID2 != thispair[i])]

    return playerlist, numf