muneebalam/scrapenhl2

View on GitHub
examples/5v5 TOI for teams and players/5v5 TOI for teams and players.ipynb

Summary

Maintainability
Test Coverage
{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "\n",
    "from scrapenhl2.scrape import autoupdate, schedules, team_info, players\n",
    "from scrapenhl2.manipulate import manipulate as manip"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "collapsed": true
   },
   "source": [
    "The purpose of this script is to get game-by-game 5v5 toi counts by player and team for every game since 2012-13. We can get this information from the 5v5 player log easily."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style>\n",
       "    .dataframe thead tr:only-child th {\n",
       "        text-align: right;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: left;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>PlayerID</th>\n",
       "      <th>Game</th>\n",
       "      <th>TOION</th>\n",
       "      <th>TeamTOI</th>\n",
       "      <th>TOIOFF</th>\n",
       "      <th>CAON</th>\n",
       "      <th>CFON</th>\n",
       "      <th>TeamCA</th>\n",
       "      <th>TeamCF</th>\n",
       "      <th>CFOFF</th>\n",
       "      <th>...</th>\n",
       "      <th>E-OtF</th>\n",
       "      <th>N</th>\n",
       "      <th>NDL</th>\n",
       "      <th>NDR</th>\n",
       "      <th>NOL</th>\n",
       "      <th>NOR</th>\n",
       "      <th>OL</th>\n",
       "      <th>OR</th>\n",
       "      <th>TeamID</th>\n",
       "      <th>Season</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>8460542.0</td>\n",
       "      <td>20006</td>\n",
       "      <td>0.222500</td>\n",
       "      <td>3034.0</td>\n",
       "      <td>0.620278</td>\n",
       "      <td>6.0</td>\n",
       "      <td>17.0</td>\n",
       "      <td>31.0</td>\n",
       "      <td>45.0</td>\n",
       "      <td>28.0</td>\n",
       "      <td>...</td>\n",
       "      <td>15.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1</td>\n",
       "      <td>2012</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>8460542.0</td>\n",
       "      <td>20030</td>\n",
       "      <td>0.196389</td>\n",
       "      <td>2515.0</td>\n",
       "      <td>0.502222</td>\n",
       "      <td>12.0</td>\n",
       "      <td>9.0</td>\n",
       "      <td>32.0</td>\n",
       "      <td>24.0</td>\n",
       "      <td>15.0</td>\n",
       "      <td>...</td>\n",
       "      <td>9.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>5.0</td>\n",
       "      <td>3.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>5.0</td>\n",
       "      <td>1</td>\n",
       "      <td>2012</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>8460542.0</td>\n",
       "      <td>20052</td>\n",
       "      <td>0.176111</td>\n",
       "      <td>2416.0</td>\n",
       "      <td>0.495000</td>\n",
       "      <td>5.0</td>\n",
       "      <td>10.0</td>\n",
       "      <td>27.0</td>\n",
       "      <td>29.0</td>\n",
       "      <td>19.0</td>\n",
       "      <td>...</td>\n",
       "      <td>10.0</td>\n",
       "      <td>5.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>1</td>\n",
       "      <td>2012</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>8460542.0</td>\n",
       "      <td>20067</td>\n",
       "      <td>0.219444</td>\n",
       "      <td>2936.0</td>\n",
       "      <td>0.596111</td>\n",
       "      <td>4.0</td>\n",
       "      <td>14.0</td>\n",
       "      <td>45.0</td>\n",
       "      <td>29.0</td>\n",
       "      <td>15.0</td>\n",
       "      <td>...</td>\n",
       "      <td>11.0</td>\n",
       "      <td>7.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>1</td>\n",
       "      <td>2012</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>8460542.0</td>\n",
       "      <td>20067</td>\n",
       "      <td>0.219444</td>\n",
       "      <td>2936.0</td>\n",
       "      <td>0.596111</td>\n",
       "      <td>4.0</td>\n",
       "      <td>14.0</td>\n",
       "      <td>45.0</td>\n",
       "      <td>29.0</td>\n",
       "      <td>15.0</td>\n",
       "      <td>...</td>\n",
       "      <td>11.0</td>\n",
       "      <td>7.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>1</td>\n",
       "      <td>2012</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>5 rows × 62 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "    PlayerID   Game     TOION  TeamTOI    TOIOFF  CAON  CFON  TeamCA  TeamCF  \\\n",
       "0  8460542.0  20006  0.222500   3034.0  0.620278   6.0  17.0    31.0    45.0   \n",
       "1  8460542.0  20030  0.196389   2515.0  0.502222  12.0   9.0    32.0    24.0   \n",
       "2  8460542.0  20052  0.176111   2416.0  0.495000   5.0  10.0    27.0    29.0   \n",
       "3  8460542.0  20067  0.219444   2936.0  0.596111   4.0  14.0    45.0    29.0   \n",
       "4  8460542.0  20067  0.219444   2936.0  0.596111   4.0  14.0    45.0    29.0   \n",
       "\n",
       "   CFOFF   ...    E-OtF    N  NDL  NDR  NOL  NOR   OL   OR  TeamID  Season  \n",
       "0   28.0   ...     15.0  2.0  0.0  2.0  2.0  0.0  0.0  0.0       1    2012  \n",
       "1   15.0   ...      9.0  0.0  5.0  3.0  0.0  0.0  0.0  5.0       1    2012  \n",
       "2   19.0   ...     10.0  5.0  0.0  2.0  0.0  0.0  0.0  2.0       1    2012  \n",
       "3   15.0   ...     11.0  7.0  2.0  0.0  0.0  0.0  2.0  4.0       1    2012  \n",
       "4   15.0   ...     11.0  7.0  2.0  0.0  0.0  0.0  2.0  4.0       1    2012  \n",
       "\n",
       "[5 rows x 62 columns]"
      ]
     },
     "execution_count": 2,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# Update data\n",
    "# autoupdate.autoupdate()  # Comment in if needed, and loop if needed\n",
    "# manip.get_5v5_player_log(2017, force_create)  # Comment in if needed, and loop if needed\n",
    "log = pd.concat([manip.get_5v5_player_log(season).assign(Season=season) for season in range(2012, 2018)])\n",
    "sch = pd.concat([schedules.get_season_schedule(season).assign(Season=season) for season in range(2012, 2018)])\n",
    "log.head()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "All we need to do is:\n",
    "- Sum TOION and TOIOFF, and take distinct values to get team counts\n",
    "- Take TOION for individual counts"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style>\n",
       "    .dataframe thead tr:only-child th {\n",
       "        text-align: right;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: left;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Season</th>\n",
       "      <th>Game</th>\n",
       "      <th>HR</th>\n",
       "      <th>Team</th>\n",
       "      <th>TOI(min)</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>2012</td>\n",
       "      <td>20001</td>\n",
       "      <td>Home</td>\n",
       "      <td>PHI</td>\n",
       "      <td>46.816667</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>2012</td>\n",
       "      <td>20001</td>\n",
       "      <td>Road</td>\n",
       "      <td>PIT</td>\n",
       "      <td>46.816667</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>2012</td>\n",
       "      <td>20002</td>\n",
       "      <td>Home</td>\n",
       "      <td>WPG</td>\n",
       "      <td>46.016667</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>2012</td>\n",
       "      <td>20002</td>\n",
       "      <td>Road</td>\n",
       "      <td>OTT</td>\n",
       "      <td>46.016667</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>2012</td>\n",
       "      <td>20003</td>\n",
       "      <td>Home</td>\n",
       "      <td>LAK</td>\n",
       "      <td>43.583333</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   Season   Game    HR Team   TOI(min)\n",
       "0    2012  20001  Home  PHI  46.816667\n",
       "1    2012  20001  Road  PIT  46.816667\n",
       "2    2012  20002  Home  WPG  46.016667\n",
       "3    2012  20002  Road  OTT  46.016667\n",
       "4    2012  20003  Home  LAK  43.583333"
      ]
     },
     "execution_count": 3,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# Teams\n",
    "teamtoi = log.assign(TOI=log.TOION + log.TOIOFF) \\\n",
    "    [['Season', 'Game', 'TOI']] \\\n",
    "    .groupby(['Season', 'Game'], as_index=False) \\\n",
    "    .max()  # take max to avoid floating point errors that may fell drop_duplicates\n",
    "teamtoi = sch[['Season', 'Game', 'Home', 'Road']] \\\n",
    "    .melt(id_vars=['Season', 'Game'], var_name='HR', value_name='TeamID') \\\n",
    "    .merge(teamtoi, how='inner', on=['Season', 'Game']) \\\n",
    "    .drop_duplicates()\n",
    "    \n",
    "# Make names into str, and convert TOI from hours to minutes\n",
    "teamtoi.loc[:, 'Team'] = teamtoi.TeamID.apply(lambda x: team_info.team_as_str(x))\n",
    "teamtoi.loc[:, 'TOI(min)'] = teamtoi.TOI * 60\n",
    "teamtoi = teamtoi.drop(['TeamID', 'TOI'], axis=1)\n",
    "teamtoi.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/Users/muneebalam/anaconda/lib/python3.6/site-packages/pandas/core/indexing.py:337: SettingWithCopyWarning: \n",
      "A value is trying to be set on a copy of a slice from a DataFrame.\n",
      "Try using .loc[row_indexer,col_indexer] = value instead\n",
      "\n",
      "See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy\n",
      "  self.obj[key] = _infer_fill_value(value)\n",
      "/Users/muneebalam/anaconda/lib/python3.6/site-packages/pandas/core/indexing.py:517: SettingWithCopyWarning: \n",
      "A value is trying to be set on a copy of a slice from a DataFrame.\n",
      "Try using .loc[row_indexer,col_indexer] = value instead\n",
      "\n",
      "See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy\n",
      "  self.obj[item] = s\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style>\n",
       "    .dataframe thead tr:only-child th {\n",
       "        text-align: right;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: left;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Season</th>\n",
       "      <th>Game</th>\n",
       "      <th>Player</th>\n",
       "      <th>Team</th>\n",
       "      <th>TOI(min)</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>2012</td>\n",
       "      <td>20006</td>\n",
       "      <td>Patrik Elias</td>\n",
       "      <td>NJD</td>\n",
       "      <td>13.350000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>2012</td>\n",
       "      <td>20030</td>\n",
       "      <td>Patrik Elias</td>\n",
       "      <td>NJD</td>\n",
       "      <td>11.783333</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>2012</td>\n",
       "      <td>20052</td>\n",
       "      <td>Patrik Elias</td>\n",
       "      <td>NJD</td>\n",
       "      <td>10.566667</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>2012</td>\n",
       "      <td>20067</td>\n",
       "      <td>Patrik Elias</td>\n",
       "      <td>NJD</td>\n",
       "      <td>13.166667</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>2012</td>\n",
       "      <td>20067</td>\n",
       "      <td>Patrik Elias</td>\n",
       "      <td>NJD</td>\n",
       "      <td>13.166667</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   Season   Game        Player Team   TOI(min)\n",
       "0    2012  20006  Patrik Elias  NJD  13.350000\n",
       "1    2012  20030  Patrik Elias  NJD  11.783333\n",
       "2    2012  20052  Patrik Elias  NJD  10.566667\n",
       "3    2012  20067  Patrik Elias  NJD  13.166667\n",
       "4    2012  20067  Patrik Elias  NJD  13.166667"
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# Individuals\n",
    "indivtoi = log[['Season', 'Game', 'PlayerID', 'TOION', 'TeamID']]\n",
    "\n",
    "# IDs to names and TOI from hours to minutes\n",
    "indivtoi.loc[:, 'Player'] = players.playerlst_as_str(indivtoi.PlayerID.values)\n",
    "indivtoi.loc[:, 'Team'] = indivtoi.TeamID.apply(lambda x: team_info.team_as_str(x))\n",
    "indivtoi.loc[:, 'TOI(min)'] = indivtoi.TOION * 60\n",
    "\n",
    "indivtoi = indivtoi.drop(['TeamID', 'TOION', 'PlayerID'], axis=1)\n",
    "indivtoi.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "# Write to file\n",
    "teamtoi.to_csv('/Users/muneebalam/Desktop/teamtoi.csv')\n",
    "indivtoi.to_csv('/Users/muneebalam/Desktop/indivtoi.csv')"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.1"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 1
}