examples/5v5 TOI for teams and players/5v5 TOI for teams and players.ipynb
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"import pandas as pd\n",
"\n",
"from scrapenhl2.scrape import autoupdate, schedules, team_info, players\n",
"from scrapenhl2.manipulate import manipulate as manip"
]
},
{
"cell_type": "markdown",
"metadata": {
"collapsed": true
},
"source": [
"The purpose of this script is to get game-by-game 5v5 toi counts by player and team for every game since 2012-13. We can get this information from the 5v5 player log easily."
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style>\n",
" .dataframe thead tr:only-child th {\n",
" text-align: right;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: left;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>PlayerID</th>\n",
" <th>Game</th>\n",
" <th>TOION</th>\n",
" <th>TeamTOI</th>\n",
" <th>TOIOFF</th>\n",
" <th>CAON</th>\n",
" <th>CFON</th>\n",
" <th>TeamCA</th>\n",
" <th>TeamCF</th>\n",
" <th>CFOFF</th>\n",
" <th>...</th>\n",
" <th>E-OtF</th>\n",
" <th>N</th>\n",
" <th>NDL</th>\n",
" <th>NDR</th>\n",
" <th>NOL</th>\n",
" <th>NOR</th>\n",
" <th>OL</th>\n",
" <th>OR</th>\n",
" <th>TeamID</th>\n",
" <th>Season</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>8460542.0</td>\n",
" <td>20006</td>\n",
" <td>0.222500</td>\n",
" <td>3034.0</td>\n",
" <td>0.620278</td>\n",
" <td>6.0</td>\n",
" <td>17.0</td>\n",
" <td>31.0</td>\n",
" <td>45.0</td>\n",
" <td>28.0</td>\n",
" <td>...</td>\n",
" <td>15.0</td>\n",
" <td>2.0</td>\n",
" <td>0.0</td>\n",
" <td>2.0</td>\n",
" <td>2.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>1</td>\n",
" <td>2012</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>8460542.0</td>\n",
" <td>20030</td>\n",
" <td>0.196389</td>\n",
" <td>2515.0</td>\n",
" <td>0.502222</td>\n",
" <td>12.0</td>\n",
" <td>9.0</td>\n",
" <td>32.0</td>\n",
" <td>24.0</td>\n",
" <td>15.0</td>\n",
" <td>...</td>\n",
" <td>9.0</td>\n",
" <td>0.0</td>\n",
" <td>5.0</td>\n",
" <td>3.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>5.0</td>\n",
" <td>1</td>\n",
" <td>2012</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>8460542.0</td>\n",
" <td>20052</td>\n",
" <td>0.176111</td>\n",
" <td>2416.0</td>\n",
" <td>0.495000</td>\n",
" <td>5.0</td>\n",
" <td>10.0</td>\n",
" <td>27.0</td>\n",
" <td>29.0</td>\n",
" <td>19.0</td>\n",
" <td>...</td>\n",
" <td>10.0</td>\n",
" <td>5.0</td>\n",
" <td>0.0</td>\n",
" <td>2.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>2.0</td>\n",
" <td>1</td>\n",
" <td>2012</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>8460542.0</td>\n",
" <td>20067</td>\n",
" <td>0.219444</td>\n",
" <td>2936.0</td>\n",
" <td>0.596111</td>\n",
" <td>4.0</td>\n",
" <td>14.0</td>\n",
" <td>45.0</td>\n",
" <td>29.0</td>\n",
" <td>15.0</td>\n",
" <td>...</td>\n",
" <td>11.0</td>\n",
" <td>7.0</td>\n",
" <td>2.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>2.0</td>\n",
" <td>4.0</td>\n",
" <td>1</td>\n",
" <td>2012</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>8460542.0</td>\n",
" <td>20067</td>\n",
" <td>0.219444</td>\n",
" <td>2936.0</td>\n",
" <td>0.596111</td>\n",
" <td>4.0</td>\n",
" <td>14.0</td>\n",
" <td>45.0</td>\n",
" <td>29.0</td>\n",
" <td>15.0</td>\n",
" <td>...</td>\n",
" <td>11.0</td>\n",
" <td>7.0</td>\n",
" <td>2.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>2.0</td>\n",
" <td>4.0</td>\n",
" <td>1</td>\n",
" <td>2012</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>5 rows × 62 columns</p>\n",
"</div>"
],
"text/plain": [
" PlayerID Game TOION TeamTOI TOIOFF CAON CFON TeamCA TeamCF \\\n",
"0 8460542.0 20006 0.222500 3034.0 0.620278 6.0 17.0 31.0 45.0 \n",
"1 8460542.0 20030 0.196389 2515.0 0.502222 12.0 9.0 32.0 24.0 \n",
"2 8460542.0 20052 0.176111 2416.0 0.495000 5.0 10.0 27.0 29.0 \n",
"3 8460542.0 20067 0.219444 2936.0 0.596111 4.0 14.0 45.0 29.0 \n",
"4 8460542.0 20067 0.219444 2936.0 0.596111 4.0 14.0 45.0 29.0 \n",
"\n",
" CFOFF ... E-OtF N NDL NDR NOL NOR OL OR TeamID Season \n",
"0 28.0 ... 15.0 2.0 0.0 2.0 2.0 0.0 0.0 0.0 1 2012 \n",
"1 15.0 ... 9.0 0.0 5.0 3.0 0.0 0.0 0.0 5.0 1 2012 \n",
"2 19.0 ... 10.0 5.0 0.0 2.0 0.0 0.0 0.0 2.0 1 2012 \n",
"3 15.0 ... 11.0 7.0 2.0 0.0 0.0 0.0 2.0 4.0 1 2012 \n",
"4 15.0 ... 11.0 7.0 2.0 0.0 0.0 0.0 2.0 4.0 1 2012 \n",
"\n",
"[5 rows x 62 columns]"
]
},
"execution_count": 2,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Update data\n",
"# autoupdate.autoupdate() # Comment in if needed, and loop if needed\n",
"# manip.get_5v5_player_log(2017, force_create) # Comment in if needed, and loop if needed\n",
"log = pd.concat([manip.get_5v5_player_log(season).assign(Season=season) for season in range(2012, 2018)])\n",
"sch = pd.concat([schedules.get_season_schedule(season).assign(Season=season) for season in range(2012, 2018)])\n",
"log.head()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"All we need to do is:\n",
"- Sum TOION and TOIOFF, and take distinct values to get team counts\n",
"- Take TOION for individual counts"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style>\n",
" .dataframe thead tr:only-child th {\n",
" text-align: right;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: left;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Season</th>\n",
" <th>Game</th>\n",
" <th>HR</th>\n",
" <th>Team</th>\n",
" <th>TOI(min)</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>2012</td>\n",
" <td>20001</td>\n",
" <td>Home</td>\n",
" <td>PHI</td>\n",
" <td>46.816667</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>2012</td>\n",
" <td>20001</td>\n",
" <td>Road</td>\n",
" <td>PIT</td>\n",
" <td>46.816667</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>2012</td>\n",
" <td>20002</td>\n",
" <td>Home</td>\n",
" <td>WPG</td>\n",
" <td>46.016667</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>2012</td>\n",
" <td>20002</td>\n",
" <td>Road</td>\n",
" <td>OTT</td>\n",
" <td>46.016667</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>2012</td>\n",
" <td>20003</td>\n",
" <td>Home</td>\n",
" <td>LAK</td>\n",
" <td>43.583333</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Season Game HR Team TOI(min)\n",
"0 2012 20001 Home PHI 46.816667\n",
"1 2012 20001 Road PIT 46.816667\n",
"2 2012 20002 Home WPG 46.016667\n",
"3 2012 20002 Road OTT 46.016667\n",
"4 2012 20003 Home LAK 43.583333"
]
},
"execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Teams\n",
"teamtoi = log.assign(TOI=log.TOION + log.TOIOFF) \\\n",
" [['Season', 'Game', 'TOI']] \\\n",
" .groupby(['Season', 'Game'], as_index=False) \\\n",
" .max() # take max to avoid floating point errors that may fell drop_duplicates\n",
"teamtoi = sch[['Season', 'Game', 'Home', 'Road']] \\\n",
" .melt(id_vars=['Season', 'Game'], var_name='HR', value_name='TeamID') \\\n",
" .merge(teamtoi, how='inner', on=['Season', 'Game']) \\\n",
" .drop_duplicates()\n",
" \n",
"# Make names into str, and convert TOI from hours to minutes\n",
"teamtoi.loc[:, 'Team'] = teamtoi.TeamID.apply(lambda x: team_info.team_as_str(x))\n",
"teamtoi.loc[:, 'TOI(min)'] = teamtoi.TOI * 60\n",
"teamtoi = teamtoi.drop(['TeamID', 'TOI'], axis=1)\n",
"teamtoi.head()"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"/Users/muneebalam/anaconda/lib/python3.6/site-packages/pandas/core/indexing.py:337: SettingWithCopyWarning: \n",
"A value is trying to be set on a copy of a slice from a DataFrame.\n",
"Try using .loc[row_indexer,col_indexer] = value instead\n",
"\n",
"See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy\n",
" self.obj[key] = _infer_fill_value(value)\n",
"/Users/muneebalam/anaconda/lib/python3.6/site-packages/pandas/core/indexing.py:517: SettingWithCopyWarning: \n",
"A value is trying to be set on a copy of a slice from a DataFrame.\n",
"Try using .loc[row_indexer,col_indexer] = value instead\n",
"\n",
"See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy\n",
" self.obj[item] = s\n"
]
},
{
"data": {
"text/html": [
"<div>\n",
"<style>\n",
" .dataframe thead tr:only-child th {\n",
" text-align: right;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: left;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Season</th>\n",
" <th>Game</th>\n",
" <th>Player</th>\n",
" <th>Team</th>\n",
" <th>TOI(min)</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>2012</td>\n",
" <td>20006</td>\n",
" <td>Patrik Elias</td>\n",
" <td>NJD</td>\n",
" <td>13.350000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>2012</td>\n",
" <td>20030</td>\n",
" <td>Patrik Elias</td>\n",
" <td>NJD</td>\n",
" <td>11.783333</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>2012</td>\n",
" <td>20052</td>\n",
" <td>Patrik Elias</td>\n",
" <td>NJD</td>\n",
" <td>10.566667</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>2012</td>\n",
" <td>20067</td>\n",
" <td>Patrik Elias</td>\n",
" <td>NJD</td>\n",
" <td>13.166667</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>2012</td>\n",
" <td>20067</td>\n",
" <td>Patrik Elias</td>\n",
" <td>NJD</td>\n",
" <td>13.166667</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Season Game Player Team TOI(min)\n",
"0 2012 20006 Patrik Elias NJD 13.350000\n",
"1 2012 20030 Patrik Elias NJD 11.783333\n",
"2 2012 20052 Patrik Elias NJD 10.566667\n",
"3 2012 20067 Patrik Elias NJD 13.166667\n",
"4 2012 20067 Patrik Elias NJD 13.166667"
]
},
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Individuals\n",
"indivtoi = log[['Season', 'Game', 'PlayerID', 'TOION', 'TeamID']]\n",
"\n",
"# IDs to names and TOI from hours to minutes\n",
"indivtoi.loc[:, 'Player'] = players.playerlst_as_str(indivtoi.PlayerID.values)\n",
"indivtoi.loc[:, 'Team'] = indivtoi.TeamID.apply(lambda x: team_info.team_as_str(x))\n",
"indivtoi.loc[:, 'TOI(min)'] = indivtoi.TOION * 60\n",
"\n",
"indivtoi = indivtoi.drop(['TeamID', 'TOION', 'PlayerID'], axis=1)\n",
"indivtoi.head()"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"# Write to file\n",
"teamtoi.to_csv('/Users/muneebalam/Desktop/teamtoi.csv')\n",
"indivtoi.to_csv('/Users/muneebalam/Desktop/indivtoi.csv')"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.1"
}
},
"nbformat": 4,
"nbformat_minor": 1
}