Skip to content

Instantly share code, notes, and snippets.

@bendominguez0111
Created April 18, 2020 16:52
Show Gist options
  • Save bendominguez0111/2cd04a721eee67071e4a25ea5378f52f to your computer and use it in GitHub Desktop.
Save bendominguez0111/2cd04a721eee67071e4a25ea5378f52f to your computer and use it in GitHub Desktop.
ml_post_one.ipynb
Display the source blob
Display the rendered blob
Raw
{
"nbformat": 4,
"nbformat_minor": 0,
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.4"
},
"colab": {
"name": "ml_post_one.ipynb",
"provenance": [],
"collapsed_sections": [],
"include_colab_link": true
}
},
"cells": [
{
"cell_type": "markdown",
"metadata": {
"id": "view-in-github",
"colab_type": "text"
},
"source": [
"<a href=\"https://colab.research.google.com/gist/fantasydatapros/2cd04a721eee67071e4a25ea5378f52f/ml_post_one.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
]
},
{
"cell_type": "code",
"metadata": {
"id": "UqI0wH4yH48v",
"colab_type": "code",
"colab": {}
},
"source": [
"import pandas as pd\n",
"from sklearn.model_selection import train_test_split \n",
"from sklearn.linear_model import LinearRegression\n",
"from matplotlib import pyplot as plt\n",
"import warnings\n",
"import numpy as np\n",
"\n",
"warnings.filterwarnings('ignore')"
],
"execution_count": 0,
"outputs": []
},
{
"cell_type": "code",
"metadata": {
"id": "kls58FVBH49B",
"colab_type": "code",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 313
},
"outputId": "13ac4efd-3e9c-4054-faa0-f652ca70b499"
},
"source": [
"#import our CSV file\n",
"df = pd.read_csv('2019.csv')\n",
"#drop unneccessary columns\n",
"df.drop(['Rk', '2PM', '2PP', 'DKPt', 'FDPt', 'VBD', 'PosRank', 'OvRank', 'PPR', 'Fmb', 'GS'], axis=1, inplace=True)\n",
"\n",
"#fix name formatting\n",
"df['Player'] = df['Player'].apply(lambda x: x.split('*')[0]).apply(lambda x: x.split('\\\\')[0])\n",
"\n",
"#rename columns\n",
"df.rename({\n",
" 'TD': 'PassingTD',\n",
" 'TD.1': 'RushingTD',\n",
" 'TD.2': 'ReceivingTD',\n",
" 'TD.3': 'TotalTD',\n",
" 'Yds': 'PassingYDs',\n",
" 'Yds.1': 'RushingYDs',\n",
" 'Yds.2': 'ReceivingYDs',\n",
" 'Att': 'PassingAtt',\n",
" 'Att.1': 'RushingAtt'\n",
"}, axis=1, inplace=True)\n",
"\n",
"#seperate dataframes based off position\n",
"rb_df = df[df['FantPos'] == 'RB']\n",
"qb_df = df[df['FantPos'] == 'QB']\n",
"wr_df = df[df['FantPos'] == 'WR']\n",
"te_df = df[df['FantPos'] == 'TE']\n",
"\n",
"rb_df.head()"
],
"execution_count": 28,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Player</th>\n",
" <th>Tm</th>\n",
" <th>FantPos</th>\n",
" <th>Age</th>\n",
" <th>G</th>\n",
" <th>Cmp</th>\n",
" <th>PassingAtt</th>\n",
" <th>PassingYDs</th>\n",
" <th>PassingTD</th>\n",
" <th>Int</th>\n",
" <th>RushingAtt</th>\n",
" <th>RushingYDs</th>\n",
" <th>Y/A</th>\n",
" <th>RushingTD</th>\n",
" <th>Tgt</th>\n",
" <th>Rec</th>\n",
" <th>ReceivingYDs</th>\n",
" <th>Y/R</th>\n",
" <th>ReceivingTD</th>\n",
" <th>FL</th>\n",
" <th>TotalTD</th>\n",
" <th>FantPt</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>Christian McCaffrey</td>\n",
" <td>CAR</td>\n",
" <td>RB</td>\n",
" <td>23</td>\n",
" <td>16</td>\n",
" <td>0</td>\n",
" <td>2</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>287</td>\n",
" <td>1387</td>\n",
" <td>4.83</td>\n",
" <td>15</td>\n",
" <td>142</td>\n",
" <td>116</td>\n",
" <td>1005</td>\n",
" <td>8.66</td>\n",
" <td>4</td>\n",
" <td>0</td>\n",
" <td>19</td>\n",
" <td>355.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>Derrick Henry</td>\n",
" <td>TEN</td>\n",
" <td>RB</td>\n",
" <td>25</td>\n",
" <td>15</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>303</td>\n",
" <td>1540</td>\n",
" <td>5.08</td>\n",
" <td>16</td>\n",
" <td>24</td>\n",
" <td>18</td>\n",
" <td>206</td>\n",
" <td>11.44</td>\n",
" <td>2</td>\n",
" <td>3</td>\n",
" <td>18</td>\n",
" <td>277.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>Aaron Jones</td>\n",
" <td>GNB</td>\n",
" <td>RB</td>\n",
" <td>25</td>\n",
" <td>16</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>236</td>\n",
" <td>1084</td>\n",
" <td>4.59</td>\n",
" <td>16</td>\n",
" <td>68</td>\n",
" <td>49</td>\n",
" <td>474</td>\n",
" <td>9.67</td>\n",
" <td>3</td>\n",
" <td>2</td>\n",
" <td>19</td>\n",
" <td>266.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>Ezekiel Elliott</td>\n",
" <td>DAL</td>\n",
" <td>RB</td>\n",
" <td>24</td>\n",
" <td>16</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>301</td>\n",
" <td>1357</td>\n",
" <td>4.51</td>\n",
" <td>12</td>\n",
" <td>71</td>\n",
" <td>54</td>\n",
" <td>420</td>\n",
" <td>7.78</td>\n",
" <td>2</td>\n",
" <td>2</td>\n",
" <td>14</td>\n",
" <td>258.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5</th>\n",
" <td>Dalvin Cook</td>\n",
" <td>MIN</td>\n",
" <td>RB</td>\n",
" <td>24</td>\n",
" <td>14</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>250</td>\n",
" <td>1135</td>\n",
" <td>4.54</td>\n",
" <td>13</td>\n",
" <td>63</td>\n",
" <td>53</td>\n",
" <td>519</td>\n",
" <td>9.79</td>\n",
" <td>0</td>\n",
" <td>2</td>\n",
" <td>13</td>\n",
" <td>239.0</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Player Tm FantPos Age ... ReceivingTD FL TotalTD FantPt\n",
"0 Christian McCaffrey CAR RB 23 ... 4 0 19 355.0\n",
"2 Derrick Henry TEN RB 25 ... 2 3 18 277.0\n",
"3 Aaron Jones GNB RB 25 ... 3 2 19 266.0\n",
"4 Ezekiel Elliott DAL RB 24 ... 2 2 14 258.0\n",
"5 Dalvin Cook MIN RB 24 ... 0 2 13 239.0\n",
"\n",
"[5 rows x 22 columns]"
]
},
"metadata": {
"tags": []
},
"execution_count": 28
}
]
},
{
"cell_type": "code",
"metadata": {
"id": "L0dLUr7lH49P",
"colab_type": "code",
"colab": {}
},
"source": [
"rushing_columns = ['RushingAtt', 'RushingYDs', 'Y/A', 'RushingTD']\n",
"receiving_columns = ['Tgt', 'Rec', 'ReceivingYDs', 'Y/R', 'ReceivingTD']\n",
"\n",
"def transform_columns(df, new_column_list):\n",
" df = df[['Player','Tm', 'Age', 'G','FantPt'] + new_column_list + ['FL']]\n",
" return df\n",
"\n",
"rb_df = transform_columns(rb_df, rushing_columns+receiving_columns)"
],
"execution_count": 0,
"outputs": []
},
{
"cell_type": "code",
"metadata": {
"id": "B1ZSF_1SH49Z",
"colab_type": "code",
"colab": {}
},
"source": [
"rb_df['FantasyPoints'] = (rb_df['RushingYDs']*0.1 + rb_df['RushingTD']*6+ rb_df['ReceivingYDs']*0.1 + rb_df ['ReceivingTD']*6 - rb_df['FL']*2)\n",
"rb_df['Total Usage'] = (rb_df['RushingAtt'] + rb_df['Tgt'])\n",
"rb_df = rb_df[rb_df['RushingAtt'] > 20]"
],
"execution_count": 0,
"outputs": []
},
{
"cell_type": "code",
"metadata": {
"id": "EiQe9bLOH49i",
"colab_type": "code",
"colab": {}
},
"source": [
"x = rb_df['Total Usage'].values.reshape(-1, 1)\n",
"y = rb_df['FantasyPoints'].values.reshape(-1, 1)\n",
"x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=0)\n",
"regressor = LinearRegression() \n",
"regressor.fit(x_train, y_train) #training the algorithm\n",
"y_pred = regressor.predict(x_test)"
],
"execution_count": 0,
"outputs": []
},
{
"cell_type": "code",
"metadata": {
"id": "_g9JvUesH49r",
"colab_type": "code",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 206
},
"outputId": "95259503-f0df-4c56-d956-ac045d7fa13d"
},
"source": [
"df = pd.DataFrame({'Actual': y_test.flatten(), 'Predicted': y_pred.flatten()})\n",
"df.head()"
],
"execution_count": 32,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Actual</th>\n",
" <th>Predicted</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>265.8</td>\n",
" <td>199.907703</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>111.1</td>\n",
" <td>95.167927</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>143.2</td>\n",
" <td>171.037124</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>165.3</td>\n",
" <td>173.051350</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>171.6</td>\n",
" <td>176.408394</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Actual Predicted\n",
"0 265.8 199.907703\n",
"1 111.1 95.167927\n",
"2 143.2 171.037124\n",
"3 165.3 173.051350\n",
"4 171.6 176.408394"
]
},
"metadata": {
"tags": []
},
"execution_count": 32
}
]
},
{
"cell_type": "code",
"metadata": {
"id": "tlLXzvu_H49z",
"colab_type": "code",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 279
},
"outputId": "c300b1f2-831d-4340-8c79-e371d2632aac"
},
"source": [
"plt.scatter(x_test, y_test, color='gray')\n",
"plt.plot(x_test, y_pred, color='red', linewidth=2)\n",
"plt.xlabel('Usage')\n",
"plt.ylabel('Total FantasyPoints')\n",
"plt.show()"
],
"execution_count": 37,
"outputs": [
{
"output_type": "display_data",
"data": {
"image/png": "\n",
"text/plain": [
"<Figure size 432x288 with 1 Axes>"
]
},
"metadata": {
"tags": [],
"needs_background": "light"
}
}
]
},
{
"cell_type": "code",
"metadata": {
"id": "hAtBPkpMH494",
"colab_type": "code",
"colab": {}
},
"source": [
"#This is for part two of Machine Learning for Fantasy Football\n",
"def rmse(actual, predicted):\n",
" \"\"\"Calculates RMSE from actual and predicted values\"\"\"\n",
" return np.sqrt(np.mean((actual-predicted)**2))"
],
"execution_count": 0,
"outputs": []
},
{
"cell_type": "code",
"metadata": {
"id": "Sh-bjCg0H499",
"colab_type": "code",
"colab": {},
"outputId": "2400b06d-08c0-40a0-9181-91f0fdff8a35"
},
"source": [
"rmse(y_test,y_pred)"
],
"execution_count": 0,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"26.049902859860683"
]
},
"metadata": {
"tags": []
},
"execution_count": 9
}
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "Ex709xluH4-H",
"colab_type": "text"
},
"source": [
"Root mean square is the standard deviation of the residuals (error in our prediction). Essentially this means most of our predictions are within 26 fantasy points of the actual. If this doesnt make sense just think the lower this number the better. It is important to look at error when you make models so you can evaluate how well we are doing our prediction."
]
}
]
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment