Skip to content

Instantly share code, notes, and snippets.

@abFunctions
Created June 25, 2021 01:13
Show Gist options
  • Save abFunctions/276e145cfb704c8136e9a829f342408d to your computer and use it in GitHub Desktop.
Save abFunctions/276e145cfb704c8136e9a829f342408d to your computer and use it in GitHub Desktop.
ml_01 - music project
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "code",
"execution_count": 55,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>age</th>\n",
" <th>gender</th>\n",
" <th>genre</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>20</td>\n",
" <td>1</td>\n",
" <td>HipHop</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>23</td>\n",
" <td>1</td>\n",
" <td>HipHop</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>25</td>\n",
" <td>1</td>\n",
" <td>HipHop</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>26</td>\n",
" <td>1</td>\n",
" <td>Jazz</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>29</td>\n",
" <td>1</td>\n",
" <td>Jazz</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5</th>\n",
" <td>30</td>\n",
" <td>1</td>\n",
" <td>Jazz</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6</th>\n",
" <td>31</td>\n",
" <td>1</td>\n",
" <td>Classical</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7</th>\n",
" <td>33</td>\n",
" <td>1</td>\n",
" <td>Classical</td>\n",
" </tr>\n",
" <tr>\n",
" <th>8</th>\n",
" <td>37</td>\n",
" <td>1</td>\n",
" <td>Classical</td>\n",
" </tr>\n",
" <tr>\n",
" <th>9</th>\n",
" <td>20</td>\n",
" <td>0</td>\n",
" <td>Dance</td>\n",
" </tr>\n",
" <tr>\n",
" <th>10</th>\n",
" <td>21</td>\n",
" <td>0</td>\n",
" <td>Dance</td>\n",
" </tr>\n",
" <tr>\n",
" <th>11</th>\n",
" <td>25</td>\n",
" <td>0</td>\n",
" <td>Dance</td>\n",
" </tr>\n",
" <tr>\n",
" <th>12</th>\n",
" <td>26</td>\n",
" <td>0</td>\n",
" <td>Acoustic</td>\n",
" </tr>\n",
" <tr>\n",
" <th>13</th>\n",
" <td>27</td>\n",
" <td>0</td>\n",
" <td>Acoustic</td>\n",
" </tr>\n",
" <tr>\n",
" <th>14</th>\n",
" <td>30</td>\n",
" <td>0</td>\n",
" <td>Acoustic</td>\n",
" </tr>\n",
" <tr>\n",
" <th>15</th>\n",
" <td>31</td>\n",
" <td>0</td>\n",
" <td>Classical</td>\n",
" </tr>\n",
" <tr>\n",
" <th>16</th>\n",
" <td>34</td>\n",
" <td>0</td>\n",
" <td>Classical</td>\n",
" </tr>\n",
" <tr>\n",
" <th>17</th>\n",
" <td>35</td>\n",
" <td>0</td>\n",
" <td>Classical</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" age gender genre\n",
"0 20 1 HipHop\n",
"1 23 1 HipHop\n",
"2 25 1 HipHop\n",
"3 26 1 Jazz\n",
"4 29 1 Jazz\n",
"5 30 1 Jazz\n",
"6 31 1 Classical\n",
"7 33 1 Classical\n",
"8 37 1 Classical\n",
"9 20 0 Dance\n",
"10 21 0 Dance\n",
"11 25 0 Dance\n",
"12 26 0 Acoustic\n",
"13 27 0 Acoustic\n",
"14 30 0 Acoustic\n",
"15 31 0 Classical\n",
"16 34 0 Classical\n",
"17 35 0 Classical"
]
},
"execution_count": 55,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"import pandas as pd\n",
"music_data = pd.read_csv('music.csv')\n",
"music_data"
]
},
{
"cell_type": "code",
"execution_count": 15,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>age</th>\n",
" <th>gender</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>20</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>23</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>25</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>26</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>29</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5</th>\n",
" <td>30</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6</th>\n",
" <td>31</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7</th>\n",
" <td>33</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>8</th>\n",
" <td>37</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>9</th>\n",
" <td>20</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>10</th>\n",
" <td>21</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>11</th>\n",
" <td>25</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>12</th>\n",
" <td>26</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>13</th>\n",
" <td>27</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>14</th>\n",
" <td>30</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>15</th>\n",
" <td>31</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>16</th>\n",
" <td>34</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>17</th>\n",
" <td>35</td>\n",
" <td>0</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" age gender\n",
"0 20 1\n",
"1 23 1\n",
"2 25 1\n",
"3 26 1\n",
"4 29 1\n",
"5 30 1\n",
"6 31 1\n",
"7 33 1\n",
"8 37 1\n",
"9 20 0\n",
"10 21 0\n",
"11 25 0\n",
"12 26 0\n",
"13 27 0\n",
"14 30 0\n",
"15 31 0\n",
"16 34 0\n",
"17 35 0"
]
},
"execution_count": 15,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"import pandas as pd\n",
"music_data = pd.read_csv('music.csv')\n",
"\n",
"# split data into two sets (training and test)\n",
"X = music_data.drop(columns=['genre']) # X (capital X) is used to represent the training set\n",
"y = music_data['genre'] # y is used for the test/output data set\n",
"X"
]
},
{
"cell_type": "code",
"execution_count": 16,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"0 HipHop\n",
"1 HipHop\n",
"2 HipHop\n",
"3 Jazz\n",
"4 Jazz\n",
"5 Jazz\n",
"6 Classical\n",
"7 Classical\n",
"8 Classical\n",
"9 Dance\n",
"10 Dance\n",
"11 Dance\n",
"12 Acoustic\n",
"13 Acoustic\n",
"14 Acoustic\n",
"15 Classical\n",
"16 Classical\n",
"17 Classical\n",
"Name: genre, dtype: object"
]
},
"execution_count": 16,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"y\n"
]
},
{
"cell_type": "code",
"execution_count": 18,
"metadata": {},
"outputs": [],
"source": [
"# Next step is to build a model using an algorithm"
]
},
{
"cell_type": "code",
"execution_count": 57,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"array(['HipHop', 'Dance'], dtype=object)"
]
},
"execution_count": 57,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"import pandas as pd\n",
"from sklearn.tree import DecisionTreeClassifier # this class implements the decision tree classifier\n",
"\n",
"music_data = pd.read_csv('music.csv')\n",
"X = music_data.drop(columns=['genre']) \n",
"y = music_data['genre'] \n",
"\n",
"model = DecisionTreeClassifier() # Create new object ('model') and set it to an instance of \n",
"\n",
"# then we have to train the model to learn patterns in the data\n",
"\n",
"model.fit(X,y) # this method takes two data sets (INPUT_SET, OUTPUT_SET) = (X,y)\n",
"\n",
"\n",
"# then we have to ask the model to make a prediction\n",
"# use model.predict method; pass new input set (like a new record in the table)\n",
"predictions = model.predict([[21,1],[22,0]]) # this method takes a 2D array\n",
"predictions"
]
},
{
"cell_type": "code",
"execution_count": 58,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"array(['HipHop', 'Dance'], dtype=object)"
]
},
"execution_count": 58,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# USE FOR PREDICTING VALUES\n",
"\n",
"import pandas as pd\n",
"from sklearn.tree import DecisionTreeClassifier\n",
"\n",
"music_data = pd.read_csv('music.csv')\n",
"X = music_data.drop(columns=['genre']) \n",
"y = music_data['genre'] \n",
"\n",
"model = DecisionTreeClassifier()\n",
"model.fit(X,y)\n",
"\n",
"predictions = model.predict([[21,1],[22,0]])\n",
"predictions"
]
},
{
"cell_type": "code",
"execution_count": 27,
"metadata": {},
"outputs": [],
"source": [
"# MEASURE THE ACCURACY OF THE MODEL \n",
"# GENERAL RULE OF THUMB (allocate 70-80% of the data set for training and the other 20% for testing)"
]
},
{
"cell_type": "code",
"execution_count": 28,
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd\n",
"from sklearn.tree import DecisionTreeClassifier # this class implements the decision tree classifier\n",
"\n",
"music_data = pd.read_csv('music.csv')\n",
"X = music_data.drop(columns=['genre']) \n",
"y = music_data['genre'] \n",
"\n",
"model = DecisionTreeClassifier() # Create new object ('model') and set it to an instance of \n",
"\n",
"# then we have to train the model to learn patterns in the data\n",
"\n",
"model.fit(X,y) # this method takes two data sets (INPUT_SET, OUTPUT_SET) = (X,y)\n",
"\n",
"\n",
"# then we have to ask the model to make a prediction\n",
"# use model.predict method; pass new input set (like a new record in the table)\n",
"predictions = model.predict([[21,1],[22,0]]) # this method takes a 2D array\n",
"predictions\n"
]
},
{
"cell_type": "code",
"execution_count": 84,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"1.0"
]
},
"execution_count": 84,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# USE TO VALIDATE PREDICTION ACCURACY\n",
"\n",
"import pandas as pd\n",
"from sklearn.tree import DecisionTreeClassifier\n",
"from sklearn.model_selection import train_test_split\n",
"from sklearn.metrics import accuracy_score\n",
"\n",
"music_data = pd.read_csv('music.csv')\n",
"X = music_data.drop(columns=['genre']) \n",
"y = music_data['genre'] \n",
"\n",
"X_train, X_test, y_train, y_test = train_test_split(X,y, test_size=0.2)\n",
"\n",
"model = DecisionTreeClassifier() \n",
"model.fit(X_train,y_train)\n",
"predictions = model.predict(X_test)\n",
"\n",
"score = accuracy_score(y_test,predictions)\n",
"score"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# STORE MODEL - so you don't have to run it each time once it's trained and use memory\n",
"\n",
"# Import joblib as jb"
]
},
{
"cell_type": "code",
"execution_count": 67,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"['music-recommender.joblib']"
]
},
"execution_count": 67,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# STORE MODEL\n",
"\n",
"import pandas as pd\n",
"from sklearn.tree import DecisionTreeClassifier\n",
"import joblib as jb\n",
"\n",
"music_data = pd.read_csv('music.csv')\n",
"X = music_data.drop(columns=['genre']) \n",
"y = music_data['genre'] \n",
"\n",
"model = DecisionTreeClassifier() # create a model\n",
"model.fit(X,y) # training the model\n",
"# after training the model, export it to a file\n",
"\n",
"jb.dump(model,'music-recommender.joblib') # use .dump and give it two arguements: (model,'filename') # (the model, and a name for the file in which to store the model)\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.5"
}
},
"nbformat": 4,
"nbformat_minor": 4
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment