Skip to content

Instantly share code, notes, and snippets.

@tobitech
Last active June 27, 2020 17:06
Show Gist options
  • Save tobitech/411d6b63dde22b3932bdc92ea58c4581 to your computer and use it in GitHub Desktop.
Save tobitech/411d6b63dde22b3932bdc92ea58c4581 to your computer and use it in GitHub Desktop.
import pandas as pd
from sklearn.tree import DecisionTreeClassifier
music_data = pd.read_csv('music.csv')
X = music_data.drop(columns=['genre'])
y = music_data['genre']
model = DecisionTreeClassifier()
model.fit(X, y)
predictions = model.predict([ [21, 1], [22, 0] ])
import pandas as pd
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
music_data = pd.read_csv('music.csv') # this returns a data frame similar to an excel spreadsheet
X = music_data.drop(columns=['genre'])
y = music_data['genre'] # using `[]` we can get all the values in a given column
# we are allocating 20% of our data for testing. this returns a tuple
# we will unpack the tuple
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)
model = DecisionTreeClassifier()
# now we need to train the model
# model.fit(X, y) # this takes two parameters: input set and output set.
# unlike before, instead of passing the entire data set, we want to pass only the training data set
model.fit(X_train, y_train)
# this takes two dimensional array.
# we are asking our model to make two predictios at the same time.
# passing it two input set, 21 yr old male and 22 yr old female
# predictions = model.predict([ [21, 1], [22, 0] ])
predictions = model.predict(X_test) # X_test is the data set that contains input values for testing
# to calculate accuracy
# we need to compare the preditions we have with our actual value from output set for testing
# this take two arguments, the expected values and the predictions
# this returns an accuracy score between 0 to 1
score = accuracy_score(y_test, predictions)
score # returns 1.0 which is 100% accurate
import pandas as pd
from sklearn.tree import DecisionTreeClassifier
import joblib
# import data for model
music_data = pd.read_csv('music.csv')
X = music_data.drop(columns=['genre'])
y = music_data['genre']
# train it
model = DecisionTreeClassifier()
model.fit(X, y)
# persist the model
joblib.dump(model, 'music-recommender.joblib')
import pandas as pd
from sklearn.tree import DecisionTreeClassifier
import joblib
# this returns our trained model
model = joblib.load('music-recommender.joblib')
# ask it to make predictions
predictions = model.predict([ [21, 1] ])
predictions
# Visualize model decision tree in a graph
import pandas as pd
from sklearn.tree import DecisionTreeClassifier
from sklearn import tree
music_data = pd.read_csv('music.csv')
X = music_data.drop(columns=['genre'])
y = music_data['genre']
model = DecisionTreeClassifier()
model.fit(X, y)
# .dot format is the graph description language
tree.export_graphviz(
model, out_file='music-recommender.dot',
feature_names=['age', 'gender'],
class_names=sorted(y.unique()),
label='all',
rounded=True,
filled=True)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment