tobitech/music1.txt

## music1.txt
import pandas as pd
from sklearn.tree import DecisionTreeClassifier

music_data = pd.read_csv('music.csv')
X = music_data.drop(columns=['genre'])
y = music_data['genre']

model = DecisionTreeClassifier()
model.fit(X, y)

predictions = model.predict([ [21, 1], [22, 0] ])

## music2.txt
import pandas as pd
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

music_data = pd.read_csv('music.csv')  # this returns a data frame similar to an excel spreadsheet
X = music_data.drop(columns=['genre'])
y = music_data['genre']  # using `[]` we can get all the values in a given column

# we are allocating 20% of our data for testing. this returns a tuple
# we will unpack the tuple
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)


model = DecisionTreeClassifier()
# now we need to train the model
# model.fit(X, y)  # this takes two parameters: input set and output set.

# unlike before, instead of passing the entire data set, we want to pass only the training data set
model.fit(X_train, y_train)

# this takes two dimensional array.
# we are asking our model to make two predictios at the same time.
# passing it two input set, 21 yr old male and 22 yr old female
# predictions = model.predict([ [21, 1], [22, 0] ])
predictions = model.predict(X_test)  # X_test is the data set that contains input values for testing

# to calculate accuracy
# we need to compare the preditions we have with our actual value from output set for testing
# this take two arguments, the expected values and the predictions
# this returns an accuracy score between 0 to 1
score = accuracy_score(y_test, predictions)
score  # returns 1.0 which is 100% accurate

## music3.txt
import pandas as pd
from sklearn.tree import DecisionTreeClassifier
import joblib

# import data for model
music_data = pd.read_csv('music.csv')
X = music_data.drop(columns=['genre'])
y = music_data['genre']

# train it
model = DecisionTreeClassifier()
model.fit(X, y)

# persist the model
joblib.dump(model, 'music-recommender.joblib')

## music4.txt
import pandas as pd
from sklearn.tree import DecisionTreeClassifier
import joblib

# this returns our trained model
model = joblib.load('music-recommender.joblib')

# ask it to make predictions
predictions = model.predict([ [21, 1] ])
predictions

## music5.txt
# Visualize model decision tree in a graph

import pandas as pd
from sklearn.tree import DecisionTreeClassifier
from sklearn import tree

music_data = pd.read_csv('music.csv')
X = music_data.drop(columns=['genre'])
y = music_data['genre']

model = DecisionTreeClassifier()
model.fit(X, y)

# .dot format is the graph description language
tree.export_graphviz(
    model, out_file='music-recommender.dot',
    feature_names=['age', 'gender'],
    class_names=sorted(y.unique()),
    label='all',
    rounded=True,
    filled=True)
	import pandas as pd
	from sklearn.tree import DecisionTreeClassifier

	music_data = pd.read_csv('music.csv')
	X = music_data.drop(columns=['genre'])
	y = music_data['genre']

	model = DecisionTreeClassifier()
	model.fit(X, y)

	predictions = model.predict([ [21, 1], [22, 0] ])
	import pandas as pd
	from sklearn.tree import DecisionTreeClassifier
	from sklearn.model_selection import train_test_split
	from sklearn.metrics import accuracy_score

	music_data = pd.read_csv('music.csv') # this returns a data frame similar to an excel spreadsheet
	X = music_data.drop(columns=['genre'])
	y = music_data['genre'] # using `[]` we can get all the values in a given column

	# we are allocating 20% of our data for testing. this returns a tuple
	# we will unpack the tuple
	X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)


	model = DecisionTreeClassifier()
	# now we need to train the model
	# model.fit(X, y) # this takes two parameters: input set and output set.

	# unlike before, instead of passing the entire data set, we want to pass only the training data set
	model.fit(X_train, y_train)

	# this takes two dimensional array.
	# we are asking our model to make two predictios at the same time.
	# passing it two input set, 21 yr old male and 22 yr old female
	# predictions = model.predict([ [21, 1], [22, 0] ])
	predictions = model.predict(X_test) # X_test is the data set that contains input values for testing

	# to calculate accuracy
	# we need to compare the preditions we have with our actual value from output set for testing
	# this take two arguments, the expected values and the predictions
	# this returns an accuracy score between 0 to 1
	score = accuracy_score(y_test, predictions)
	score # returns 1.0 which is 100% accurate
	import pandas as pd
	from sklearn.tree import DecisionTreeClassifier
	import joblib

	# import data for model
	music_data = pd.read_csv('music.csv')
	X = music_data.drop(columns=['genre'])
	y = music_data['genre']

	# train it
	model = DecisionTreeClassifier()
	model.fit(X, y)

	# persist the model
	joblib.dump(model, 'music-recommender.joblib')
	# Visualize model decision tree in a graph

	import pandas as pd
	from sklearn.tree import DecisionTreeClassifier
	from sklearn import tree

	music_data = pd.read_csv('music.csv')
	X = music_data.drop(columns=['genre'])
	y = music_data['genre']

	model = DecisionTreeClassifier()
	model.fit(X, y)

	# .dot format is the graph description language
	tree.export_graphviz(
	model, out_file='music-recommender.dot',
	feature_names=['age', 'gender'],
	class_names=sorted(y.unique()),
	label='all',
	rounded=True,
	filled=True)