Skip to content

Instantly share code, notes, and snippets.

@johanvanderkuijl
Created February 5, 2019 20:58
Show Gist options
  • Save johanvanderkuijl/fc0cad135559c40dd88e681a160becb0 to your computer and use it in GitHub Desktop.
Save johanvanderkuijl/fc0cad135559c40dd88e681a160becb0 to your computer and use it in GitHub Desktop.
#!/usr/bin/env python3
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
url = "https://raw.githubusercontent.com/jadeyee/r2d3-part-1-data/master/part_1_data.csv"
df = pd.read_csv(url, header=2)
#print(df.describe())
# X = the data
X = df.iloc[:,1:].to_numpy()
# Y = the labels
Y = df.iloc[:,0].to_numpy()
# split the set
X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=0.1)
print(X_train.shape, y_train.shape)
print(X_test.shape, y_test.shape)
from sklearn.naive_bayes import GaussianNB
from sklearn.neighbors import KNeighborsClassifier
from sklearn.ensemble import RandomForestClassifier
#clf = GaussianNB()
#clf = KNeighborsClassifier(n_neighbors=3)
clf = RandomForestClassifier(n_estimators=100, max_depth=2,random_state=0)
clf.fit(X_train, y_train)
score = clf.score(X_test, y_test)
print(score)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment