dmorgan-github/f1.py

## f1.py
import pandas as pd
import numpy as np
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import f1_score

target = 'e'
features = ['a', 'b', 'c', 'd']
cols = len(features)
rows = 1000

df = pd.DataFrame(np.random.randint(1, 5, (rows, cols)), columns=features)
df[target] = np.random.randint(2, size=rows)

# split the dataset
l = range(rows)
trainLen = int(rows*0.75)
testLen  = int(rows*0.25)
training = df.ix[l[:trainLen]]
test = df.ix[l[trainLen:trainLen+testLen]]

X_train = training[features]
y_train = training[target]
X_test = test[features]
y_test = test[target]

clf = RandomForestClassifier()
clf.fit(X_train, y_train)
preds = clf.predict(X_test)

# show confusion matrix
ct = pd.crosstab(y_test, preds, rownames=['actual'], colnames=['preds'])
print ct

#show f1 score
f1 = f1_score(y_test, preds, average=None)
print f1
	import pandas as pd
	import numpy as np
	from sklearn.ensemble import RandomForestClassifier
	from sklearn.metrics import f1_score

	target = 'e'
	features = ['a', 'b', 'c', 'd']
	cols = len(features)
	rows = 1000

	df = pd.DataFrame(np.random.randint(1, 5, (rows, cols)), columns=features)
	df[target] = np.random.randint(2, size=rows)

	# split the dataset
	l = range(rows)
	trainLen = int(rows*0.75)
	testLen = int(rows*0.25)
	training = df.ix[l[:trainLen]]
	test = df.ix[l[trainLen:trainLen+testLen]]

	X_train = training[features]
	y_train = training[target]
	X_test = test[features]
	y_test = test[target]

	clf = RandomForestClassifier()
	clf.fit(X_train, y_train)
	preds = clf.predict(X_test)

	# show confusion matrix
	ct = pd.crosstab(y_test, preds, rownames=['actual'], colnames=['preds'])
	print ct

	#show f1 score
	f1 = f1_score(y_test, preds, average=None)
	print f1