Skip to content

Instantly share code, notes, and snippets.

@Lougarou
Created June 30, 2017 09:02
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save Lougarou/101c39a0a60ab02c16ee9d405d8c457f to your computer and use it in GitHub Desktop.
Save Lougarou/101c39a0a60ab02c16ee9d405d8c457f to your computer and use it in GitHub Desktop.
import pandas as pd
from sklearn.cross_validation import cross_val_score, train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import average_precision_score, accuracy_score
casualties = pd.read_csv("Casualties_2015.csv", index_col=0)
print(casualties.describe())
sample = casualties[['Sex_of_Casualty','Age_of_Casualty','Casualty_Severity']].sample(1000)
sns.set(style="ticks")
sns.pairplot(sample,hue="Casualty_Severity")
#sns.plt.show()
features = ['Sex_of_Casualty','Age_Band_of_Casualty','Pedestrian_Location','Pedestrian_Movement',
'Car_Passenger','Bus_or_Coach_Passenger','Pedestrian_Road_Maintenance_Worker']
data_x_train, data_x_test, data_y_train, data_y_test = train_test_split(casualties[features], casualties['Casualty_Severity'], test_size=0.25, random_state=42)
clf = RandomForestClassifier(n_estimators=16)
clf.fit(data_x_train, data_y_train)
clf_probs = clf.predict(data_x_test)
score = accuracy_score(data_y_test, clf_probs)
print("Single Score: %f",score)
print("Features & Importance:")
print(clf.feature_importances_)
scores = cross_val_score(clf, casualties[features], casualties['Casualty_Severity'], cv=5)
print("Accuracy: %0.2f (+/- %0.2f)" % (scores.mean(), scores.std() * 2))
test = pd.read_csv("test.csv", index_col=False)
severities = clf.predict(test[features])
severity_verbose = {1: "Fatal", 2:"Serious", 3:"Slight"}
for severity in severities:
print(severity_verbose.get(severity))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment