Skip to content

Instantly share code, notes, and snippets.

Show Gist options
  • Save jennyonjourney/df3af2928265799f3e9cc6cb8b892de3 to your computer and use it in GitHub Desktop.
Save jennyonjourney/df3af2928265799f3e9cc6cb8b892de3 to your computer and use it in GitHub Desktop.
bike-sharing-demand (kaggle competition)
import numpy as np
import pandas as pd
train = pd.read_csv("data/train (bike).csv")
print(train.shape)
train.head()
train = pd.read_csv("data/train (bike).csv", parse_dates=["datetime"])
print(train.shape)
## Data processing
train["year"] = train["datetime"].dt.year
train["month"] = train["datetime"].dt.month
train["day"] = train["datetime"].dt.day
train["hour"] = train["datetime"].dt.hour
train["minute"] = train["datetime"].dt.minute
print(train.shape)
train[["datetime","year","month","day"]].head()
train["season_spring"] = train["season"]==1
train["season_summer"] = train["season"]==2
train["season_fall"] = train["season"]==3
train["season_winter"] = train["season"]==4
train[["season","season_spring","season_summer","season_fall","season_winter"]].head()
train["windspeed_fillin"] = train["windspeed"]
train.loc[train["windspeed"]==0,"windspeed_fillin"] = train["windspeed"].mean()
train.loc[:, ["windspeed","windspeed_fillin"]]
train["weekend"]=train["workingday"]+train["holiday"]==0
train["season_spring"] = train["season"]==1
train["season_summer"] = train["season"]==2
train["season_fall"] = train["season"]==3
train["season_winter"] = train["season"]==4
print(train.shape)
train[["season","season_spring","season_summer","season_fall","season_winter"]].head()
test = pd.read_csv("data/test (bike).csv", parse_dates=["datetime"])
print(test.shape)
test.head()
## Train
feature_names = ["year","month","hour","holiday","season","weather","weekend","workingday","temp","atemp","humidity","windspeed","windspeed_fillin"]
feature_names
X_train = train[feature_names]
print(X_train.shape)
X_train.head()
X_test = test[feature_names]
print(X_test.shape)
X_test.head()
label_name = "count"
y_train = train[label_name]
print(y_train.shape)
y_train.head()
## Use Decision Tree
from sklearn.ensemble import RandomForestRegressor
model = RandomForestRegressor(random_state=777)
model
## model verification
X_train
from sklearn.model_selection import cross_val_score
score = cross_val_score(model, X_train, y_train, cv=20, scoring="neg_mean_absolute_error").mean()
score = (-1)*score
print("Score = {0:.5f}".format(score))
model.fit(X_train, y_train)
predictions = model.predict(X_test)
print(predictions.shape)
predictions
submission = pd.read_csv("data/sampleSubmission.csv")
print(submission.shape)
submission.head()
submission["count"] = predictions
print(submission.shape)
submission.head()
submission.to_csv("data/baseline-script(2).csv", index=False)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment