Fast.ai notes
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#Lib versions | |
#Python Version: 3.6.7 | |
#Pandas Version: 0.24.2 | |
#Numpy Version: 1.16.2 | |
#FastAI Version: 1.0.50.post1 | |
#PyTorch Version: 1.0.1.post2 | |
#GPU Acceleration | |
#GPU: NVDIA K80 | |
#Cuda V10.0.130 | |
############################## | |
column_names = df.columns.values.tolist() | |
# (1) Separate columns into continous data columns and category data columns | |
cont_names = [] | |
cat_names = [] | |
cont_names = column_names[1:] # store everything except the first column "Date" | |
# (2) Converted "Date" column to categories, as suggested by the Docs | |
add_datepart(df, 'Date') | |
column_names = df.columns.values.tolist() | |
cat_names = column_names[6:len(column_names)-1] # Everything except the first seven and the last element | |
# inspect all meta data | |
print("Nr. of all columns: " + str(len(column_names))) | |
print("Nr. of categorial data columns: " + str(len(cat_names))) | |
print("Nr. of continous data columns: " + str(len(cont_names))) | |
delta = len(column_names) - (len(cat_names) + len(cont_names)) | |
print("Difference between all columns and all cat / cont columns: " + str(delta)) | |
## Output | |
#Nr. of all columns: 19 | |
#Nr. of categorial data columns: 12 | |
#Nr. of continous data columns: 7 | |
#Difference between all columns and all cat / cont columns: 0 | |
# (3) Added data processors | |
procs = [FillMissing, Categorify, Normalize] | |
# (4) Split data by index | |
valid_idx = range(len(df)-1000, len(df)) | |
# (5) a test data set as TabularList | |
test = TabularList.from_df(df.iloc[:-2000].copy(), | |
#path=path, | |
cat_names=cat_names, | |
cont_names=cont_names) | |
# (5) Created a databunch | |
data = (TabularList.from_df(df, | |
#path=path, | |
cat_names=cat_names, | |
cont_names=cont_names, | |
procs=procs) | |
.split_by_idx(valid_idx=valid_idx) | |
.label_from_df(cols=dep_var) | |
.add_test(test, label=0) | |
.databunch()) | |
data.show_batch(rows=3)# looks okay, although I'm not sure wjhy the dependent variable is still there. | |
# (7) Created a tabular learne | |
learn = tabular_learner(data, layers=[200,100], metrics=accuracy_long) | |
# accuracy_long fixes a type casting error. I believe the dependent variable must be type long to avoid this fix. TODO for later | |
epochs = 8 | |
lr = 1e-3 | |
learn.fit(epochs) | |
## Output | |
Total time: 00:09 | |
epoch train_loss valid_loss accuracy_long time | |
0 3623.272461 536329.562500 0.000000 00:00 | |
1 2522.631348 166452.062500 0.000000 00:00 | |
2 1433.353516 15016.925781 0.000000 00:01 | |
3 679.439087 12283.965820 0.000000 00:01 | |
4 282.636536 20604.240234 0.000000 00:01 | |
5 136.765137 9877.943359 0.000000 00:01 | |
6 76.523834 6982.699219 0.000000 00:01 | |
7 56.457199 17809.328125 0.000000 00:01 |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment