Skip to content

Instantly share code, notes, and snippets.

Embed
What would you like to do?
Fast.ai notes
#Lib versions
#Python Version: 3.6.7
#Pandas Version: 0.24.2
#Numpy Version: 1.16.2
#FastAI Version: 1.0.50.post1
#PyTorch Version: 1.0.1.post2
#GPU Acceleration
#GPU: NVDIA K80
#Cuda V10.0.130
##############################
column_names = df.columns.values.tolist()
# (1) Separate columns into continous data columns and category data columns
cont_names = []
cat_names = []
cont_names = column_names[1:] # store everything except the first column "Date"
# (2) Converted "Date" column to categories, as suggested by the Docs
add_datepart(df, 'Date')
column_names = df.columns.values.tolist()
cat_names = column_names[6:len(column_names)-1] # Everything except the first seven and the last element
# inspect all meta data
print("Nr. of all columns: " + str(len(column_names)))
print("Nr. of categorial data columns: " + str(len(cat_names)))
print("Nr. of continous data columns: " + str(len(cont_names)))
delta = len(column_names) - (len(cat_names) + len(cont_names))
print("Difference between all columns and all cat / cont columns: " + str(delta))
## Output
#Nr. of all columns: 19
#Nr. of categorial data columns: 12
#Nr. of continous data columns: 7
#Difference between all columns and all cat / cont columns: 0
# (3) Added data processors
procs = [FillMissing, Categorify, Normalize]
# (4) Split data by index
valid_idx = range(len(df)-1000, len(df))
# (5) a test data set as TabularList
test = TabularList.from_df(df.iloc[:-2000].copy(),
#path=path,
cat_names=cat_names,
cont_names=cont_names)
# (5) Created a databunch
data = (TabularList.from_df(df,
#path=path,
cat_names=cat_names,
cont_names=cont_names,
procs=procs)
.split_by_idx(valid_idx=valid_idx)
.label_from_df(cols=dep_var)
.add_test(test, label=0)
.databunch())
data.show_batch(rows=3)# looks okay, although I'm not sure wjhy the dependent variable is still there.
# (7) Created a tabular learne
learn = tabular_learner(data, layers=[200,100], metrics=accuracy_long)
# accuracy_long fixes a type casting error. I believe the dependent variable must be type long to avoid this fix. TODO for later
epochs = 8
lr = 1e-3
learn.fit(epochs)
## Output
Total time: 00:09
epoch train_loss valid_loss accuracy_long time
0 3623.272461 536329.562500 0.000000 00:00
1 2522.631348 166452.062500 0.000000 00:00
2 1433.353516 15016.925781 0.000000 00:01
3 679.439087 12283.965820 0.000000 00:01
4 282.636536 20604.240234 0.000000 00:01
5 136.765137 9877.943359 0.000000 00:01
6 76.523834 6982.699219 0.000000 00:01
7 56.457199 17809.328125 0.000000 00:01
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment