from fastai import *
from fastai.tabular import *
from import *
PATH = os.path.abspath('..')
# distinguish categorical and continuous variables, and dependent variable
cat_names = ['cat1', 'cat2', 'cat3']
cont_names =['cont1', 'cont2']
dep_var = 'target'
# transformations for image augmentation
tfms = get_transforms(do_flip=True,
def get_dataframe():
"""Returns DataFrame containing tabular data, image names, and targets."""
# main data set
df = pd.read_csv(f'{PATH}/path/to/data.csv')
# isolate useful columns
df = df[cont_names + ['img_fn'] + [dep_var]]
return df
def get_val_idxs(n, seed=1234):
return np.random.permutation(np.arange(n))[0:int(0.2 * n)]
class TabConvDataset(Dataset):
"""A Dataset of combined tabular data, image names, and targets."""
def __init__(self, x_tab, x_img, y):
self.x_tab, self.x_img, self.y = x_tab, x_img, y
def __len__(self): return len(self.y)
def __getitem__(self, i):
return (self.x_tab[i], self.x_img[i]), self.y[i]
class TabConvModel(nn.Module):
"""A combined neural network using the convnet and tabular model"""
def __init__(self, tab_model, img_model, layers, drops):
self.tab_model = tab_model
self.img_model = img_model
lst_layers = []
activs = [nn.ReLU(inplace=True),] * (len(layers) - 2) + [None]
for n_in, n_out, p, actn in zip(layers[:-1], layers[1:], drops, activs):
lst_layers += bn_drop_lin(n_in, n_out, p=p, actn=actn)
self.layers = nn.Sequential(*lst_layers)
def forward(self, *x):
x_tab = self.tab_model(*x[0])
x_img = self.img_model(x[1])
x =[x_tab, x_img], dim=1)
return self.layers(x)
def initialize_combined_model(n_lin_tab=16, n_lin_conv=32, ps_final=0.25, bs=64, sz=502,
seed=1234, val_pct=0.2, img_tfms=tfms):
"""Initialize a combined model that can learn from both tabular and image data.
n_lin_tab: int
number of linear nodes in the single hidden layer for the tabular model
(default is )
n_lin_conv: int
number of linear nodes in the final dense layer for the convolutional model
(default is 32)
ps_final: float
dropout fraction in the final linear layers (default is 0.25)
bs: int
batchsize for loading combined TabConvData (default is 64)
sz: int
image size
seed: int, optional
random seed passed to numpy (default is 1234)
val_pct: float, optional
fraction of data used for validation (default is 0.2)
img_tfms: Transforms, optional
set of transformations for image augmentation (global default set)
learn: Learner
combined `Learner` object built on top of the TabConvModel and
supplied data
df = get_dataframe()
val_idxs = get_val_idxs(len(df))
# preprocessing
procs = [Normalize]
# set up tabular data and learner
tab_data = (TabularList.from_df(df, path=PATH, cat_names=cat_names, cont_names=cont_names, procs=procs)
.label_from_df(cols=dep_var, label_cls=FloatList)
tab_learn = tabular_learner(tab_data, layers=[n_lin_tab], loss_func=root_mean_squared_error)
# set up image data and learner
img_data = (ImageList.from_df(path=PATH,
.label_from_df(cols='target', label_cls=FloatList)
.transform(img_tfms, size=sz)
img_learn = cnn_learner(img_data, models.resnet34,
# combined data
train_ds = TabConvDataset(tab_data.train_ds.x, img_data.train_ds.x, tab_data.train_ds.y)
valid_ds = TabConvDataset(tab_data.valid_ds.x, img_data.valid_ds.x, tab_data.valid_ds.y)
train_dl = DataLoader(train_ds, bs)
valid_dl = DataLoader(valid_ds, 2 * bs)
data = DataBunch(train_dl, valid_dl, path=PATH)
# chop off final layers from both models
tab_learn.model.layers = tab_learn.model.layers[:-2]
img_learn.model[-1] = nn.Sequential(*img_learn.model[-1][:-5], nn.Linear(1024, n_lin_conv, bias=True), nn.ReLU(inplace=True))
lin_layers = [n_lin_tab + n_lin_conv, 1]
ps = [ps_final]
# initialize model
model = TabConvModel(tab_learn.model, img_learn.model, lin_layers, ps)
layer_groups = [nn.Sequential(*flatten_model(img_learn.layer_groups[0])),
nn.Sequential(*(flatten_model(img_learn.layer_groups[2]) +
flatten_model(model.tab_model) +
# combined learner
learn = Learner(data, model,
return learn
if __name__ == '__main__':
learn = initialize_combined_model()
learn.fit_one_cycle(1, 1e-2)'combined-init-train-1')
learn.fit_one_cycle(1, 1e-2)'combined-init-train-2')
learn.fit_one_cycle(20, 1e-3)'combined-init-train-3')
Can you also share a sample datafile. will make it a bit easy to work with the sample code. Thanks

jwuphysics commented Feb 7, 2020

Can you also share a sample datafile. will make it a bit easy to work with the sample code. Thanks

I didn't actually try this with an example data set, but imagine you would want to access a tabular data file in the location {PATH}/data/catalog.csv, which could contain data like the following:

cat1, cat2, cat3, cont1, cont2, img_fn, target
A, middle, True, 6.4, -3.1, object_13702, 0.45
B, lower, True, 1.1, 2.1, object_11204, 0.98
C, upper, False, 0.8, -3.3, object_11092, 0.2

Your directory structure would have to look something like:

└── data
    ├── catalog.csv
    └── images
        ├── object_13702.jpg 
        ├── object_11204.jpg
        └── ...

This is obviously a contrived example but hopefully it makes more sense this way. Note of course that the path to the CSV file in line 25 would need to be updated.

noob9000 commented May 6, 2020

wanted to comment that I got a dtype error when using loss_func=root_mean_squared_error and had to switch to MSELossFlat instead

I have trouble to get learn.predict() to work using TabConvDataset. How did you proceed outside training to get prediction ?

Sorry, this code is now considerably out of date. It uses fastai version 1.0 and Pytorch 1.0. There may be a better version for fastai v2.x on the forums.

