# select only the columns we need
cols = ['family', 'variants', 'subsets', 'category']
df = df[cols]
# df.head(5)
# Remove any space from family string so that it matchs with file name convention. = [name.replace(' ', '') for name in]
