Skip to content

Instantly share code, notes, and snippets.

@jjone36
Created February 16, 2019 14:05
Show Gist options
  • Save jjone36/a032cde755fbc967259eb8b2370a1503 to your computer and use it in GitHub Desktop.
Save jjone36/a032cde755fbc967259eb8b2370a1503 to your computer and use it in GitHub Desktop.
# Create a function to vetorize all the ingredients and get t-SNE at once
def cosmetic_map(option_1, option_2):
''' Define a function creating a dataframe for each option '''
df = cosm[cosm['Label'] == option_1][cosm[option_2] == 1]
df = df.reset_index()
# embedding each ingredients
word_index_map = {}
index_word_map = []
current_index = 0
corpus = []
for i in range(len(df)):
text = df['Ingredients'][i]
text = text.lower()
tokens = text.split(', ')
corpus.append(tokens)
for token in tokens:
if token not in word_index_map:
word_index_map[token] = current_index
current_index += 1
index_word_map.append(token)
# creating dtm matrix
D = len(corpus) # number of items
N = len(word_index_map) # total number of ingredients
A = np.zeros((D, N))
def tokens_to_vector(tokens):
x = np.zeros(len(word_index_map))
for token in tokens:
i = word_index_map[token]
x[i] = 1
return x
i = 0
for tokens in corpus:
A[i, :] = tokens_to_vector(tokens)
i += 1
# decomposition using t-SNE
tsne = TSNE(learning_rate = 200)
tsne_features = tsne.fit_transform(A)
df['X'] = tsne_features[:, 0]
df['Y'] = tsne_features[:, 1]
return df
# Apply the function to all combination of the two options.
df_all = pd.DataFrame()
for a in option_1:
for b in option_2:
temp = cosmetic_map(a, b)
temp['Label'] = a + '_' + b
df_all = pd.concat([df, temp], axis = 0)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment