Skip to content

Instantly share code, notes, and snippets.

@ikatsov
Created March 15, 2020 16:26
Show Gist options
  • Save ikatsov/c380cd6baa37bb170e6fc2eb229ba4a4 to your computer and use it in GitHub Desktop.
Save ikatsov/c380cd6baa37bb170e6fc2eb229ba4a4 to your computer and use it in GitHub Desktop.
from sklearn.manifold import TSNE
from sklearn.metrics import pairwise_distances
# prepare inputs for t-SNE
word_vectors = model.wv
vocab = list(model.wv.vocab.keys())
item2vector_dict = {arg:model.wv[arg] for arg in vocab}
X = pd.DataFrame(item2vector_dict).T.values
# perform t-SNE
distance_matrix = pairwise_distances(X, X, metric='cosine', n_jobs=-1)
tsne = TSNE(metric="precomputed", n_components=2,
verbose=1, perplexity=30, n_iter=500)
tsne_results = tsne.fit_transform(distance_matrix)
# prepare t-SNE outputs for visualization
df_semantic_item = pd.DataFrame({'product_id': vocab})
df_semantic_item['tsne-2d-one'] = tsne_results[:,0]
df_semantic_item['tsne-2d-two'] = tsne_results[:,1]
df_semantic_item['product_id'] = df_semantic_item['product_id'].astype(int)
# join the embeddings with department and aisle names
df_semantic_item = df_semantic_item.merge(data_dict['products'],
on='product_id', how='left')
df_semantic_item = df_semantic_item.merge(data_dict['aisles'],
on='aisle_id', how='left')
df_semantic_item = df_semantic_item.merge(data_dict['departments'],
on='department_id', how='left')
# visualize the semantic space and its mapping to the departments
sns.scatterplot(
x="tsne-2d-one", y="tsne-2d-two",
hue='department',
palette=sns.color_palette("hls", n_department),
data=df_semantic_item,
legend="full",
alpha=0.3
)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment