Skip to content

Instantly share code, notes, and snippets.

@abhijeet-talaulikar
Last active September 10, 2023 14:56
Show Gist options
  • Save abhijeet-talaulikar/f41b5ad72324ba7e94b4a4ac3803e657 to your computer and use it in GitHub Desktop.
Save abhijeet-talaulikar/f41b5ad72324ba7e94b4a4ac3803e657 to your computer and use it in GitHub Desktop.
from sklearn.manifold import TSNE
from sklearn.mixture import GaussianMixture
from sklearn.model_selection import GridSearchCV
matrix = np.array(review_data['ada_embedding'].to_list())
# Grid search to find best n_components - number of clusters
components, aic, bic = [], [], []
for i in range(3,11):
gmm = GaussianMixture(n_components=i)
gmm.fit(matrix)
components.append(i)
aic.append(
gmm.aic(matrix)
)
bic.append(
gmm.bic(matrix)
)
df_search = pd.DataFrame({
"Components": components,
"AIC": aic,
"BIC": bic
})
sns.set_style("darkgrid")
sns.lineplot(data=df_search, x="Components", y="AIC", markers=True)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment