Skip to content

Instantly share code, notes, and snippets.

@georgeodsc
Created December 3, 2017 20:18
Show Gist options
  • Save georgeodsc/b3454f71b69f47ca65ec36496cb80abc to your computer and use it in GitHub Desktop.
Save georgeodsc/b3454f71b69f47ca65ec36496cb80abc to your computer and use it in GitHub Desktop.
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [],
"source": [
"#Set max features to 6000\n",
"no_features = 6000\n",
"\n",
"#Intialize the tfidf vectorizer\n",
"tfidf_vectorizer = TfidfVectorizer(max_df=0.95,\n",
" max_features=no_features,\n",
" min_df=4, stop_words='english')\n",
"#Fit tfidf on corpus\n",
"tfidf = tfidf_vectorizer.fit_transform(docs)\n",
"tfidf_feature_names = tfidf_vectorizer.get_feature_names()"
]
},
{
"cell_type": "code",
"execution_count": 12,
"metadata": {},
"outputs": [],
"source": [
"\n",
"no_topics = 7\n",
"\n",
"# Fit NMF object on tfidf matrix\n",
"nmf = NMF(n_components=no_topics, random_state=1, \n",
" alpha=.1, l1_ratio=.5, init='nndsvd').fit(tfidf)\n"
]
},
{
"cell_type": "code",
"execution_count": 13,
"metadata": {},
"outputs": [],
"source": [
"#Create function to display the top words for each topic\n",
"def display_topics(model, feature_names, no_top_words):\n",
" for topic_idx, topic in enumerate(model.components_):\n",
" print (\"Topic %d:\" % (topic_idx + 1))\n",
" print (\" \".join([feature_names[i]\n",
" for i in topic.argsort()[:-no_top_words - 1:-1]]))\n",
" print (\"\\n\")"
]
},
{
"cell_type": "code",
"execution_count": 14,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Topic 1:\n",
"ai intelligence artificial human machines marketing humans tasks business startups\n",
"\n",
"\n",
"Topic 2:\n",
"data science analytics big business scientists spark scientist skills hadoop\n",
"\n",
"\n",
"Topic 3:\n",
"learning machine deep algorithms ml learn systems computer google data\n",
"\n",
"\n",
"Topic 4:\n",
"new said technology like company people google says world companies\n",
"\n",
"\n",
"Topic 5:\n",
"iot devices internet things connected security smart sensors samsung home\n",
"\n",
"\n",
"Topic 6:\n",
"model neural regression data function network class training classification set\n",
"\n",
"\n",
"Topic 7:\n",
"customer chatbots chatbot customers bot bots marketing service messaging banks\n",
"\n",
"\n"
]
}
],
"source": [
"#Run functions to find the top words for each topic as determined by NMF\n",
"no_top_words = 10\n",
"display_topics(nmf, tfidf_feature_names, no_top_words)\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.2"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment