Last active
August 23, 2016 01:09
-
-
Save korymath/33d0f36f1810b53891230441fe71721d to your computer and use it in GitHub Desktop.
Quick n-gram discovery on the NIPS 2016 accepted papers, from https://nips.cc/Conferences/2016/AcceptedPapers.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from sklearn.feature_extraction.text import CountVectorizer | |
from collections import Counter | |
import numpy as np | |
with open('accepted-papers.txt', 'r') as myfile: | |
data=myfile.read().replace('\n', '') | |
lectures = data | |
bigram_vectorizer = CountVectorizer(ngram_range=(2, 10), stop_words='english') | |
analyze = bigram_vectorizer.build_analyzer() | |
output = analyze(lectures) | |
print(Counter(output).most_common(50)) | |
# [(u'stanford university', 54), (u'carnegie mellon', 45), (u'mellon university', 44), (u'carnegie mellon university', 44), (u'google deepmind', 43), (u'princeton university', 37), (u'uc berkeley', 35), (u'neural networks', 28), (u'microsoft research', 25), (u'university washington', 24), (u'cornell university', 18), (u'eth zurich', 17), (u'university michigan', 16), (u'university texas', 16), (u'university pennsylvania', 16), (u'university toronto', 15), (u'columbia university', 15), (u'duke university', 14), (u'tsinghua university', 14), (u'university oxford', 14), (u'university texas austin', 13), (u'texas austin', 13), (u'state university', 13), (u'university montreal', 12), (u'ut austin', 12), (u'university cambridge', 11), (u'peking university', 11), (u'university california', 10), (u'university minnesota', 10), (u'stochastic gradient', 10), (u'university wisconsin', 9), (u'institute science', 9), (u'deep learning', 8), (u'national university', 8), (u'university chicago', 8), (u'bayesian optimization', 8), (u'convolutional neural', 8), (u'convex optimization', 8), (u'harvard university', 8), (u'virginia tech', 7), (u'national laboratory', 7), (u'san diego', 7), (u'chen university', 7), (u'supervised learning', 7), (u'alamos national', 7), (u'alamos national laboratory', 7), (u'new york', 7), (u'recurrent neural networks', 7), (u'los alamos national laboratory', 7), (u'university david', 7)] |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment