Skip to content

Instantly share code, notes, and snippets.

@Randl
Created May 21, 2018 15:53
Show Gist options
  • Save Randl/80efead5dd9dc4582f6ae356001cdb89 to your computer and use it in GitHub Desktop.
Save Randl/80efead5dd9dc4582f6ae356001cdb89 to your computer and use it in GitHub Desktop.
Parse ICML submissions, get some statistics
import pickle
from contextlib import closing
from timeit import default_timer as timer
from selenium.common.exceptions import TimeoutException
from selenium.webdriver import Firefox, FirefoxProfile
from selenium.webdriver.support.ui import WebDriverWait
from tqdm import tqdm
unis = {'Google Deepmind': 'Deepmind', 'Google Brain': 'Google', 'Cmu': 'Carnegie Mellon University',
'Cargenie Mellon University': 'Carnegie Mellon University',
'Carnegie Mellen University': 'Carnegie Mellon University',
'Carnegie Mellon University': 'Carnegie Mellon University',
'Facebook Ai Research': 'Facebook', 'University Of Virginia--> Ucla': 'University Of Virginia',
'Openai / Uc Berkeley': 'Openai',
'Mpi For Intelligent Systems Tübingen, Germany': 'Max Planck Institute For Intelligent Systems',
'Mit Csail': 'Mit', 'Fair': 'Facebook', 'University Of Oxford': 'Oxford', 'Harvard University': 'Harvard',
'Uber Ai Labs': 'Uber', 'Uber Ai Labs & University Of Central Florida': 'Uber',
'Uber Atg / University Of Toronto': 'Uber', 'Uber/Cmu': 'Uber',
'University Of Cambridge & Uber': 'University Of Cambridge', 'Cambridge/Mpi': 'University Of Cambridge',
'University Of Cambridge And Mpi Tübingen': 'University Of Cambridge',
'University Of Cambridge, Alan Turing Institute': 'University Of Cambridge',
'Columbia University In The City Of New York': 'Columbia University',
'Columbia University Medical Center': 'Columbia University',
'Aalto University & Nvidia': 'Aalto University',
'Alan Turing Institute & University Of Warwick': 'Alan Turing Institute',
'Amazon / Ucsb': 'Amazon',
'Amazon Ai & Caltech': 'Amazon', 'Amazon Research': 'Amazon', 'Amazon Research Tübingen': 'Amazon',
'Amazon.Com': 'Amazon',
'Ant Financial Services Group': 'Ant Financial',
'Artificial Intelligence Department, Ant Financial': 'Ant Financial',
'Apple Inc.': 'Apple',
'Cornell University': 'Cornell',
'Deepmind/University Of Alberta': 'Deepmind',
'Google Inc': 'Google', 'Google Inc.': 'Google', 'Google Llc': 'Google', 'Google Research': 'Google',
'Google Research, Ny': 'Google', 'Google Uk': 'Google', 'Google, Inc.': 'Google', 'Google, Usa': 'Google',
'Facebook / Nyu': 'Facebook', 'Facebook Ai Research And Tel Aviv University': 'Facebook',
'Facebook Ai Research, Nyu': 'Facebook', 'Facebook Artificial Intelligence Research': 'Facebook',
'Facebook Research': 'Facebook',
'Oxford And Deepmind': 'Oxford', 'Oxford, Deepmind': 'Oxford',
'Northwestern': 'Northwestern University', 'Northwestern U': 'Northwestern University',
'Ut Austin': 'University Of Texas At Austin', 'Ut Austin & Amazon': 'University Of Texas At Austin',
'Ut Austin - Sentient Technologies': 'University Of Texas At Austin',
'Ut-Austin': 'University Of Texas At Austin',
'Weizmanninstitute': 'Weizmann Institute Of Science',
'Tu Darmstadt + Max Planck Institute For Intelligent Systems': 'Tu Darmstadt',
'Eecs, Uc Berkeley': 'University Of California, Berkeley',
'Eecs Department, University Of California, Berkeley': 'University Of California, Berkeley',
'Berkeley': 'University Of California, Berkeley',
'Uc Berkeley': 'University Of California, Berkeley',
'University Of California At Berkeley': 'University Of California, Berkeley',
'University Of California Berkeley': 'University Of California, Berkeley',
'Google / U. Michigan': 'Google', 'Google Ai': 'Google', 'Google Brain / Cornell University': 'Google',
'Google Brain And Princeton University': 'Google', 'Google Brain Robotics': 'Google',
'Google Deepmind And Inria': 'Deepmind',
'Deep Mind': 'Deepmind', 'Deepmind, University Of Oxford': 'Deepmind',
'Yandex; Msu': 'Yandex',
'University At Albany, State University Of New York': 'University At Albany',
'Massachusetts Institute Of Technology': 'Mit',
'Nyu': 'New York University',
'Skoltech & Criteo': 'Skoltech',
'Zhejiang University & Tencent Ai Lab': 'Zhejiang University',
'Mcgill University / Facebook': 'Mcgill University',
'U Oxford': 'Oxford',
'University Of California At San Diego': 'University Of California San Diego',
'Microsoft Research Ai': 'Microsoft', 'Microsoft Ai & Research': 'Microsoft', 'Microsoft Research': 'Microsoft',
'Baidu Research, Usa': 'Baidu', 'Baidu Research Usa': 'Baidu', 'Baidu Research': 'Baidu',
'Microsoft Maluuba': 'Microsoft',
'Technion – Israel Institute Of Technology': 'Technion',
'Technion Israeli Institute Of Technology': 'Technion',
'Okinawa Institute Of Science And Technology Graduate University': 'Okinawa Institute Of Science And Technology',
'Nvidia Research': 'Nvidia', 'Microsoft Research Cambridge': 'Microsoft', 'Stanford': 'Stanford University',
'Tel Aviv University, Google': 'Tel Aviv University', 'Stanford University & Google': 'Stanford University',
'Mit, Tau': 'Mit', 'Magic Leap, Inc': 'Magic Leap',
'Magic Leap Inc.': 'Magic Leap',
'Magic Leap, Inc.': 'Magic Leap', 'Princeton University And Google Brain': 'Princeton University',
'Univ Of Toronto | Toronto': 'University Of Toronto',
'Department Of Electrical And Computer Engineering, University Of Toronto': 'University Of Toronto',
'Hebrew University Of Jerusalem, Israel': 'Hebrew University',
'University Of Illinois Uc': 'University Of Illinois Urbana-Champaign',
'Mcgill': 'Mcgill University', 'Stanford University, California': 'Stanford',
'Microsoft Research Asia': 'Microsoft', 'The University Of Oxford': 'Oxford',
'Princeton University And Institute For Advanced Study': 'Princeton University',
'Princeton': 'Princeton University', 'Princeton Univerisity': 'Princeton University',
'The University Of Texas At Austin': 'University Of Texas At Austin',
'Epfl': 'École Polytechnique Fédérale De Lausanne',
'École Polytechnique Fédérale D': 'École Polytechnique Fédérale De Lausanne',
'University Of Toronto And Vector Institute': 'Univeristy Of Toronto',
'Universita Di Pisa': 'University Of Pisa',
'Cambridge': 'University Of Cambridge',
'Columbia': 'Columbia University',
'Department Of Statistics, Columbia University': 'Columbia University',
'Eth Zurich And University Of Zurich': 'Eth Zurich',
'Eth Zurich - Max-Planck-Institute': 'Eth Zurich',
'Eth Zürich': 'Eth Zurich',
'Ethz': 'Eth Zurich',
'Openai / University Of Edinburgh': 'Openai',
'Iiis, Tsinghua University': 'Tsinghua University',
'California Institute Of Technology': 'Caltech',
'Georgia Tech': 'Georgia Institute Of Technology',
'Georgia Institute Of Technology / Facebook Ai Research': 'Georgia Institute Of Technology',
'Tecent Ai Lab': 'Tencent Ai Lab', 'Tencent': 'Tencent Ai Lab',
}
url = 'https://icml.cc/Conferences/2018/AcceptedPapersInitial'
driver_timeout = 15
with closing(Firefox()) as browser:
browser.get(url)
WebDriverWait(browser, timeout=driver_timeout).until(
lambda x: x.find_elements_by_xpath("//*[contains(text(), 'Successful Page Load')]"))
text = 'ICML 2018 Accepted Papers'
list_of_papers = browser.find_elements_by_xpath('//*[contains(text(), "' + text + '")]')
papers = []
for el in list_of_papers[0].find_element_by_xpath('..').find_elements_by_tag_name('p')[1:]:
paper_name = el.find_element_by_tag_name('b').text
paper_autors = el.find_element_by_tag_name('i').text.split('·')
authors = []
names = set()
institutions = set()
for author in paper_autors:
aut = author.split('(')[0].strip().lower().title()
institution = author.split('(')[1].split(')')[0].strip().lower().title()
if institution in unis:
institution = unis[institution]
authors.append((aut, institution))
names.add(aut)
institutions.add(institution)
papers.append((paper_name, authors, names, institutions, authors[0][0], authors[-1][0]))
with open('papers.pickle', 'wb') as handle:
pickle.dump(papers, handle, protocol=pickle.HIGHEST_PROTOCOL)
authors_count = {}
institution_count = {}
authors_first_count = {}
authors_last_count = {}
for paper in papers:
for author in paper[2]:
if author in authors_count:
authors_count[author] += 1
else:
authors_count[author] = 1
for inst in paper[3]:
if inst in institution_count:
institution_count[inst] += 1
else:
institution_count[inst] = 1
if paper[4] in authors_first_count:
authors_first_count[paper[4]] += 1
else:
authors_first_count[paper[4]] = 1
if paper[5] in authors_last_count:
authors_last_count[paper[5]] += 1
else:
authors_last_count[paper[5]] = 1
print('Authors')
# Tong Zhang 8
# Lawrence Carin 7
# Jun Zhu 6
# Quanquan Gu 6
# Le Song 6
# Remi Munos 6
# Sergey Levine 6
# Pieter Abbeel 6
# Bernhard Schölkopf 5
# Amin Karbasi 5
# Shimon Whiteson 5
for key, value in sorted(authors_count.items(), key=lambda kv: kv[1], reverse=True):
if value < 5:
break
print(key, value)
print('Institutions')
# Google 48
# Carnegie Mellon University 32
# University Of California, Berkeley 31
# Deepmind 31
# Mit 28
# Stanford University 28
# Microsoft 27
# Princeton University 20
# Oxford 19
# Facebook 18
# Cornell 16
# University Of Texas At Austin 16
# École Polytechnique Fédérale De Lausanne 15
# University Of Toronto 15
# University Of Cambridge 14
# Eth Zurich 14
# Columbia University 13
# Tsinghua University 12
# Georgia Institute Of Technology 11
# University Of Southern California 10
# Duke University 10
for key, value in sorted(institution_count.items(), key=lambda kv: kv[1], reverse=True):
if value < 10:
break
print(key, value)
print('First authors')
for key, value in sorted(authors_first_count.items(), key=lambda kv: kv[1], reverse=True):
if value < 2:
break
print(key, value)
print('Last authors')
for key, value in sorted(authors_last_count.items(), key=lambda kv: kv[1], reverse=True):
if value < 4:
break
print(key, value)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment