Skip to content

Instantly share code, notes, and snippets.

@kailashbuki
Created March 17, 2019 22:53
Show Gist options
  • Save kailashbuki/305c22c579800274764d1aff389c5e98 to your computer and use it in GitHub Desktop.
Save kailashbuki/305c22c579800274764d1aff389c5e98 to your computer and use it in GitHub Desktop.
extract authors of research papers from all editions of the IEEE ICDM so far
#!/usr/bin/python3
import requests
from collections import Counter
from bs4 import BeautifulSoup
top_k = 10
authors = list()
for year in range(2001, 2019):
print(year)
res = requests.get("https://dblp.org/db/conf/icdm/icdm%d.html" % year)
soup = BeautifulSoup(res.text, features="html.parser")
entries = soup.find_all("li", {"class": "entry inproceedings"})
for entry in entries:
name_tags = entry.find_all("span", {"itemprop": "author"})
authors.extend(name_tag.text for name_tag in name_tags)
for author, npub in Counter(authors).most_common(top_k):
print("%s -> %d" % (author, npub))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment