Skip to content

Instantly share code, notes, and snippets.

@emallson
Last active July 6, 2020 16:11
Show Gist options
  • Save emallson/f3001e8bf445c114450e0dfb07e34f8d to your computer and use it in GitHub Desktop.
Save emallson/f3001e8bf445c114450e0dfb07e34f8d to your computer and use it in GitHub Desktop.

Stumbled on this old pile of code from the mid-point of my PhD work. I wanted to see what venues WWW-published authors also published in, because I wanted to be published in WWW---but only one publication deadline per year is not enough.

adjacent-confs.py downloads that data from DBLP. No idea if it still works. It was last edited in December 2017. Similar Venues.ipynb does some visualization. If it won't load on Github, you can view it here

#!/usr/bin/env python3
"""List the most frequently published-in conferences by authors publishing in <conf>.
Usage:
adjacent-confs.py download <conf> <year> [options]
adjacent-confs.py file <dump> [options]
adjacent-confs.py (-h | --help)
Options:
-h --help Show this screen.
--dump <out> Save output dataframe to <out>.
"""
import requests
from requests.exceptions import Timeout, RequestException
from docopt import docopt
import pandas as pd
from time import sleep
from json.decoder import JSONDecodeError
def get_json(url, retry_limit = 10):
tries = 0
while tries < retry_limit:
tries += 1
try:
r = requests.get(url)
return r.json()
except (Timeout, RequestException, JSONDecodeError) as e:
print("Request to {} failed: {}".format(url, e))
sleep(5)
raise Exception("request failed after {} tries".format(tries))
def list_authors(conf, year):
FMT = "http://dblp.org/search/publ/api?q=venue:{venue}:year:{year}:&format=json&f={first}&c=0"
r = get_json(FMT.format(venue=conf, year=year, first=0))
authors = set()
total = int(r["result"]["hits"]["@total"])
recvd = int(r["result"]["hits"]["@sent"])
while True:
print("{} of {} received".format(recvd, total))
for pub in r["result"]["hits"]["hit"]:
if pub["info"]["type"] != "Editorship":
try:
auth = pub["info"]["authors"]["author"]
if isinstance(auth, list):
authors |= set(auth)
elif isinstance(auth, str):
authors.add(auth)
else:
print("Unknown authors list {} of type {}".format(auth, type(auth)))
except Exception as e:
print("Unable to read authors from pub {}: {}".format(pub, e))
if recvd == total:
return authors
else:
r = get_json(FMT.format(venue=conf, year=year, first=recvd))
recvd += int(r["result"]["hits"]["@sent"])
def list_venues(author):
FMT = "http://dblp.org/search/publ/api?q=author:{author}:&format=json&f={first}&c=0"
r = get_json(FMT.format(author=author.replace(" ", "_"), first=0))
venues = {}
total = int(r["result"]["hits"]["@total"])
recvd = int(r["result"]["hits"]["@sent"])
if total == 0:
print("No publications listed for author {}".format(author))
return {}
while True:
print("{} of {} received".format(recvd, total))
for pub in r["result"]["hits"]["hit"]:
if pub["info"]["type"] in ["Conference and Workshop Papers", "Journal Articles"]:
try:
venue = pub["info"]["venue"]
year = pub["info"]["year"]
if venue not in venues:
venues[venue] = [year]
else:
venues[venue] += [year]
except Exception as e:
print("Could not get info from pub {}: {}".format(pub, e))
if recvd == total:
return venues
else:
r = get_json(FMT.format(author=author.replace(" ", "_"), first=recvd))
recvd += int(r["result"]["hits"]["@sent"])
if __name__ == "__main__":
args = docopt(__doc__, version="0.1")
if args["download"]:
authors = list_authors(args["<conf>"], args["<year>"])
print(authors)
df = pd.DataFrame(columns=["author", "venue", "year"])
for author in authors:
try:
vs = list_venues(author)
if len(vs) > 0:
df = df.append([{"author": author, "venue": v, "year": y} for v, yrs in vs.items() for y in yrs])
except Exception as e:
print("Downloading pubs by {} failed (reason: {}), skipping.".format(author, e))
print(df)
if args["--dump"] is not None:
df.to_csv(args["--dump"])
Display the source blob
Display the rendered blob
Raw
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment