Created
April 12, 2017 11:51
-
-
Save fno2010/7bbe6234a522bec8c8747401826afb27 to your computer and use it in GitHub Desktop.
A utility script to fecth bibtex from top conferences of CS
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
"""DBLP Bib Fetcher | |
A utility script for fetching bibtex from DBLP. | |
Dependencies: | |
pyquery | |
""" | |
import urllib2 | |
import os | |
from pyquery import PyQuery as pq | |
BIB_URL_TEMPLATE = 'http://dblp.uni-trier.de/rec/bib1/%s.bib' | |
BIB_URL_SELECTOR = '#main ul.publ-list>li.entry.inproceedings' | |
BIB_CACHE = 'bib' | |
def fetchBibtex(url): | |
"""Fetch bibtex entries into the file | |
url: the url of a dblp conference/journal homepage. | |
""" | |
ext = '.bib' | |
name = url.split('/')[-1].split('.')[0] + ext | |
print 'Downloading %s -> file %s' % (url, name) | |
page = pq(url=url, | |
opener=lambda url, **kw: urllib2.urlopen(url).read()) | |
bibs = page(BIB_URL_SELECTOR) | |
bib_urls = bibs.map(lambda i, e: BIB_URL_TEMPLATE % pq(e).attr('id')) | |
with open(os.path.join(BIB_CACHE, name), 'w') as f: | |
for bib_url in bib_urls: | |
f.write(urllib2.urlopen(bib_url).read()) | |
f.close() | |
print len(bib_urls), 'bib entries are written into the file.' | |
def fetchBibtexs(urls): | |
"""Fetch bibtex entries from a list of urls""" | |
if not os.path.exists(BIB_CACHE): | |
os.mkdir(BIB_CACHE) | |
for url in urls: | |
fetchBibtex(url) | |
if __name__ == '__main__': | |
sigcomm_url_pattern = 'http://dblp.uni-trier.de/db/conf/sigcomm/sigcomm%d.html' | |
sigcomm_urls = [sigcomm_url_pattern % year for year in range(2012, 2017)] | |
nsdi_url_pattern = 'http://dblp.uni-trier.de/db/conf/nsdi/nsdi%d.html' | |
nsdi_urls = [nsdi_url_pattern % year for year in range(2012, 2018)] | |
icnp_url_pattern = 'http://dblp.uni-trier.de/db/conf/icnp/icnp%d.html' | |
icnp_urls = [icnp_url_pattern % year for year in range(2012, 2017)] | |
urls = sigcomm_urls + nsdi_urls + icnp_urls | |
print 'Downloading bibtex from:' | |
for url in urls: | |
print url | |
fetchBibtexs(urls) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment