Skip to content

Instantly share code, notes, and snippets.

@fno2010
Created April 12, 2017 11:51
Show Gist options
  • Save fno2010/7bbe6234a522bec8c8747401826afb27 to your computer and use it in GitHub Desktop.
Save fno2010/7bbe6234a522bec8c8747401826afb27 to your computer and use it in GitHub Desktop.
A utility script to fecth bibtex from top conferences of CS
#!/usr/bin/env python
"""DBLP Bib Fetcher
A utility script for fetching bibtex from DBLP.
Dependencies:
pyquery
"""
import urllib2
import os
from pyquery import PyQuery as pq
BIB_URL_TEMPLATE = 'http://dblp.uni-trier.de/rec/bib1/%s.bib'
BIB_URL_SELECTOR = '#main ul.publ-list>li.entry.inproceedings'
BIB_CACHE = 'bib'
def fetchBibtex(url):
"""Fetch bibtex entries into the file
url: the url of a dblp conference/journal homepage.
"""
ext = '.bib'
name = url.split('/')[-1].split('.')[0] + ext
print 'Downloading %s -> file %s' % (url, name)
page = pq(url=url,
opener=lambda url, **kw: urllib2.urlopen(url).read())
bibs = page(BIB_URL_SELECTOR)
bib_urls = bibs.map(lambda i, e: BIB_URL_TEMPLATE % pq(e).attr('id'))
with open(os.path.join(BIB_CACHE, name), 'w') as f:
for bib_url in bib_urls:
f.write(urllib2.urlopen(bib_url).read())
f.close()
print len(bib_urls), 'bib entries are written into the file.'
def fetchBibtexs(urls):
"""Fetch bibtex entries from a list of urls"""
if not os.path.exists(BIB_CACHE):
os.mkdir(BIB_CACHE)
for url in urls:
fetchBibtex(url)
if __name__ == '__main__':
sigcomm_url_pattern = 'http://dblp.uni-trier.de/db/conf/sigcomm/sigcomm%d.html'
sigcomm_urls = [sigcomm_url_pattern % year for year in range(2012, 2017)]
nsdi_url_pattern = 'http://dblp.uni-trier.de/db/conf/nsdi/nsdi%d.html'
nsdi_urls = [nsdi_url_pattern % year for year in range(2012, 2018)]
icnp_url_pattern = 'http://dblp.uni-trier.de/db/conf/icnp/icnp%d.html'
icnp_urls = [icnp_url_pattern % year for year in range(2012, 2017)]
urls = sigcomm_urls + nsdi_urls + icnp_urls
print 'Downloading bibtex from:'
for url in urls:
print url
fetchBibtexs(urls)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment