Skip to content

Instantly share code, notes, and snippets.

@ryul99
Last active December 20, 2019 06:28
Show Gist options
  • Save ryul99/ef6fad0def581ad57e08690356d01e74 to your computer and use it in GitHub Desktop.
Save ryul99/ef6fad0def581ad57e08690356d01e74 to your computer and use it in GitHub Desktop.
# made by ryul99 at github
from bs4 import BeautifulSoup
import requests
from tqdm import tqdm
paper_index_page = 'whole url of NeaurIPS paper list page'
print('Crawling Paper List...')
req = requests.get(paper_index_page)
html = req.text
soup = BeautifulSoup(html, 'html.parser')
links = [obj.get('href') for obj in soup.find_all('a') if 'paper/' in obj.get('href')]
for link in tqdm(links):
paper_link = 'https://papers.nips.cc/' + link + '.pdf'
paper = requests.get(paper_link)
paper_name = link.split('/')[-1] + '.pdf'
with open(paper_name, 'wb') as f:
for chunk in paper.iter_content(chunk_size=2000):
f.write(chunk)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment