Last active
December 20, 2019 06:28
-
-
Save ryul99/ef6fad0def581ad57e08690356d01e74 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# made by ryul99 at github | |
from bs4 import BeautifulSoup | |
import requests | |
from tqdm import tqdm | |
paper_index_page = 'whole url of NeaurIPS paper list page' | |
print('Crawling Paper List...') | |
req = requests.get(paper_index_page) | |
html = req.text | |
soup = BeautifulSoup(html, 'html.parser') | |
links = [obj.get('href') for obj in soup.find_all('a') if 'paper/' in obj.get('href')] | |
for link in tqdm(links): | |
paper_link = 'https://papers.nips.cc/' + link + '.pdf' | |
paper = requests.get(paper_link) | |
paper_name = link.split('/')[-1] + '.pdf' | |
with open(paper_name, 'wb') as f: | |
for chunk in paper.iter_content(chunk_size=2000): | |
f.write(chunk) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment