Skip to content

Instantly share code, notes, and snippets.

@omarayad1
Created September 18, 2014 18:32
Show Gist options
  • Save omarayad1/7c89d5cda3d54271620b to your computer and use it in GitHub Desktop.
Save omarayad1/7c89d5cda3d54271620b to your computer and use it in GitHub Desktop.
from bs4 import BeautifulSoup
import requests
import os
import tarfile
import sys
try:
os.makedirs('archive')
except OSError:
pass
archive = 'https://icpcarchive.ecs.baylor.edu/index.php?option=com_onlinejudge&Itemid=8&category=0'
data = requests.get(archive).content
data2 = BeautifulSoup(data).find_all(class_='sectiontableentry1')
data2 += BeautifulSoup(data).find_all(class_='sectiontableentry2')
data = {x.find('a').text: 'http://icpcarchive.ecs.baylor.edu/%s' %x.find('a')['href'] for x in data2}
for key in data.keys():
try:
os.makedirs('archive/%s' %key)
except OSError:
pass
rec_links = []
for key, value in data.iteritems():
batee5 = requests.get(value).content
data2 = BeautifulSoup(batee5).find_all(class_='sectiontableentry1')
data2 += BeautifulSoup(batee5).find_all(class_='sectiontableentry2')
rec_links.append({'%s/%s' %(key, x.find('a').text): 'https://icpcarchive.ecs.baylor.edu/%s' %x.find('a')['href'] for x in data2})
for contest in rec_links:
for key, value in contest.iteritems():
print '--- Downloading Region/Contest: %s' %key
try:
os.makedirs('archive/%s' %key)
except OSError:
pass
batee5 = requests.get(value).content
data2 = BeautifulSoup(batee5).find_all(class_='sectiontableentry1')
data2 += BeautifulSoup(batee5).find_all(class_='sectiontableentry2')
data = {'archive/%s/%s.pdf' %(key, x.find('a').text): 'https://icpcarchive.ecs.baylor.edu/%s' %x.find('a')['href'] for x in data2}
for filename, url in data.iteritems():
if not os.path.isfile(filename):
batee5 = requests.get(url).content
data2 = 'https://icpcarchive.ecs.baylor.edu/' + BeautifulSoup(batee5).find_all('a')[18]['href']
try:
batee5 = requests.get(data2).content
pdf = open(filename, 'w')
pdf.write(batee5)
pdf.close()
except TypeError:
print url
else:
continue
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment