Skip to content

Instantly share code, notes, and snippets.

@liveashish
Forked from shshank/Hibiscur_scrawl.py
Created March 5, 2014 20:38
Show Gist options
  • Save liveashish/9376088 to your computer and use it in GitHub Desktop.
Save liveashish/9376088 to your computer and use it in GitHub Desktop.
BRANCH_ID={'btech_CSE':'b1',
'btech_ETC':'b2',
'btech_EEE':'b3',
'btech_IT':'b4',
'mtech_CSE':'A1', #I don't really know what it is
'faculty':'FP' # i have no clue kya hai iska. i will do that
}
class DoesNotExistError(Exception):
def __init__(self, value='User not found.'):
self.value = value
def __str__(self):
return repr(self.value)
def download_file(picutre_url):
'''
downloads the file on the given url and resturn the images's file object
'''
baseUrl="http://172.16.1.30/Hibiscus/docs/iiit/Photos/"
return file_object
def scrape_picture(page_url):
'''
scraps the page for the image url and returns image url
'''
return picture_url
def make_url(branch, id):
'''
returns the hibiscus url for the given branch name and id
'''
return page_url
def get_images_of_branch_year(branch, year):
pics_dir = 'hib_pics/students/%s/%s'%(branch, year)
if not os.path.exists(pics_dir):
os.makedirs(pics_dir)
base_id = '%s%s'%(BRANCH_ID['branch'], str(year)[2:])
for id in range(1, 80):
try:
image_path = os.path.join(pics_dir, base_id+"%03d"%id)
page_url = make_url(branch, year) ''# whatever the page URL is
with f.open(image_path, 'w'):
image_file_url = scrape_picture(page_url)
image_file_content = download_file(image_url).read()
f.write(image_file_content)
except DoesNotExistError('user does not exist on hibiscus') as e:
print 'Encountered an error for %s, Moving on to next user'%(page_url)
print 'Error', e
def main():
for year in range(START_YEAR, END_YEAR):
for branch in BRANCH_ID.keys():
print 'Getting images for &s, for the year %s'%(branch, year)
get_images_of_branch_year(branch, year)
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment