Created
March 5, 2014 20:17
-
-
Save shshank/9375700 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
BRANCH_ID={'btech_CSE':'b1', | |
'btech_ETC':'b2', | |
'btech_EEE':'b3', | |
'btech_IT':'b4', | |
'mtech_CSE':'A1', #I don't really know what it is | |
'faculty':'FP' # i have no clue kya hai iska. i will do that | |
} | |
class DoesNotExistError(Exception): | |
def __init__(self, value='User not found.'): | |
self.value = value | |
def __str__(self): | |
return repr(self.value) | |
def download_file(picutre_url): | |
''' | |
downloads the file on the given url and resturn the images's file object | |
''' | |
baseUrl="http://172.16.1.30/Hibiscus/docs/iiit/Photos/" | |
return file_object | |
def scrape_picture(page_url): | |
''' | |
scraps the page for the image url and returns image url | |
''' | |
return picture_url | |
def make_url(branch, id): | |
''' | |
returns the hibiscus url for the given branch name and id | |
''' | |
return page_url | |
def get_images_of_branch_year(branch, year): | |
pics_dir = 'hib_pics/students/%s/%s'%(branch, year) | |
if not os.path.exists(pics_dir): | |
os.makedirs(pics_dir) | |
base_id = '%s%s'%(BRANCH_ID['branch'], str(year)[2:]) | |
for id in range(1, 80): | |
try: | |
image_path = os.path.join(pics_dir, base_id+"%03d"%id) | |
page_url = make_url(branch, year) ''# whatever the page URL is | |
with f.open(image_path, 'w'): | |
image_file_url = scrape_picture(page_url) | |
image_file_content = download_file(image_url).read() | |
f.write(image_file_content) | |
except DoesNotExistError('user does not exist on hibiscus') as e: | |
print 'Encountered an error for %s, Moving on to next user'%(page_url) | |
print 'Error', e | |
def main(): | |
for year in range(START_YEAR, END_YEAR): | |
for branch in BRANCH_ID.keys(): | |
print 'Getting images for &s, for the year %s'%(branch, year) | |
get_images_of_branch_year(branch, year) | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment