Created
September 22, 2015 13:31
-
-
Save jbwincek/feb515dfb5618d1b909c to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import argparse | |
import getpass | |
from os.path import exists | |
from os import makedirs | |
import requests | |
from robobrowser import RoboBrowser | |
import shutil | |
def parse_filename(url, number): | |
filename = url.split('/')[-1] | |
return '{i:0>3}_{fn}'.format(i = number, fn = filename) | |
def download(url, path): | |
r = requests.get(url, stream=True) | |
if r.status_code == 200: | |
with open(path, 'wb') as f: | |
r.raw.decode_content = True | |
shutil.copyfileobj(r.raw, f) | |
def make_path(folder, filename): | |
return folder + filename | |
def main(): | |
parser = argparse.ArgumentParser() | |
parser.add_argument('username', help = 'Your username') | |
parser.add_argument('url', help = 'Gallery URL') | |
parser.add_argument('-f', '--folder', help = 'Folder to download to', default = './downloads/') | |
args = parser.parse_args() | |
password = getpass.getpass() | |
browser = RoboBrowser(parser = 'lxml') | |
browser.open(args.url) | |
form = browser.get_forms()[1] # [1] happens to be the login form | |
form['username'].value = args.username | |
form['password'].value = password | |
browser.submit_form(form) | |
browser.open(args.url) | |
urls_to_download = [] | |
potentials = browser.find_all("a", class_ = "thumb ismature") | |
for tag in potentials: | |
try: | |
urls_to_download.append(tag.attrs['data-super-full-img']) | |
except KeyError: | |
pass | |
browser.open(args.url, params = {'offset' : '24'}) # Switch to second page | |
potentials = browser.find_all("a", class_ = "thumb ismature") | |
for tag in potentials: | |
try: | |
urls_to_download.append(tag.attrs['data-super-full-img']) | |
except KeyError: | |
pass | |
#print(urls_to_download) | |
if not exists(args.folder): | |
makedirs(args.folder) | |
for index, url in enumerate(urls_to_download): | |
#print(parse_filename(url, index)) | |
download(url, make_path(args.folder, parse_filename(url, index))) | |
if __name__ == '__main__': | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment