Created
March 2, 2017 02:15
-
-
Save sebastian-code/8f8733e5d841029dd149ebf1c85a36c6 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
""" | |
This code will require to install BeatifulSoup4 and Requests library, for that | |
use: | |
pip install BeatifulSoup requests | |
""" | |
import sys | |
import time | |
import shutil | |
import urllib2 | |
import requests | |
from urlparse import urljoin | |
import bs4 as bs | |
def make_soup(url): | |
source = requests.get(url, headers={'User-Agent': "Magic Browser"}) | |
return bs.BeautifulSoup(source.content, 'html.parser') | |
def get_images(url): | |
soup = make_soup(url) | |
images = [img for img in soup.findAll('img')] | |
print(str(len(images)), "images found.") | |
print('Downloading the images') | |
image_links = [each.get('src') for each in images] | |
for each in image_links: | |
try: | |
filename = each.strip().split('/')[-1].strip() | |
src = urljoin(url, each) | |
print('Getting:', filename) | |
response = requests.get(src, stream=True) | |
# delay to avoid corrupted previews | |
time.sleep(1) | |
with open(filename, 'wb') as out_file: | |
shutil.copyfileobj(response.raw, out_file) | |
except: | |
print('An error has been detected... Don\'t worry') | |
print 'Done.' | |
if __name__ == '__main__': | |
url = sys.argv[1] | |
get_images(url) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
pip install BeatifulSoup ( de esa forma esta mal escrito)
Correctamente es: pip install beautifulsoup4