Skip to content

Instantly share code, notes, and snippets.

@CodeDotJS
Created November 18, 2015 03:56
Show Gist options
  • Save CodeDotJS/8ae61cdc3b5904f4162a to your computer and use it in GitHub Desktop.
Save CodeDotJS/8ae61cdc3b5904f4162a to your computer and use it in GitHub Desktop.
import urllib2
import re
import os
from os.path import basename
from urlparse import urlsplit
from urlparse import urlparse
from posixpath import basename,dirname
def process_url(raw_url):
if ' ' not in raw_url[-1]:
raw_url=raw_url.replace(' ','%20')
return raw_url
elif ' ' in raw_url[-1]:
raw_url=raw_url[:-1]
raw_url=raw_url.replace(' ','%20')
return raw_url
url='http://localhost:8000/Instagram'
parse_object=urlparse(url)
dirname=basename(parse_object.path)
if not os.path.exists('images'):
os.mkdir("images")
urlcontent=urllib2.urlopen(url).read()
imgurls=re.findall('img .*?src="(.*?)"',urlcontent)
for imgurl in imgurls:
try:
imgurl=process_url(imgurl)
imgdata=urllib2.urlopen(imgurl).read()
filname=basename(urlsplit(imgurl)[2])
output=open(filname,'wb')
output.write(imgdata)
output.close()
os.remove(filename)
except:
pass
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment