moskomule/save_downloaded_images.py

## save_downloaded_images.py
from html.parser import HTMLParser
import urllib.request
import re

def download(url, path):
    with urllib.request.urlopen(url) as file:
        file_name = path + "/" + url.split("/")[-1]
        with open(file_name, 'wb') as local:
            local.write(file.read())

class TestParser(HTMLParser):
    def __init__(self):
        HTMLParser.__init__(self)
        self.url = ""

    def handle_starttag(self, tag, attrs):
        if tag == "link":
            attrs = dict(attrs)
            if attrs["rel"] == "image_src":
                download(attrs["href"], path)

    def handle_endtag(self, tag):
        if self.url and re.match('^https', self.url):
            self.url = ""

parser = TestParser()

with open(csv) as f:
    urls = f.readlines()

for url in urls:
    try:
        with urllib.request.urlopen(url) as response:
            page = response.read().decode('utf-8')
            parser.feed(page)
            parser.close()
    except Exception:
        pass
	from html.parser import HTMLParser
	import urllib.request
	import re

	def download(url, path):
	with urllib.request.urlopen(url) as file:
	file_name = path + "/" + url.split("/")[-1]
	with open(file_name, 'wb') as local:
	local.write(file.read())

	class TestParser(HTMLParser):
	def __init__(self):
	HTMLParser.__init__(self)
	self.url = ""

	def handle_starttag(self, tag, attrs):
	if tag == "link":
	attrs = dict(attrs)
	if attrs["rel"] == "image_src":
	download(attrs["href"], path)

	def handle_endtag(self, tag):
	if self.url and re.match('^https', self.url):
	self.url = ""

	parser = TestParser()

	with open(csv) as f:
	urls = f.readlines()

	for url in urls:
	try:
	with urllib.request.urlopen(url) as response:
	page = response.read().decode('utf-8')
	parser.feed(page)
	parser.close()
	except Exception:
	pass