Skip to content

Instantly share code, notes, and snippets.

@ultimatecoder
Created March 30, 2017 13:25
Show Gist options
  • Save ultimatecoder/45f73f58086041e1d68d13cfb60ffbf7 to your computer and use it in GitHub Desktop.
Save ultimatecoder/45f73f58086041e1d68d13cfb60ffbf7 to your computer and use it in GitHub Desktop.
A webpage downloader I have written when I was learning Python. This will not solve any production purpose. It is just kept here for reference purpose.
http://www.inc.com/ss/jill-krasny/7-food-delivery-startups-watch?slide=1
http://techcrunch.com/tag/food/
http://www.npr.org/blogs/thesalt/2014/08/18/336877182/for-food-start-ups-incubators-help-dish-up-success
http://yourstory.com/2014/10/techsparks-hackathon-theme-mobile/
import urllib2
from sys import exit, argv
from os import path
class Fetcher:
_urls = []
_output = ''
def _geturls(self, file):
return file.read().split()
def __init__(self, ifpath, ofpath):
file = open(ifpath, 'r')
self._urls = self._geturls(file)
self._output = ofpath
def _getwebpage(self, url):
response = urllib2.urlopen(url)
print("fetching : {}".format(url))
return response.read()
def _writewebpage(self, webpage, fname):
f = open(self._output + '/' + fname, 'w')
f.write(webpage)
f.close()
def start(self):
for i, url in enumerate(self._urls):
self._writewebpage(self._getwebpage(url), str(i) + '.html')
def main():
usage = 'usage: [--urlFile] url.txt webpages-dir-path'
args = argv[1:]
if not args:
print(usage)
exit(1)
else:
if args[0] == '--urlFile':
try:
urls = args[1]
webpages = args[2]
except IndexError:
print("ERROR: Please provide enough arguments")
print(usage)
exit(1)
if path.exists(urls) and path.isdir(webpages):
fetcher = Fetcher(urls, webpages)
fetcher.start()
print("Fetcher Task completed !")
else:
print("ERROR : In url file path or out put webpages dir.")
exit(1)
else:
print(usage)
if __name__ == '__main__':
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment