Created
March 30, 2017 13:25
-
-
Save ultimatecoder/45f73f58086041e1d68d13cfb60ffbf7 to your computer and use it in GitHub Desktop.
A webpage downloader I have written when I was learning Python. This will not solve any production purpose. It is just kept here for reference purpose.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
http://www.inc.com/ss/jill-krasny/7-food-delivery-startups-watch?slide=1 | |
http://techcrunch.com/tag/food/ | |
http://www.npr.org/blogs/thesalt/2014/08/18/336877182/for-food-start-ups-incubators-help-dish-up-success | |
http://yourstory.com/2014/10/techsparks-hackathon-theme-mobile/ |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import urllib2 | |
from sys import exit, argv | |
from os import path | |
class Fetcher: | |
_urls = [] | |
_output = '' | |
def _geturls(self, file): | |
return file.read().split() | |
def __init__(self, ifpath, ofpath): | |
file = open(ifpath, 'r') | |
self._urls = self._geturls(file) | |
self._output = ofpath | |
def _getwebpage(self, url): | |
response = urllib2.urlopen(url) | |
print("fetching : {}".format(url)) | |
return response.read() | |
def _writewebpage(self, webpage, fname): | |
f = open(self._output + '/' + fname, 'w') | |
f.write(webpage) | |
f.close() | |
def start(self): | |
for i, url in enumerate(self._urls): | |
self._writewebpage(self._getwebpage(url), str(i) + '.html') | |
def main(): | |
usage = 'usage: [--urlFile] url.txt webpages-dir-path' | |
args = argv[1:] | |
if not args: | |
print(usage) | |
exit(1) | |
else: | |
if args[0] == '--urlFile': | |
try: | |
urls = args[1] | |
webpages = args[2] | |
except IndexError: | |
print("ERROR: Please provide enough arguments") | |
print(usage) | |
exit(1) | |
if path.exists(urls) and path.isdir(webpages): | |
fetcher = Fetcher(urls, webpages) | |
fetcher.start() | |
print("Fetcher Task completed !") | |
else: | |
print("ERROR : In url file path or out put webpages dir.") | |
exit(1) | |
else: | |
print(usage) | |
if __name__ == '__main__': | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment