Created
September 16, 2012 10:36
-
-
Save krishnacode/3731908 to your computer and use it in GitHub Desktop.
Url Downloader
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/python3 | |
# | |
# Author:Krishna Mohan Sinha | |
# Project:Website Downloader | |
# | |
# How To Run : ./main url dirname | |
# Example : ./main http:www/google.com /krishna | |
import os | |
import re | |
import sys | |
import urllib.request | |
import urllib.error | |
import urllib | |
class myurl: | |
pass | |
def main(): | |
numarg=len(sys.argv) | |
if(numarg<2): | |
print("Usage : ./main url dirname") | |
sys.exit(1) | |
try: | |
url=sys.argv[1] | |
ureq=urllib.request.Request(sys.argv[1]) | |
ures=urllib.request.urlopen(url) | |
except urllib.error.HTTPError as e: | |
sys.stderr.write('The server couldn\'t fulfill the request.') | |
sys.stderr.write('Error code: ', e.code) | |
sys.exit(1) | |
except urllib.error.URLError as e: | |
sys.stderr.write("We Failed To Reach Server") | |
sys.stderr.write("Reason - ",e.reason ) | |
sys.exit(1) | |
else: | |
#Connection Established | |
uurl=ures.geturl() | |
urlinfo=ures.info() | |
print() | |
for key in urlinfo: | |
print(key," ",urlinfo[key]) | |
print() | |
print("Retrieving all links from URL") | |
print("Retrieving all links from URL") | |
## print(ures.read()) | |
pat=re.compile(b"href=[\"\'](.[^\"\']+)[\"\']", re.I) | |
links=re.findall(pat,ures.read()) | |
##Links Found | |
for link in links: | |
print(link) | |
print() | |
##Rest Module in file2 | |
if __name__ =="__main__": | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment