Skip to content

Instantly share code, notes, and snippets.

@krishnacode
Created September 16, 2012 10:36
Show Gist options
  • Save krishnacode/3731908 to your computer and use it in GitHub Desktop.
Save krishnacode/3731908 to your computer and use it in GitHub Desktop.
Url Downloader
#!/usr/bin/python3
#
# Author:Krishna Mohan Sinha
# Project:Website Downloader
#
# How To Run : ./main url dirname
# Example : ./main http:www/google.com /krishna
import os
import re
import sys
import urllib.request
import urllib.error
import urllib
class myurl:
pass
def main():
numarg=len(sys.argv)
if(numarg<2):
print("Usage : ./main url dirname")
sys.exit(1)
try:
url=sys.argv[1]
ureq=urllib.request.Request(sys.argv[1])
ures=urllib.request.urlopen(url)
except urllib.error.HTTPError as e:
sys.stderr.write('The server couldn\'t fulfill the request.')
sys.stderr.write('Error code: ', e.code)
sys.exit(1)
except urllib.error.URLError as e:
sys.stderr.write("We Failed To Reach Server")
sys.stderr.write("Reason - ",e.reason )
sys.exit(1)
else:
#Connection Established
uurl=ures.geturl()
urlinfo=ures.info()
print()
for key in urlinfo:
print(key," ",urlinfo[key])
print()
print("Retrieving all links from URL")
print("Retrieving all links from URL")
## print(ures.read())
pat=re.compile(b"href=[\"\'](.[^\"\']+)[\"\']", re.I)
links=re.findall(pat,ures.read())
##Links Found
for link in links:
print(link)
print()
##Rest Module in file2
if __name__ =="__main__":
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment