Last active
September 24, 2015 01:37
-
-
Save siddhant3s/647237 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/python | |
VERSION='0.1.2' | |
from BeautifulSoup import BeautifulSoup as BS | |
from urllib import urlopen,urlencode,basejoin,urlretrieve | |
from urlparse import urlparse | |
import sys,time,os | |
def keepvid_get(loc,keepvid='http://keepvid.com',i=-1): | |
"""keepvid_get(loc,keepvid='http://keepvid.com') -> l | |
Fetch the actual URL of video hosted at Dailymotion,Youtube( and all | |
such video hosting website supported by KeepVid.com ) and return it as | |
a string. | |
Location of video is given by the first argument, loc. | |
The second argument is the URL of the KeepVid.com and is optional. | |
When more than one of the actual URLs are available, this function | |
returns the last one (which is usually marked as 'High Quality'), | |
although this behaviour can be changed by setting the third argument | |
to 0,1,2,3... to fetch the first, second, third and fourth actual URLs | |
respectively.""" | |
data=urlencode({'url':loc})#return string as "url=loc" | |
page=urlopen(keepvid+'/?'+data)#fetch page by HTTP/GET | |
soup=BS(page)#parse using BeutifulSoup | |
#Next line matches the links with class=link. KeepVid specify the | |
#class='link' attribute in the <a> tags when hyperlinking the | |
#actual URLs | |
#Eg:<a href="actual URL" class="link" target="_blank"> | |
rel_url= (soup.findAll('a',{'class':'link'})[i]['href']).strip() | |
abs_url=basejoin(keepvid,rel_url) | |
#Next line returns a tupple of: the absolute url and the file name. | |
return (abs_url,os.path.basename(urlparse(abs_url).path)) | |
def get_url_spankwire(url): | |
a=urlopen("http://www.downloadspankwirevideos.com/?videoraw=%s"%url).read() | |
a=a[a.find("""var flashvars = {"""):] | |
a=a[a.find('"'):][1:] | |
a=a[:a.find('"')] | |
return a | |
def get_url_pornhub(url): | |
a=urlopen("http://www.downloadpornhubvideos.com/?videoraw=%s"%url).read() | |
a=a[a.find("""var flashvars = {"""):] | |
a=a[a.find('"'):][1:] | |
a=a[:a.find('"')] | |
return a | |
def get_url(url): | |
a=get_url_pornhub(url) | |
if a=='': | |
a=get_url_spankwire(url) | |
if a=='': | |
print "Error. No Video found. Exiting" | |
sys.exit() | |
abs_url=a | |
#Next line returns a tupple of: the absolute url and the file name. | |
return (abs_url,os.path.basename(urlparse(abs_url).path)) | |
def timer(): | |
t1=0 | |
while 1: | |
inter= time.time()-t1 | |
t1=time.time() | |
yield inter | |
speedtimer=timer() | |
def progress_printer(transferred,block_size,total_size): | |
"""Prints a progress bar. transferred is the number of block transferred. | |
block_size is the size of each block. total_size is the total size of | |
the file.""" | |
speed=0 | |
try: | |
speed=float(block_size)/speedtimer.next()/1000 | |
except ZeroDivisionError: | |
pass | |
completed=transferred*block_size | |
percent=(completed*100)/total_size | |
sys.stdout.write("\r%.2f%% complete[%d/%d bytes] @ %.2f KBytes/sec"% \ | |
(percent,completed,total_size,speed)) | |
sys.stdout.flush() | |
def no_progress_printer(transferred,block_size,total_size): | |
"""This function does nothing. It is just to mimic the progress_printer | |
function when silent mode is turned on""" | |
pass | |
if __name__=='__main__': | |
def usage(): | |
print """Usage: downvid [options] [location1 location2 .....] | |
location1 location2 ..... should be surrounded in quotes. | |
Options: | |
-h, --help Display Usage | |
-i, --input=FILE Input the location from a file. Each location | |
should be written in a new line. These locations | |
will be downloaded after the locations specified | |
as the arguments. | |
-o, --output-name Downloaded videos will be renamed to NAME_<number> | |
where <number> will be the order in which the file | |
was being downloaded. | |
-s, --silent Silent.No output. | |
-w --wget Use wget to download the file. Much more reliable | |
and has ability to resume downloads""" | |
import getopt | |
if len(sys.argv)==1: #if no command line parameter is passed | |
usage() | |
sys.exit() | |
inputfile=None | |
output_prefix=None | |
location=[] | |
silent=False #if set True, Program will not output anything to std out | |
#parsing command line | |
usewget=False | |
try: | |
optlist,args=getopt.getopt(sys.argv[1:],'hvswi:o:', | |
["help","version","silent","wget","input-file","output-name"]) | |
except getopt.GetoptError, err: | |
usage() | |
sys.exit(2) | |
for o,a in optlist: | |
if o in ('-i',"--input-file"): #if the -i exist | |
inputfile=a | |
elif o in ("-s","--silent"): | |
silent=True | |
elif o in ("-w","--wget"): | |
usewget=True | |
elif o in ("-o","--output-name"): | |
output_prefix=a | |
elif o in ('-h',"--help"): | |
usage() | |
sys.exit() | |
elif o in ('-v',"--version"): | |
print VERSION | |
sys.exit() | |
for a in args: | |
location.append(a) | |
if inputfile!=None: | |
try: | |
f=open(inputfile) | |
except IOError: | |
print "No such file as",inputfile,"exits in the current directory" | |
sys.exit(3) | |
for x in f: | |
location.append(x.strip()) | |
f.close() | |
# At this point, the location[] will contain all the | |
# location to be downloaded | |
# Now call the GNU wget one by one with the processed | |
# location by the keepvid_get function defined above. | |
loc_dw=[]# the location of successfully downloaded URLs | |
i=0 # position of the file to be downloaded; used only if -o argument | |
# is present at the command line | |
for x in location: | |
new_loc,filename=get_url(x) | |
if output_prefix: | |
i+=1 #increment i | |
filename=output_prefix+'_'+str(i) #change filename | |
if usewget: | |
import subprocess | |
wgetcommand=["wget","-c","-O",filename,new_loc] | |
print "Excecuting...",wgetcommand | |
p=subprocess.Popen(wgetcommand,stdout=sys.stdout) | |
print p.communicate()[0] | |
else: | |
hooker=no_progress_printer | |
if not silent: | |
print "Now Downloading the Location:"+new_loc+ ' as '+filename | |
hooker=progress_printer | |
urlretrieve(new_loc,filename,hooker) | |
loc_dw.append(new_loc) | |
if not silent: | |
print "The following location were downloaded successfully:" | |
for x in loc_dw: | |
print x | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment