Skip to content

Instantly share code, notes, and snippets.

@siddhant3s
Last active September 24, 2015 01:37
Show Gist options
  • Save siddhant3s/647237 to your computer and use it in GitHub Desktop.
Save siddhant3s/647237 to your computer and use it in GitHub Desktop.
#!/usr/bin/python
VERSION='0.1.2'
from BeautifulSoup import BeautifulSoup as BS
from urllib import urlopen,urlencode,basejoin,urlretrieve
from urlparse import urlparse
import sys,time,os
def keepvid_get(loc,keepvid='http://keepvid.com',i=-1):
"""keepvid_get(loc,keepvid='http://keepvid.com') -> l
Fetch the actual URL of video hosted at Dailymotion,Youtube( and all
such video hosting website supported by KeepVid.com ) and return it as
a string.
Location of video is given by the first argument, loc.
The second argument is the URL of the KeepVid.com and is optional.
When more than one of the actual URLs are available, this function
returns the last one (which is usually marked as 'High Quality'),
although this behaviour can be changed by setting the third argument
to 0,1,2,3... to fetch the first, second, third and fourth actual URLs
respectively."""
data=urlencode({'url':loc})#return string as "url=loc"
page=urlopen(keepvid+'/?'+data)#fetch page by HTTP/GET
soup=BS(page)#parse using BeutifulSoup
#Next line matches the links with class=link. KeepVid specify the
#class='link' attribute in the <a> tags when hyperlinking the
#actual URLs
#Eg:<a href="actual URL" class="link" target="_blank">
rel_url= (soup.findAll('a',{'class':'link'})[i]['href']).strip()
abs_url=basejoin(keepvid,rel_url)
#Next line returns a tupple of: the absolute url and the file name.
return (abs_url,os.path.basename(urlparse(abs_url).path))
def get_url_spankwire(url):
a=urlopen("http://www.downloadspankwirevideos.com/?videoraw=%s"%url).read()
a=a[a.find("""var flashvars = {"""):]
a=a[a.find('"'):][1:]
a=a[:a.find('"')]
return a
def get_url_pornhub(url):
a=urlopen("http://www.downloadpornhubvideos.com/?videoraw=%s"%url).read()
a=a[a.find("""var flashvars = {"""):]
a=a[a.find('"'):][1:]
a=a[:a.find('"')]
return a
def get_url(url):
a=get_url_pornhub(url)
if a=='':
a=get_url_spankwire(url)
if a=='':
print "Error. No Video found. Exiting"
sys.exit()
abs_url=a
#Next line returns a tupple of: the absolute url and the file name.
return (abs_url,os.path.basename(urlparse(abs_url).path))
def timer():
t1=0
while 1:
inter= time.time()-t1
t1=time.time()
yield inter
speedtimer=timer()
def progress_printer(transferred,block_size,total_size):
"""Prints a progress bar. transferred is the number of block transferred.
block_size is the size of each block. total_size is the total size of
the file."""
speed=0
try:
speed=float(block_size)/speedtimer.next()/1000
except ZeroDivisionError:
pass
completed=transferred*block_size
percent=(completed*100)/total_size
sys.stdout.write("\r%.2f%% complete[%d/%d bytes] @ %.2f KBytes/sec"% \
(percent,completed,total_size,speed))
sys.stdout.flush()
def no_progress_printer(transferred,block_size,total_size):
"""This function does nothing. It is just to mimic the progress_printer
function when silent mode is turned on"""
pass
if __name__=='__main__':
def usage():
print """Usage: downvid [options] [location1 location2 .....]
location1 location2 ..... should be surrounded in quotes.
Options:
-h, --help Display Usage
-i, --input=FILE Input the location from a file. Each location
should be written in a new line. These locations
will be downloaded after the locations specified
as the arguments.
-o, --output-name Downloaded videos will be renamed to NAME_<number>
where <number> will be the order in which the file
was being downloaded.
-s, --silent Silent.No output.
-w --wget Use wget to download the file. Much more reliable
and has ability to resume downloads"""
import getopt
if len(sys.argv)==1: #if no command line parameter is passed
usage()
sys.exit()
inputfile=None
output_prefix=None
location=[]
silent=False #if set True, Program will not output anything to std out
#parsing command line
usewget=False
try:
optlist,args=getopt.getopt(sys.argv[1:],'hvswi:o:',
["help","version","silent","wget","input-file","output-name"])
except getopt.GetoptError, err:
usage()
sys.exit(2)
for o,a in optlist:
if o in ('-i',"--input-file"): #if the -i exist
inputfile=a
elif o in ("-s","--silent"):
silent=True
elif o in ("-w","--wget"):
usewget=True
elif o in ("-o","--output-name"):
output_prefix=a
elif o in ('-h',"--help"):
usage()
sys.exit()
elif o in ('-v',"--version"):
print VERSION
sys.exit()
for a in args:
location.append(a)
if inputfile!=None:
try:
f=open(inputfile)
except IOError:
print "No such file as",inputfile,"exits in the current directory"
sys.exit(3)
for x in f:
location.append(x.strip())
f.close()
# At this point, the location[] will contain all the
# location to be downloaded
# Now call the GNU wget one by one with the processed
# location by the keepvid_get function defined above.
loc_dw=[]# the location of successfully downloaded URLs
i=0 # position of the file to be downloaded; used only if -o argument
# is present at the command line
for x in location:
new_loc,filename=get_url(x)
if output_prefix:
i+=1 #increment i
filename=output_prefix+'_'+str(i) #change filename
if usewget:
import subprocess
wgetcommand=["wget","-c","-O",filename,new_loc]
print "Excecuting...",wgetcommand
p=subprocess.Popen(wgetcommand,stdout=sys.stdout)
print p.communicate()[0]
else:
hooker=no_progress_printer
if not silent:
print "Now Downloading the Location:"+new_loc+ ' as '+filename
hooker=progress_printer
urlretrieve(new_loc,filename,hooker)
loc_dw.append(new_loc)
if not silent:
print "The following location were downloaded successfully:"
for x in loc_dw:
print x
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment