siddhant3s/downvid.py

## downvid.py
#!/usr/bin/python
VERSION='0.1.2'
from BeautifulSoup import BeautifulSoup as BS
from urllib import urlopen,urlencode,basejoin,urlretrieve
from urlparse import urlparse
import sys,time,os

def keepvid_get(loc,keepvid='http://keepvid.com',i=-1):
    """keepvid_get(loc,keepvid='http://keepvid.com') -> l
    Fetch the actual URL of video hosted at Dailymotion,Youtube( and all
    such video hosting website supported by KeepVid.com ) and return it as
    a string.
    Location of video is given by the first argument, loc.
    The second argument is the URL of the KeepVid.com and is optional.
    When more than one of the actual URLs are available, this function
    returns the last one (which is usually marked as 'High Quality'),
    although this behaviour can be changed by setting the third argument
    to 0,1,2,3... to fetch the first, second, third and fourth actual URLs
    respectively."""

    data=urlencode({'url':loc})#return string as "url=loc"
    page=urlopen(keepvid+'/?'+data)#fetch page by HTTP/GET
    soup=BS(page)#parse using BeutifulSoup
    #Next line matches the links with class=link. KeepVid specify the
    #class='link' attribute in the <a> tags when hyperlinking the
    #actual URLs
    #Eg:<a href="actual URL" class="link" target="_blank">
    rel_url= (soup.findAll('a',{'class':'link'})[i]['href']).strip()
    abs_url=basejoin(keepvid,rel_url)
    #Next line returns a tupple of: the absolute url and the file name.
    return (abs_url,os.path.basename(urlparse(abs_url).path))
def get_url_spankwire(url):
        a=urlopen("http://www.downloadspankwirevideos.com/?videoraw=%s"%url).read()
        a=a[a.find("""var flashvars = {"""):]
        a=a[a.find('"'):][1:]
        a=a[:a.find('"')]
        return a


def get_url_pornhub(url):
        a=urlopen("http://www.downloadpornhubvideos.com/?videoraw=%s"%url).read()
        a=a[a.find("""var flashvars = {"""):]
        a=a[a.find('"'):][1:]
        a=a[:a.find('"')]
        return a

def get_url(url):
        a=get_url_pornhub(url)
        if a=='':
                a=get_url_spankwire(url)

        if a=='':
                print "Error. No Video found. Exiting"
                sys.exit()

        abs_url=a
        #Next line returns a tupple of: the absolute url and the file name.
        return (abs_url,os.path.basename(urlparse(abs_url).path))
def timer():
    t1=0
    while 1:
         inter= time.time()-t1
         t1=time.time()
         yield inter

speedtimer=timer()

def progress_printer(transferred,block_size,total_size):
    """Prints a progress bar. transferred is the number of block transferred.
    block_size is the size of each block. total_size is the total size of
    the file."""
    speed=0
    try:
        speed=float(block_size)/speedtimer.next()/1000
    except ZeroDivisionError:
        pass
    completed=transferred*block_size
    percent=(completed*100)/total_size
    sys.stdout.write("\r%.2f%% complete[%d/%d bytes] @ %.2f KBytes/sec"% \
                     (percent,completed,total_size,speed))
    sys.stdout.flush()
def no_progress_printer(transferred,block_size,total_size):
    """This function does nothing. It is just to mimic the progress_printer
    function when silent mode is turned on"""
    pass


if __name__=='__main__':
    def usage():
        print """Usage: downvid [options] [location1 location2 .....]
location1 location2 ..... should be surrounded in quotes.
Options:
-h, --help                  Display Usage
-i, --input=FILE            Input the location from a file. Each location
                            should be written in a new line. These locations
                            will be downloaded after the locations specified
                            as the arguments.
-o, --output-name           Downloaded videos will be renamed to NAME_<number>
                            where <number> will be the order in which the file
                            was being downloaded.
-s, --silent                Silent.No output.
-w  --wget                  Use wget to download the file. Much more reliable
                            and has ability to resume downloads"""
    import getopt

    if len(sys.argv)==1: #if no command line parameter is passed
        usage()
        sys.exit()

    inputfile=None
    output_prefix=None
    location=[]
    silent=False #if set True, Program will not output anything to std out
    #parsing command line
    usewget=False

    try:
        optlist,args=getopt.getopt(sys.argv[1:],'hvswi:o:',
        ["help","version","silent","wget","input-file","output-name"])
    except getopt.GetoptError, err:
        usage()
        sys.exit(2)
    for o,a in optlist:
        if o in ('-i',"--input-file"): #if the -i exist
            inputfile=a
        elif o in ("-s","--silent"):
            silent=True
        elif o in ("-w","--wget"):
            usewget=True
        elif o in ("-o","--output-name"):
            output_prefix=a
        elif o in ('-h',"--help"):
            usage()
            sys.exit()
        elif o in ('-v',"--version"):
            print VERSION
            sys.exit()


    for a in args:
        location.append(a)

    if inputfile!=None:
        try:
            f=open(inputfile)
        except IOError:
            print "No such file as",inputfile,"exits in the current directory"
            sys.exit(3)
        for x in f:
            location.append(x.strip())
        f.close()

        # At this point, the location[] will contain all the
        # location to be downloaded

        # Now call the GNU wget one by one with the processed
        # location by the keepvid_get function defined above.


    loc_dw=[]# the location of successfully downloaded URLs
    i=0 # position of the file to be downloaded; used only if -o argument
        # is present at the command line
    for x in location:
        new_loc,filename=get_url(x)
        if output_prefix:
            i+=1 #increment i
            filename=output_prefix+'_'+str(i) #change filename

        if usewget:
            import subprocess

            wgetcommand=["wget","-c","-O",filename,new_loc]
            print "Excecuting...",wgetcommand

            p=subprocess.Popen(wgetcommand,stdout=sys.stdout)
            print p.communicate()[0]
        else:
            hooker=no_progress_printer
            if not silent:
                print "Now Downloading the Location:"+new_loc+ ' as '+filename
                hooker=progress_printer

            urlretrieve(new_loc,filename,hooker)
        loc_dw.append(new_loc)
    if not silent:
        print "The following location were downloaded successfully:"
        for x in loc_dw:
            print x
	#!/usr/bin/python
	VERSION='0.1.2'
	from BeautifulSoup import BeautifulSoup as BS
	from urllib import urlopen,urlencode,basejoin,urlretrieve
	from urlparse import urlparse
	import sys,time,os

	def keepvid_get(loc,keepvid='http://keepvid.com',i=-1):
	"""keepvid_get(loc,keepvid='http://keepvid.com') -> l
	Fetch the actual URL of video hosted at Dailymotion,Youtube( and all
	such video hosting website supported by KeepVid.com ) and return it as
	a string.
	Location of video is given by the first argument, loc.
	The second argument is the URL of the KeepVid.com and is optional.
	When more than one of the actual URLs are available, this function
	returns the last one (which is usually marked as 'High Quality'),
	although this behaviour can be changed by setting the third argument
	to 0,1,2,3... to fetch the first, second, third and fourth actual URLs
	respectively."""

	data=urlencode({'url':loc})#return string as "url=loc"
	page=urlopen(keepvid+'/?'+data)#fetch page by HTTP/GET
	soup=BS(page)#parse using BeutifulSoup
	#Next line matches the links with class=link. KeepVid specify the
	#class='link' attribute in the <a> tags when hyperlinking the
	#actual URLs
	#Eg:<a href="actual URL" class="link" target="_blank">
	rel_url= (soup.findAll('a',{'class':'link'})[i]['href']).strip()
	abs_url=basejoin(keepvid,rel_url)
	#Next line returns a tupple of: the absolute url and the file name.
	return (abs_url,os.path.basename(urlparse(abs_url).path))
	def get_url_spankwire(url):
	a=urlopen("http://www.downloadspankwirevideos.com/?videoraw=%s"%url).read()
	a=a[a.find("""var flashvars = {"""):]
	a=a[a.find('"'):][1:]
	a=a[:a.find('"')]
	return a


	def get_url_pornhub(url):
	a=urlopen("http://www.downloadpornhubvideos.com/?videoraw=%s"%url).read()
	a=a[a.find("""var flashvars = {"""):]
	a=a[a.find('"'):][1:]
	a=a[:a.find('"')]
	return a

	def get_url(url):
	a=get_url_pornhub(url)
	if a=='':
	a=get_url_spankwire(url)

	if a=='':
	print "Error. No Video found. Exiting"
	sys.exit()

	abs_url=a
	#Next line returns a tupple of: the absolute url and the file name.
	return (abs_url,os.path.basename(urlparse(abs_url).path))
	def timer():
	t1=0
	while 1:
	inter= time.time()-t1
	t1=time.time()
	yield inter

	speedtimer=timer()

	def progress_printer(transferred,block_size,total_size):
	"""Prints a progress bar. transferred is the number of block transferred.
	block_size is the size of each block. total_size is the total size of
	the file."""
	speed=0
	try:
	speed=float(block_size)/speedtimer.next()/1000
	except ZeroDivisionError:
	pass
	completed=transferred*block_size
	percent=(completed*100)/total_size
	sys.stdout.write("\r%.2f%% complete[%d/%d bytes] @ %.2f KBytes/sec"% \
	(percent,completed,total_size,speed))
	sys.stdout.flush()
	def no_progress_printer(transferred,block_size,total_size):
	"""This function does nothing. It is just to mimic the progress_printer
	function when silent mode is turned on"""
	pass


	if __name__=='__main__':
	def usage():
	print """Usage: downvid [options] [location1 location2 .....]
	location1 location2 ..... should be surrounded in quotes.
	Options:
	-h, --help Display Usage
	-i, --input=FILE Input the location from a file. Each location
	should be written in a new line. These locations
	will be downloaded after the locations specified
	as the arguments.
	-o, --output-name Downloaded videos will be renamed to NAME_<number>
	where <number> will be the order in which the file
	was being downloaded.
	-s, --silent Silent.No output.
	-w --wget Use wget to download the file. Much more reliable
	and has ability to resume downloads"""
	import getopt

	if len(sys.argv)==1: #if no command line parameter is passed
	usage()
	sys.exit()

	inputfile=None
	output_prefix=None
	location=[]
	silent=False #if set True, Program will not output anything to std out
	#parsing command line
	usewget=False

	try:
	optlist,args=getopt.getopt(sys.argv[1:],'hvswi:o:',
	["help","version","silent","wget","input-file","output-name"])
	except getopt.GetoptError, err:
	usage()
	sys.exit(2)
	for o,a in optlist:
	if o in ('-i',"--input-file"): #if the -i exist
	inputfile=a
	elif o in ("-s","--silent"):
	silent=True
	elif o in ("-w","--wget"):
	usewget=True
	elif o in ("-o","--output-name"):
	output_prefix=a
	elif o in ('-h',"--help"):
	usage()
	sys.exit()
	elif o in ('-v',"--version"):
	print VERSION
	sys.exit()


	for a in args:
	location.append(a)

	if inputfile!=None:
	try:
	f=open(inputfile)
	except IOError:
	print "No such file as",inputfile,"exits in the current directory"
	sys.exit(3)
	for x in f:
	location.append(x.strip())
	f.close()

	# At this point, the location[] will contain all the
	# location to be downloaded

	# Now call the GNU wget one by one with the processed
	# location by the keepvid_get function defined above.


	loc_dw=[]# the location of successfully downloaded URLs
	i=0 # position of the file to be downloaded; used only if -o argument
	# is present at the command line
	for x in location:
	new_loc,filename=get_url(x)
	if output_prefix:
	i+=1 #increment i
	filename=output_prefix+'_'+str(i) #change filename

	if usewget:
	import subprocess

	wgetcommand=["wget","-c","-O",filename,new_loc]
	print "Excecuting...",wgetcommand

	p=subprocess.Popen(wgetcommand,stdout=sys.stdout)
	print p.communicate()[0]
	else:
	hooker=no_progress_printer
	if not silent:
	print "Now Downloading the Location:"+new_loc+ ' as '+filename
	hooker=progress_printer

	urlretrieve(new_loc,filename,hooker)
	loc_dw.append(new_loc)
	if not silent:
	print "The following location were downloaded successfully:"
	for x in loc_dw:
	print x