Skip to content

Instantly share code, notes, and snippets.

@mopemope

mopemope/xvideos.py

Created Oct 4, 2011
Embed
What would you like to do?
xvideos downloader
import os
from os import path
from werkzeug import secure_filename
from tobikko.core import *
from tobikko.patch import *
patch_all()
import urllib2
from pyquery import PyQuery as pq
import urllib
from urlparse import urlparse
import re
search_urls = [
"http://www.xvideos.com/?k=fad&p=%s&durf=10min_more",
"http://www.xvideos.com/?k=fax&p=%s&durf=10min_more",
"http://www.xvideos.com/?k=nsps&p=%s&durf=10min_more",
#"http://www.xvideos.com/?k=sama&p=%s&durf=10min_more",
"http://www.xvideos.com/?k=vema&p=%s&durf=10min_more",
"http://www.xvideos.com/?k=kop&p=%s&durf=10min_more",
"http://www.xvideos.com/?k=tnss&p=%s&durf=10min_more",
"http://www.xvideos.com/?k=bkd&p=%s&durf=10min_more",
"http://www.xvideos.com/?k=saq&p=%s&durf=10min_more",
"http://www.xvideos.com/?k=natr&p=%s&durf=10min_more",
"http://www.xvideos.com/?k=sprd&p=%s&durf=10min_more",
"http://www.xvideos.com/?k=oksn&p=%s&durf=10min_more",
"http://www.xvideos.com/?k=tnss&p=%s&durf=10min_more",
"http://www.xvideos.com/?k=kbkd&p=%s&durf=10min_more",
"http://www.xvideos.com/?k=nade&p=%s&durf=10min_more",
"http://www.xvideos.com/?k=tdmj&p=%s&durf=10min_more",
"http://www.xvideos.com/?k=momj&p=%s&durf=10min_more",
"http://www.xvideos.com/?k=mdyd&p=%s&durf=10min_more",
"http://www.xvideos.com/?k=shkd&p=%s&durf=10min_more",
"http://www.xvideos.com/?k=vagu&p=%s&durf=10min_more",
"http://www.xvideos.com/?k=anb&p=%s&durf=10min_more",
"http://www.xvideos.com/?k=aed&p=%s&durf=10min_more",
"http://www.xvideos.com/?k=uuru&p=%s&durf=10min_more",
"http://www.xvideos.com/?k=etc&p=%s&durf=10min_more",
"http://www.xvideos.com/?k=vec&p=%s&durf=10min_more",
"http://www.xvideos.com/?k=kk&p=%s&durf=10min_more",
"http://www.xvideos.com/?k=lhby&p=%s&durf=10min_more",
"http://www.xvideos.com/?k=fffd&p=%s&durf=10min_more",
"http://www.xvideos.com/?k=yone&p=%s&durf=10min_more",
"http://www.xvideos.com/?k=nade&p=%s&durf=10min_more",
"http://www.xvideos.com/?k=jufd&p=%s&durf=10min_more",
"http://www.xvideos.com/?k=twl&p=%s&durf=10min_more",
"http://www.xvideos.com/?k=xkk&p=%s&durf=10min_more",
"http://www.xvideos.com/?k=juc&p=%s&durf=10min_more",
"http://www.xvideos.com/?k=venu&p=%s&durf=10min_more",
"http://www.xvideos.com/?k=hone&p=%s&durf=10min_more",
"http://www.xvideos.com/?k=dse&p=%s&durf=10min_more",
"http://www.xvideos.com/?k=okas&p=%s&durf=10min_more",
"http://www.xvideos.com/?k=emav&p=%s&durf=10min_more",
"http://www.xvideos.com/?k=emaf&p=%s&durf=10min_more",
"http://www.xvideos.com/?k=crc&p=%s&durf=10min_more",
"http://www.xvideos.com/?k=tlso&p=%s&durf=10min_more",
"http://www.xvideos.com/?k=jrzd&p=%s&durf=10min_more",
"http://www.xvideos.com/?k=deju&p=%s&durf=10min_more",
"http://www.xvideos.com/?k=atgo&p=%s&durf=10min_more",
"http://www.xvideos.com/?k=mkd&p=%s&durf=10min_more",
"http://www.xvideos.com/?k=_cj_&p=%s&durf=10min_more",
"http://www.xvideos.com/?k=scd&p=%s&durf=10min_more",
"http://www.xvideos.com/?k=havd&p=%s&durf=10min_more",
"http://www.xvideos.com/?k=nxg&p=%s&durf=10min_more",
"http://www.xvideos.com/?k=abs&p=%s&durf=10min_more",
"http://www.xvideos.com/?k=cyf&p=%s&durf=10min_more",
"http://www.xvideos.com/?k=mcsr&p=%s&durf=10min_more",
"http://www.xvideos.com/?k=crpd&p=%s&durf=10min_more",
"http://www.xvideos.com/?k=madv&p=%s&durf=10min_more",
"http://www.xvideos.com/?k=bbm&p=%s&durf=10min_more",
"http://www.xvideos.com/?k=rbd&p=%s&durf=10min_more",
"http://www.xvideos.com/?k=nhdt&p=%s&durf=10min_more",
"http://www.xvideos.com/?k=lhjf&p=%s&durf=10min_more",
"http://www.xvideos.com/?k=ddb&p=%s&durf=10min_more",
"http://www.xvideos.com/?k=jukd&p=%s&durf=10min_more",
"http://www.xvideos.com/?k=gar&p=%s&durf=10min_more",
"http://www.xvideos.com/?k=cmc&p=%s&durf=10min_more",
"http://www.xvideos.com/?k=blk&p=%s&durf=10min_more",
"http://www.xvideos.com/?k=pts&p=%s&durf=10min_more",
"http://www.xvideos.com/?k=emu&p=%s&durf=10min_more",
"http://www.xvideos.com/?k=dasd&p=%s&durf=10min_more",
"http://www.xvideos.com/?k=drs&p=%s&durf=10min_more",
"http://www.xvideos.com/?k=mgic&p=%s&durf=10min_more",
"http://www.xvideos.com/?k=axam&p=%s&durf=10min_more",
"http://www.xvideos.com/?k=tnsd&p=%s&durf=10min_more",
"http://www.xvideos.com/?k=vnds&p=%s&durf=10min_more",
"http://www.xvideos.com/?k=fpjr&p=%s&durf=10min_more",
"http://www.xvideos.com/?k=ugss&p=%s&durf=10min_more",
"http://www.xvideos.com/?k=sbnr&p=%s&durf=10min_more",
"http://www.xvideos.com/?k=aofa&p=%s&durf=10min_more",
"http://www.xvideos.com/?k=caoy&p=%s&durf=10min_more",
"http://www.xvideos.com/?k=tos&p=%s&durf=10min_more",
"http://www.xvideos.com/?k=hima&p=%s&durf=10min_more",
"http://www.xvideos.com/?k=wnz&p=%s&durf=10min_more",
"http://www.xvideos.com/?k=hthd&p=%s&durf=10min_more",
"http://www.xvideos.com/?k=asw&p=%s&durf=10min_more",
"http://www.xvideos.com/?k=matu&p=%s&durf=10min_more",
"http://www.xvideos.com/?k=mdc&p=%s&durf=10min_more",
#"http://www.xvideos.com/?k=mama&p=%s&durf=10min_more",
#"http://www.xvideos.com/?k=000&p=%s&durf=10min_more",
#"http://www.xvideos.com/?k=001&p=%s&durf=10min_more",
#"http://www.xvideos.com/?k=002&p=%s&durf=10min_more",
#"http://www.xvideos.com/?k=003&p=%s&durf=10min_more",
#"http://www.xvideos.com/?k=004&p=%s&durf=10min_more",
#"http://www.xvideos.com/tags/japan/%s/s:uploaddate/m:10min_more",
#"http://www.xvideos.com/tags/japanese/%s/s:uploaddate/m:10min_more",
#"http://www.xvideos.com/tags/hotjav/%s/s:uploaddate/m:10min_more",
#"http://www.xvideos.com/tags/jap/%s/s:uploaddate/m:10min_more",
#"http://www.xvideos.com/tags/jpavgod/%s/s:uploaddate/m:10min_more",
#"http://www.xvideos.com/tags/creampie/%s/s:uploaddate/m:10min_more",
#"http://www.xvideos.com/tags/japan/%s/s:uploaddate/m:10min_more",
#"http://www.xvideos.com/tags/jap/%s/s:uploaddate/m:10min_more",
#"http://www.xvideos.com/tags/jav/%s/s:uploaddate/m:10min_more",
]
detail_urls = []
id_mode = True
save_path = "/video/xvideo/"
#os.mkdir(save_path)
pool = Pool(20)
q = set()
MINSIZE = 1024 * 1024 * 80
download_re = re.compile("flv_url=([\w\d.:/_%]*)&", re.M)
def get_pagelist(url, page=1):
qurl = []
url = url % page
conn = urllib2.urlopen(url)
page = conn.read()
d = pq(page)
for anchor in d(".miniature"):
href = pq(anchor).attr.href
qurl.append(href)
for u in qurl:
q.add(u)
def get_download_url(url):
conn = urllib2.urlopen(url)
page = conn.read()
url_match = download_re.search(page)
if url_match:
parsed = urlparse(url)
file_name = parsed.path.split("/")[-1]
file_id = parsed.path.split("/")[-2]
vurl = url_match.group(1)
download_url = urllib.unquote(vurl)
file_name = "%s_%s" % (file_id, file_name) + ".flv"
return url, download_url, file_name
return None, None, None
def download_flv(url, down_url, file_name):
#print "'%s' ---- Try Download ----" % url
out_path = path.join(save_path, file_name)
if not file_name:
#print "'%s' ** Not Found Link ** " % url
return
partial = False
try:
conn = urllib2.urlopen(down_url)
length = conn.info()['Content-Length']
length = int(length)
#name = file_name.find("jpavgod") == -1
if length < MINSIZE or length > 1024 * 1024 * 900:
#print "*** '%s' is small! Skip!!!'%s' ***" % (url, length)
return
#if name :
# return
if path.exists(out_path):
size = path.getsize(out_path)
if size < length:
r = "bytes=%s-" % size
req = urllib2.Request(down_url, headers={"Range":r})
conn = urllib2.urlopen(req)
#print "'%s' == Resume!! '%s' ==" % (url, file_name)
#print "'%s' == File '%s' Size: %d/%d'" % (url, file_name, size, length)
partial = True
else:
#print "'%s' == Downloaded '%s' ==" % (url, file_name)
return
except:
import traceback
print traceback.format_exc()
#pool.spawn_n(download, url)
return
if partial:
f = open(out_path, "rb+")
f.seek(0, 2)
else:
f = open(out_path, "wb")
print "'%s' == Start '%s' size %s ==" % (url, file_name, length)
while True:
data = conn.read(1024 * 4 )
if not data:
break
f.write(data)
#per = path.getsize(out_path) / float(length) * 100.0
#print "'%s' == '%s' %d%% done. ==" % (url, file_name, per)
print "'%s' == Finish '%s' ==" % (url, file_name)
def download(url):
try:
url, download_url, file_name = get_download_url(url)
if url and download_url:
download_flv(url, download_url, file_name)
except:
pass
#download(url)
def start(url, min_page=0, max_page=10):
for i in xrange(min_page, max_page+1):
try:
pool.spawn(get_pagelist, url, i)
except:
pool.spawn(get_pagelist, url, i)
while q:
url = q.pop()
pool.spawn(download, url)
if __name__ == '__main__':
for url in search_urls:
start(url=url, min_page=1, max_page=10)
pool.join()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
You can’t perform that action at this time.