Skip to content

Instantly share code, notes, and snippets.

@mopemope
Created November 9, 2010 08:50
Show Gist options
  • Save mopemope/668874 to your computer and use it in GitHub Desktop.
Save mopemope/668874 to your computer and use it in GitHub Desktop.
from os import path
from werkzeug import secure_filename
import eventlet
from eventlet.green import urllib2
from pyquery import PyQuery as pq
from urlparse import urlparse
import psyco
psyco.full()
search_urls = [
'http://www.empflix.com/browsecat.php?page=%s&chid=17&category=rd',
#'http://www.empflix.com/browsecat.php?page=%s&chid=17',
#'http://www.empflix.com/search.php?page=%s&what=Mondomuyou',
#'http://www.empflix.com/search.php?page=%s&what=Mondo64',
#'http://www.empflix.com/search.php?page=%s&what=trg',
#'http://www.empflix.com/search.php?page=%s&what=smr',
#'http://www.empflix.com/search.php?page=%s&what=tkyo',
#'http://www.empflix.com/search.php?page=%s&what=manko',
#'http://www.empflix.com/search.php?page=%s&what=omanko',
#'http://www.empflix.com/search.php?page=%s&what=rhj',
#'http://www.empflix.com/search.php?page=%s&what=Tokyo',
#'http://www.empflix.com/search.php?page=%s&what=TokyoHot',
#'http://www.empflix.com/search.php?page=%s&what=Tora',
#'http://www.empflix.com/search.php?page=%s&what=Sky+Angel',
#'http://www.empflix.com/search.php?page=%s&what=Santa+Gal',
#'http://www.empflix.com/search.php?page=%s&what=Mugen',
#'http://www.empflix.com/search.php?page=%s&what=XVN',
#'http://www.empflix.com/search.php?page=%s&what=Asami',
#'http://www.empflix.com/search.php?page=%s&what=haruka',
#'http://www.empflix.com/search.php?page=%s&what=Asuka',
#'http://www.empflix.com/search.php?page=%s&what=Maki',
#'http://www.empflix.com/search.php?page=%s&what=Nao',
#'http://www.empflix.com/search.php?page=%s&what=Yui',
#'http://www.empflix.com/search.php?page=%s&what=Yuki',
#'http://www.empflix.com/search.php?page=%s&what=Yuka',
#'http://www.empflix.com/search.php?page=%s&what=Saki',
#'http://www.empflix.com/search.php?page=%s&what=Rika',
#'http://www.empflix.com/search.php?page=%s&what=Riko',
#'http://www.empflix.com/search.php?page=%s&what=sara%%20Part2&sort=relevance',
#'http://www.empflix.com/search.php?page=%s&what=pakopako',
#'http://www.empflix.com/search.php?page=%s&what=pacopaco',
#'http://www.empflix.com/search.php?page=%s&what=Miku',
#'http://www.empflix.com/search.php?page=%s&what=0930',
#'http://www.empflix.com/search.php?page=%s&what=h0930',
#'http://www.empflix.com/search.php?page=%s&what=4610',
#'http://www.empflix.com/search.php?page=%s&what=okusama&sort=relevance',
#'http://www.empflix.com/search.php?page=%s&what=JAV%%20Amateur&sort=relevance',
#'http://www.empflix.com/search.php?page=%s&what=okusama&sort=relevance',
#'http://www.empflix.com/search.php?page=%s&what=jav%%20creampie&sort=relevance',
#'http://www.empflix.com/search.php?page=%s&what=Serina',
#'http://www.empflix.com/search.php?page=%s&what=hikaru',
#'http://www.empflix.com/search.php?page=%s&what=tsubaki',
#'http://www.empflix.com/search.php?page=%s&what=mikado',
#'http://www.empflix.com/search.php?page=%s&what=catwalk',
#'http://www.empflix.com/search.php?page=%s&what=Samurai',
#'http://www.empflix.com/search.php?page=%s&what=Jeans+Fetish',
#'http://www.empflix.com/search.php?page=%s&what=red+hot+fetish',
#'http://www.empflix.com/search.php?page=%s&what=pink+puncher',
#'http://www.empflix.com/search.php?page=%s&what=nakadashi&sort=relevance',
#'http://www.empflix.com/search.php?page=%s&what=okusama&sort=relevance',
#'http://www.empflix.com/search.php?page=%s&what=tokyo&sort=relevance',
#'http://www.empflix.com/browsecat.php?page=%s&chid=17&category=mr',
#'http://www.empflix.com/search.php?page=%s&what=japan%%20creampie&sort=relevance',
#'http://www.empflix.com/advanced_search.php?page=%s&what=&sort=length&per_page=0&adv_category[]=Amateur&adv_category[]=Asian',
#'http://www.empflix.com/advanced_search.php?page=%s&what=&sort=length&per_page=0&adv_category[]=Asian&adv_category[]=Creampie',
#'http://www.empflix.com/advanced_search.php?page=%s&what=&sort=length&per_page=0&adv_category[]=Amateur&adv_category[]=Asian&adv_category[]=Creampie',
#'http://www.empflix.com/advanced_search.php?page=%s&what=&sort=length&per_page=0&adv_category[]=Asian&adv_category[]=Mature',
]
#empflix_cream_url = 'http://www.empflix.com/search.php?page=%s&what=japan%%20creampie&sort=relevance'
#empflix_cream_url = 'http://www.empflix.com/search.php?page=%s&what=pacopaco%%20sara%%20Part2&sort=relevance'
detail_urls = []
id_mode = True
save_path = "/home/ma2/Public/empflix/"
pool = eventlet.GreenPool(2)
import re
download_re = re.compile("\s*so.addVariable\('config',\s*'([\w\d\.:/%=_-]*)'\);", re.M)
def get_pagelist(url, page=1):
q = []
conn = urllib2.urlopen(url % page)
page = conn.read()
d = pq(page)
for span in d(".thumb"):
detail_url = pq(span.find("a")).attr.href
q.append(detail_url)
return q
def _get_flv(page):
match = download_re.search(page)
if match:
url = match.group(1)
#url = "http://cdn.tnaflix.com/" + url
import urllib
url = urllib.unquote(url)
conn = urllib2.urlopen(url)
data = conn.read()
d = pq(data)
download_url = d("file").text()
d = pq(page)
file_name = secure_filename(d("h2:first").text() + ".flv")
return download_url, file_name
def get_download_url(url):
conn = urllib2.urlopen(url)
page = "".join(conn.readlines())
d = pq(page)
#download_url = d(".linkRight a:first").attr.href
download_url = d(".downloadButton").attr.href
if download_url:
parsed = urlparse(download_url)
file_name = parsed.path.split("/")[-1]
else:
download_url, file_name = _get_flv(page)
return url, download_url, file_name
def download_flv(url, down_url, file_name):
print "'%s' ---- Try Download ----" % url
out_path = path.join(save_path, file_name)
if not file_name:
print "'%s' ** Not Found Link ** " % url
return
partial = False
try:
conn = urllib2.urlopen(down_url)
length = conn.info()['Content-Length']
length = int(length)
if length < 1024 * 1024 * 100 or length > 1024 * 1024 * 900:
print "*** '%s' is small! Skip!!!'%s' ***" % (url, length)
return
if path.exists(out_path):
size = path.getsize(out_path)
if size < length:
r = "bytes=%s-" % size
req = urllib2.Request(down_url, headers={"Range":r})
conn = urllib2.urlopen(req)
print "'%s' == Resume!! '%s' ==" % (url, file_name)
print "'%s' == File '%s' Size: %d/%d'" % (url, file_name, size, length)
partial = True
else:
print "'%s' == Downloaded '%s' ==" % (url, file_name)
return
except:
import traceback
print traceback.format_exc()
pool.spawn_n(download, url)
return
if partial:
f = open(out_path, "rb+")
f.seek(0, 2)
else:
f = open(out_path, "wb")
print "'%s' == Start '%s' ==" % (url, file_name)
while True:
data = conn.read(1024 * 512 )
if not data:
break
f.write(data)
#per = path.getsize(out_path) / float(length) * 100.0
#print "'%s' == '%s' %d%% done. ==" % (url, file_name, per)
print "'%s' == Finish '%s' ==" % (url, file_name)
def download(url):
if url.find("premium.empflix.com") >= 0:
return
url, download_url, file_name = get_download_url(url)
id = urlparse(url).query[3:]
if id_mode:
file_name = id + "_" + file_name
if not download_url.startswith('#'):
if file_name.lower().find('mosaic') == -1:
#print download_url, file_name
download_flv(url, download_url, file_name)
q = []
def start(url, min_page=66, max_page=70):
#def start(url, min_page=14, max_page=24):
for i in xrange(min_page, max_page+1):
urls = get_pagelist(url, page=i)
q.extend(urls)
q.reverse()
while q:
url = q.pop()
pool.spawn_n(download, url)
def read_detail_urls(file='empflix.txt'):
i = 0
for href in open(file):
i += 1
href = href.strip()
if href:
detail_urls.append(href)
if __name__ == '__main__':
#read_detail_urls()
#detail_urls.reverse()
#q.extend(detail_urls)
for url in search_urls:
start(url=url)
pool.waitall()
@askanyogesh
Copy link

i like your web site

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment