mopemope/empflix.py

## empflix.py
from os import path
from werkzeug import secure_filename
import eventlet
from eventlet.green import urllib2
from pyquery import PyQuery as pq
from urlparse import urlparse
import psyco
psyco.full()

search_urls = [
        'http://www.empflix.com/browsecat.php?page=%s&chid=17&category=rd',
    #'http://www.empflix.com/browsecat.php?page=%s&chid=17',
    #'http://www.empflix.com/search.php?page=%s&what=Mondomuyou',
    #'http://www.empflix.com/search.php?page=%s&what=Mondo64',
    #'http://www.empflix.com/search.php?page=%s&what=trg',
    #'http://www.empflix.com/search.php?page=%s&what=smr',
    #'http://www.empflix.com/search.php?page=%s&what=tkyo',
    #'http://www.empflix.com/search.php?page=%s&what=manko',
    #'http://www.empflix.com/search.php?page=%s&what=omanko',
    #'http://www.empflix.com/search.php?page=%s&what=rhj',
    #'http://www.empflix.com/search.php?page=%s&what=Tokyo',
    #'http://www.empflix.com/search.php?page=%s&what=TokyoHot',
    #'http://www.empflix.com/search.php?page=%s&what=Tora',
    #'http://www.empflix.com/search.php?page=%s&what=Sky+Angel',
    #'http://www.empflix.com/search.php?page=%s&what=Santa+Gal',
    #'http://www.empflix.com/search.php?page=%s&what=Mugen',
    #'http://www.empflix.com/search.php?page=%s&what=XVN',
    #'http://www.empflix.com/search.php?page=%s&what=Asami',
    #'http://www.empflix.com/search.php?page=%s&what=haruka',
    #'http://www.empflix.com/search.php?page=%s&what=Asuka',
    #'http://www.empflix.com/search.php?page=%s&what=Maki',
    #'http://www.empflix.com/search.php?page=%s&what=Nao',
    #'http://www.empflix.com/search.php?page=%s&what=Yui',
    #'http://www.empflix.com/search.php?page=%s&what=Yuki',
    #'http://www.empflix.com/search.php?page=%s&what=Yuka',
    #'http://www.empflix.com/search.php?page=%s&what=Saki',
    #'http://www.empflix.com/search.php?page=%s&what=Rika',
    #'http://www.empflix.com/search.php?page=%s&what=Riko',
    #'http://www.empflix.com/search.php?page=%s&what=sara%%20Part2&sort=relevance',
    #'http://www.empflix.com/search.php?page=%s&what=pakopako',
    #'http://www.empflix.com/search.php?page=%s&what=pacopaco',
    #'http://www.empflix.com/search.php?page=%s&what=Miku',
    #'http://www.empflix.com/search.php?page=%s&what=0930',
    #'http://www.empflix.com/search.php?page=%s&what=h0930',
    #'http://www.empflix.com/search.php?page=%s&what=4610',
    #'http://www.empflix.com/search.php?page=%s&what=okusama&sort=relevance',
    #'http://www.empflix.com/search.php?page=%s&what=JAV%%20Amateur&sort=relevance',
    #'http://www.empflix.com/search.php?page=%s&what=okusama&sort=relevance',
    #'http://www.empflix.com/search.php?page=%s&what=jav%%20creampie&sort=relevance',
    #'http://www.empflix.com/search.php?page=%s&what=Serina',
    #'http://www.empflix.com/search.php?page=%s&what=hikaru',
    #'http://www.empflix.com/search.php?page=%s&what=tsubaki',
    #'http://www.empflix.com/search.php?page=%s&what=mikado',
    #'http://www.empflix.com/search.php?page=%s&what=catwalk',
    #'http://www.empflix.com/search.php?page=%s&what=Samurai',
    #'http://www.empflix.com/search.php?page=%s&what=Jeans+Fetish',
    #'http://www.empflix.com/search.php?page=%s&what=red+hot+fetish',
    #'http://www.empflix.com/search.php?page=%s&what=pink+puncher',
    #'http://www.empflix.com/search.php?page=%s&what=nakadashi&sort=relevance',
    #'http://www.empflix.com/search.php?page=%s&what=okusama&sort=relevance',
    #'http://www.empflix.com/search.php?page=%s&what=tokyo&sort=relevance',
    #'http://www.empflix.com/browsecat.php?page=%s&chid=17&category=mr',
    #'http://www.empflix.com/search.php?page=%s&what=japan%%20creampie&sort=relevance',
    #'http://www.empflix.com/advanced_search.php?page=%s&what=&sort=length&per_page=0&adv_category[]=Amateur&adv_category[]=Asian',
    #'http://www.empflix.com/advanced_search.php?page=%s&what=&sort=length&per_page=0&adv_category[]=Asian&adv_category[]=Creampie',
    #'http://www.empflix.com/advanced_search.php?page=%s&what=&sort=length&per_page=0&adv_category[]=Amateur&adv_category[]=Asian&adv_category[]=Creampie',
    #'http://www.empflix.com/advanced_search.php?page=%s&what=&sort=length&per_page=0&adv_category[]=Asian&adv_category[]=Mature',
    ]
#empflix_cream_url = 'http://www.empflix.com/search.php?page=%s&what=japan%%20creampie&sort=relevance'
#empflix_cream_url = 'http://www.empflix.com/search.php?page=%s&what=pacopaco%%20sara%%20Part2&sort=relevance'

detail_urls = []

id_mode = True

save_path = "/home/ma2/Public/empflix/"
pool = eventlet.GreenPool(2)

import re
download_re = re.compile("\s*so.addVariable\('config',\s*'([\w\d\.:/%=_-]*)'\);", re.M)

def get_pagelist(url, page=1):
    q = []
    conn = urllib2.urlopen(url % page)
    page = conn.read()
    d = pq(page)
    for span in d(".thumb"):
        detail_url = pq(span.find("a")).attr.href
        q.append(detail_url)
    return q

def _get_flv(page):
    match = download_re.search(page)
    if match:
        url = match.group(1)
        #url = "http://cdn.tnaflix.com/" + url
        import urllib
        url = urllib.unquote(url)
        conn = urllib2.urlopen(url)
        data = conn.read()
        d = pq(data)

        download_url = d("file").text()
        d = pq(page)
        file_name = secure_filename(d("h2:first").text() + ".flv")

        return download_url, file_name

def get_download_url(url):
    conn = urllib2.urlopen(url)
    page = "".join(conn.readlines())
    d = pq(page)
    #download_url = d(".linkRight a:first").attr.href
    download_url = d(".downloadButton").attr.href
    if download_url:
        parsed = urlparse(download_url)
        file_name = parsed.path.split("/")[-1]
    else:
        download_url, file_name = _get_flv(page)

    return url, download_url, file_name

def download_flv(url, down_url, file_name):

    print "'%s' ---- Try Download ----" % url

    out_path = path.join(save_path, file_name)
    if not file_name:
        print "'%s' ** Not Found Link ** " % url
        return

    partial = False
    try:
        conn = urllib2.urlopen(down_url)
        length = conn.info()['Content-Length']
        length = int(length)
        if length < 1024 * 1024 * 100 or length > 1024 * 1024 * 900:
            print "*** '%s' is small! Skip!!!'%s' ***" % (url, length)
            return

        if path.exists(out_path):
            size = path.getsize(out_path)
            if size < length:
                r = "bytes=%s-" % size
                req = urllib2.Request(down_url, headers={"Range":r})
                conn = urllib2.urlopen(req)
                print "'%s' == Resume!! '%s' ==" % (url, file_name)
                print "'%s' == File     '%s' Size: %d/%d'" % (url, file_name, size, length)
                partial = True
            else:
                print "'%s' == Downloaded '%s' ==" % (url, file_name)
                return
    except:
        import traceback
        print traceback.format_exc()
        pool.spawn_n(download, url)
        return

    if partial:
        f = open(out_path, "rb+")
        f.seek(0, 2)
    else:
        f = open(out_path, "wb")

    print "'%s' == Start '%s' ==" % (url, file_name)
    while True:
        data = conn.read(1024 * 512 )
        if not data:
            break
        f.write(data)
        #per = path.getsize(out_path) / float(length) * 100.0
        #print "'%s' == '%s' %d%% done. ==" % (url, file_name, per)
    print "'%s' == Finish '%s' ==" % (url, file_name)

def download(url):
    if url.find("premium.empflix.com") >= 0:
        return
    url, download_url, file_name = get_download_url(url)
    id = urlparse(url).query[3:]
    if id_mode:
        file_name = id + "_" + file_name
    if not download_url.startswith('#'):
        if file_name.lower().find('mosaic')  == -1:
            #print download_url, file_name
            download_flv(url, download_url, file_name)

q = []
def start(url, min_page=66, max_page=70):
#def start(url, min_page=14, max_page=24):
    for i in xrange(min_page, max_page+1):
        urls = get_pagelist(url, page=i)
        q.extend(urls)
    q.reverse()
    while q:
        url = q.pop()
        pool.spawn_n(download, url)

def read_detail_urls(file='empflix.txt'):
    i = 0
    for href in open(file):
        i += 1
        href = href.strip()
        if href:
            detail_urls.append(href)

if __name__ == '__main__':
    #read_detail_urls()
    #detail_urls.reverse()
    #q.extend(detail_urls)

    for url in search_urls:
        start(url=url)
    pool.waitall()
	from os import path
	from werkzeug import secure_filename
	import eventlet
	from eventlet.green import urllib2
	from pyquery import PyQuery as pq
	from urlparse import urlparse
	import psyco
	psyco.full()

	search_urls = [
	'http://www.empflix.com/browsecat.php?page=%s&chid=17&category=rd',
	#'http://www.empflix.com/browsecat.php?page=%s&chid=17',
	#'http://www.empflix.com/search.php?page=%s&what=Mondomuyou',
	#'http://www.empflix.com/search.php?page=%s&what=Mondo64',
	#'http://www.empflix.com/search.php?page=%s&what=trg',
	#'http://www.empflix.com/search.php?page=%s&what=smr',
	#'http://www.empflix.com/search.php?page=%s&what=tkyo',
	#'http://www.empflix.com/search.php?page=%s&what=manko',
	#'http://www.empflix.com/search.php?page=%s&what=omanko',
	#'http://www.empflix.com/search.php?page=%s&what=rhj',
	#'http://www.empflix.com/search.php?page=%s&what=Tokyo',
	#'http://www.empflix.com/search.php?page=%s&what=TokyoHot',
	#'http://www.empflix.com/search.php?page=%s&what=Tora',
	#'http://www.empflix.com/search.php?page=%s&what=Sky+Angel',
	#'http://www.empflix.com/search.php?page=%s&what=Santa+Gal',
	#'http://www.empflix.com/search.php?page=%s&what=Mugen',
	#'http://www.empflix.com/search.php?page=%s&what=XVN',
	#'http://www.empflix.com/search.php?page=%s&what=Asami',
	#'http://www.empflix.com/search.php?page=%s&what=haruka',
	#'http://www.empflix.com/search.php?page=%s&what=Asuka',
	#'http://www.empflix.com/search.php?page=%s&what=Maki',
	#'http://www.empflix.com/search.php?page=%s&what=Nao',
	#'http://www.empflix.com/search.php?page=%s&what=Yui',
	#'http://www.empflix.com/search.php?page=%s&what=Yuki',
	#'http://www.empflix.com/search.php?page=%s&what=Yuka',
	#'http://www.empflix.com/search.php?page=%s&what=Saki',
	#'http://www.empflix.com/search.php?page=%s&what=Rika',
	#'http://www.empflix.com/search.php?page=%s&what=Riko',
	#'http://www.empflix.com/search.php?page=%s&what=sara%%20Part2&sort=relevance',
	#'http://www.empflix.com/search.php?page=%s&what=pakopako',
	#'http://www.empflix.com/search.php?page=%s&what=pacopaco',
	#'http://www.empflix.com/search.php?page=%s&what=Miku',
	#'http://www.empflix.com/search.php?page=%s&what=0930',
	#'http://www.empflix.com/search.php?page=%s&what=h0930',
	#'http://www.empflix.com/search.php?page=%s&what=4610',
	#'http://www.empflix.com/search.php?page=%s&what=okusama&sort=relevance',
	#'http://www.empflix.com/search.php?page=%s&what=JAV%%20Amateur&sort=relevance',
	#'http://www.empflix.com/search.php?page=%s&what=okusama&sort=relevance',
	#'http://www.empflix.com/search.php?page=%s&what=jav%%20creampie&sort=relevance',
	#'http://www.empflix.com/search.php?page=%s&what=Serina',
	#'http://www.empflix.com/search.php?page=%s&what=hikaru',
	#'http://www.empflix.com/search.php?page=%s&what=tsubaki',
	#'http://www.empflix.com/search.php?page=%s&what=mikado',
	#'http://www.empflix.com/search.php?page=%s&what=catwalk',
	#'http://www.empflix.com/search.php?page=%s&what=Samurai',
	#'http://www.empflix.com/search.php?page=%s&what=Jeans+Fetish',
	#'http://www.empflix.com/search.php?page=%s&what=red+hot+fetish',
	#'http://www.empflix.com/search.php?page=%s&what=pink+puncher',
	#'http://www.empflix.com/search.php?page=%s&what=nakadashi&sort=relevance',
	#'http://www.empflix.com/search.php?page=%s&what=okusama&sort=relevance',
	#'http://www.empflix.com/search.php?page=%s&what=tokyo&sort=relevance',
	#'http://www.empflix.com/browsecat.php?page=%s&chid=17&category=mr',
	#'http://www.empflix.com/search.php?page=%s&what=japan%%20creampie&sort=relevance',
	#'http://www.empflix.com/advanced_search.php?page=%s&what=&sort=length&per_page=0&adv_category[]=Amateur&adv_category[]=Asian',
	#'http://www.empflix.com/advanced_search.php?page=%s&what=&sort=length&per_page=0&adv_category[]=Asian&adv_category[]=Creampie',
	#'http://www.empflix.com/advanced_search.php?page=%s&what=&sort=length&per_page=0&adv_category[]=Amateur&adv_category[]=Asian&adv_category[]=Creampie',
	#'http://www.empflix.com/advanced_search.php?page=%s&what=&sort=length&per_page=0&adv_category[]=Asian&adv_category[]=Mature',
	]
	#empflix_cream_url = 'http://www.empflix.com/search.php?page=%s&what=japan%%20creampie&sort=relevance'
	#empflix_cream_url = 'http://www.empflix.com/search.php?page=%s&what=pacopaco%%20sara%%20Part2&sort=relevance'

	detail_urls = []

	id_mode = True

	save_path = "/home/ma2/Public/empflix/"
	pool = eventlet.GreenPool(2)

	import re
	download_re = re.compile("\sso.addVariable\('config',\s'([\w\d\.:/%=_-]*)'\);", re.M)

	def get_pagelist(url, page=1):
	q = []
	conn = urllib2.urlopen(url % page)
	page = conn.read()
	d = pq(page)
	for span in d(".thumb"):
	detail_url = pq(span.find("a")).attr.href
	q.append(detail_url)
	return q

	def _get_flv(page):
	match = download_re.search(page)
	if match:
	url = match.group(1)
	#url = "http://cdn.tnaflix.com/" + url
	import urllib
	url = urllib.unquote(url)
	conn = urllib2.urlopen(url)
	data = conn.read()
	d = pq(data)

	download_url = d("file").text()
	d = pq(page)
	file_name = secure_filename(d("h2:first").text() + ".flv")

	return download_url, file_name

	def get_download_url(url):
	conn = urllib2.urlopen(url)
	page = "".join(conn.readlines())
	d = pq(page)
	#download_url = d(".linkRight a:first").attr.href
	download_url = d(".downloadButton").attr.href
	if download_url:
	parsed = urlparse(download_url)
	file_name = parsed.path.split("/")[-1]
	else:
	download_url, file_name = _get_flv(page)

	return url, download_url, file_name

	def download_flv(url, down_url, file_name):

	print "'%s' ---- Try Download ----" % url

	out_path = path.join(save_path, file_name)
	if not file_name:
	print "'%s' Not Found Link " % url
	return

	partial = False
	try:
	conn = urllib2.urlopen(down_url)
	length = conn.info()['Content-Length']
	length = int(length)
	if length < 1024 * 1024 * 100 or length > 1024 * 1024 * 900:
	print "* '%s' is small! Skip!!!'%s' *" % (url, length)
	return

	if path.exists(out_path):
	size = path.getsize(out_path)
	if size < length:
	r = "bytes=%s-" % size
	req = urllib2.Request(down_url, headers={"Range":r})
	conn = urllib2.urlopen(req)
	print "'%s' == Resume!! '%s' ==" % (url, file_name)
	print "'%s' == File '%s' Size: %d/%d'" % (url, file_name, size, length)
	partial = True
	else:
	print "'%s' == Downloaded '%s' ==" % (url, file_name)
	return
	except:
	import traceback
	print traceback.format_exc()
	pool.spawn_n(download, url)
	return

	if partial:
	f = open(out_path, "rb+")
	f.seek(0, 2)
	else:
	f = open(out_path, "wb")

	print "'%s' == Start '%s' ==" % (url, file_name)
	while True:
	data = conn.read(1024 * 512 )
	if not data:
	break
	f.write(data)
	#per = path.getsize(out_path) / float(length) * 100.0
	#print "'%s' == '%s' %d%% done. ==" % (url, file_name, per)
	print "'%s' == Finish '%s' ==" % (url, file_name)

	def download(url):
	if url.find("premium.empflix.com") >= 0:
	return
	url, download_url, file_name = get_download_url(url)
	id = urlparse(url).query[3:]
	if id_mode:
	file_name = id + "_" + file_name
	if not download_url.startswith('#'):
	if file_name.lower().find('mosaic') == -1:
	#print download_url, file_name
	download_flv(url, download_url, file_name)

	q = []
	def start(url, min_page=66, max_page=70):
	#def start(url, min_page=14, max_page=24):
	for i in xrange(min_page, max_page+1):
	urls = get_pagelist(url, page=i)
	q.extend(urls)
	q.reverse()
	while q:
	url = q.pop()
	pool.spawn_n(download, url)

	def read_detail_urls(file='empflix.txt'):
	i = 0
	for href in open(file):
	i += 1
	href = href.strip()
	if href:
	detail_urls.append(href)

	if __name__ == '__main__':
	#read_detail_urls()
	#detail_urls.reverse()
	#q.extend(detail_urls)

	for url in search_urls:
	start(url=url)
	pool.waitall()