cwchiu/bigstash.py

## bigstash.py
# -*- coding: utf-8 -*-
from __future__ import unicode_literals, division, with_statement, print_function

import sys
reload(sys)
sys.setdefaultencoding('utf-8')


import requests
import io
from pyquery import PyQuery as pq
import re
import time
import httplib as http_client
# http_client.HTTPConnection.debuglevel= 1
import webbrowser
import sqlite3dbm
db = sqlite3dbm.sshelve.open('bigstash.sqlite3')
link_db = sqlite3dbm.sshelve.open('link.sqlite3')
s = requests.session()
s.headers['Origin'] = 'https://www.bigstash.co'
s.headers['User-Agent'] = 'Mozilla/5.0 (Windows NT 6.3; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/43.0.2357.130 Safari/537.36'


# browser get
cookies = {
    'sessionid':'hlnpbv2kwosup3flebad1wtiih4jpsa2',
    'hz_amChecked':'1',
    '_gat':'1',
    'csrftoken':'KByAf9taCGzliJhvQIpdr7VteP1pFOVV',
}

requests.utils.add_dict_to_cookiejar(s.cookies, cookies)

def enable_fetch(url, delete_action = False):
    s.headers['Referer'] = url
    #

    print('#')
    if delete_action is True:
        delete_url = url + 'delete'
        key = delete_url.split('/')[-2]
        while True:
            try:
                r = s.get(delete_url )
                if r.status_code == 404:
                    return
                a = re.findall('value=\'(.*?)\'', r.text)
                token = a[0]
                break
            except:
                time.sleep(5)
                print('error, wait for retry')


        # print(a)
        # d = pq(r.text)
        # elem = d('form.form-inline')
        # archive_url = 'https://www.bigstash.co{0}'.format( elem[0].attrib['action'] )
        data = {'reqkey':key,'csrfmiddlewaretoken':token}
        try:
            r2 = s.post(delete_url, data=data, timeout=5)
        except Exception as ex:
            print(ex)
        # print(r2.json())
        return

    while True:
        try:
            r = s.get(url)
            c = r.text
            d = pq(c)
            break
        except Exception as ex:
            print(ex)
            time.sleep(5)
            print(url + ' error, wait for retry')


    result = d('.btn-outline-white')
    # print(result)
    if len(result) > 0:
        zip_url = 'https://www.bigstash.co{0}'.format(result[0].attrib['href'])


        r_zip = s.head(zip_url)
        # print(dir(r_zip))
        # print(r_zip.headers)
        if r_zip.headers['content-type'] == 'binary/octet-stream' or 'location' in r_zip.headers :
            print(zip_url)
            link_db[ zip_url ] = 1
        else:
            print('***')
            enable_fetch(url)
        return

    elem = d('form.form-inline')
    archive_url = 'https://www.bigstash.co{0}'.format( elem[0].attrib['action'] )
    data = {'csrfmiddlewaretoken':elem[0].cssselect('input')[0].attrib['value']}
    r2 = s.post(archive_url, data=data)

def scan_page():
    '''
    掃描檔案清單
    '''

    from xml.etree import ElementTree

    # page
    for pn in xrange(1, 15):
        url = 'https://www.bigstash.co/a/archives/?page={0}&sortedby=-created'.format(pn)
        print(url)
        r = s.get(url)
        # with io.open('d:\\tmp.txt', 'w', encoding='utf-8') as fout:
            # fout.write(r.text)
        # a
        d = pq(r.text)

        tr_list = d('.table > tbody > tr')
        # print(len(tr_list))
        for tr in tr_list:
            # print(dir(tr))
            xmlstr = ElementTree.tostring(tr, encoding='utf8', method='xml')
            # print(xmlstr)
            # print(tr.cssselect('a')[0])
            # print(tr.cssselect('a')[0].attrib['href'])
            url = 'https://www.bigstash.co{0}'.format(tr.cssselect('a')[0].attrib['href'])

            # break
            # print(tr.cssselect('a').attrib['href'])

            if 'Creating' in xmlstr:
                print('{0} => del'.format(url))
                if url in db:
                    del db[url]
                enable_fetch(url, True)
            else:
                db[url] = 1
                enable_fetch(url)

def remove():
    '''
    移除已下載
    '''
    import glob
    for fn in glob.glob('c:\\a\\*.zip'):
        key = fn.split('\\')[-1].split('_')[1]
        print(key)
        url = 'https://www.bigstash.co/a/'+key+'/'
        enable_fetch(url, True)

def download(max):
    '''
    預設瀏覽器開啟下載連結
    '''
    n = 1
    for link in link_db:
        print(link)
        webbrowser.open_new_tab(link)
        del link_db[link]
        n+=1
        if n > max :
            break

def monitor_download():
    import glob, time
    max_fn = 6
    while True:
        count_fn = 0
        for fn in glob.glob('C:\\Users\\cwchiu\\Downloads\\*.crdownload'):
            count_fn += 1
        diff = max_fn - count_fn

        if diff > 0:
            print('run download')
            download(diff)
        else:
            print('wait')
        time.sleep(60)

# remove()
# scan_page()
# download(6)
monitor_download()
	# -- coding: utf-8 --
	from __future__ import unicode_literals, division, with_statement, print_function

	import sys
	reload(sys)
	sys.setdefaultencoding('utf-8')


	import requests
	import io
	from pyquery import PyQuery as pq
	import re
	import time
	import httplib as http_client
	# http_client.HTTPConnection.debuglevel= 1
	import webbrowser
	import sqlite3dbm
	db = sqlite3dbm.sshelve.open('bigstash.sqlite3')
	link_db = sqlite3dbm.sshelve.open('link.sqlite3')
	s = requests.session()
	s.headers['Origin'] = 'https://www.bigstash.co'
	s.headers['User-Agent'] = 'Mozilla/5.0 (Windows NT 6.3; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/43.0.2357.130 Safari/537.36'



	# browser get
	cookies = {
	'sessionid':'hlnpbv2kwosup3flebad1wtiih4jpsa2',
	'hz_amChecked':'1',
	'_gat':'1',
	'csrftoken':'KByAf9taCGzliJhvQIpdr7VteP1pFOVV',
	}

	requests.utils.add_dict_to_cookiejar(s.cookies, cookies)

	def enable_fetch(url, delete_action = False):
	s.headers['Referer'] = url
	#

	print('#')
	if delete_action is True:
	delete_url = url + 'delete'
	key = delete_url.split('/')[-2]
	while True:
	try:
	r = s.get(delete_url )
	if r.status_code == 404:
	return
	a = re.findall('value=\'(.*?)\'', r.text)
	token = a[0]
	break
	except:
	time.sleep(5)
	print('error, wait for retry')




	# print(a)
	# d = pq(r.text)
	# elem = d('form.form-inline')
	# archive_url = 'https://www.bigstash.co{0}'.format( elem[0].attrib['action'] )
	data = {'reqkey':key,'csrfmiddlewaretoken':token}
	try:
	r2 = s.post(delete_url, data=data, timeout=5)
	except Exception as ex:
	print(ex)
	# print(r2.json())
	return

	while True:
	try:
	r = s.get(url)
	c = r.text
	d = pq(c)
	break
	except Exception as ex:
	print(ex)
	time.sleep(5)
	print(url + ' error, wait for retry')


	result = d('.btn-outline-white')
	# print(result)
	if len(result) > 0:
	zip_url = 'https://www.bigstash.co{0}'.format(result[0].attrib['href'])


	r_zip = s.head(zip_url)
	# print(dir(r_zip))
	# print(r_zip.headers)
	if r_zip.headers['content-type'] == 'binary/octet-stream' or 'location' in r_zip.headers :
	print(zip_url)
	link_db[ zip_url ] = 1
	else:
	print('***')
	enable_fetch(url)
	return

	elem = d('form.form-inline')
	archive_url = 'https://www.bigstash.co{0}'.format( elem[0].attrib['action'] )
	data = {'csrfmiddlewaretoken':elem[0].cssselect('input')[0].attrib['value']}
	r2 = s.post(archive_url, data=data)

	def scan_page():
	'''
	掃描檔案清單
	'''

	from xml.etree import ElementTree

	# page
	for pn in xrange(1, 15):
	url = 'https://www.bigstash.co/a/archives/?page={0}&sortedby=-created'.format(pn)
	print(url)
	r = s.get(url)
	# with io.open('d:\\tmp.txt', 'w', encoding='utf-8') as fout:
	# fout.write(r.text)
	# a
	d = pq(r.text)

	tr_list = d('.table > tbody > tr')
	# print(len(tr_list))
	for tr in tr_list:
	# print(dir(tr))
	xmlstr = ElementTree.tostring(tr, encoding='utf8', method='xml')
	# print(xmlstr)
	# print(tr.cssselect('a')[0])
	# print(tr.cssselect('a')[0].attrib['href'])
	url = 'https://www.bigstash.co{0}'.format(tr.cssselect('a')[0].attrib['href'])

	# break
	# print(tr.cssselect('a').attrib['href'])

	if 'Creating' in xmlstr:
	print('{0} => del'.format(url))
	if url in db:
	del db[url]
	enable_fetch(url, True)
	else:
	db[url] = 1
	enable_fetch(url)

	def remove():
	'''
	移除已下載
	'''
	import glob
	for fn in glob.glob('c:\\a\\*.zip'):
	key = fn.split('\\')[-1].split('_')[1]
	print(key)
	url = 'https://www.bigstash.co/a/'+key+'/'
	enable_fetch(url, True)

	def download(max):
	'''
	預設瀏覽器開啟下載連結
	'''
	n = 1
	for link in link_db:
	print(link)
	webbrowser.open_new_tab(link)
	del link_db[link]
	n+=1
	if n > max :
	break

	def monitor_download():
	import glob, time
	max_fn = 6
	while True:
	count_fn = 0
	for fn in glob.glob('C:\\Users\\cwchiu\\Downloads\\*.crdownload'):
	count_fn += 1
	diff = max_fn - count_fn

	if diff > 0:
	print('run download')
	download(diff)
	else:
	print('wait')
	time.sleep(60)

	# remove()
	# scan_page()
	# download(6)
	monitor_download()