Skip to content

Instantly share code, notes, and snippets.

@cwchiu
Last active August 29, 2015 14:24
Show Gist options
  • Save cwchiu/34d6fab92822880a5b8b to your computer and use it in GitHub Desktop.
Save cwchiu/34d6fab92822880a5b8b to your computer and use it in GitHub Desktop.
bigstash
# -*- coding: utf-8 -*-
from __future__ import unicode_literals, division, with_statement, print_function
import sys
reload(sys)
sys.setdefaultencoding('utf-8')
import requests
import io
from pyquery import PyQuery as pq
import re
import time
import httplib as http_client
# http_client.HTTPConnection.debuglevel= 1
import webbrowser
import sqlite3dbm
db = sqlite3dbm.sshelve.open('bigstash.sqlite3')
link_db = sqlite3dbm.sshelve.open('link.sqlite3')
s = requests.session()
s.headers['Origin'] = 'https://www.bigstash.co'
s.headers['User-Agent'] = 'Mozilla/5.0 (Windows NT 6.3; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/43.0.2357.130 Safari/537.36'
# browser get
cookies = {
'sessionid':'hlnpbv2kwosup3flebad1wtiih4jpsa2',
'hz_amChecked':'1',
'_gat':'1',
'csrftoken':'KByAf9taCGzliJhvQIpdr7VteP1pFOVV',
}
requests.utils.add_dict_to_cookiejar(s.cookies, cookies)
def enable_fetch(url, delete_action = False):
s.headers['Referer'] = url
#
print('#')
if delete_action is True:
delete_url = url + 'delete'
key = delete_url.split('/')[-2]
while True:
try:
r = s.get(delete_url )
if r.status_code == 404:
return
a = re.findall('value=\'(.*?)\'', r.text)
token = a[0]
break
except:
time.sleep(5)
print('error, wait for retry')
# print(a)
# d = pq(r.text)
# elem = d('form.form-inline')
# archive_url = 'https://www.bigstash.co{0}'.format( elem[0].attrib['action'] )
data = {'reqkey':key,'csrfmiddlewaretoken':token}
try:
r2 = s.post(delete_url, data=data, timeout=5)
except Exception as ex:
print(ex)
# print(r2.json())
return
while True:
try:
r = s.get(url)
c = r.text
d = pq(c)
break
except Exception as ex:
print(ex)
time.sleep(5)
print(url + ' error, wait for retry')
result = d('.btn-outline-white')
# print(result)
if len(result) > 0:
zip_url = 'https://www.bigstash.co{0}'.format(result[0].attrib['href'])
r_zip = s.head(zip_url)
# print(dir(r_zip))
# print(r_zip.headers)
if r_zip.headers['content-type'] == 'binary/octet-stream' or 'location' in r_zip.headers :
print(zip_url)
link_db[ zip_url ] = 1
else:
print('***')
enable_fetch(url)
return
elem = d('form.form-inline')
archive_url = 'https://www.bigstash.co{0}'.format( elem[0].attrib['action'] )
data = {'csrfmiddlewaretoken':elem[0].cssselect('input')[0].attrib['value']}
r2 = s.post(archive_url, data=data)
def scan_page():
'''
掃描檔案清單
'''
from xml.etree import ElementTree
# page
for pn in xrange(1, 15):
url = 'https://www.bigstash.co/a/archives/?page={0}&sortedby=-created'.format(pn)
print(url)
r = s.get(url)
# with io.open('d:\\tmp.txt', 'w', encoding='utf-8') as fout:
# fout.write(r.text)
# a
d = pq(r.text)
tr_list = d('.table > tbody > tr')
# print(len(tr_list))
for tr in tr_list:
# print(dir(tr))
xmlstr = ElementTree.tostring(tr, encoding='utf8', method='xml')
# print(xmlstr)
# print(tr.cssselect('a')[0])
# print(tr.cssselect('a')[0].attrib['href'])
url = 'https://www.bigstash.co{0}'.format(tr.cssselect('a')[0].attrib['href'])
# break
# print(tr.cssselect('a').attrib['href'])
if 'Creating' in xmlstr:
print('{0} => del'.format(url))
if url in db:
del db[url]
enable_fetch(url, True)
else:
db[url] = 1
enable_fetch(url)
def remove():
'''
移除已下載
'''
import glob
for fn in glob.glob('c:\\a\\*.zip'):
key = fn.split('\\')[-1].split('_')[1]
print(key)
url = 'https://www.bigstash.co/a/'+key+'/'
enable_fetch(url, True)
def download(max):
'''
預設瀏覽器開啟下載連結
'''
n = 1
for link in link_db:
print(link)
webbrowser.open_new_tab(link)
del link_db[link]
n+=1
if n > max :
break
def monitor_download():
import glob, time
max_fn = 6
while True:
count_fn = 0
for fn in glob.glob('C:\\Users\\cwchiu\\Downloads\\*.crdownload'):
count_fn += 1
diff = max_fn - count_fn
if diff > 0:
print('run download')
download(diff)
else:
print('wait')
time.sleep(60)
# remove()
# scan_page()
# download(6)
monitor_download()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment