Last active
August 29, 2015 14:24
-
-
Save cwchiu/34d6fab92822880a5b8b to your computer and use it in GitHub Desktop.
bigstash
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# -*- coding: utf-8 -*- | |
from __future__ import unicode_literals, division, with_statement, print_function | |
import sys | |
reload(sys) | |
sys.setdefaultencoding('utf-8') | |
import requests | |
import io | |
from pyquery import PyQuery as pq | |
import re | |
import time | |
import httplib as http_client | |
# http_client.HTTPConnection.debuglevel= 1 | |
import webbrowser | |
import sqlite3dbm | |
db = sqlite3dbm.sshelve.open('bigstash.sqlite3') | |
link_db = sqlite3dbm.sshelve.open('link.sqlite3') | |
s = requests.session() | |
s.headers['Origin'] = 'https://www.bigstash.co' | |
s.headers['User-Agent'] = 'Mozilla/5.0 (Windows NT 6.3; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/43.0.2357.130 Safari/537.36' | |
# browser get | |
cookies = { | |
'sessionid':'hlnpbv2kwosup3flebad1wtiih4jpsa2', | |
'hz_amChecked':'1', | |
'_gat':'1', | |
'csrftoken':'KByAf9taCGzliJhvQIpdr7VteP1pFOVV', | |
} | |
requests.utils.add_dict_to_cookiejar(s.cookies, cookies) | |
def enable_fetch(url, delete_action = False): | |
s.headers['Referer'] = url | |
# | |
print('#') | |
if delete_action is True: | |
delete_url = url + 'delete' | |
key = delete_url.split('/')[-2] | |
while True: | |
try: | |
r = s.get(delete_url ) | |
if r.status_code == 404: | |
return | |
a = re.findall('value=\'(.*?)\'', r.text) | |
token = a[0] | |
break | |
except: | |
time.sleep(5) | |
print('error, wait for retry') | |
# print(a) | |
# d = pq(r.text) | |
# elem = d('form.form-inline') | |
# archive_url = 'https://www.bigstash.co{0}'.format( elem[0].attrib['action'] ) | |
data = {'reqkey':key,'csrfmiddlewaretoken':token} | |
try: | |
r2 = s.post(delete_url, data=data, timeout=5) | |
except Exception as ex: | |
print(ex) | |
# print(r2.json()) | |
return | |
while True: | |
try: | |
r = s.get(url) | |
c = r.text | |
d = pq(c) | |
break | |
except Exception as ex: | |
print(ex) | |
time.sleep(5) | |
print(url + ' error, wait for retry') | |
result = d('.btn-outline-white') | |
# print(result) | |
if len(result) > 0: | |
zip_url = 'https://www.bigstash.co{0}'.format(result[0].attrib['href']) | |
r_zip = s.head(zip_url) | |
# print(dir(r_zip)) | |
# print(r_zip.headers) | |
if r_zip.headers['content-type'] == 'binary/octet-stream' or 'location' in r_zip.headers : | |
print(zip_url) | |
link_db[ zip_url ] = 1 | |
else: | |
print('***') | |
enable_fetch(url) | |
return | |
elem = d('form.form-inline') | |
archive_url = 'https://www.bigstash.co{0}'.format( elem[0].attrib['action'] ) | |
data = {'csrfmiddlewaretoken':elem[0].cssselect('input')[0].attrib['value']} | |
r2 = s.post(archive_url, data=data) | |
def scan_page(): | |
''' | |
掃描檔案清單 | |
''' | |
from xml.etree import ElementTree | |
# page | |
for pn in xrange(1, 15): | |
url = 'https://www.bigstash.co/a/archives/?page={0}&sortedby=-created'.format(pn) | |
print(url) | |
r = s.get(url) | |
# with io.open('d:\\tmp.txt', 'w', encoding='utf-8') as fout: | |
# fout.write(r.text) | |
# a | |
d = pq(r.text) | |
tr_list = d('.table > tbody > tr') | |
# print(len(tr_list)) | |
for tr in tr_list: | |
# print(dir(tr)) | |
xmlstr = ElementTree.tostring(tr, encoding='utf8', method='xml') | |
# print(xmlstr) | |
# print(tr.cssselect('a')[0]) | |
# print(tr.cssselect('a')[0].attrib['href']) | |
url = 'https://www.bigstash.co{0}'.format(tr.cssselect('a')[0].attrib['href']) | |
# break | |
# print(tr.cssselect('a').attrib['href']) | |
if 'Creating' in xmlstr: | |
print('{0} => del'.format(url)) | |
if url in db: | |
del db[url] | |
enable_fetch(url, True) | |
else: | |
db[url] = 1 | |
enable_fetch(url) | |
def remove(): | |
''' | |
移除已下載 | |
''' | |
import glob | |
for fn in glob.glob('c:\\a\\*.zip'): | |
key = fn.split('\\')[-1].split('_')[1] | |
print(key) | |
url = 'https://www.bigstash.co/a/'+key+'/' | |
enable_fetch(url, True) | |
def download(max): | |
''' | |
預設瀏覽器開啟下載連結 | |
''' | |
n = 1 | |
for link in link_db: | |
print(link) | |
webbrowser.open_new_tab(link) | |
del link_db[link] | |
n+=1 | |
if n > max : | |
break | |
def monitor_download(): | |
import glob, time | |
max_fn = 6 | |
while True: | |
count_fn = 0 | |
for fn in glob.glob('C:\\Users\\cwchiu\\Downloads\\*.crdownload'): | |
count_fn += 1 | |
diff = max_fn - count_fn | |
if diff > 0: | |
print('run download') | |
download(diff) | |
else: | |
print('wait') | |
time.sleep(60) | |
# remove() | |
# scan_page() | |
# download(6) | |
monitor_download() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment