Skip to content

Instantly share code, notes, and snippets.

@tesu
Last active November 13, 2016 18:31
Show Gist options
  • Save tesu/3516618b582611daa6c2e3be0164a165 to your computer and use it in GitHub Desktop.
Save tesu/3516618b582611daa6c2e3be0164a165 to your computer and use it in GitHub Desktop.
tallies up your sadpanda favorite tags
import codecs
import csv
import json
import operator
import re
import requests
import time
MEMBER_ID = ''
PASS_HASH = ''
sleep = 5
page = 0
d = []
doujins = []
freq = {}
freqnn = {}
cookies = {
'ipb_member_id': MEMBER_ID,
'ipb_pass_hash': PASS_HASH,
's': 'f3fefd0f1b529496b358ce7912b0da55cd7809984dbd4ed66ecaab18891240516ab5653bb516af8a23dd53c5121d328c37e173530aa8ff8af889403f4869db2f'
}
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/54.0.2840.87 Safari/537.36'
}
def fetch_api(d):
json = {
'method': 'gdata',
'gidlist': d[0:25],
'namespace': 1
}
r = requests.post('https://exhentai.org/api.php', json=json, cookies=cookies)
if 'Your IP address has been temporarily banned' in r.text:
print(r.text.encode('ascii',errors='ignore').decode('ascii'), flush=True)
return
for doujin in r.json()['gmetadata']:
doujins.append(doujin)
time.sleep(sleep)
print(str(len(d)) + ' doujins left to fetch from api.', flush=True)
if len(d) > 25:
fetch_api(d[25:])
try:
s = requests.Session()
r = s.get('https://exhentai.org/favorites.php', cookies=cookies, headers=headers)
while 'No hits found' not in r.text:
if 'Your IP address has been temporarily banned' in r.text:
print(r.text.encode('ascii',errors='ignore').decode('ascii'), flush=True)
break
for l in re.finditer(r'https://exhentai\.org/g/(\d+)/([^/]+)/', r.text):
d.append([int(l.group(1)), l.group(2)])
page = page + 1
time.sleep(sleep/5)
r = s.get('https://exhentai.org/favorites.php?page='+str(page), cookies=cookies, headers=headers)
print(str(len(d)) + ' doujins fetched from favorites.', flush=True)
print('Stopped fetching doujins from favorites.', flush=True)
fetch_api(d)
print('Stopped fetching tags from api.', flush=True)
finally:
for d in doujins:
for t in d['tags']:
c = freq.get(t, 0)
freq[t] = c+1
t = re.sub(r'^[^:]+:','',t)
c = freqnn.get(t, 0)
freqnn[t] = c+1
with open('freq.csv', 'w') as file:
w = csv.DictWriter(file, fieldnames=['tag', 'frequency'])
w.writeheader()
for t, c in sorted(freq.items(), key=operator.itemgetter(1), reverse=True):
w.writerow({'tag': t, 'frequency': c})
with open('freqnn.csv', 'w') as file:
w = csv.DictWriter(file, fieldnames=['tag', 'frequency'])
w.writeheader()
for t, c in sorted(freqnn.items(), key=operator.itemgetter(1), reverse=True):
w.writerow({'tag': t, 'frequency': c})
with codecs.open('doujins.json', 'w', 'utf-8') as file:
json.dump(doujins, file, indent=4, sort_keys=True, ensure_ascii=False)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment