Skip to content

Instantly share code, notes, and snippets.

Created July 22, 2017 07:29
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save anonymous/b9175556f05f31d46181661596a9d2c5 to your computer and use it in GitHub Desktop.
Save anonymous/b9175556f05f31d46181661596a9d2c5 to your computer and use it in GitHub Desktop.
from requests import Session
from psycopg2 import connect
from time import sleep
from hashlib import sha1
from os.path import exists
con = connect('dbname=ii')
s = Session()
s.proxies={'http': "socks5://localhost:32256", 'https': 'socks5://localhost:32256'}
while True:
c = con.cursor()
c.execute('SELECT image_id FROM booruio.post WHERE sha1 IS NULL LIMIT 999')
tsk = list(c)
if not tsk:
print 'Done?'
sleep(10)
continue
for n, (image_id,) in enumerate(tsk):
url = 'https://booru.io/data/'+image_id
print '%03d/%03d' % (n, len(tsk)), url
result = s.get(url)
if result.status_code != 200:
print 'BAD CODE', result.status_code
sleep(30)
continue
h = sha1(result.content).hexdigest()
fp = '/opt/storage/bulk/ii/images/'+h[:2]+'/'+h[2]+'/'+h
if exists(fp):
print '-E->', h
else:
with open(fp, 'w') as out:
out.write(result.content)
print '--->', h
c.execute('UPDATE booruio.post SET sha1 = %s WHERE image_id = %s', (h, image_id))
con.commit()
BEGIN;
CREATE SCHEMA booruio;
CREATE TABLE booruio.post (
id VARCHAR PRIMARY KEY,
image_id VARCHAR NOT NULL,
sha1 VARCHAR,
data JSONB NOT NULL
);
COMMIT;
from requests import Session
from psycopg2 import connect
from urllib.parse import urljoin
from time import sleep
import logging as L
from pprint import pprint
from json import dumps
s = Session()
s.proxies={'http': "socks5://localhost:32256", 'https': 'socks5://localhost:32256'}
con = connect('dbname=ii')
c = con.cursor()
c.execute('select id from booruio.post')
exists = {x for x, in c}
qs = []
qs += ['touhou#origin']
# for DOWNLOAD FUCKING EVERYTHIG mode
qs += ['2girls', '3girls', '4girls', '5girls', '6+girls']
url_base = 'https://booru.io/api/'
for q in qs:
cursor = None
while True:
params = {'query': q}
if cursor:
params['cursor'] = cursor
resp = s.get(url_base + 'query/entity', params=params)
data = resp.json()
new = 0
for row in (data['data'] or []):
id = row['key']
if '' not in row['transforms']:
image_id = row['transforms'][':image/jpeg']
else:
image_id = row['transforms']['']
if id in exists:
continue
new += 1
c.execute('INSERT INTO booruio.post(id,image_id,data) VALUES (%s, %s, %s)', (id, image_id, dumps(row)))
exists.add(id)
con.commit()
print(q, cursor, len(data['data'] or []), new)
if 'cursor' not in data:
print('done')
break
cursor = data['cursor']
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment