/booru.py

## booru.py
#! /usr/local/bin/python3

# Just run it. Supports python2 and python3.
# Only tested on unix, but should work everywhere; nothing complicated going on.
# Saves files to 'current directory / out [site id] [tags searched for, in alphabetic order]'.
# You'll need an account for danbooru, and you can only search 2 tags at a go (with a basic account)

import os
import sys
import xml.etree.ElementTree as ET
from hashlib import sha1

if sys.version_info[0] >= 3:
	from urllib.request import urlopen, urlretrieve
else:
	from urllib import urlopen, urlretrieve # can we all move on from python2, please?

from math import ceil

if sys.version_info[0] >= 3:
	minput = input
else:
	minput = raw_input


site = minput('Enter \'d\' for danbooru, \'g\' for gelbooru: ')


if site == 'd':
	uname = minput('Username: ')
	passw = 'choujin-steiner--' + minput('Password (don\'t worry, it\'s not saved): ') + '--'
	hpassw = sha1(passw.encode('utf-8')).hexdigest()
	baseURL = 'http://danbooru.donmai.us/post/index.xml?login=' + uname + '&password_hash=' + hpassw
	pagelabel = 'page'
	pageoff = 1
elif site == 'g':
	baseURL = 'http://gelbooru.com/index.php?page=dapi&s=post&q=index'
	pagelabel = 'pid'
	pageoff = 0
else:
	print('I DON\'T UNDERSTAND')
	exit(1)

def sanitize(s):
	valid_chars = '-_.() abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789'
	return ''.join(c if c in valid_chars else '_' for c in s)

tags = minput('Enter space-delimited tags to search for: ').split()
tags.sort()

perpage = 20
barwidth = 40

dir = sanitize('out ' + site + ' ' + ' '.join(tags))
tags = '+'.join(tags)
if not os.path.exists(dir):
		os.makedirs(dir)

# Just get one so we know the count.
req = urlopen(baseURL + '&tags={}&limit=1'.format(tags))
root = ET.parse(req).getroot()

total = int(root.attrib['count'])


# Always precisely width 4, suffix width 2.
def humanize_bytes(bytes):
	abbrevs = (
			(1<<50, 'PB'),
			(1<<40, 'TB'),
			(1<<30, 'GB'),
			(1<<20, 'MB'),
			(1<<10, 'kB'),
			(1, ' B')
	)
	for factor, suffix in abbrevs:
		if bytes >= factor:
			k = '{:f}'.format(bytes/factor)[:4]
			return k, factor, suffix


def dlProgress(count, blockSize, totalSize):
	done = min(count*blockSize, totalSize)
	ticks = int(done*barwidth/totalSize)
	outstr = ('\b'*(barwidth+13)) + ('-'*ticks) + (' '*(barwidth-ticks)) + ']'

	t, factor, suffix = humanize_bytes(totalSize)
	k = '{:f}'.format(done/factor)[:4]
	outstr += ' {0}/{1}{2}'.format(k, t, suffix)

	sys.stdout.write(outstr)
	sys.stdout.flush()


def dopage(root):
	kids = list(root)
	kids.reverse()
	for child in kids:
		url = child.attrib['file_url']
		extn = url[url.rfind('.'):]
		mtags = child.attrib['tags'].lstrip()
		if len(mtags) > 200:
			mtags = mtags[:201][:mtags.rfind(' ')]
		filename = dir + '/' + sanitize(child.attrib['id'] + ' ' + mtags + extn)
		if(os.path.exists(filename)):
			print('Skipping ' + '{0:7s}'.format(child.attrib['id']) + ': already exists.')
		else:
			sys.stdout.write('Fetching ' + '{0:7s}'.format(child.attrib['id']) + ': [' + (' '*(barwidth+13)))
			sys.stdout.flush()
			urlretrieve(url, filename, reporthook=dlProgress)
			print('')

pages = int(ceil(float(total) / perpage))
for pg in range(pages-1, -1, -1): #eg 3 pages: pg 2,1,0
	print('Page {} of {}: ({} images total)'.format(pages-pg, pages, total))
	rurl = baseURL + ('&tags={}&limit={}&' + pagelabel + '={}').format(tags, perpage, pg+pageoff)
	req = urlopen(rurl)
	root = ET.parse(req).getroot()
	dopage(root)
	print('')
	#! /usr/local/bin/python3

	# Just run it. Supports python2 and python3.
	# Only tested on unix, but should work everywhere; nothing complicated going on.
	# Saves files to 'current directory / out [site id] [tags searched for, in alphabetic order]'.
	# You'll need an account for danbooru, and you can only search 2 tags at a go (with a basic account)

	import os
	import sys
	import xml.etree.ElementTree as ET
	from hashlib import sha1

	if sys.version_info[0] >= 3:
	from urllib.request import urlopen, urlretrieve
	else:
	from urllib import urlopen, urlretrieve # can we all move on from python2, please?

	from math import ceil

	if sys.version_info[0] >= 3:
	minput = input
	else:
	minput = raw_input


	site = minput('Enter \'d\' for danbooru, \'g\' for gelbooru: ')



	if site == 'd':
	uname = minput('Username: ')
	passw = 'choujin-steiner--' + minput('Password (don\'t worry, it\'s not saved): ') + '--'
	hpassw = sha1(passw.encode('utf-8')).hexdigest()
	baseURL = 'http://danbooru.donmai.us/post/index.xml?login=' + uname + '&password_hash=' + hpassw
	pagelabel = 'page'
	pageoff = 1
	elif site == 'g':
	baseURL = 'http://gelbooru.com/index.php?page=dapi&s=post&q=index'
	pagelabel = 'pid'
	pageoff = 0
	else:
	print('I DON\'T UNDERSTAND')
	exit(1)

	def sanitize(s):
	valid_chars = '-_.() abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789'
	return ''.join(c if c in valid_chars else '_' for c in s)

	tags = minput('Enter space-delimited tags to search for: ').split()
	tags.sort()

	perpage = 20
	barwidth = 40

	dir = sanitize('out ' + site + ' ' + ' '.join(tags))
	tags = '+'.join(tags)
	if not os.path.exists(dir):
	os.makedirs(dir)

	# Just get one so we know the count.
	req = urlopen(baseURL + '&tags={}&limit=1'.format(tags))
	root = ET.parse(req).getroot()

	total = int(root.attrib['count'])


	# Always precisely width 4, suffix width 2.
	def humanize_bytes(bytes):
	abbrevs = (
	(1<<50, 'PB'),
	(1<<40, 'TB'),
	(1<<30, 'GB'),
	(1<<20, 'MB'),
	(1<<10, 'kB'),
	(1, ' B')
	)
	for factor, suffix in abbrevs:
	if bytes >= factor:
	k = '{:f}'.format(bytes/factor)[:4]
	return k, factor, suffix


	def dlProgress(count, blockSize, totalSize):
	done = min(count*blockSize, totalSize)
	ticks = int(done*barwidth/totalSize)
	outstr = ('\b'(barwidth+13)) + ('-'ticks) + (' '*(barwidth-ticks)) + ']'

	t, factor, suffix = humanize_bytes(totalSize)
	k = '{:f}'.format(done/factor)[:4]
	outstr += ' {0}/{1}{2}'.format(k, t, suffix)

	sys.stdout.write(outstr)
	sys.stdout.flush()


	def dopage(root):
	kids = list(root)
	kids.reverse()
	for child in kids:
	url = child.attrib['file_url']
	extn = url[url.rfind('.'):]
	mtags = child.attrib['tags'].lstrip()
	if len(mtags) > 200:
	mtags = mtags[:201][:mtags.rfind(' ')]
	filename = dir + '/' + sanitize(child.attrib['id'] + ' ' + mtags + extn)
	if(os.path.exists(filename)):
	print('Skipping ' + '{0:7s}'.format(child.attrib['id']) + ': already exists.')
	else:
	sys.stdout.write('Fetching ' + '{0:7s}'.format(child.attrib['id']) + ': [' + (' '*(barwidth+13)))
	sys.stdout.flush()
	urlretrieve(url, filename, reporthook=dlProgress)
	print('')

	pages = int(ceil(float(total) / perpage))
	for pg in range(pages-1, -1, -1): #eg 3 pages: pg 2,1,0
	print('Page {} of {}: ({} images total)'.format(pages-pg, pages, total))
	rurl = baseURL + ('&tags={}&limit={}&' + pagelabel + '={}').format(tags, perpage, pg+pageoff)
	req = urlopen(rurl)
	root = ET.parse(req).getroot()
	dopage(root)
	print('')
No results found