corposim/hydrusBatchSauce.py

## hydrusBatchSauce.py
#!/usr/bin/env python3

import requests
import time

# set these!
ROOT = "" # "/path/to/db/client_files/"
USERNAME = "" # e621 username
API_KEY = "" # e621 api key
# you need to enable API access to get a key.
# to enable API access, you must go to your e621 API page (Account > Manage API Access) and generate an API key.

# prompts user for sha256 hashes of images until 'done' is submitted
# select the images you want to search for in hydrus > share > copy > hashes > sha256 (default). (You must have help > advanced mode checked)
# when copy+pasting this way, you will need to hit enter once to submit the last hash, then submit "done"
# when you paste the hashes into the terminal, it may look weird but it should still work
hashes = []
hashInput = ""
while hashInput != "done":
    hashInput = input("hashes: ")
    if (hashInput != "done") and (hashInput != ""):
        hashes.append(hashInput)

# transforms the hash strings into full paths.
# hydrus stores its thumbnails in subfolders prefixed with 't', followed by the first 2 digits of the image's hash.
paths = []
for hash in hashes:
    paths.append(ROOT + 't' + hash[:2] + '/' + hash + ".thumbnail")

# setting up variables for POST request
url = "https://e621.net/iqdb_queries.json"
headers = {'user-agent': 'hydrusBatchSauce/corposim'}
auth = requests.auth.HTTPBasicAuth(USERNAME, API_KEY)

urls = ""
notFound = ""

for path in paths:
    # asks e621 for similar images to our thumbnail
    files = {'file': (open(path, 'rb'))}
    r = requests.post(url, files=files, headers=headers, auth=auth)

    # stop searches if we get any code back other than 200 (OK)
    if (r.status_code != 200):
        print("ERR:", r.status_code)
        break

    # extracts our image's hash from path by substring between root path and ".thumbnail"
    hash = path[len(ROOT)+4:-10]

    # if e621 returns at least one hit from the current search, add it to urls.
    # (e621 results are sorted by sameness, so it just needs to check the first one.)
    # if there are no results, trying to access the post_id will fail, entering the "except:" branch where the image's hash is placed into notFound
    try:
        urls += "https://e621.net/posts/" + str(r.json()[0]['post_id']) + '\n'
        print(hash, "hit!")
    except:
        notFound += path[len(ROOT)+4:-10] + '\n'
        print(hash, "no results.")
    # supposedly e621's hard limit is 2 requests per second (1/s preferred), responding with 503 if you hit it
    # however i was getting 429 (too many requests) at 1 per second after a couple searches.
    # the amount you need to sleep seems to depend on how many searches you are doing.
    # i've done >200 searches with 2 second sleeps, ~50 searches on 1.5s sleep, and 2 searches on 1s sleep.
    time.sleep(1.5)

# print the results!
print('\nhits:')
print(urls)
print('not found:')
print(notFound)
	#!/usr/bin/env python3

	import requests
	import time

	# set these!
	ROOT = "" # "/path/to/db/client_files/"
	USERNAME = "" # e621 username
	API_KEY = "" # e621 api key
	# you need to enable API access to get a key.
	# to enable API access, you must go to your e621 API page (Account > Manage API Access) and generate an API key.

	# prompts user for sha256 hashes of images until 'done' is submitted
	# select the images you want to search for in hydrus > share > copy > hashes > sha256 (default). (You must have help > advanced mode checked)
	# when copy+pasting this way, you will need to hit enter once to submit the last hash, then submit "done"
	# when you paste the hashes into the terminal, it may look weird but it should still work
	hashes = []
	hashInput = ""
	while hashInput != "done":
	hashInput = input("hashes: ")
	if (hashInput != "done") and (hashInput != ""):
	hashes.append(hashInput)

	# transforms the hash strings into full paths.
	# hydrus stores its thumbnails in subfolders prefixed with 't', followed by the first 2 digits of the image's hash.
	paths = []
	for hash in hashes:
	paths.append(ROOT + 't' + hash[:2] + '/' + hash + ".thumbnail")

	# setting up variables for POST request
	url = "https://e621.net/iqdb_queries.json"
	headers = {'user-agent': 'hydrusBatchSauce/corposim'}
	auth = requests.auth.HTTPBasicAuth(USERNAME, API_KEY)

	urls = ""
	notFound = ""

	for path in paths:
	# asks e621 for similar images to our thumbnail
	files = {'file': (open(path, 'rb'))}
	r = requests.post(url, files=files, headers=headers, auth=auth)

	# stop searches if we get any code back other than 200 (OK)
	if (r.status_code != 200):
	print("ERR:", r.status_code)
	break

	# extracts our image's hash from path by substring between root path and ".thumbnail"
	hash = path[len(ROOT)+4:-10]

	# if e621 returns at least one hit from the current search, add it to urls.
	# (e621 results are sorted by sameness, so it just needs to check the first one.)
	# if there are no results, trying to access the post_id will fail, entering the "except:" branch where the image's hash is placed into notFound
	try:
	urls += "https://e621.net/posts/" + str(r.json()[0]['post_id']) + '\n'
	print(hash, "hit!")
	except:
	notFound += path[len(ROOT)+4:-10] + '\n'
	print(hash, "no results.")
	# supposedly e621's hard limit is 2 requests per second (1/s preferred), responding with 503 if you hit it
	# however i was getting 429 (too many requests) at 1 per second after a couple searches.
	# the amount you need to sleep seems to depend on how many searches you are doing.
	# i've done >200 searches with 2 second sleeps, ~50 searches on 1.5s sleep, and 2 searches on 1s sleep.
	time.sleep(1.5)

	# print the results!
	print('\nhits:')
	print(urls)
	print('not found:')
	print(notFound)