Skip to content

Instantly share code, notes, and snippets.

@corposim
Created April 23, 2020 23:09
Show Gist options
  • Star 2 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save corposim/b7ccb6a2c8814032ddd65db91b371dc2 to your computer and use it in GitHub Desktop.
Save corposim/b7ccb6a2c8814032ddd65db91b371dc2 to your computer and use it in GitHub Desktop.
Uses e621's reverse image search API to find e621 posts using thumbnails from hydrus network.
#!/usr/bin/env python3
import requests
import time
# set these!
ROOT = "" # "/path/to/db/client_files/"
USERNAME = "" # e621 username
API_KEY = "" # e621 api key
# you need to enable API access to get a key.
# to enable API access, you must go to your e621 API page (Account > Manage API Access) and generate an API key.
# prompts user for sha256 hashes of images until 'done' is submitted
# select the images you want to search for in hydrus > share > copy > hashes > sha256 (default). (You must have help > advanced mode checked)
# when copy+pasting this way, you will need to hit enter once to submit the last hash, then submit "done"
# when you paste the hashes into the terminal, it may look weird but it should still work
hashes = []
hashInput = ""
while hashInput != "done":
hashInput = input("hashes: ")
if (hashInput != "done") and (hashInput != ""):
hashes.append(hashInput)
# transforms the hash strings into full paths.
# hydrus stores its thumbnails in subfolders prefixed with 't', followed by the first 2 digits of the image's hash.
paths = []
for hash in hashes:
paths.append(ROOT + 't' + hash[:2] + '/' + hash + ".thumbnail")
# setting up variables for POST request
url = "https://e621.net/iqdb_queries.json"
headers = {'user-agent': 'hydrusBatchSauce/corposim'}
auth = requests.auth.HTTPBasicAuth(USERNAME, API_KEY)
urls = ""
notFound = ""
for path in paths:
# asks e621 for similar images to our thumbnail
files = {'file': (open(path, 'rb'))}
r = requests.post(url, files=files, headers=headers, auth=auth)
# stop searches if we get any code back other than 200 (OK)
if (r.status_code != 200):
print("ERR:", r.status_code)
break
# extracts our image's hash from path by substring between root path and ".thumbnail"
hash = path[len(ROOT)+4:-10]
# if e621 returns at least one hit from the current search, add it to urls.
# (e621 results are sorted by sameness, so it just needs to check the first one.)
# if there are no results, trying to access the post_id will fail, entering the "except:" branch where the image's hash is placed into notFound
try:
urls += "https://e621.net/posts/" + str(r.json()[0]['post_id']) + '\n'
print(hash, "hit!")
except:
notFound += path[len(ROOT)+4:-10] + '\n'
print(hash, "no results.")
# supposedly e621's hard limit is 2 requests per second (1/s preferred), responding with 503 if you hit it
# however i was getting 429 (too many requests) at 1 per second after a couple searches.
# the amount you need to sleep seems to depend on how many searches you are doing.
# i've done >200 searches with 2 second sleeps, ~50 searches on 1.5s sleep, and 2 searches on 1s sleep.
time.sleep(1.5)
# print the results!
print('\nhits:')
print(urls)
print('not found:')
print(notFound)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment