Created
April 23, 2020 23:09
-
-
Save corposim/b7ccb6a2c8814032ddd65db91b371dc2 to your computer and use it in GitHub Desktop.
Uses e621's reverse image search API to find e621 posts using thumbnails from hydrus network.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
import requests | |
import time | |
# set these! | |
ROOT = "" # "/path/to/db/client_files/" | |
USERNAME = "" # e621 username | |
API_KEY = "" # e621 api key | |
# you need to enable API access to get a key. | |
# to enable API access, you must go to your e621 API page (Account > Manage API Access) and generate an API key. | |
# prompts user for sha256 hashes of images until 'done' is submitted | |
# select the images you want to search for in hydrus > share > copy > hashes > sha256 (default). (You must have help > advanced mode checked) | |
# when copy+pasting this way, you will need to hit enter once to submit the last hash, then submit "done" | |
# when you paste the hashes into the terminal, it may look weird but it should still work | |
hashes = [] | |
hashInput = "" | |
while hashInput != "done": | |
hashInput = input("hashes: ") | |
if (hashInput != "done") and (hashInput != ""): | |
hashes.append(hashInput) | |
# transforms the hash strings into full paths. | |
# hydrus stores its thumbnails in subfolders prefixed with 't', followed by the first 2 digits of the image's hash. | |
paths = [] | |
for hash in hashes: | |
paths.append(ROOT + 't' + hash[:2] + '/' + hash + ".thumbnail") | |
# setting up variables for POST request | |
url = "https://e621.net/iqdb_queries.json" | |
headers = {'user-agent': 'hydrusBatchSauce/corposim'} | |
auth = requests.auth.HTTPBasicAuth(USERNAME, API_KEY) | |
urls = "" | |
notFound = "" | |
for path in paths: | |
# asks e621 for similar images to our thumbnail | |
files = {'file': (open(path, 'rb'))} | |
r = requests.post(url, files=files, headers=headers, auth=auth) | |
# stop searches if we get any code back other than 200 (OK) | |
if (r.status_code != 200): | |
print("ERR:", r.status_code) | |
break | |
# extracts our image's hash from path by substring between root path and ".thumbnail" | |
hash = path[len(ROOT)+4:-10] | |
# if e621 returns at least one hit from the current search, add it to urls. | |
# (e621 results are sorted by sameness, so it just needs to check the first one.) | |
# if there are no results, trying to access the post_id will fail, entering the "except:" branch where the image's hash is placed into notFound | |
try: | |
urls += "https://e621.net/posts/" + str(r.json()[0]['post_id']) + '\n' | |
print(hash, "hit!") | |
except: | |
notFound += path[len(ROOT)+4:-10] + '\n' | |
print(hash, "no results.") | |
# supposedly e621's hard limit is 2 requests per second (1/s preferred), responding with 503 if you hit it | |
# however i was getting 429 (too many requests) at 1 per second after a couple searches. | |
# the amount you need to sleep seems to depend on how many searches you are doing. | |
# i've done >200 searches with 2 second sleeps, ~50 searches on 1.5s sleep, and 2 searches on 1s sleep. | |
time.sleep(1.5) | |
# print the results! | |
print('\nhits:') | |
print(urls) | |
print('not found:') | |
print(notFound) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment