Last active
June 22, 2021 02:44
-
-
Save elijahr/2db4b0669b940777853493868b072d3e to your computer and use it in GitHub Desktop.
Use OCR to search for text in your library of downloaded memes
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/env/bin python3 | |
try: | |
from PIL import Image | |
except ImportError: | |
import Image | |
import os | |
import mimetypes | |
import sys | |
import subprocess | |
import pytesseract | |
def get_img_paths(meme_dirs): | |
for meme_dir in meme_dirs: | |
for root, dirs, files in os.walk(meme_dir): | |
for file in files: | |
mtype = mimetypes.guess_type(file)[0] | |
if mtype and mtype.startswith("image/"): | |
yield os.path.join(root, file) | |
DEFAULT_MEME_DIRS = [ | |
# Put paths here where you keep your memes | |
] | |
if __name__ == "__main__": | |
query = sys.argv[1] | |
meme_dirs = sys.argv[2:] or DEFAULT_MEME_DIRS | |
for img_path in get_img_paths(meme_dirs): | |
img = Image.open(img_path) | |
text = pytesseract.image_to_string(img, config="--oem 3 --psm 6").lower() | |
if query in text: | |
print(f"===== {img_path} =====") | |
print(text) | |
# on macOS, this opens the image in Preview | |
subprocess.run(["open", img_path]) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment