Skip to content

Instantly share code, notes, and snippets.

@elijahr
Last active June 22, 2021 02:44
Show Gist options
  • Save elijahr/2db4b0669b940777853493868b072d3e to your computer and use it in GitHub Desktop.
Save elijahr/2db4b0669b940777853493868b072d3e to your computer and use it in GitHub Desktop.
Use OCR to search for text in your library of downloaded memes
#!/usr/env/bin python3
try:
from PIL import Image
except ImportError:
import Image
import os
import mimetypes
import sys
import subprocess
import pytesseract
def get_img_paths(meme_dirs):
for meme_dir in meme_dirs:
for root, dirs, files in os.walk(meme_dir):
for file in files:
mtype = mimetypes.guess_type(file)[0]
if mtype and mtype.startswith("image/"):
yield os.path.join(root, file)
DEFAULT_MEME_DIRS = [
# Put paths here where you keep your memes
]
if __name__ == "__main__":
query = sys.argv[1]
meme_dirs = sys.argv[2:] or DEFAULT_MEME_DIRS
for img_path in get_img_paths(meme_dirs):
img = Image.open(img_path)
text = pytesseract.image_to_string(img, config="--oem 3 --psm 6").lower()
if query in text:
print(f"===== {img_path} =====")
print(text)
# on macOS, this opens the image in Preview
subprocess.run(["open", img_path])
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment