Skip to content

Instantly share code, notes, and snippets.

@aonemd
Created January 13, 2019 15:29
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save aonemd/a4895dbdbdf9b7a0bc47bcb051aae5b8 to your computer and use it in GitHub Desktop.
Save aonemd/a4895dbdbdf9b7a0bc47bcb051aae5b8 to your computer and use it in GitHub Desktop.
import os
from PIL import Image # pip install Pillow
import pytesseract # pip install pytesseract
newly_renamed = 0
not_renamed = 0
for subdir, dirs, files in os.walk('./memes'):
for file in files:
src_path = os.path.join(subdir, file)
tmp_path = os.path.join('./tmp/', file)
img = Image.open(src_path).convert('L')
bw = img.point(lambda x: 0 if x < 249 else 250, '1')
bw.save(tmp_path)
img = Image.open(tmp_path)
print("************************************")
print(src_path)
new_text = pytesseract.image_to_string(img).strip()
if new_text:
newly_renamed += 1
else:
not_renamed += 1
new_text = src_path
print(new_text)
print("**********************************************")
print(f"{newly_renamed} files newly renamed vs. {not_renamed} still have the same old name.")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment