Skip to content

Instantly share code, notes, and snippets.

@Tobi-De
Created May 7, 2024 14:17
Show Gist options
  • Save Tobi-De/4174aa0bf2e7e1393bb005c996fca6f7 to your computer and use it in GitHub Desktop.
Save Tobi-De/4174aa0bf2e7e1393bb005c996fca6f7 to your computer and use it in GitHub Desktop.
# /// script
# requires-python = ">=3.11"
# dependencies = [
# "pytesseract",
# "pillow",
# "rich"
# ]
# ///
import pytesseract
from PIL import Image
import sys
from pathlib import Path
from rich.progress import track
def img_to_text(file: Path) -> str:
image = Image.open(file)
text = pytesseract.image_to_string(image)
return text
def main(folder: Path, results: Path):
for file in track(folder.glob("*.png"), description="Processing..."):
text = img_to_text(file)
with open(results / f"{file.stem}.txt", "w") as f:
f.write(text)
if __name__ == "__main__":
folder = Path(sys.argv[1])
try:
results = Path(sys.argv[2])
except IndexError:
results = Path("results")
results.mkdir(exist_ok=True)
main(folder, results)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment