Created
July 6, 2021 05:44
-
-
Save TeddyDD/d9df6a7a5edec86af24e05934aa4acf0 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/sh | |
# Source https://github.com/sdushantha/dotfiles/blob/master/bin/bin/utils/ocr | |
TEXT_FILE="/tmp/ocr.txt" | |
IMAGE_FILE="/tmp/ocr.png" | |
# Check if the needed dependencies are installed | |
dependencies="tesseract-ocr | |
maim | |
notify-send | |
xclip" | |
for dependency in $dependencies | |
do | |
command -v "$dependency" || { | |
notify-send "ocr" "Could not find '${dependency}', is it installed?" | |
exit 1 | |
} >/dev/null 2>&1 | |
done | |
# shellcheck disable=SC2064 | |
trap "rm -rf $TEXT_FILE $IMAGE_FILE" EXIT INT TERM | |
# Take screenshot by selecting the area | |
maim -s "$IMAGE_FILE" || exit 1 | |
# Do the magic (∩^o^)⊃━☆゚.*・。゚ | |
# Notice how I have removing the extension .txt from the file path. This is | |
# because tesseract-ocr adds .txt to the given file path anyways. So if we were to | |
# specify /tmp/ocr.txt as the file path, tesseract-ocr would out the text to | |
# /tmp/ocr.txt.txt | |
tesseract-ocr "$IMAGE_FILE" "${TEXT_FILE%.txt}" 2>/dev/null | |
# Remove the new page character. | |
# Source: https://askubuntu.com/a/1276441/782646 | |
sed -i 's/\x0c//' "$TEXT_FILE" | |
# Check if the text was detected by checking number | |
# of lines in the file | |
NUM_LINES=$(wc -l <$TEXT_FILE) | |
if [ "$NUM_LINES" -eq 0 ] | |
then | |
notify-send "ocr" "no text was detected" | |
exit 1 | |
fi | |
# Copy text to clipboard | |
xclip -selection clip <"$TEXT_FILE" | |
# Send a notification with the text that was grabbed using OCR | |
notify-send "ocr" "$(cat $TEXT_FILE)" |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment