-
-
Save scrapehero/b85a280dc0d993f665c91e0332cf618f to your computer and use it in GitHub Desktop.
import pytesseract | |
import sys | |
import argparse | |
try: | |
import Image | |
except ImportError: | |
from PIL import Image | |
from subprocess import check_output | |
def resolve(path): | |
print("Resampling the Image") | |
check_output(['convert', path, '-resample', '600', path]) | |
return pytesseract.image_to_string(Image.open(path)) | |
if __name__=="__main__": | |
argparser = argparse.ArgumentParser() | |
argparser.add_argument('path',help = 'Captcha file path') | |
args = argparser.parse_args() | |
path = args.path | |
print('Resolving Captcha') | |
captcha_text = resolve(path) | |
print('Extracted Text',captcha_text) |
Can you please suggest how you installed ImageMagick and manully convert image?
I've installed but it doesn't work for captcha :(
It does not work, the same problem
magick convert "1.png" -resample 600 "2.png"
on windows?
FIX: change the path at check_output so the first argument is the full path to magick.exe.
for example,
check_output([r"C:\Program Files\ImageMagick-7.0.8-Q16\magick.exe", path, '-resample', '600', path])
Resolving Captcha
Resampling the Image
Traceback (most recent call last):
File "CaptchaSolver.py", line 22, in
captcha_text = resolve(path)
File "CaptchaSolver.py", line 13, in resolve
check_output(["C:\Program Files\ImageMagick-7.0.8-Q16", path, '-resample', '600', path])
File "C:\Python27\lib\subprocess.py", line 216, in check_output
process = Popen(stdout=PIPE, *popenargs, **kwargs)
File "C:\Python27\lib\subprocess.py", line 394, in init
errread, errwrite)
File "C:\Python27\lib\subprocess.py", line 644, in _execute_child
startupinfo)
WindowsError: [Error 5] Access is denied
Hi,
I got below error.
Please help.
Resolving Captcha
Resampling the Image
Traceback (most recent call last):
File "C:\Users\yenag\AppData\Local\Programs\Python\Python38-32\lib\site-packages\pytesseract\pytesseract.py", line 226, in run_tesseract
proc = subprocess.Popen(cmd_args, **subprocess_args())
File "C:\Users\yenag\AppData\Local\Programs\Python\Python38-32\lib\subprocess.py", line 854, in init
self._execute_child(args, executable, preexec_fn, close_fds,
File "C:\Users\yenag\AppData\Local\Programs\Python\Python38-32\lib\subprocess.py", line 1307, in _execute_child
hp, ht, pid, tid = _winapi.CreateProcess(executable, args,
FileNotFoundError: [WinError 2] The system cannot find the file specified
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "OCR_image_processing.py", line 22, in
captcha_text = resolve(path)
File "OCR_image_processing.py", line 14, in resolve
return pytesseract.image_to_string(Image.open(path))
File "C:\Users\yenag\AppData\Local\Programs\Python\Python38-32\lib\site-packages\pytesseract\pytesseract.py", line 344, in image_to_string
return {
File "C:\Users\yenag\AppData\Local\Programs\Python\Python38-32\lib\site-packages\pytesseract\pytesseract.py", line 347, in
Output.STRING: lambda: run_and_get_output(*args),
File "C:\Users\yenag\AppData\Local\Programs\Python\Python38-32\lib\site-packages\pytesseract\pytesseract.py", line 258, in run_and_get_output
run_tesseract(**kwargs)
File "C:\Users\yenag\AppData\Local\Programs\Python\Python38-32\lib\site-packages\pytesseract\pytesseract.py", line 230, in run_tesseract
raise TesseractNotFoundError()
pytesseract.pytesseract.TesseractNotFoundError: tesseract is not installed or it's not in your path
sudo apt-get install tesseract-ocr
Hello,
I am experiencing the same error
Invalid Parameter - -resample
I do have ImageMagick installed and added to PATH. When I manually enter
convert cap.jpg -resample 600 cap_resampled.jpg
it does the job perfectly