Skip to content

Instantly share code, notes, and snippets.

@mertcangokgoz
Last active November 12, 2022 09:37
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
Star You must be signed in to star a gist
Save mertcangokgoz/4f90999d570dca49bc12efe74fab3c50 to your computer and use it in GitHub Desktop.
Taxpayer checker via tesseract-ocr
import os
import shutil
import uuid
import pytesseract # pip install pytesseract and https://tesseract-ocr.github.io/tessdoc/Home.html
import requests # pip install requests
from PIL import Image # pip install pillow
from bs4 import BeautifulSoup # pip install beautifulsoup4
# Create a requests session
session = requests.Session()
headers = {
'Accept-Language': 'tr-TR,tr;q=0.9,en-US;q=0.8,en;q=0.7',
'Content-Type': 'application/x-www-form-urlencoded',
'Pragma': 'no-cache',
'Origin': 'https://sorgu.efatura.gov.tr',
'Referer': 'https://sorgu.efatura.gov.tr/kullanicilar/xliste.php',
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/103.0.5060.66 Safari/537.36 Edg/103.0.1264.44'
}
def taxpayer_check(tax_no: str) -> bool:
if not isinstance(tax_no, str) or not tax_no.isdigit(): # Check if the tax number is a string and if it is a number
return False
response = session.get(url='https://sorgu.efatura.gov.tr/kullanicilar/img.php', stream=True) # Get the image
chaptcha_image_file = f'{uuid.uuid4()}.jpg'
with open(chaptcha_image_file, 'wb') as out_file:
shutil.copyfileobj(response.raw, out_file)
# Delete the response object
del response
# Recognize the image using tesseract removing newline '\n' from tesseract return values
# https://stackoverflow.com/questions/67857988/removing-newline-n-from-tesseract-return-values
captcha = pytesseract.image_to_string(Image.open(chaptcha_image_file), config='--oem 3 --psm 6').replace('\n', '')
response = session.post(verify=False, url='https://sorgu.efatura.gov.tr/kullanicilar/xliste.php', headers=headers, data={'search_string': tax_no, 'captcha_code': str(captcha)})
get_html_content = BeautifulSoup(response.text, features='html.parser')
mukellef = get_html_content.find('div', {'style': 'font-weight:bold;'})
if mukellef and mukellef.text == "Mükellef kayıtlıdır.":
return True
os.remove(chaptcha_image_file)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment