mertcangokgoz/taxpayer_check.py

## taxpayer_check.py
import os
import shutil
import uuid

import pytesseract  # pip install pytesseract and https://tesseract-ocr.github.io/tessdoc/Home.html
import requests  # pip install requests
from PIL import Image  # pip install pillow
from bs4 import BeautifulSoup  # pip install beautifulsoup4

# Create a requests session
session = requests.Session()
headers = {
    'Accept-Language': 'tr-TR,tr;q=0.9,en-US;q=0.8,en;q=0.7',
    'Content-Type': 'application/x-www-form-urlencoded',
    'Pragma': 'no-cache',
    'Origin': 'https://sorgu.efatura.gov.tr',
    'Referer': 'https://sorgu.efatura.gov.tr/kullanicilar/xliste.php',
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/103.0.5060.66 Safari/537.36 Edg/103.0.1264.44'
}


def taxpayer_check(tax_no: str) -> bool:
    if not isinstance(tax_no, str) or not tax_no.isdigit():  # Check if the tax number is a string and if it is a number
        return False

    response = session.get(url='https://sorgu.efatura.gov.tr/kullanicilar/img.php', stream=True)  # Get the image
    chaptcha_image_file = f'{uuid.uuid4()}.jpg'
    with open(chaptcha_image_file, 'wb') as out_file:
        shutil.copyfileobj(response.raw, out_file)
    # Delete the response object
    del response
    # Recognize the image using tesseract removing newline '\n' from tesseract return values
    # https://stackoverflow.com/questions/67857988/removing-newline-n-from-tesseract-return-values
    captcha = pytesseract.image_to_string(Image.open(chaptcha_image_file), config='--oem 3 --psm 6').replace('\n', '')
    response = session.post(verify=False, url='https://sorgu.efatura.gov.tr/kullanicilar/xliste.php', headers=headers, data={'search_string': tax_no, 'captcha_code': str(captcha)})
    get_html_content = BeautifulSoup(response.text, features='html.parser')
    mukellef = get_html_content.find('div', {'style': 'font-weight:bold;'})
    if mukellef and mukellef.text == "Mükellef kayıtlıdır.":
        return True
    os.remove(chaptcha_image_file)
	import os
	import shutil
	import uuid

	import pytesseract # pip install pytesseract and https://tesseract-ocr.github.io/tessdoc/Home.html
	import requests # pip install requests
	from PIL import Image # pip install pillow
	from bs4 import BeautifulSoup # pip install beautifulsoup4

	# Create a requests session
	session = requests.Session()
	headers = {
	'Accept-Language': 'tr-TR,tr;q=0.9,en-US;q=0.8,en;q=0.7',
	'Content-Type': 'application/x-www-form-urlencoded',
	'Pragma': 'no-cache',
	'Origin': 'https://sorgu.efatura.gov.tr',
	'Referer': 'https://sorgu.efatura.gov.tr/kullanicilar/xliste.php',
	'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/103.0.5060.66 Safari/537.36 Edg/103.0.1264.44'
	}


	def taxpayer_check(tax_no: str) -> bool:
	if not isinstance(tax_no, str) or not tax_no.isdigit(): # Check if the tax number is a string and if it is a number
	return False

	response = session.get(url='https://sorgu.efatura.gov.tr/kullanicilar/img.php', stream=True) # Get the image
	chaptcha_image_file = f'{uuid.uuid4()}.jpg'
	with open(chaptcha_image_file, 'wb') as out_file:
	shutil.copyfileobj(response.raw, out_file)
	# Delete the response object
	del response
	# Recognize the image using tesseract removing newline '\n' from tesseract return values
	# https://stackoverflow.com/questions/67857988/removing-newline-n-from-tesseract-return-values
	captcha = pytesseract.image_to_string(Image.open(chaptcha_image_file), config='--oem 3 --psm 6').replace('\n', '')
	response = session.post(verify=False, url='https://sorgu.efatura.gov.tr/kullanicilar/xliste.php', headers=headers, data={'search_string': tax_no, 'captcha_code': str(captcha)})
	get_html_content = BeautifulSoup(response.text, features='html.parser')
	mukellef = get_html_content.find('div', {'style': 'font-weight:bold;'})
	if mukellef and mukellef.text == "Mükellef kayıtlıdır.":
	return True
	os.remove(chaptcha_image_file)