|
import pytesseract |
|
# import time |
|
import requests |
|
import os |
|
from bs4 import BeautifulSoup |
|
from PIL import Image, ImageEnhance, ImageFilter |
|
#import qrtools |
|
from pyzbar.pyzbar import decode |
|
# import cv2 |
|
import textract |
|
import io |
|
import easyocr |
|
|
|
phrases = ['etsy','support','billing','account'] |
|
|
|
profiles = [ |
|
'Aquarius0174', # still an active account 18/3/24 |
|
'4rxehd0d9sx9cpjz', |
|
'ygxanpmoi9bll6dm', |
|
'dpiqol46u7qv8s6h', |
|
] |
|
|
|
# demo of checking qrcodes sent to users |
|
qrcodes = [ |
|
'pics/qrcode.jpg', |
|
'pics/qrcode2.jpg', |
|
] |
|
def read_qrcode_data(image): |
|
data = decode(Image.open(image)) |
|
return data |
|
for i in qrcodes: |
|
data = read_qrcode_data(i) |
|
text = data[0].data.decode('utf-8') |
|
if any(phrase in text.lower() for phrase in phrases): |
|
print(f'WARN: {i} contains phrase: {text.lower()}') |
|
|
|
# extract text from image |
|
def read_image_text(image): |
|
|
|
# textract (hit a miss, but fast) |
|
try: |
|
text = textract.process(image) |
|
text = text.decode('utf-8') |
|
if check_text(text): |
|
return text |
|
except Exception as e: |
|
return "" |
|
|
|
# easy ocr (gets them all tbh, but slow) |
|
try: |
|
# I don't have a proper GPU... |
|
reader = easyocr.Reader(['en'], gpu=False, verbose=False) # specify the language(s) |
|
result = reader.readtext(image) |
|
text = ' '.join([item[1] for item in result]) |
|
if text: |
|
return text |
|
except Exception as e: |
|
return "" |
|
|
|
# pytesseract (reasonably fast, but misses some easy ones) |
|
for psm in range(14): |
|
try: |
|
print(f'INFO: Running psm {psm} {i}') |
|
text = pytesseract.image_to_string(i, config=f'--psm {psm}') |
|
return text |
|
except Exception as e: |
|
return "" |
|
|
|
return False |
|
|
|
|
|
# slurp up images in pics directory |
|
def get_local_images(dir): |
|
images = os.listdir(dir) |
|
image_extensions = ['jpg', 'jpeg', 'png', 'gif', 'bmp', 'webp'] |
|
images = [i for i in images if any(i.endswith(ext) for ext in image_extensions)] |
|
return images |
|
|
|
def generate_test_images(image): |
|
|
|
# Load the original image |
|
img = Image.open(image) |
|
|
|
# convert webp to jpeg |
|
if img.format == 'WEBP': |
|
print(f'INFO: Convert webp tp JPEG') |
|
|
|
# Save the image to a BytesIO object |
|
image_io = io.BytesIO() |
|
img.save(image_io, format='JPEG') |
|
|
|
# Load the image from the BytesIO object |
|
image_io.seek(0) |
|
img = Image.open(image_io) |
|
|
|
if img.format == "PNG": |
|
return False |
|
|
|
# Save the original image |
|
img.save('tests/original.jpg') |
|
|
|
# Convert the image to grayscale and save it |
|
img_gray = img.convert('L') |
|
img_gray.save('tests/grayscale.jpg') |
|
|
|
# Enhance the contrast of the image and save it |
|
enhancer = ImageEnhance.Contrast(img) |
|
img_enhanced = enhancer.enhance(2) |
|
img_enhanced.save('tests/enhanced.jpg') |
|
|
|
# Apply a median filter to the image and save it |
|
img_filtered = img.filter(ImageFilter.MedianFilter()) |
|
img_filtered.save('tests/filtered.jpg') |
|
|
|
|
|
def check_text(text): |
|
text = text.replace('\n', ' ') |
|
text = text.lower() |
|
if any(phrase in text for phrase in phrases): |
|
return True |
|
return False |
|
|
|
# image = 'pics/pic1.webp' |
|
images = get_local_images("pics") |
|
for i in images: |
|
generate_test_images('pics/'+i) |
|
image_types = ['original', 'grayscale', 'enhanced', 'filtered'] |
|
for t in [f'tests/{img_type}.jpg' for img_type in image_types]: |
|
text = read_image_text(t) |
|
if check_text(text): |
|
print(f'WARN: {i} is suspicious.') |
|
break |
|
|
|
|
|
|
|
|
|
# # get profile images from etsy page |
|
# def get_profile_image(profile): |
|
# # set user agent to that of curl, which WAF appears to allow |
|
# headers = { |
|
# 'User-Agent': 'curl/7.64.1' |
|
# } |
|
# resp = requests.get(f'https://www.etsy.com/people/{profile}', headers=headers) |
|
# if resp.status_code != 200: |
|
# print(f'ERROR: Got a {resp.status_code} response') |
|
# return |
|
# html = resp.text |
|
|
|
# soup = BeautifulSoup(html, features="html.parser") |
|
|
|
# # get the src from <img> called data-user-avatar-img |
|
# img = soup.find('img', {'data-user-avatar-img': True}) |
|
# if img: |
|
# img = img['src'] |
|
# print("INFO: Downloading image from", img) |
|
# image = requests.get(img) |
|
# with open(f'{profile}.jpg', 'wb') as f: |
|
# f.write(image.content) |
|
# return f'{profile}.jpg' |
|
# else: |
|
# print('ERROR: Profile pic not found') |
|
# return |