Skip to content

Instantly share code, notes, and snippets.

@itrobotics
Last active November 19, 2023 14:35
Show Gist options
  • Save itrobotics/571c86d03d8849bcf6076d958178057a to your computer and use it in GitHub Desktop.
Save itrobotics/571c86d03d8849bcf6076d958178057a to your computer and use it in GitHub Desktop.
def ocr_by_goole_vision(input_file='captcha.png',draw_text=False):
with io.open(input_file, 'rb') as image_file:
content = image_file.read()
response = client.text_detection(image=vision.Image(content=content))
if response.error.message:
raise Exception(
'{}\nFor more info on error messages, check: '
'https://cloud.google.com/apis/design/errors'.format(
response.error.message))
print('chptcha decode:')
texts=response.text_annotations
#print(texts[0].description.split())
for text in response.text_annotations[1::]:
ocr_text = text.description
print(ocr_text)
if draw_text:
bound=text.bounding_poly
draw_text_on_image(input_file,ocr_text,bound)
return ocr_text
# -*- coding: utf-8 -*-
"""
Created on Sat Nov 18 20:30:29 2023
@author: joseph@艾鍗學院
(1) mimic the behavior of browser
(2)session = requests.Session()
send a cookie containing the session identifier.
The Flask-Session extension will then associate the request
with the correct session.
"""
import requests
from bs4 import BeautifulSoup
import random
import string
def generate_captcha_text(length=5):
#return ''.join(random.choices(string.ascii_uppercase + string.digits, k=length))
return ''.join(random.choices(string.digits, k=length))
server='http://localhost:5000/' # end with '/'
url = server+"test2"
# Send an HTTP GET request to the URL
# Create a session to maintain cookies
session = requests.Session()
response =session.get(url)
# Check if the request was successful (status code 200)
if response.status_code == 200:
# Parse the HTML content
soup = BeautifulSoup(response.text, 'html.parser')
# Find the img tag containing the CAPTCHA image
img_tag = soup.find('img', attrs={'alt': 'CAPTCHA'}) # Replace 'CAPTCHA' with the actual alt text of the image
if img_tag:
# Extract the URL of the image
captcha_image_url = img_tag['src']
# You now have the URL of the CAPTCHA image
print("CAPTCHA Image URL:", captcha_image_url)
captcha_image_url=server+captcha_image_url
# You can use this URL to download the image using the 'requests' library
captcha_image_response = session.get(captcha_image_url)
# Check if the request was successful (status code 200)
if captcha_image_response.status_code == 200:
#Save the image to a file
with open('captcha.png', 'wb') as f:
f.write(captcha_image_response.content)
print("CAPTCHA Image downloaded successfully as 'captcha.png'")
form = soup.find('form') #, {'id': 'your_form_id'})
form_action = server+ form['action']
form_fields = {}
# for input_element in form.find_all('input'):
# print(input_element)
# field_name = input_element.get('name')
# print(field_name)
# if field_name: #place the text on the value attribute
# form_fields[field_name] = input_element.get('value', '1234')
# #print(form_fields)
for _ in range(5):
form_fields['captcha']=generate_captcha_text()
print(form_fields)
# Send an HTTP POST request to submit the form data
response = session.post(form_action, data=form_fields)
# Check if the request was successful (status code 200)
if response.status_code == 200:
# Process the response from the server as needed
print("Form submitted successfully")
print(response.text)
else:
print("Failed to submit the form")
break
else:
print("Failed to download CAPTCHA image")
else:
print("CAPTCHA image not found on the page")
else:
print("Failed to retrieve the webpage")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment