Create a gist now

Instantly share code, notes, and snippets.

What would you like to do?
A script to solve Maths CAPTCHAs
# coding=utf-8
import sys
import argparse
import urllib2
import re
import digify # pip install https://pypi.python.org/packages/source/D/Digify/Digify-0.2.tar.gz
from bs4 import BeautifulSoup # pip install https://pypi.python.org/packages/source/b/beautifulsoup4/beautifulsoup4-4.3.2.tar.gz
#
# A far from elegant script to show how to automatically solve Maths CAPTCHAs, as discussed in my blog post
# http://flyingtophat.co.uk/blog/2015/03/30/cracking-maths-captchas.html
#
__author__ = 'Lucas'
__homepage__ = 'http://www.flyingtophat.co.uk/'
__version__ = '1.0'
__date__ = '2015/03/30'
parser = argparse.ArgumentParser()
parser.add_argument("url", help="The URL containing the maths CAPTCHA")
parser.add_argument("container", help="The class attribute of the CAPTCHA's container")
args = parser.parse_args()
user_agent = "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/534.30 (KHTML, like Gecko) Ubuntu/11.04 Chromium/12.0.742.112 Chrome/12.0.742.112 Safari/534.30"
request = req = urllib2.Request(args.url, headers={'User-Agent': user_agent})
page = urllib2.urlopen(request).read()
soup = BeautifulSoup(page)
container = soup.find(attrs={'class': args.container})
if container is None:
print("CAPTCHA container not found")
sys.exit(1)
captcha = unicode(container.text.strip())
print('Extracted', captcha)
def plus_with_unknown_component(first_comp, second_comp, result):
if result is None:
return first_comp + second_comp
component = (first_comp if second_comp is None else second_comp)
return result - component
def subtract_with_unknown_component(first_comp, second_comp, result):
if result is None:
return first_comp - second_comp
return (first_comp - result) if second_comp is None else (result + second_comp)
def multiply_with_unknown_component(first_comp, second_comp, result):
if result is None:
return first_comp * second_comp
component = (first_comp if second_comp is None else second_comp)
return result / component
calculation = digify.replace_spelled_numbers(captcha)
''' Extract pattern matches '''
pattern = re.compile(u'(?P<first_component>[0-9]+)?'
+ u'\s*(?P<operator>[+×−])\s*'
+ u'(?P<second_component>[0-9]+)?'
+ u'\s*(=)\s*'
+ u'(?P<result>[0-9]+)?', re.UNICODE)
calculationParts = re.search(pattern, calculation)
if calculationParts is None:
print("CAPTCHA's calculation could not be found in the container")
sys.exit(1)
operator = calculationParts.group('operator')
result = calculationParts.group('result')
result = int(result) if result is not None else None
component_one = calculationParts.group('first_component')
component_one = int(component_one) if component_one is not None else None
component_two = calculationParts.group('second_component')
component_two = int(component_two) if component_two is not None else None
''' Determine Answer '''
answer = None
if operator == '+':
answer = plus_with_unknown_component(component_one, component_two, result)
elif operator == u'×':
answer = multiply_with_unknown_component(component_one, component_two, result)
elif operator == u'':
answer = subtract_with_unknown_component(component_one, component_two, result)
else:
print("Unknown operator")
sys.exit(1)
print("Solution: " + str(answer))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment