Skip to content

Instantly share code, notes, and snippets.

Embed
What would you like to do?
Simple test for captcha from tum-vision/captcha-recognition
import sys
caffe_root = '/home/user/libs/caffe'
# We need to define how many digits our CAPTCHAs can have at maximum.
# For simplicity we only have CAPTCHAs of fixed length 6 in this version!
maxNumberOfDigits = 6
model = 'data_iter_25000.caffemodel'
import numpy as np
# show whole arrays in outputs
np.set_printoptions(threshold=np.nan)
import matplotlib
# Force matplotlib to not use any Xwindows backend.
matplotlib.use('Agg')
import matplotlib.pyplot as plt
sys.path.insert(0, caffe_root + 'python')
import caffe
import time
import os
from math import log
from sklearn import svm, datasets
from sklearn.cross_validation import train_test_split
from sklearn.metrics import confusion_matrix
from random import shuffle
import warnings
warnings.filterwarnings("ignore", category=DeprecationWarning)
# for copy
import shutil
# This function maps the ascii value of a character to a number.
# 0 -> 0, 1->1, ... 9->9, A->10, B->11, ... Z->35,
# a->37, b->38, ... z->62
# there is a small mistkate! The class 36 is never asigned. But it doesn't matter ;)
def convertCharacterToClass(ascii_value):
if ascii_value > 90:
# a small letter
correctClass = ascii_value-60
elif ascii_value > 57:
# a big letter
correctClass = ascii_value-55
else:
# a digit
correctClass=ascii_value-48
return correctClass
# This function is the inverse function of convertCharacterToClass
def convertClassToCharacter(predictedClass):
if predictedClass < 10:
predictedCharacter = chr(predictedClass+48)
#print 'Predicted digit:', predictedCharacter
elif predictedClass <= 36:
predictedCharacter = chr(predictedClass+55)
#print "Predicted big letter", predictedCharacter
else:
predictedCharacter = chr(predictedClass+60)
#print "Predicted small letter", predictedCharacter
return predictedCharacter;
network = "network_captchas_with_3_convolutional_layers.prototxt"
# Make classifier.
classifier = caffe.Classifier(network,model,mean=None)
start = time.time()
IMAGE_FILE = 'aaavbz.png'
#print(IMAGE_FILE)
correctString = os.path.splitext(file)[0]
#convert the string into a list of chars
correctChars = list(correctString)
input_image = caffe.io.load_image(IMAGE_FILE, color=False)
# convert image to grayscale with 1 channel if it is saved with 3 channels
# We assume that all three channels are identical and thus just take the second channel and ignore the others
if input_image.shape[2]>1:
input_image = input_image[:,:,1]
input_image = np.reshape(input_image, (50,180,1))
# print input_image
inputs = [input_image]
print input_image.shape
print inputs
# Classify.
prediction = classifier.predict(inputs, oversample=False)
predictedString = ""
numberOfDigits = 6
classesPerDigit = 63
numberOfCorrectChars = 0
for x in xrange(0, numberOfDigits):
predictedChar = prediction[0][63*x:63*(x+1)]
# normalize to a sum of 1
predictedChar = predictedChar * sum(predictedChar) ** -1
# first guess
predictedClass = predictedChar.argmax()
probabilityFirst = predictedChar.max()
predictedCharacter = convertClassToCharacter(predictedClass)
predictedString+=predictedCharacter
# secondguess
predictedChar[predictedClass]=0
predictedClassSecond = predictedChar.argmax()
probabilitySecond = predictedChar.max()
predictedCharacterSecond = convertClassToCharacter(predictedClassSecond)
# unceartainty: 0: absolutley certatin, 1: absoluteley uncertain
uncertainty = uncertainty + probabilitySecond / probabilityFirst
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
You can’t perform that action at this time.