cyberdecker/simpletest.py

## simpletest.py
import sys

caffe_root = '/home/user/libs/caffe'

# We need to define how many digits our CAPTCHAs can have at maximum.
# For simplicity we only have CAPTCHAs of fixed length 6 in this version!
maxNumberOfDigits = 6

model = 'data_iter_25000.caffemodel'

import numpy as np
# show whole arrays in outputs
np.set_printoptions(threshold=np.nan)
import matplotlib
# Force matplotlib to not use any Xwindows backend.
matplotlib.use('Agg')
import matplotlib.pyplot as plt
sys.path.insert(0, caffe_root + 'python')
import caffe
import time
import os

from math import log
from sklearn import svm, datasets
from sklearn.cross_validation import train_test_split
from sklearn.metrics import confusion_matrix
from random import shuffle

import warnings
warnings.filterwarnings("ignore", category=DeprecationWarning)

# for copy
import shutil


# This function maps the ascii value of a character to a number.
# 0 -> 0, 1->1, ... 9->9, A->10, B->11, ... Z->35,
# a->37, b->38, ... z->62
# there is a small mistkate! The class 36 is never asigned. But it doesn't matter ;)
def convertCharacterToClass(ascii_value):
	if ascii_value > 90:
		# a small letter
		correctClass = ascii_value-60
	elif ascii_value > 57:
		# a big letter
		correctClass = ascii_value-55
	else:
		# a digit
		correctClass=ascii_value-48
	return correctClass

# This function is the inverse function of convertCharacterToClass
def convertClassToCharacter(predictedClass):
	if predictedClass < 10:
		predictedCharacter = chr(predictedClass+48)
		#print 'Predicted digit:', predictedCharacter
	elif predictedClass <= 36:
		predictedCharacter = chr(predictedClass+55)
		#print "Predicted big letter", predictedCharacter
	else:
		predictedCharacter = chr(predictedClass+60)
		#print "Predicted small letter", predictedCharacter
	return predictedCharacter;

network = "network_captchas_with_3_convolutional_layers.prototxt"

# Make classifier.
classifier = caffe.Classifier(network,model,mean=None)


start = time.time()

IMAGE_FILE = 'aaavbz.png'
#print(IMAGE_FILE)

correctString = os.path.splitext(file)[0]
#convert the string into a list of chars
correctChars = list(correctString)

input_image = caffe.io.load_image(IMAGE_FILE, color=False)

# convert image to grayscale with 1 channel if it is saved with 3 channels
# We assume that all three channels are identical and thus just take the second channel and ignore the others
if input_image.shape[2]>1:
    input_image = input_image[:,:,1]
    input_image = np.reshape(input_image, (50,180,1))

# print input_image
inputs = [input_image]
print input_image.shape
print inputs


# Classify.
prediction = classifier.predict(inputs, oversample=False)

predictedString = ""
numberOfDigits = 6
classesPerDigit = 63
numberOfCorrectChars = 0

for x in xrange(0, numberOfDigits):
    predictedChar = prediction[0][63*x:63*(x+1)]
    # normalize to a sum of 1
    predictedChar = predictedChar * sum(predictedChar) ** -1

    # first guess
    predictedClass = predictedChar.argmax()
    probabilityFirst = predictedChar.max()
    predictedCharacter = convertClassToCharacter(predictedClass)
    predictedString+=predictedCharacter

    # secondguess
    predictedChar[predictedClass]=0
    predictedClassSecond = predictedChar.argmax()
    probabilitySecond = predictedChar.max()
    predictedCharacterSecond = convertClassToCharacter(predictedClassSecond)

    # unceartainty: 0: absolutley certatin, 1: absoluteley uncertain
    uncertainty = uncertainty + probabilitySecond / probabilityFirst
	import sys

	caffe_root = '/home/user/libs/caffe'

	# We need to define how many digits our CAPTCHAs can have at maximum.
	# For simplicity we only have CAPTCHAs of fixed length 6 in this version!
	maxNumberOfDigits = 6

	model = 'data_iter_25000.caffemodel'

	import numpy as np
	# show whole arrays in outputs
	np.set_printoptions(threshold=np.nan)
	import matplotlib
	# Force matplotlib to not use any Xwindows backend.
	matplotlib.use('Agg')
	import matplotlib.pyplot as plt
	sys.path.insert(0, caffe_root + 'python')
	import caffe
	import time
	import os

	from math import log
	from sklearn import svm, datasets
	from sklearn.cross_validation import train_test_split
	from sklearn.metrics import confusion_matrix
	from random import shuffle

	import warnings
	warnings.filterwarnings("ignore", category=DeprecationWarning)

	# for copy
	import shutil



	# This function maps the ascii value of a character to a number.
	# 0 -> 0, 1->1, ... 9->9, A->10, B->11, ... Z->35,
	# a->37, b->38, ... z->62
	# there is a small mistkate! The class 36 is never asigned. But it doesn't matter ;)
	def convertCharacterToClass(ascii_value):
	if ascii_value > 90:
	# a small letter
	correctClass = ascii_value-60
	elif ascii_value > 57:
	# a big letter
	correctClass = ascii_value-55
	else:
	# a digit
	correctClass=ascii_value-48
	return correctClass

	# This function is the inverse function of convertCharacterToClass
	def convertClassToCharacter(predictedClass):
	if predictedClass < 10:
	predictedCharacter = chr(predictedClass+48)
	#print 'Predicted digit:', predictedCharacter
	elif predictedClass <= 36:
	predictedCharacter = chr(predictedClass+55)
	#print "Predicted big letter", predictedCharacter
	else:
	predictedCharacter = chr(predictedClass+60)
	#print "Predicted small letter", predictedCharacter
	return predictedCharacter;

	network = "network_captchas_with_3_convolutional_layers.prototxt"

	# Make classifier.
	classifier = caffe.Classifier(network,model,mean=None)


	start = time.time()

	IMAGE_FILE = 'aaavbz.png'
	#print(IMAGE_FILE)

	correctString = os.path.splitext(file)[0]
	#convert the string into a list of chars
	correctChars = list(correctString)

	input_image = caffe.io.load_image(IMAGE_FILE, color=False)

	# convert image to grayscale with 1 channel if it is saved with 3 channels
	# We assume that all three channels are identical and thus just take the second channel and ignore the others
	if input_image.shape[2]>1:
	input_image = input_image[:,:,1]
	input_image = np.reshape(input_image, (50,180,1))

	# print input_image
	inputs = [input_image]
	print input_image.shape
	print inputs


	# Classify.
	prediction = classifier.predict(inputs, oversample=False)

	predictedString = ""
	numberOfDigits = 6
	classesPerDigit = 63
	numberOfCorrectChars = 0

	for x in xrange(0, numberOfDigits):
	predictedChar = prediction[0][63x:63(x+1)]
	# normalize to a sum of 1
	predictedChar = predictedChar * sum(predictedChar) ** -1

	# first guess
	predictedClass = predictedChar.argmax()
	probabilityFirst = predictedChar.max()
	predictedCharacter = convertClassToCharacter(predictedClass)
	predictedString+=predictedCharacter

	# secondguess
	predictedChar[predictedClass]=0
	predictedClassSecond = predictedChar.argmax()
	probabilitySecond = predictedChar.max()
	predictedCharacterSecond = convertClassToCharacter(predictedClassSecond)

	# unceartainty: 0: absolutley certatin, 1: absoluteley uncertain
	uncertainty = uncertainty + probabilitySecond / probabilityFirst