Created
May 15, 2012 02:53
-
-
Save christianroman/2698751 to your computer and use it in GitHub Desktop.
Captcha OCR + Tessaract Complex
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
public class OCR { | |
private static final String INPUT = "C:/captcha/ex.png"; | |
private static final String OUTPUT = "C:/captcha/captcha-out.png"; | |
private static final String TESSERACT_BIN = "C:/Program Files/Tesseract-OCR/tesseract.exe"; | |
private static final String TESSERACT_OUTPUT = "C:/captcha/out.txt"; | |
private static final int WHITE = 0x00FFFFFF, BLACK = 0x00000000; | |
public static void main(String... args) throws Exception { | |
BufferedImage image = ImageIO.read(new FileInputStream(INPUT)); | |
int average = 0; | |
image = createGrayscalePic(image); | |
for( int row = 0; ++row < image.getHeight(); ) | |
for ( int column = 0; ++column < image.getWidth(); ) | |
average += image.getRGB(column, row) & 0x000000FF; | |
average /= image.getWidth() * image.getHeight(); | |
for( int row = 0; ++row < image.getHeight(); ) | |
for ( int column = 0; ++column < image.getWidth(); ) | |
if ((image.getRGB(column, row) & 0x000000FF) <= average * .80) | |
image.setRGB(column, row, BLACK); | |
else | |
image.setRGB(column, row, WHITE); | |
int consecutiveWhite = 0; | |
for( int row = 0; ++row < image.getHeight(); ) | |
for ( int column = 0; ++column < image.getWidth(); ) | |
if ( (image.getRGB(column,row) & 0x000000FF) == 255 ) | |
consecutiveWhite++; | |
else { | |
if (consecutiveWhite < 3 && column > consecutiveWhite) | |
for (int col = column - consecutiveWhite; col < column; col++) | |
image.setRGB(col, row, BLACK); | |
consecutiveWhite = 0; | |
} | |
consecutiveWhite = 0; | |
for ( int column = 0; ++column < image.getWidth(); ) | |
for( int row = 0; ++row < image.getHeight(); ) | |
if ( (image.getRGB(column, row) & 0x000000FF) == 255 ) | |
consecutiveWhite++; | |
else { | |
if (consecutiveWhite < 2 && row > consecutiveWhite) | |
for (int r = row - consecutiveWhite; r < row; r++) | |
image.setRGB(column, r, BLACK); | |
consecutiveWhite = 0; | |
} | |
for( int row = 0; ++row < image.getHeight(); ) | |
for ( int column = 0; ++column < image.getWidth(); ) | |
if ((image.getRGB(column, row) & WHITE) == WHITE) { | |
int height = countVerticalWhite(image, column, row); | |
int width = countHorizontalWhite(image, column, row); | |
if ((width * height <= 10) || (width == 1) || (height == 1)) | |
image.setRGB(column, row, BLACK); | |
} | |
for( int row = 0; ++row < image.getHeight(); ) | |
for ( int column = 0; ++column < image.getWidth(); ) | |
if ((image.getRGB(column, row) & WHITE) == WHITE) { | |
int height = countVerticalWhite(image, column, row); | |
int width = countHorizontalWhite(image, column, row); | |
if ((width * height <= 10) || (width == 1) || (height == 1)) | |
image.setRGB(column, row, BLACK); | |
} | |
for( int row = 0; ++row < image.getHeight(); ) | |
for ( int column = 0; ++column < image.getWidth(); ) | |
if ((image.getRGB(column, row) & WHITE) != WHITE) | |
if (countBlackNeighbors(image, column, row) <= 3) | |
image.setRGB(column, row, WHITE); | |
ImageIO.write(image, "png", new File(OUTPUT)); | |
Process tesseractProc = Runtime.getRuntime().exec(TESSERACT_BIN + " " + OUTPUT + " " + TESSERACT_OUTPUT + " nobatch letters"); | |
tesseractProc.waitFor(); | |
BufferedReader reader = new BufferedReader(new InputStreamReader(new FileInputStream(TESSERACT_OUTPUT + ".txt"))); | |
System.out.println("CAPTCHA: " + reader.readLine()); | |
reader.close(); | |
} | |
private static int countVerticalWhite(BufferedImage image, int x, int y) { | |
return (countAboveWhite(image, x, y) + countBelowWhite(image, x, y)) + 1; | |
} | |
private static int countAboveWhite(BufferedImage image, int x, int y) { | |
int aboveWhite = 0; | |
y--; | |
while (y-- > 0) | |
if ((image.getRGB(x, y) & WHITE) == WHITE) | |
aboveWhite++; | |
else | |
break; | |
return aboveWhite; | |
} | |
private static int countBelowWhite(BufferedImage image, int x, int y) { | |
int belowWhite = 0; | |
y++; | |
while (y < image.getHeight()) | |
if ((image.getRGB(x, y++) & WHITE) == WHITE) | |
belowWhite++; | |
else | |
break; | |
return belowWhite; | |
} | |
private static int countHorizontalWhite(BufferedImage image, int x, int y) { | |
return (countLeftWhite(image, x, y) + countRightWhite(image, x, y)) + 1; | |
} | |
private static int countLeftWhite(BufferedImage image, int x, int y) { | |
int leftWhite = 0; | |
x--; | |
while (x-- > 0) | |
if ((image.getRGB(x, y) & WHITE) == WHITE) | |
leftWhite++; | |
else | |
break; | |
return leftWhite; | |
} | |
private static int countRightWhite(BufferedImage image, int x, int y) { | |
int rightWhite = 0; | |
x++; | |
while (x < image.getWidth()) | |
if ((image.getRGB(x++, y) & WHITE) == WHITE) | |
rightWhite++; | |
else | |
break; | |
return rightWhite; | |
} | |
private static int countBlackNeighbors(BufferedImage image, int x, int y) { | |
int numBlacks = 0; | |
if (pixelColor(image, x - 1, y) != WHITE) | |
numBlacks++; | |
if (pixelColor(image, x - 1, y + 1) != WHITE) | |
numBlacks++; | |
if (pixelColor(image, x - 1, y - 1) != WHITE) | |
numBlacks++; | |
if (pixelColor(image, x, y + 1) != WHITE) | |
numBlacks++; | |
if (pixelColor(image, x, y - 1) != WHITE) | |
numBlacks++; | |
if (pixelColor(image, x + 1, y) != WHITE) | |
numBlacks++; | |
if (pixelColor(image, x + 1, y + 1) != WHITE) | |
numBlacks++; | |
if (pixelColor(image, x + 1, y - 1) != WHITE) | |
numBlacks++; | |
return numBlacks; | |
} | |
private static int pixelColor(BufferedImage image, int x, int y) { | |
if (x >= image.getWidth() || x < 0 || y < 0 || y >= image.getHeight()) | |
return WHITE; | |
return image.getRGB(x, y) & WHITE; | |
} | |
private static BufferedImage createGrayscalePic(BufferedImage raw) { | |
BufferedImage temp = new BufferedImage(raw.getWidth(), raw.getHeight(), BufferedImage.TYPE_BYTE_GRAY); | |
Graphics g = temp.getGraphics(); | |
g.drawImage(raw, 0, 0, null); | |
g.dispose(); | |
return temp; | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment