Last active
January 2, 2016 17:09
-
-
Save vivahiraj/8334779 to your computer and use it in GitHub Desktop.
tesseract-ocr を使ったサンプルです。http://kingyo-bachi.blogspot.jp/2014/01/rubyocr.htmlにある画像を読み込ませてみることを前提にした内容にしています。
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# -*- coding: utf-8 | |
require 'rubygems' | |
require 'tesseract-ocr' | |
engine = Tesseract::Engine.new{ |engine| | |
engine.language = :jpn | |
} | |
puts "==== 画像全体から文字を取得します" | |
puts engine.text_for(ARGV[0]) | |
puts "" | |
#以下のようにすると画像の一部だけで認識できる | |
#test_for(file_name,認識させたい左上のx,認識させたい左上のxy,認識させたい幅,認識させたい高さ) | |
puts "==== 画像のメイリオの部分のみ" | |
puts engine.text_for(ARGV[0],0,0,200,100) | |
puts "" | |
puts "==== 画像のゴシックの部分のみ" | |
puts engine.text_for(ARGV[0],370,70,540-370,130-70) | |
puts "" | |
puts "==== 画像の明朝の部分のみ" | |
puts engine.text_for(ARGV[0],100,120,230-100,180-120) | |
puts "" | |
puts "==== 画像の赤いメイリオの部分のみ" | |
puts engine.text_for(ARGV[0],280,180,420-280,250-180) | |
puts "" | |
puts "==== 画像の縦書きメイリオの部分のみ" | |
puts engine.text_for(ARGV[0],490,160,570-490,290-160) | |
puts "" | |
puts "==== 画像の富士山の上のメイリオの部分のみ" | |
puts engine.text_for(ARGV[0],50,270,580-50,460-270) | |
puts "" | |
puts "==== 認識した一文字ずつの情報" | |
engine.symbols_for(ARGV[0]).each_with_index{ |val, i| | |
text = val.text | |
b = val.bounding_box | |
printf("%d | text=%s pos=(x:%d y:%d w:%d h:%d)\n", | |
i, text, b.x, b.y, b.width, b.height) | |
} | |
puts "" | |
puts "==== 認識した文字ブロックの情報" | |
engine.blocks_for(ARGV[0]).each_with_index{ |val, i| | |
text = val.text | |
b = val.bounding_box | |
printf("%d | text=%s pos=(x:%d y:%d w:%d h:%d)\n", | |
i, text, b.x, b.y, b.width, b.height) | |
} | |
puts "" | |
puts "==== 認識した文字段落(ブロックとの違いがよくわからない)の情報" | |
engine.paragraphs_for(ARGV[0]).each_with_index{ |val, i| | |
text = val.text | |
b = val.bounding_box | |
printf("%d | text=%s pos=(x:%d y:%d w:%d h:%d)\n", | |
i, text, b.x, b.y, b.width, b.height) | |
} | |
puts "" | |
puts "==== 認識した文字の行ごとの情報" | |
engine.lines_for(ARGV[0]).each_with_index{ |val, i| | |
text = val.text | |
b = val.bounding_box | |
printf("%d | text=%s pos=(x:%d y:%d w:%d h:%d)\n", | |
i, text, b.x, b.y, b.width, b.height) | |
} | |
puts "" | |
puts "==== 認識した単語のごとの情報" | |
engine.words_for(ARGV[0]).each_with_index{ |val, i| | |
text = val.text | |
b = val.bounding_box | |
printf("%d | text=%s pos=(x:%d y:%d w:%d h:%d)\n", | |
i, text, b.x, b.y, b.width, b.height) | |
} | |
puts "" |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment