Skip to content

Instantly share code, notes, and snippets.

@brbrr
Last active August 29, 2015 14:21
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save brbrr/7a97e1b9b8394421e34f to your computer and use it in GitHub Desktop.
Save brbrr/7a97e1b9b8394421e34f to your computer and use it in GitHub Desktop.
OCR engine module
require 'rtesseract'
require 'RMagick'
require 'tesseract'
module OCRHelper
def toast?(error_msg, screens:5, delay:nil, pre_pause:nil)
# Thread.new{take_multiple_screens(6, 0.3, 0.5)}
take_multiple_screens(screens, delay, pre_pause)
recognize_multiple_screens(screens, error_msg)
end
def text_position(text)
text = text.to_s unless text.is_a?(String)
pic_location = "#{Dir::pwd}/gen/screenshots/txt_#{text.gsub(' ', '_')}.png"
$driver.screenshot(pic_location)
img = process_image(pic_location)
img2 = img.write("#{Dir::pwd}/gen/screenshots/changed.png")
e = ocr_proc(img2)
words = ""
box = e.each_word do |w|
words << (' ' + w.to_s)
if w.to_s.include?(text)
break w.bounding_box
end
end
debug words
x = (box.x + box.width / 2) / 3
y = (box.y + box.height / 2)/ 3
{x: x, y: y}
end
# take multiple screenshots in order to catch the error on the screen
def take_multiple_screens(circles, delay, pre_pause)
sleep(pre_pause) unless pre_pause.nil?
circles.times do|index|
debug "Taking screenshot # #{index + 1}"
$driver.screenshot("#{Dir::pwd}/gen/screenshots/rec_#{index}.png")
sleep(delay) unless delay.nil?
end
end
def recognize_multiple_screens(circles, text)
circles.times do |index|
debug "Checking: #{Dir::pwd}/gen/screenshots/rec_#{index}.png"
rec_text = recognize_text_on_pic("#{Dir::pwd}/gen/screenshots/rec_#{index}.png")
return true if rec_text.include?(text)
end
false
end
#by default RTesseract gem is used here
def recognize_text_on_pic(pic_location=nil)
img = process_image(pic_location)
tess = RTesseract.new(img, lang: 'eng')
debug ocr_proc(img).text if $DEBUG
rec_text = tess.to_s # recognize
debug rec_text.gsub(/\n/, '\n')
# debug("----- ----- Recognized text ----- -----\n")
# debug(rec_text)
# debug("\n----- ----- ----- ----- ----- -----")
rec_text
end
def process_image(pic_location)
img = Magick::Image.read(pic_location).first
#img = img.quantize(256,Magick::GRAYColorspace)
img.contrast.normalize.negate.posterize(3).adaptive_resize(3)
end
#tesseract-ocr gem is used here (could get better results)
def ocr_proc(img)
Tesseract::Engine.new {|e|
e.language = :eng
#e.blacklist = '|'
e.image = img
}
end
end
module BasePage
class << self
def tap_on_text(text)
pos = OCRHelper.text_position(text)
tap_on(pos) #pos[:x], pos[:y]
end
def tap_on(hash) # x_cord, y_cord
Appium::TouchAction.new.tap(hash).release.perform # x: x_cord, y: y_cord
end
end
end
BasePage.tap_on_text('some text')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment