Skip to content

Instantly share code, notes, and snippets.

@chsh
Last active August 29, 2015 14:15
Show Gist options
  • Save chsh/85fd0a961a33a80a95c0 to your computer and use it in GitHub Desktop.
Save chsh/85fd0a961a33a80a95c0 to your computer and use it in GitHub Desktop.
Extract text from image using Google Drive API.
# gem 'google-api-client'
# gem 'mime-types'
#
# prepare config/ocr.yml
# default: &default
# google:
# auth_passphrase: <passphrase>
# auth_email: <email>
# permission: e.g. https://www.googleapis.com/auth/drive
#
# usage:
# te = TextExtractor.new
# id = te.insert_file <file-path>
# content = te.get_content id
# te.delete_file id
require 'google/api_client'
class TextExtractor
def initialize
end
def client
@client ||= build_client
end
def drive
@drive ||= client.discovered_api 'drive', 'v2'
end
def list_files
result = Array.new
begin
parameters = {}
parameters['q'] = "'root' in parents"
api_result = client.execute api_method: drive.files.list,
parameters: parameters
if api_result.status == 200
files = api_result.data
result.concat(files.items)
page_token = files.next_page_token
else
puts "An error occurred: #{result.data['error']['message']}"
page_token = nil
end
end while page_token.to_s != ''
result
end
def insert_file(file_name)
mime_type = mime_type_from_file_name file_name
file = drive.files.insert.request_schema.new title: File.basename(file_name),
mimeType: mime_type,
parents: [{ id: 'root' }]
media = Google::APIClient::UploadIO.new(file_name, mime_type)
result = client.execute api_method: drive.files.insert,
body_object: file,
media: media,
parameters: {
uploadType: 'multipart',
convert: true, ocr: true, ocrLanguage: 'ja',
alt: 'json'}
return result unless result.status == 200
result.data.id
end
def get_file(file_id)
result = client.execute api_method: drive.files.get,
parameters: { fileId: file_id }
return nil unless result.status == 200
result
end
def get_content(file_id)
result = get_file file_id
return nil unless result
url = result.data['exportLinks']['text/plain']
result = client.execute uri: url
return nil unless result.status == 200
result.body
end
def delete_file(file_id)
result = client.execute api_method: drive.files.delete,
parameters: { fileId: file_id }
return result if result.status != 200
file_id
end
private
def asserter
@asserter ||= Google::APIClient::JWTAsserter.new(google_config['auth_email'],
google_config['permission'],
pkcs12_key)
end
def pkcs12_key
Google::APIClient::KeyUtils.load_from_pkcs12(pkcs12_key_path, passphrase)
end
def pkcs12_key_path
# any path to *.p12 file.
end
def passphrase
google_config['auth_passphrase']
end
def build_client
client = Google::APIClient.new application_name: 'your-app-name',
application_version: '0.0.1'
client.authorization = asserter.authorize
client
end
def ocr_config
@@ocr_config ||= Rails.application.config_for(:ocr)
end
def google_config
@@google_config ||= ocr_config['google']
end
def mime_type_from_file_name(file_name)
MIME::Types.of(file_name).first.to_s
end
end
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment