Last active
August 29, 2015 14:15
-
-
Save chsh/85fd0a961a33a80a95c0 to your computer and use it in GitHub Desktop.
Extract text from image using Google Drive API.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# gem 'google-api-client' | |
# gem 'mime-types' | |
# | |
# prepare config/ocr.yml | |
# default: &default | |
# google: | |
# auth_passphrase: <passphrase> | |
# auth_email: <email> | |
# permission: e.g. https://www.googleapis.com/auth/drive | |
# | |
# usage: | |
# te = TextExtractor.new | |
# id = te.insert_file <file-path> | |
# content = te.get_content id | |
# te.delete_file id | |
require 'google/api_client' | |
class TextExtractor | |
def initialize | |
end | |
def client | |
@client ||= build_client | |
end | |
def drive | |
@drive ||= client.discovered_api 'drive', 'v2' | |
end | |
def list_files | |
result = Array.new | |
begin | |
parameters = {} | |
parameters['q'] = "'root' in parents" | |
api_result = client.execute api_method: drive.files.list, | |
parameters: parameters | |
if api_result.status == 200 | |
files = api_result.data | |
result.concat(files.items) | |
page_token = files.next_page_token | |
else | |
puts "An error occurred: #{result.data['error']['message']}" | |
page_token = nil | |
end | |
end while page_token.to_s != '' | |
result | |
end | |
def insert_file(file_name) | |
mime_type = mime_type_from_file_name file_name | |
file = drive.files.insert.request_schema.new title: File.basename(file_name), | |
mimeType: mime_type, | |
parents: [{ id: 'root' }] | |
media = Google::APIClient::UploadIO.new(file_name, mime_type) | |
result = client.execute api_method: drive.files.insert, | |
body_object: file, | |
media: media, | |
parameters: { | |
uploadType: 'multipart', | |
convert: true, ocr: true, ocrLanguage: 'ja', | |
alt: 'json'} | |
return result unless result.status == 200 | |
result.data.id | |
end | |
def get_file(file_id) | |
result = client.execute api_method: drive.files.get, | |
parameters: { fileId: file_id } | |
return nil unless result.status == 200 | |
result | |
end | |
def get_content(file_id) | |
result = get_file file_id | |
return nil unless result | |
url = result.data['exportLinks']['text/plain'] | |
result = client.execute uri: url | |
return nil unless result.status == 200 | |
result.body | |
end | |
def delete_file(file_id) | |
result = client.execute api_method: drive.files.delete, | |
parameters: { fileId: file_id } | |
return result if result.status != 200 | |
file_id | |
end | |
private | |
def asserter | |
@asserter ||= Google::APIClient::JWTAsserter.new(google_config['auth_email'], | |
google_config['permission'], | |
pkcs12_key) | |
end | |
def pkcs12_key | |
Google::APIClient::KeyUtils.load_from_pkcs12(pkcs12_key_path, passphrase) | |
end | |
def pkcs12_key_path | |
# any path to *.p12 file. | |
end | |
def passphrase | |
google_config['auth_passphrase'] | |
end | |
def build_client | |
client = Google::APIClient.new application_name: 'your-app-name', | |
application_version: '0.0.1' | |
client.authorization = asserter.authorize | |
client | |
end | |
def ocr_config | |
@@ocr_config ||= Rails.application.config_for(:ocr) | |
end | |
def google_config | |
@@google_config ||= ocr_config['google'] | |
end | |
def mime_type_from_file_name(file_name) | |
MIME::Types.of(file_name).first.to_s | |
end | |
end |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment