Skip to content

Instantly share code, notes, and snippets.

@ilyasahsan123
Created January 14, 2019 15:01
Show Gist options
  • Save ilyasahsan123/d32bdb274aa63d49a36a667806d4983e to your computer and use it in GitHub Desktop.
Save ilyasahsan123/d32bdb274aa63d49a36a667806d4983e to your computer and use it in GitHub Desktop.
def detect_text(bucket, filename):
# use cloud vision client for extract information from image which store on cloud storage
from google.cloud import vision
vision_client = vision.ImageAnnotatorClient()
response = vision_client.text_detection({
'source': {'image_uri': "gs://{}/{}".format(bucket, filename)}
})
# get all text from extraction
text = response.full_text_annotation.text
# use regex for get specific information such as transfer status, transfer date, account number, name, nominal
import re
regex = r"m-Transfer m-Transfer\s([a-zA-Z]*?)\s([\s\S]*?)Ke\s(\d*)\s([a-zA-Z][\s\S]*?)Rp.\s(\d{1,3}[\s\S]*?)\s"
matches = re.finditer(regex, text.replace('\n', ' '), re.MULTILINE)
for match in matches:
transfer_detail = dict(
transfer_status = match.group(1),
transfer_date = match.group(2),
account_number = match.group(3),
name = match.group(4),
nominal = match.group(5).replace(',','')[:-3]
)
# use bigquery client for store information
from google.cloud import bigquery
bigquery_client = bigquery.Client()
dataset_ref = bigquery_client.dataset('DATASET_NAME')
table_ref = dataset_ref.table('TABLE_NAME')
table = bigquery_client.get_table(table_ref)
bigquery_client.insert_rows(table, transfer_detail)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment