Skip to content

Instantly share code, notes, and snippets.

View bleso-a's full-sized avatar

Blessing Adetoye Adesiji bleso-a

  • Ibadan, Nigeria
View GitHub Profile
@bleso-a
bleso-a / entities.py
Last active September 12, 2022 17:22
substrings = []
start = 0
for entity in piilist["Entities"]:
substrings.append(page_string[start:entity["BeginOffset"]])
substrings.append(entity["Type"])
start = entity["EndOffset"]
masked_text = " ".join(substrings)
print(masked_text)
comprehendmedical_client = boto3.client(service_name='comprehendmedical')
entities = comprehendmedical_client.detect_entities_v2(Text=page_string)
print(entities)
with open(text_data, "r") as fi:
raw_texts = [line.strip() for line in fi.readlines()]
s3 = boto3.resource('s3')
s3.Bucket(bucket).upload_file("health_notes.txt", "phi-masking/health_notes.txt")
@bleso-a
bleso-a / save_file.py
Last active September 12, 2022 17:20
text_data = 'health_notes.txt'
doc = Document(response)
with open(text_data, 'w', encoding='utf-8') as f:
for page in doc.pages:
page_string = ''
for line in page.lines:
page_string += " "
page_string += str(line.text)
f.writelines(page_string + "\n")
@bleso-a
bleso-a / parse.py
Last active September 12, 2022 17:18
from trp import Document
doc = Document(response)
page_string = ''
for page in doc.pages:
for line in page.lines:
page_string += " "
page_string += str(line.text)
print(page_string)
response = textract_client.detect_document_text(Document={'Bytes': bytes_arr})
print(response)
@bleso-a
bleso-a / app_image.py
Last active September 12, 2022 17:16
documentName = "health_notes.png"
with open(documentName, 'rb') as file:
img_file = file.read()
bytes_arr = bytearray(img_file)
print('Image file is loaded', documentName)
file.close()
textract_client = boto3.client(service_name = 'textract', region_name = region,
endpoint_url = endpoint_url)
region = boto3.Session().region_name
role = get_execution_role()
bucket = sagemaker.Session().default_bucket()
prefix = "phi-masking"
bucket_path = "https://s3-{}.amazonaws.com/{}".format(region, bucket)
endpoint_url = "https://textract.{}.amazonaws.com".format(region)
@bleso-a
bleso-a / app.py
Last active August 26, 2022 16:16
Import Libraries
import json
import boto3
import sagemaker
from sagemaker import get_execution_role
from sagemaker.s3 import S3Uploader, S3Downloader