Created
July 27, 2018 12:04
-
-
Save ghandic/a451e8b99b54906146e43260fed8a30a to your computer and use it in GitHub Desktop.
Load pdf from S3 directly into memory as list of PIL images
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
"""This module requires: | |
- poppler-utils [apt-get upde && apt-get install poppler-utils] | |
- pdf2image [pip install pdf2image] | |
- pillow [pip install pillow] | |
- boto3 [pip install boto3] | |
""" | |
import boto3 | |
from pdf2image import convert_from_bytes | |
class S3Base(object): | |
def __init__(self, aws_access_key_id, aws_secret_access_key, region_name): | |
self.s3 = boto3.client('s3', aws_access_key_id=aws_access_key_id, | |
aws_secret_access_key=aws_secret_access_key, | |
region_name=region_name) | |
class S3Pdfs(S3Base): | |
"""Useage: | |
pdfs = S3Pdfs(aws_access_key_id='fjrn4uun-my-access-key-589gnmrn90', | |
aws_secret_access_key='4f4nvu5tvnd-my-secret-access-key-rjfjnubu34un4tu4', | |
region_name='eu-west-1') | |
pdf = pdfs.from_s3('my-example-bucket-9933668', 'Folium.pdf') | |
pdf[0] | |
""" | |
def from_s3(self, bucket, key): | |
file_byte_string = self.s3.get_object(Bucket=bucket, Key=key)['Body'].read() | |
return convert_from_bytes(file_byte_string, fmt='png') |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
@ghandic How do I convert the pil images back to pdf and upload to s3 again