Skip to content

Instantly share code, notes, and snippets.

@jspinella
Last active September 2, 2021 18:21
Show Gist options
  • Save jspinella/cf653d0b124850fed51b6f528c77dd3d to your computer and use it in GitHub Desktop.
Save jspinella/cf653d0b124850fed51b6f528c77dd3d to your computer and use it in GitHub Desktop.
AWS Lambda container image to convert Access files to CSV files with Python 3
# convert a table from an accdb file to CSV and upload to an AWS S3 bucket
import os, subprocess, urllib.request, requests, zipfile, boto3
from bs4 import BeautifulSoup
from lxml import etree
def handler(event, context): # we aren't using event or context here, but you probably will in your real-world implementation
# cd into Lambda's writable directory (allows up to 512MB of files)
os.chdir('/tmp')
#todo: download the accdb file from S3 or the Internet
# convert the accdb table to CSV
DATABASE = "yourFile.accdb" # yourFile.mdb should work as well
TABLE = "tableInAccdbToConvert" # e.g. "MyAccessTable"
# based on code here: http://okfnlabs.org/handbook/data/patterns/liberating-access-databases/ which loops through all tables in accdb file
# here I am just converting a single table to CSV as I only needed one table
filename = TABLE.replace(' ','_') + '.csv'
print(f'Converting {TABLE} to CSV format...')
with open(filename, 'wb') as f:
subprocess.call(['mdb-export', DATABASE, TABLE], stdout=f)
# upload CSV file to S3
s3 = boto3.client(
's3',
region_name='us-east-1',
aws_access_key_id='yourAccessKeyId',
aws_secret_access_key='yourAccessKeyValue'
)
S3_BUCKET = "yourS3BucketName"
S3_FILE_NAME = "export.csv" # override file name of CSV in S3 here
print(f"Uploading {S3_FILE_NAME} to S3 bucket {S3_BUCKET}")
response = s3.upload_file(f"{TABLE}.csv", S3_BUCKET, S3_FILE_NAME)
print(f"S3 response: {response}")
print("Done!")
FROM public.ecr.aws/lambda/python:3.8
# install dependencies
# mdbtools depends on unixODBC-devel and gcc-c++
# we start by enabling the EPEL package repository, which hosts the mdbtools package
RUN yum install https://dl.fedoraproject.org/pub/epel/epel-release-latest-7.noarch.rpm -y && \
yum update -y && \
yum install -y mdbtools gcc-c++ unixODBC-devel
# add the python code to the Docker image
COPY app.py ${LAMBDA_TASK_ROOT}
# install python dependencies
COPY requirements.txt .
RUN pip3 install -r requirements.txt --target "${LAMBDA_TASK_ROOT}"
# trigger Lambda handler
CMD [ "app.handler" ]
beautifulsoup4==4.9.3
boto3==1.18.30
botocore==1.21.30
bs4==0.0.1
certifi==2021.5.30
charset-normalizer==2.0.4
idna==3.2
jmespath==0.10.0
lxml==4.6.3
numpy==1.21.2
pandas==1.3.2
pyodbc==4.0.32
python-dateutil==2.8.2
pytz==2021.1
requests==2.26.0
s3transfer==0.5.0
six==1.16.0
soupsieve==2.2.1
urllib3==1.26.6
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment