jspinella/Dockerfile

## app.py
# convert a table from an accdb file to CSV and upload to an AWS S3 bucket
import os, subprocess, urllib.request, requests, zipfile, boto3
from bs4 import BeautifulSoup
from lxml import etree

def handler(event, context): # we aren't using event or context here, but you probably will in your real-world implementation
    # cd into Lambda's writable directory (allows up to 512MB of files)
    os.chdir('/tmp')

    #todo: download the accdb file from S3 or the Internet

    # convert the accdb table to CSV
    DATABASE = "yourFile.accdb" # yourFile.mdb should work as well
    TABLE = "tableInAccdbToConvert" # e.g. "MyAccessTable"

    # based on code here: http://okfnlabs.org/handbook/data/patterns/liberating-access-databases/ which loops through all tables in accdb file
    # here I am just converting a single table to CSV as I only needed one table
    filename = TABLE.replace(' ','_') + '.csv'
    print(f'Converting {TABLE} to CSV format...')
    with open(filename, 'wb') as f:
        subprocess.call(['mdb-export', DATABASE, TABLE], stdout=f)

    # upload CSV file to S3
    s3 = boto3.client(
        's3',
        region_name='us-east-1',
        aws_access_key_id='yourAccessKeyId',
        aws_secret_access_key='yourAccessKeyValue'
    )

    S3_BUCKET = "yourS3BucketName"
    S3_FILE_NAME = "export.csv" # override file name of CSV in S3 here

    print(f"Uploading {S3_FILE_NAME} to S3 bucket {S3_BUCKET}")
    response = s3.upload_file(f"{TABLE}.csv", S3_BUCKET, S3_FILE_NAME)
    print(f"S3 response: {response}")
    print("Done!")

## Dockerfile
FROM public.ecr.aws/lambda/python:3.8

# install dependencies
# mdbtools depends on unixODBC-devel and gcc-c++
# we start by enabling the EPEL package repository, which hosts the mdbtools package
RUN yum install https://dl.fedoraproject.org/pub/epel/epel-release-latest-7.noarch.rpm -y && \
    yum update -y && \
    yum install -y mdbtools gcc-c++ unixODBC-devel

# add the python code to the Docker image
COPY app.py ${LAMBDA_TASK_ROOT}

# install python dependencies
COPY requirements.txt  .
RUN  pip3 install -r requirements.txt --target "${LAMBDA_TASK_ROOT}"

# trigger Lambda handler
CMD [ "app.handler" ]

## requirements.txt
beautifulsoup4==4.9.3
boto3==1.18.30
botocore==1.21.30
bs4==0.0.1
certifi==2021.5.30
charset-normalizer==2.0.4
idna==3.2
jmespath==0.10.0
lxml==4.6.3
numpy==1.21.2
pandas==1.3.2
pyodbc==4.0.32
python-dateutil==2.8.2
pytz==2021.1
requests==2.26.0
s3transfer==0.5.0
six==1.16.0
soupsieve==2.2.1
urllib3==1.26.6
	# convert a table from an accdb file to CSV and upload to an AWS S3 bucket
	import os, subprocess, urllib.request, requests, zipfile, boto3
	from bs4 import BeautifulSoup
	from lxml import etree

	def handler(event, context): # we aren't using event or context here, but you probably will in your real-world implementation
	# cd into Lambda's writable directory (allows up to 512MB of files)
	os.chdir('/tmp')

	#todo: download the accdb file from S3 or the Internet

	# convert the accdb table to CSV
	DATABASE = "yourFile.accdb" # yourFile.mdb should work as well
	TABLE = "tableInAccdbToConvert" # e.g. "MyAccessTable"

	# based on code here: http://okfnlabs.org/handbook/data/patterns/liberating-access-databases/ which loops through all tables in accdb file
	# here I am just converting a single table to CSV as I only needed one table
	filename = TABLE.replace(' ','_') + '.csv'
	print(f'Converting {TABLE} to CSV format...')
	with open(filename, 'wb') as f:
	subprocess.call(['mdb-export', DATABASE, TABLE], stdout=f)

	# upload CSV file to S3
	s3 = boto3.client(
	's3',
	region_name='us-east-1',
	aws_access_key_id='yourAccessKeyId',
	aws_secret_access_key='yourAccessKeyValue'
	)

	S3_BUCKET = "yourS3BucketName"
	S3_FILE_NAME = "export.csv" # override file name of CSV in S3 here

	print(f"Uploading {S3_FILE_NAME} to S3 bucket {S3_BUCKET}")
	response = s3.upload_file(f"{TABLE}.csv", S3_BUCKET, S3_FILE_NAME)
	print(f"S3 response: {response}")
	print("Done!")
	FROM public.ecr.aws/lambda/python:3.8

	# install dependencies
	# mdbtools depends on unixODBC-devel and gcc-c++
	# we start by enabling the EPEL package repository, which hosts the mdbtools package
	RUN yum install https://dl.fedoraproject.org/pub/epel/epel-release-latest-7.noarch.rpm -y && \
	yum update -y && \
	yum install -y mdbtools gcc-c++ unixODBC-devel

	# add the python code to the Docker image
	COPY app.py ${LAMBDA_TASK_ROOT}

	# install python dependencies
	COPY requirements.txt .
	RUN pip3 install -r requirements.txt --target "${LAMBDA_TASK_ROOT}"

	# trigger Lambda handler
	CMD [ "app.handler" ]
	beautifulsoup4==4.9.3
	boto3==1.18.30
	botocore==1.21.30
	bs4==0.0.1
	certifi==2021.5.30
	charset-normalizer==2.0.4
	idna==3.2
	jmespath==0.10.0
	lxml==4.6.3
	numpy==1.21.2
	pandas==1.3.2
	pyodbc==4.0.32
	python-dateutil==2.8.2
	pytz==2021.1
	requests==2.26.0
	s3transfer==0.5.0
	six==1.16.0
	soupsieve==2.2.1
	urllib3==1.26.6