Mlawrence95/read_csv_from_aws_s3_targz.python

## read_csv_from_aws_s3_targz.python
# checked against python 3.7.3, pandas 0.24.2, s3fs 0.4.2
import tarfile
import io
import s3fs

import pandas as pd

tar_path      = f"s3://my-bucket/debug.tar.gz"  # path in s3
metadata_path = "debug/metadata.csv"  # path inside of the tar file

s3 = s3fs.S3FileSystem()

# this is in my experience, but it does work!
with s3.open(tar_path, 'rb') as debug_tar:
    with tarfile.open(mode='r:gz', fileobj=debug_tar) as tar:
        csv_contents = tar.extractfile(metadata_path).read()
        df = pd.read_csv(io.BytesIO(csv_contents), encoding='utf8')
	# checked against python 3.7.3, pandas 0.24.2, s3fs 0.4.2
	import tarfile
	import io
	import s3fs

	import pandas as pd

	tar_path = f"s3://my-bucket/debug.tar.gz" # path in s3
	metadata_path = "debug/metadata.csv" # path inside of the tar file

	s3 = s3fs.S3FileSystem()

	# this is in my experience, but it does work!
	with s3.open(tar_path, 'rb') as debug_tar:
	with tarfile.open(mode='r:gz', fileobj=debug_tar) as tar:
	csv_contents = tar.extractfile(metadata_path).read()
	df = pd.read_csv(io.BytesIO(csv_contents), encoding='utf8')