Skip to content

Instantly share code, notes, and snippets.

@yassineAlouini
Last active July 17, 2017 13:13
Show Gist options
  • Save yassineAlouini/2225de648c8337815418e287e306dfe5 to your computer and use it in GitHub Desktop.
Save yassineAlouini/2225de648c8337815418e287e306dfe5 to your computer and use it in GitHub Desktop.
Download a tar.gz archive from s3
import s3fs
s3 = s3fs.S3FileSystem()
S3_BUCKET = ''
BASE_KEY = ''
def get_data_from_s3(file_name):
key = BASE_KEY.format('data', file_name)
s3.get('s3://{}/{}'.format(S3_BUCKET, key), '/tmp/{}'.format(file_name))
dfs = []
# Read the tarball into Pandas
with tarfile.open('/tmp/{}'.format(file_name), "r:gz") as tar:
for member in tar.getmembers():
f = tar.extractfile(member)
if '.csv' in member.name:
floor = re.findall(r'(-?\d+)', member.name)[0]
tmp_df = pd.read_csv(f).assign(floor=floor)
dfs.append(tmp_df)
df = pd.concat(dfs)
return df
def get_features_from_s3(file_name):
key = BASE_KEY.format('models', file_name)
s3.get('s3://{}/{}'.format(S3_BUCKET, key), '/tmp/{}'.format(file_name))
features = []
with tarfile.open('/tmp/{}'.format(file_name), "r:gz") as tar:
for member in tar.getmembers():
f = tar.extractfile(member)
if '.json' in member.name:
d = json.load(f)
features.append(d)
return features
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment