Skip to content

Instantly share code, notes, and snippets.

Show Gist options
  • Save djouallah/998571cf7560fb697ed174d1ef65b7fe to your computer and use it in GitHub Desktop.
Save djouallah/998571cf7560fb697ed174d1ef65b7fe to your computer and use it in GitHub Desktop.
load latest metadata iceberg
import boto3
import pandas as pd
s3_client = boto3.client('s3')
bucket = 'xxxxxxx'
prefix = 'zzzz/yyyyyy/metadata'
paginator = s3_client.get_paginator('list_objects_v2')
response_iterator = paginator.paginate(Bucket=bucket, Prefix=prefix)
file_names = pd.DataFrame(columns=['file','date'])
for response in response_iterator:
for object_data in response['Contents']:
key = object_data['Key']
last = object_data['LastModified']
if key.endswith('.json'):
row = {'file': key,'date':last}
file_names = pd.concat([file_names,pd.DataFrame(row,index=[key])] , axis=0, ignore_index=True)
file_names.sort_values(by=['date'], ascending = False, inplace=True)
file_names['file'].iat[0]
Latest_metadata ='s3://' + bucket +'/'+ file_names['file'].iat[0]
print(Latest_metadata)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment