Skip to content

Instantly share code, notes, and snippets.

@ebongzzang
Last active November 5, 2019 09:50
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save ebongzzang/17ffb5c4fbe7e0bdf8e1c5a8e3d4b490 to your computer and use it in GitHub Desktop.
Save ebongzzang/17ffb5c4fbe7e0bdf8e1c5a8e3d4b490 to your computer and use it in GitHub Desktop.
get_object_list_from_bucket using boto3
import json
import re
from io import BytesIO, StringIO
from typing import List
import requests
import bs4
import boto3
s3 = boto3.client('s3')
prefix = '/asdf/123'
REG = re.compile('^([\w]+/){3}data.json')
res_lis = []
def get_datajson_files(response) -> List[str]:
if not response['IsTruncated']:
return []
token = response['NextContinuationToken']
top_level_datajson_list = [obj['Key'] for obj in response['Contents'] if REG.search(obj['Key'])]
print(top_level_datajson_list)
if len(top_level_datajson_list) >= 1:
res_lis.extend(top_level_datajson_list)
return get_datajson_files(s3.list_objects_v2(Bucket='bucket.asdf.zxcv', StartAfter=prefix, ContinuationToken=token))
objs = s3.list_objects_v2(Bucket='cdn.zipview.kr', StartAfter=prefix)
# get_datajson_files(objs)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment