Skip to content

Instantly share code, notes, and snippets.

@Parassharmaa
Created December 31, 2023 16:46
Show Gist options
  • Save Parassharmaa/c3ffde58d07bedf0038009040976d934 to your computer and use it in GitHub Desktop.
Save Parassharmaa/c3ffde58d07bedf0038009040976d934 to your computer and use it in GitHub Desktop.
Iterate Public S3 Bucket and Save Keys
import requests
import xml.etree.ElementTree as ET
def get_bucket_contents(url, marker=None):
"""Make a request to the S3 bucket and return the response."""
params = {"marker": marker} if marker else {}
response = requests.get(url, params=params)
return response.text
def parse_xml(xml_data):
"""Parse XML data to extract file keys and the next marker."""
# Define the namespace
namespace = {"ns": "http://s3.amazonaws.com/doc/2006-03-01/"}
root = ET.fromstring(xml_data)
keys = [child.text for child in root.findall(".//ns:Key", namespace)]
next_marker = root.find(".//ns:NextMarker", namespace)
next_marker = next_marker.text if next_marker is not None else None
return keys, next_marker
def save_keys_to_file(keys, filename="bucket_keys.txt"):
"""Save the list of keys to a file."""
with open(filename, "a") as file:
for key in keys:
# url encode the key to handle special characters
key = requests.utils.quote(key)
file.write(bucket_url + "/" + key + "\n")
# URL of your bucket
bucket_url = ""
marker = None
while True:
xml_data = get_bucket_contents(bucket_url, marker)
keys, marker = parse_xml(xml_data)
print("Saving {} keys to file.".format(len(keys)))
save_keys_to_file(keys)
if not marker:
break
print("All keys have been saved to the file.")
import requests
import xml.etree.ElementTree as ET
def get_bucket_contents(url, marker=None):
"""Make a request to the S3 bucket and return the response."""
params = {"marker": marker} if marker else {}
response = requests.get(url, params=params)
return response.text
def parse_xml(xml_data):
"""Parse XML data to extract file keys and the next marker."""
# Define the namespace
namespace = {"ns": "http://s3.amazonaws.com/doc/2006-03-01/"}
root = ET.fromstring(xml_data)
keys = [child.text for child in root.findall(".//ns:Key", namespace)]
next_marker = keys[-1]
is_truncated = root.find(".//ns:IsTruncated", namespace)
return keys, next_marker, is_truncated.text
def save_keys_to_file(keys, filename="bucket_keys.txt"):
"""Save the list of keys to a file."""
with open(filename, "a") as file:
for key in keys:
# url encode the key to handle special characters
key = requests.utils.quote(key)
file.write(bucket_url + "/" + key + "\n")
# URL of your bucket
bucket_url = ""
marker = None
while True:
xml_data = get_bucket_contents(bucket_url, marker)
keys, marker, is_truncated = parse_xml(xml_data)
print("Saving {} keys to file.".format(len(keys)))
save_keys_to_file(keys, "bucket_keys.txt")
if is_truncated == "false" or marker is None:
break
print("All keys have been saved to the file.")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment