Skip to content

Instantly share code, notes, and snippets.

@tweedge
Last active January 10, 2021 09:06
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save tweedge/906ffb64eb60885ceea6bc699e6c3527 to your computer and use it in GitHub Desktop.
Save tweedge/906ffb64eb60885ceea6bc699e6c3527 to your computer and use it in GitHub Desktop.
Quick HTML Index Maker for S3 HTTPS Endpoints
# quick-s3-index-maker.py
# compatibility: python3
# originally from: https://pythonadventures.wordpress.com/2011/03/26/static-html-filelist-generator/
# features:
# - takes directory and S3 endpoint as input
# - inserts header.txt prior to listing if one is present in the directory
# - removes emojis from filenames only (not compatible with Wasabi)
# - the least production-ready thing I've written since college
# - hacky as hell
import os, sys, math, re, shutil
from urllib.parse import quote
# thank you James from StackOverflow
# https://stackoverflow.com/questions/5194057/better-way-to-convert-file-sizes-in-python
def convert_size(size_bytes):
if size_bytes == 0:
return "0 B"
size_name = ("B", "KB", "MB", "GB", "TB", "PB", "EB", "ZB", "YB")
i = int(math.floor(math.log(size_bytes, 1024)))
p = math.pow(1024, i)
s = round(size_bytes / p, 2)
return "%s %s" % (s, size_name[i])
# and thank you asst. GitHub contributors
# https://gist.github.com/slowkow/7a7f61f495e3dbb7e3d767f97bd7304b
def remove_emoji(string):
emoji_pattern = re.compile(
"["
u"\U0001F600-\U0001F64F" # emoticons
u"\U0001F300-\U0001F5FF" # symbols & pictographs
u"\U0001F680-\U0001F6FF" # transport & map symbols
u"\U0001F1E0-\U0001F1FF" # flags (iOS)
u"\U00002702-\U000027B0"
u"\U000024C2-\U0001F251"
"]+",
flags=re.UNICODE,
)
return emoji_pattern.sub(r"", string)
def print_html_header():
print("<html><body><code>")
header_file_name = os.path.join(base_dir, "header.txt")
if os.path.exists(header_file_name):
header_file = open(header_file_name)
for line in header_file:
print(line.rstrip())
def print_html_footer():
print("</code></body></html>")
# should have trailing slash on directory
# should not have protocol or trailing slash on endpoint
if len(sys.argv) != 3:
print("Needs two arguments:")
print("The directory to build a tree for (e.g. ~/MEGA/2021-01-06/)")
print(
"And the S3 endpoint base URL (e.g. capitol-hill-riots.s3.us-east-1.wasabisys.com)"
)
exit(1)
base_dir = sys.argv[1]
s3_base_url = sys.argv[2]
rel_folder_store = '|L/:"'
print_html_header()
for root, dirs, files in os.walk(base_dir):
dirs.sort()
for filename in sorted(files):
abs_path = os.path.join(root, filename)
rel_path = abs_path.replace(base_dir, "")
rel_folder = rel_path.replace(filename, "")
abs_folder = abs_path.replace(filename, "")
if rel_folder != rel_folder_store:
total_size = 0
# extremely redundant walk here which could be easily avoided w/ refactor
for calc_root, calc_dirs, calc_files in os.walk(abs_folder):
for calc_file in calc_files:
calc_filepath = os.path.join(calc_root, calc_file)
total_size += os.path.getsize(calc_filepath)
rel_folder_store = rel_folder
print(
"<strong>./"
+ rel_folder
+ " ("
+ convert_size(total_size)
+ ")</strong><br>"
)
if os.path.isfile(abs_path):
# move emoji'd file to not being emoji'd - doesn't work on directories
if remove_emoji(abs_path) != abs_path:
shutil.move(abs_path, remove_emoji(abs_path))
abs_path = remove_emoji(abs_path)
rel_path = remove_emoji(rel_path)
print(
'&nbsp;&nbsp;&nbsp;&nbsp;<a href="https://'
+ s3_base_url
+ "/"
+ quote(rel_path)
+ '">'
+ filename
+ "</a> ("
+ convert_size(os.path.getsize(abs_path))
+ ")<br>"
)
print_html_footer()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment