Last active
January 10, 2021 09:06
-
-
Save tweedge/906ffb64eb60885ceea6bc699e6c3527 to your computer and use it in GitHub Desktop.
Quick HTML Index Maker for S3 HTTPS Endpoints
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# quick-s3-index-maker.py | |
# compatibility: python3 | |
# originally from: https://pythonadventures.wordpress.com/2011/03/26/static-html-filelist-generator/ | |
# features: | |
# - takes directory and S3 endpoint as input | |
# - inserts header.txt prior to listing if one is present in the directory | |
# - removes emojis from filenames only (not compatible with Wasabi) | |
# - the least production-ready thing I've written since college | |
# - hacky as hell | |
import os, sys, math, re, shutil | |
from urllib.parse import quote | |
# thank you James from StackOverflow | |
# https://stackoverflow.com/questions/5194057/better-way-to-convert-file-sizes-in-python | |
def convert_size(size_bytes): | |
if size_bytes == 0: | |
return "0 B" | |
size_name = ("B", "KB", "MB", "GB", "TB", "PB", "EB", "ZB", "YB") | |
i = int(math.floor(math.log(size_bytes, 1024))) | |
p = math.pow(1024, i) | |
s = round(size_bytes / p, 2) | |
return "%s %s" % (s, size_name[i]) | |
# and thank you asst. GitHub contributors | |
# https://gist.github.com/slowkow/7a7f61f495e3dbb7e3d767f97bd7304b | |
def remove_emoji(string): | |
emoji_pattern = re.compile( | |
"[" | |
u"\U0001F600-\U0001F64F" # emoticons | |
u"\U0001F300-\U0001F5FF" # symbols & pictographs | |
u"\U0001F680-\U0001F6FF" # transport & map symbols | |
u"\U0001F1E0-\U0001F1FF" # flags (iOS) | |
u"\U00002702-\U000027B0" | |
u"\U000024C2-\U0001F251" | |
"]+", | |
flags=re.UNICODE, | |
) | |
return emoji_pattern.sub(r"", string) | |
def print_html_header(): | |
print("<html><body><code>") | |
header_file_name = os.path.join(base_dir, "header.txt") | |
if os.path.exists(header_file_name): | |
header_file = open(header_file_name) | |
for line in header_file: | |
print(line.rstrip()) | |
def print_html_footer(): | |
print("</code></body></html>") | |
# should have trailing slash on directory | |
# should not have protocol or trailing slash on endpoint | |
if len(sys.argv) != 3: | |
print("Needs two arguments:") | |
print("The directory to build a tree for (e.g. ~/MEGA/2021-01-06/)") | |
print( | |
"And the S3 endpoint base URL (e.g. capitol-hill-riots.s3.us-east-1.wasabisys.com)" | |
) | |
exit(1) | |
base_dir = sys.argv[1] | |
s3_base_url = sys.argv[2] | |
rel_folder_store = '|L/:"' | |
print_html_header() | |
for root, dirs, files in os.walk(base_dir): | |
dirs.sort() | |
for filename in sorted(files): | |
abs_path = os.path.join(root, filename) | |
rel_path = abs_path.replace(base_dir, "") | |
rel_folder = rel_path.replace(filename, "") | |
abs_folder = abs_path.replace(filename, "") | |
if rel_folder != rel_folder_store: | |
total_size = 0 | |
# extremely redundant walk here which could be easily avoided w/ refactor | |
for calc_root, calc_dirs, calc_files in os.walk(abs_folder): | |
for calc_file in calc_files: | |
calc_filepath = os.path.join(calc_root, calc_file) | |
total_size += os.path.getsize(calc_filepath) | |
rel_folder_store = rel_folder | |
print( | |
"<strong>./" | |
+ rel_folder | |
+ " (" | |
+ convert_size(total_size) | |
+ ")</strong><br>" | |
) | |
if os.path.isfile(abs_path): | |
# move emoji'd file to not being emoji'd - doesn't work on directories | |
if remove_emoji(abs_path) != abs_path: | |
shutil.move(abs_path, remove_emoji(abs_path)) | |
abs_path = remove_emoji(abs_path) | |
rel_path = remove_emoji(rel_path) | |
print( | |
' <a href="https://' | |
+ s3_base_url | |
+ "/" | |
+ quote(rel_path) | |
+ '">' | |
+ filename | |
+ "</a> (" | |
+ convert_size(os.path.getsize(abs_path)) | |
+ ")<br>" | |
) | |
print_html_footer() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment