Skip to content

Instantly share code, notes, and snippets.

@venky-wandb
Last active December 20, 2023 16:48
Show Gist options
  • Save venky-wandb/b95542f107b377fb9e1ad2c811201bbe to your computer and use it in GitHub Desktop.
Save venky-wandb/b95542f107b377fb9e1ad2c811201bbe to your computer and use it in GitHub Desktop.
Samsung - Deletes files from minio object storage and from mysql database
# Install mysql.connector dependency
# using pip install mysql-connector-python
import mysql.connector
from mysql.connector import errorcode
import os
import argparse
import shutil
def data_deletion(delete_days):
# Setup database config for connection
config = {
'user': 'wandb_local',
'password': 'wandb_local',
'host': '127.0.0.1',
'database': 'wandb_local',
'raise_on_warnings': True
}
try:
cnx = mysql.connector.connect(**config)
select_cursor = cnx.cursor(buffered=True)
delete_cursor = cnx.cursor(buffered=True)
select_query = ("SELECT r.name AS run_id, p.storage_key AS project_name, e.name AS entity_name "
"FROM runs r JOIN projects p ON p.id=r.project_id JOIN entities e ON p.entity_id = e.id "
"WHERE r.deleted_at < now() - interval {} DAY").format(delete_days)
select_cursor.execute(select_query)
for run_id, project_name, entity_name in select_cursor:
# Deletes files from object storage
dir_path = "/vol/minio/local-files/{}/{}/{}".format(entity_name, project_name, run_id)
if os.path.isdir(dir_path):
if os.path.basename(os.path.normpath(dir_path)) == project_name:
print("WARNING: {} skipped, unusual directory path".format(dir_path))
continue
for filename in os.listdir(dir_path):
f = os.path.join(dir_path, filename)
if os.path.isfile(f):
os.remove(f)
print("{} deleted from minio storage".format(f))
elif os.path.isdir(f) and '/artifacts' not in f:
shutil.rmtree(f)
print("{} deleted from minio storage".format(f))
elif '/artifacts' in f:
print("{} cannot be deleted".format(f))
else:
print("{} does not exist or has already been deleted".format(f))
if len(os.listdir(dir_path))==0:
print("{} deleted from minio storage".format(dir_path))
shutil.rmtree(dir_path)
# Hard deletes rows from database
delete_query = ("DELETE FROM runs WHERE name='{}'".format(run_id))
delete_cursor.execute(delete_query)
cnx.commit()
except mysql.connector.Error as err:
if err.errno == errorcode.ER_ACCESS_DENIED_ERROR:
print("Something is wrong with your user name and password")
elif err.errno == errorcode.ER_BAD_DB_ERROR:
print("Database does not exist")
else:
print(err)
else:
select_cursor.close()
delete_cursor.close()
cnx.close()
parser = argparse.ArgumentParser()
parser.add_argument("-d", "--delete-days", type=int, default=15)
args = parser.parse_args()
delete_days = args.delete_days
data_deletion(delete_days)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment