Skip to content

Instantly share code, notes, and snippets.

@kdaily
Last active September 19, 2019 21:37
Show Gist options
  • Save kdaily/8d40a3acd717d71071ddda0761af915e to your computer and use it in GitHub Desktop.
Save kdaily/8d40a3acd717d71071ddda0761af915e to your computer and use it in GitHub Desktop.
Remove submissions for a center in the VEOIBD data submission project.
"""Script to remove all artifacts of submissionf for a center.
"""
import logging
import synapseclient
import genie.config
logging.basicConfig()
logger = logging.getLogger(__name__)
logger.setLevel(logging.DEBUG)
syn = synapseclient.login(silent=True)
center = 'SAGE'
project_fileview_id = 'syn18818782'
input_and_staging_view_id = 'syn18583501'
db_table_id = 'syn18582675'
db_tables = syn.tableQuery(f'select * from {db_table_id}').asDataFrame().to_dict(orient='record')
db_lookup = {x['Database']: x['Id'] for x in db_tables}
center_mapping_table_id = 'syn18582666'
center_mapping_tables = syn.tableQuery(f'select * from {center_mapping_table_id}').asDataFrame().to_dict(orient='record')
center_mapping_lookup = {x['center']: x['inputSynId'] for x in center_mapping_tables}
center_input_id = center_mapping_lookup[center]
filetype_formats = genie.config.collect_format_types(["veoibddatasubmission"]).keys()
to_delete = []
for filetype in filetype_formats:
try:
db_id = db_lookup[filetype]
except KeyError:
logger.info(f"No database table for file type {filetype}.")
continue
try:
db = syn.get(db_id)
except Exception:
logger.info(f"Could not get database table {db_id} for file type {filetype}.")
continue
if db.properties.concreteType == 'org.sagebionetworks.repo.model.table.TableEntity':
res = syn.tableQuery(f"select * from {db_id} WHERE center='{center}'")
if len(res.asRowSet()['rows']) > 0:
logger.info(f"Queueing deletion of rows in table {db_id} for file type {filetype}.")
to_delete.append(res)
elif db.properties.concreteType == 'org.sagebionetworks.repo.model.Folder':
res = syn.tableQuery(f"select id from {project_fileview_id} WHERE center='{center}' AND fileType='{filetype}' and parentId='{db_id}'")
if len(res.asRowSet()['rows']) > 0:
logger.info(f"Queueing deletion of files in folder {db_id} for file type {filetype}.")
for synid in res.asDataFrame().id.tolist():
to_delete.append(synid)
# Remove records from the validation status and error tracker tables
for tbl_name in ['validationStatus', 'errorTracker']:
res = syn.tableQuery(f"select * from {db_lookup[tbl_name]} WHERE center='{center}'")
if len(res.asRowSet()['rows']) > 0:
to_delete.append(res)
# Remove files from center's input directory
res = syn.tableQuery(f"select id from {input_and_staging_view_id} WHERE parentId='{center_input_id}'")
if len(res.asRowSet()['rows']) > 0:
logger.info(f"Queueing deletiion of files in {center} input folder {center_input_id}.")
for synid in res.asDataFrame().id.tolist():
to_delete.append(synid)
logger.info(f"There are {len(to_delete)} things to delete.")
for obj in to_delete:
_ = syn.delete(obj)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment