Last active
April 22, 2019 23:53
-
-
Save philerooski/56c193d63129c83775124ad95bb448eb to your computer and use it in GitHub Desktop.
Update Healthcode to MD5 hash table for JourneyPro, Presence, ElevateMS
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import bridgeclient | |
import hashlib | |
import pandas as pd | |
import synapseclient as sc | |
import argparse | |
SYNAPSE_TABLES = { | |
'journey-pro': 'syn11439373', | |
'elevate-ms': 'syn11439398', | |
'lilly-presence': 'syn11445782'} | |
def read_args(): | |
parser = argparse.ArgumentParser( | |
description="Export MD5 hashes of participant's email addresses " | |
"coupled with their healthcode identifier.") | |
parser.add_argument("study") | |
parser.add_argument("email") | |
parser.add_argument("password") | |
args = parser.parse_args() | |
return args | |
def getmd5(s): | |
m = hashlib.md5() | |
m.update(s.encode('utf-8')) | |
return m.hexdigest().upper() | |
def main(study, email, password): | |
syn = sc.login() | |
bc = bridgeclient.bridgeConnector( | |
email=email, | |
password=password, | |
study=study) | |
bridge_data = bc.getParticipants() | |
synapse_data = syn.tableQuery( | |
"select healthCode from {}".format(SYNAPSE_TABLES[study])).asDataFrame() | |
print("Downloading participant metadata...") | |
bridge_metadata = map(lambda i: bc.getParticipantMetaData(i), | |
bridge_data.id.values) | |
bridge_metadata = pd.DataFrame(list(bridge_metadata)) | |
metadata_rel = bridge_metadata[['id', 'healthCode']] | |
new_health_codes = [hc not in synapse_data.healthCode.values for hc in metadata_rel.healthCode.values] | |
if any(new_health_codes): | |
metadata_rel = metadata_rel.loc[new_health_codes,:] | |
bridge_merged = bridge_data.merge(metadata_rel, on='id', how="inner") | |
hashes = list(map(getmd5, bridge_merged.email.values)) | |
bridge_merged['externalId'] = hashes | |
table = sc.Table(SYNAPSE_TABLES[study], | |
bridge_merged.loc[:, ['externalId', 'healthCode']]) | |
syn.store(table, | |
executed=["https://gist.github.com/philerooski/56c193d63129c83775124ad95bb448eb"]) | |
if __name__ == "__main__": | |
args = read_args() | |
main(args.study, args.email, args.password) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment