Skip to content

Instantly share code, notes, and snippets.

@philerooski
Last active April 22, 2019 23:53
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save philerooski/56c193d63129c83775124ad95bb448eb to your computer and use it in GitHub Desktop.
Save philerooski/56c193d63129c83775124ad95bb448eb to your computer and use it in GitHub Desktop.
Update Healthcode to MD5 hash table for JourneyPro, Presence, ElevateMS
import bridgeclient
import hashlib
import pandas as pd
import synapseclient as sc
import argparse
SYNAPSE_TABLES = {
'journey-pro': 'syn11439373',
'elevate-ms': 'syn11439398',
'lilly-presence': 'syn11445782'}
def read_args():
parser = argparse.ArgumentParser(
description="Export MD5 hashes of participant's email addresses "
"coupled with their healthcode identifier.")
parser.add_argument("study")
parser.add_argument("email")
parser.add_argument("password")
args = parser.parse_args()
return args
def getmd5(s):
m = hashlib.md5()
m.update(s.encode('utf-8'))
return m.hexdigest().upper()
def main(study, email, password):
syn = sc.login()
bc = bridgeclient.bridgeConnector(
email=email,
password=password,
study=study)
bridge_data = bc.getParticipants()
synapse_data = syn.tableQuery(
"select healthCode from {}".format(SYNAPSE_TABLES[study])).asDataFrame()
print("Downloading participant metadata...")
bridge_metadata = map(lambda i: bc.getParticipantMetaData(i),
bridge_data.id.values)
bridge_metadata = pd.DataFrame(list(bridge_metadata))
metadata_rel = bridge_metadata[['id', 'healthCode']]
new_health_codes = [hc not in synapse_data.healthCode.values for hc in metadata_rel.healthCode.values]
if any(new_health_codes):
metadata_rel = metadata_rel.loc[new_health_codes,:]
bridge_merged = bridge_data.merge(metadata_rel, on='id', how="inner")
hashes = list(map(getmd5, bridge_merged.email.values))
bridge_merged['externalId'] = hashes
table = sc.Table(SYNAPSE_TABLES[study],
bridge_merged.loc[:, ['externalId', 'healthCode']])
syn.store(table,
executed=["https://gist.github.com/philerooski/56c193d63129c83775124ad95bb448eb"])
if __name__ == "__main__":
args = read_args()
main(args.study, args.email, args.password)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment