Created
October 12, 2015 06:41
-
-
Save squirrelo/914178385508650af222 to your computer and use it in GitHub Desktop.
Scrubber for AG database pulldowns
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import psycopg2 | |
from passlib.hash import bcrypt | |
from random import choice | |
from sys import argv | |
def _new_kit_id(): | |
seen = set() | |
alpha = "abcdefghijklmnopqrstuvwxyz" | |
alpha += alpha.upper() | |
while True: | |
suffix = ''.join([choice(alpha) for i in range(5)]) | |
if suffix in seen: | |
continue | |
seen.add(suffix) | |
yield "tst_%s" % suffix | |
def ag_settings(cur): | |
cur.execute("UPDATE ag.settings SET test_environment = 'true';") | |
def ag_labadmin(cur): | |
"""Wipes out all current labadmin users and replaces with | |
username: test | |
password: password | |
""" | |
cur.execute("DELETE FROM ag.labadmin_users") | |
cur.execute("""INSERT INTO ag.labadmin_users (email, password) | |
VALUES ('test', '$2a$10$2.6Y9HmBqUFmSvKCjWmBte70WF.zd3h4VqbhLMQK1xP67Aj3rei86')""") | |
def survey_question_ids(cur): | |
"""Clean the data stored for survey answers""" | |
FREE_IDs = (98, 99, 101, 103, 104, 105, 106, 115, 116, 117, 118, 119, 120, | |
122, 124, 126, 127, 142, 143, 144) | |
# take special care for birth month: Set all to January so FK & pulldown doesnt break | |
cur.execute("""UPDATE ag.survey_answers | |
SET response = %s | |
WHERE survey_question_id = 111""", ["January"]) | |
# Wipe out answers | |
cur.execute("""UPDATE ag.survey_answers_other | |
SET response = %s | |
WHERE survey_question_id IN %s""", ["REMOVED", FREE_IDs]) | |
def ag_kit(cur): | |
"""Clean the data stored in ag_kit | |
The specific fields scrubbed are 'supplied_kit_id', and 'kit_password'. | |
All 'kit_password' fields will be set to a valid password hash where the | |
password is "test". All kit IDs will be reset to "tst_<something>" | |
where <something> is a random string of characters. | |
""" | |
pass_hash = bcrypt.encrypt('test') | |
kit_id_gen = _new_kit_id() | |
cur.execute("UPDATE ag.ag_kit SET kit_password = %s", [pass_hash]) | |
cur.execute("SELECT supplied_kit_id FROM ag.ag_kit") | |
for kit_id in cur.fetchall(): | |
new_kit_id = kit_id_gen.next() | |
cur.execute("""UPDATE ag.ag_kit | |
SET supplied_kit_id = %s | |
WHERE supplied_kit_id = %s""", [new_kit_id, kit_id]) | |
def ag_login(cur): | |
"""Clean the data stored in ag_login | |
Basically remove all information from this table. Email, name, address, | |
city, state, zip, country are set to REMOVED while latitude, longitude, | |
and elevation are set to 0.0 | |
""" | |
varchar_fields = ['email', 'name', 'address', 'city', 'state', 'zip', | |
'country'] | |
float_fields = ['latitude', 'longitude', 'elevation'] | |
for field in varchar_fields: | |
cur.execute("UPDATE ag.ag_login SET %s = 'REMOVED'" % field) | |
for field in float_fields: | |
cur.execute("UPDATE ag.ag_login SET %s = 0.0" % field) | |
def ag_handout_kits(cur): | |
"""Clean handout kits""" | |
pass_hash = bcrypt.encrypt('test') | |
kit_id_gen = _new_kit_id() | |
cur.execute("UPDATE ag.ag_handout_kits SET password = %s", [pass_hash]) | |
cur.execute("SELECT kit_id FROM ag.ag_handout_kits") | |
for kit_id in cur.fetchall(): | |
new_kit_id = kit_id_gen.next() | |
cur.execute("""UPDATE ag.ag_handout_kits | |
SET kit_id = %s | |
WHERE kit_id = %s""", [new_kit_id, kit_id]) | |
def ag_kit_barcodes(cur): | |
"""Clean the data in ag_kit_barcodes""" | |
fields = ['participant_name', 'notes', 'other_text'] | |
cur.execute("""UPDATE ag.ag_kit_barcodes | |
SET (participant_name,notes,other_text) = | |
(%s, %s, %s)""", ["REMOVED", "REMOVED", "REMOVED"]) | |
def ag_login_surveys(cur): | |
"""Scrub the ag_login_surveys table""" | |
cur.execute("ALTER TABLE ag.ag_login_surveys DROP CONSTRAINT fk_ag_login_surveys0") | |
cur.execute("ALTER TABLE ag.ag_consent DROP CONSTRAINT pk_american_gut_consent") | |
id_name_map = {} | |
cur.execute("""SELECT ag_login_id, array_agg(participant_name) | |
FROM ag.ag_consent | |
GROUP BY ag_login_id""") | |
for login_id, pnames in cur.fetchall(): | |
for idx, old_name in enumerate(pnames): | |
name = 'REMOVED-%d' % idx | |
id_name_map[(login_id, old_name)] = name | |
cur.execute("""UPDATE ag.ag_consent | |
SET participant_name = %s | |
WHERE ag_login_id = %s AND participant_name = %s""", | |
[name, login_id, old_name]) | |
cur.execute("""SELECT ag_login_id, array_agg(survey_id) as survey_id, | |
array_agg(participant_name) as participant_name | |
FROM ag.ag_login_surveys | |
GROUP BY ag_login_id""") | |
for login_id, sids, pnames in cur.fetchall(): | |
for idx, (survey, old_name) in enumerate(zip(sids, pnames)): | |
name = id_name_map[(login_id, old_name)] | |
cur.execute("""UPDATE ag.ag_login_surveys | |
SET participant_name = %s | |
WHERE survey_id = %s""", [name, survey]) | |
cur.execute("""ALTER TABLE ag.ag_consent | |
ADD CONSTRAINT pk_american_gut_consent | |
PRIMARY KEY (ag_login_id, participant_name)""") | |
cur.execute("""ALTER TABLE ag.ag_login_surveys | |
ADD CONSTRAINT fk_ag_login_surveys0 | |
FOREIGN KEY (ag_login_id, participant_name) | |
REFERENCES ag.ag_consent(ag_login_id, participant_name)""") | |
def ag_consent(cur): | |
"""Clean the ag_consent table""" | |
fields = ['participant_email', 'parent_1_name', | |
'parent_2_name', 'parent_1_code', 'parent_2_code'] | |
for f in fields: | |
cur.execute("""UPDATE ag.ag_consent | |
SET (participant_email, parent_1_name, | |
parent_2_name, parent_1_code, parent_2_code) = | |
(%s, %s, %s, %s, %s)""", ["REMOVED"] * 5) | |
# (ag_login_id, participant_name) is not unique now due to "REMOVED" | |
def ag_zipcodes(cur): | |
cur.execute('DELETE FROM ag.zipcodes') | |
def ag_surveys(cur): | |
'''Wipe out answers but leave tables so promotion patches work''' | |
cur.execute('DELETE FROM ag.ag_human_survey;') | |
cur.execute('DELETE FROM ag.ag_animal_survey;') | |
def barcodes_schema(cur): | |
"""Rename the projects in the database and remove barcodes not in AG""" | |
# project_barcode MUST BE LAST | |
tables = ['plate_barcode', 'barcode_exceptions', 'project_barcode'] | |
sql = '''DELETE FROM barcodes.{} | |
WHERE barcode NOT IN ( | |
SELECT DISTINCT barcode FROM barcodes.project_barcode | |
JOIN barcodes.project USING (project_id) | |
WHERE project = 'American Gut Project')''' | |
for table in tables: | |
cur.execute(sql.format(table)) | |
sql = """UPDATE barcodes.project | |
SET project = 'PROJECT' || project_id | |
WHERE project != 'American Gut Project'""" | |
cur.execute(sql) | |
def drop_tables(cur): | |
sql = '''DROP TABLE ag.ag_participant_exceptions; | |
DROP TABLE ag.ag_survey_multiples_backup; | |
DROP TABLE ag.ag_survey_answer; | |
DROP TABLE ag.ag_survey_multiples; | |
DROP TABLE ag.controlled_vocab_values; | |
DROP TABLE ag.controlled_vocabs; | |
DROP TABLE ag.ag_import_stats_tmp; | |
''' | |
cur.execute(sql) | |
def drop_procedures(cur): | |
sql = '''DROP FUNCTION ag.ag_add_animal_participant(uuid, text); | |
DROP FUNCTION ag.ag_add_participant(uuid, text); | |
DROP FUNCTION ag.ag_authenticate_user(text, text, refcursor); | |
DROP FUNCTION ag.ag_available_barcodes(uuid, refcursor); | |
DROP FUNCTION ag.ag_check_barcode_status(text, refcursor); | |
DROP FUNCTION ag.ag_delete_participant(uuid, text); | |
DROP FUNCTION ag.ag_delete_sample(text, uuid); | |
DROP FUNCTION ag.ag_delete_survey_answer(uuid, text); | |
DROP FUNCTION ag.ag_get_animal_participants(uuid, refcursor); | |
DROP FUNCTION ag.ag_get_barcode_details(text, refcursor); | |
DROP FUNCTION ag.ag_get_barcode_md_animal(text, refcursor); | |
DROP FUNCTION ag.ag_get_barcode_metadata(text, refcursor); | |
DROP FUNCTION ag.ag_get_barcodes(refcursor); | |
DROP FUNCTION ag.ag_get_barcodes_by_kit(text, refcursor); | |
DROP FUNCTION ag.ag_get_barcodes_by_login(uuid, refcursor); | |
DROP FUNCTION ag.ag_get_environmental_samples(uuid, refcursor); | |
DROP FUNCTION ag.ag_get_human_participants(uuid, refcursor); | |
DROP FUNCTION ag.ag_get_kit_details(text, refcursor); | |
DROP FUNCTION ag.ag_get_kit_id_by_email(text, refcursor); | |
DROP FUNCTION ag.ag_get_kits_by_login(refcursor); | |
DROP FUNCTION ag.ag_get_logins(refcursor); | |
DROP FUNCTION ag.ag_get_map_markers(refcursor); | |
DROP FUNCTION ag.ag_get_next_barcode(refcursor); | |
DROP FUNCTION ag.ag_get_participant_exceptions(uuid, refcursor); | |
DROP FUNCTION ag.ag_get_participant_samples(uuid, text, refcursor); | |
DROP FUNCTION ag.ag_get_print_results(text, refcursor); | |
DROP FUNCTION ag.ag_get_survey_details(uuid, text, refcursor); | |
DROP FUNCTION ag.ag_insert_barcode(uuid, text); | |
DROP FUNCTION ag.ag_insert_kit(uuid, text, text, bigint, text, text); | |
DROP FUNCTION ag.ag_insert_login(text, text, text, text, text, text, text); | |
DROP FUNCTION ag.ag_insert_participant_exception(uuid, text); | |
DROP FUNCTION ag.ag_insert_survey_answer(uuid, text, text, text); | |
DROP FUNCTION ag.ag_is_handout(text, text); | |
DROP FUNCTION ag.ag_log_participant_sample(text, text, text, text, text, text, text); | |
DROP FUNCTION ag.ag_reassign_barcode(uuid, text); | |
DROP FUNCTION ag.ag_set_pass_change_code(text, text, text); | |
DROP FUNCTION ag.ag_stats(refcursor); | |
DROP FUNCTION ag.ag_update_barcode(text, uuid, text, text, text, text, text, text, text, text); | |
DROP FUNCTION ag.ag_update_geo_info(uuid, double precision, double precision, double precision, character); | |
DROP FUNCTION ag.ag_update_kit(uuid, text, text, bigint, text); | |
DROP FUNCTION ag.ag_update_kit_password(text, text); | |
DROP FUNCTION ag.ag_update_login(uuid, text, text, text, text, text, text, text); | |
DROP FUNCTION ag.ag_verify_kit_status(text); | |
DROP FUNCTION ag.ag_verify_password_change_code(text, text, text); | |
DROP FUNCTION ag.american_gut_consent_submit(text, text, text, text, text, text, text, text); | |
DROP FUNCTION ag.get_barcode_proj_type(text, refcursor); | |
DROP FUNCTION ag.get_project_names(refcursor); | |
DROP FUNCTION ag.set_barcode_proj_type(text, text); | |
DROP FUNCTION ag.update_akb(text, character, character, character, text, text); | |
DROP FUNCTION ag.update_barcode_status(text, text, text, text);''' | |
cur.execute(sql) | |
if __name__ == '__main__': | |
conn = psycopg2.connect(host='localhost', dbname=argv[1]) | |
cur = conn.cursor() | |
ag_settings(cur) | |
drop_tables(cur) | |
ag_surveys(cur) | |
drop_procedures(cur) | |
survey_question_ids(cur) | |
ag_zipcodes(cur) | |
ag_kit(cur) | |
ag_login(cur) | |
ag_handout_kits(cur) | |
ag_kit_barcodes(cur) | |
ag_login_surveys(cur) | |
ag_consent(cur) | |
ag_labadmin(cur) | |
barcodes_schema(cur) | |
conn.commit() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment