Skip to content

Instantly share code, notes, and snippets.

@Dyrcona
Created April 27, 2023 21:25
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save Dyrcona/07cc49070ecd07aa91dde6576c59e08b to your computer and use it in GitHub Desktop.
Save Dyrcona/07cc49070ecd07aa91dde6576c59e08b to your computer and use it in GitHub Desktop.
A SQL to Anonymize Patron Data for an Evergreen Test Database
-- Evergreen Test Data Anonymizaiton Query
-- Copyright (C) 2023 CW MARS, Inc.
-- Jason Stepenson <jstephenson@cwmars.org>
-- This program is free software; you can redistribute it and/or modify
-- it under the terms of the GNU General Public License as published by
-- the Free Software Foundation; either version 2 of the License, or
-- (at your option) any later version.
-- This program is distributed in the hope that it will be useful,
-- but WITHOUT ANY WARRANTY; without even the implied warranty of
-- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-- GNU General Public License for more details.
-- You should have received a copy of the GNU General Public License along
-- with this program; if not, write to the Free Software Foundation, Inc.,
-- 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
\pset pager off
-- Set up: Create some tables and views.
CREATE VIEW staging.user_anons AS
SELECT u.id
FROM actor.usr u
JOIN permission.grp_descendants(2) t ON u.profile = t.id
WHERE NOT u.deleted;
CREATE UNLOGGED TABLE staging.first_names (
first_name char(20),
letter char(1)
);
CREATE UNLOGGED TABLE staging.street_names (
name char(20),
letter char(1)
);
-- Add some data for mangling.
INSERT INTO staging.first_names (first_name, letter) VALUES
('BERT', 'a'),
('CAL', 'b'),
('DEL', 'c'),
('ELLEN', 'd'),
('FRED', 'e'),
('GRETA', 'f'),
('HELEN', 'g'),
('ISABELLA', 'h'),
('JANET', 'i'),
('KELLY', 'j'),
('LARRY', 'k'),
('MAX', 'l'),
('NANCY', 'm'),
('OLLIE', 'n'),
('PENNY', 'o'),
('QUINN', 'p'),
('RON', 'q'),
('SARA', 'r'),
('TERRY', 's'),
('URSULA', 't'),
('VIVIAN', 'u'),
('WENDELL', 'v'),
('XAVIER', 'w'),
('YURI', 'x'),
('ZOE', 'y'),
('ALEX', 'z');
INSERT INTO staging.street_names (name, letter) VALUES
(' APPLE STREET', 'a'),
(' BEECH STREET', 'b'),
(' CANTALOUPE STREET', 'c'),
(' DOGWOOD STREET', 'd'),
(' ELM STREET', 'e'),
(' FRUIT STREET', 'f'),
(' GREEN STREET', 'g'),
(' HIGH STREET', 'h'),
(' INMAN STREET', 'i'),
(' JUNIPER STREET', 'j'),
(' KELLY STREET', 'k'),
(' LOCUST STREET', 'l'),
(' MARIGOLD STREET', 'm'),
(' NEW STREET', 'n'),
(' ORANGE STREET', 'o'),
(' PEAR STREET', 'p'),
(' QUINOX STREET', 'q'),
(' RUST STREET', 'r'),
(' SAND STREET', 's'),
(' TURNIP STREET', 't'),
(' UNDER STREET', 'u'),
(' WELCH STREET', 'v'),
(' XAVIER STREET', 'w'),
(' YELLOW STREET', 'x'),
(' VELVET STREET', 'y'),
(' SPRUCE STREET', 'z');
-- Mangle patron first names.
UPDATE actor.usr
SET first_given_name = n.first_name,
usrname = 'ANON-' || CAST(usr.id AS TEXT)
FROM staging.first_names n, staging.user_anons a
WHERE UPPER(n.letter) = UPPER(SUBSTRING(usr.first_given_name FROM 1 FOR 1))
AND usr.id = a.id
AND usr.profile NOT IN (152,158,253)
AND first_given_name NOT ILIKE '%LIBRARY%'
AND family_name NOT ILIKE '%LIBRARY%';
-- Empty preferred name fields.
UPDATE actor.usr
SET pref_prefix = NULL,
pref_first_given_name = NULL,
pref_second_given_name = NULL,
pref_family_name = NULL,
pref_suffix = NULL,
name_keywords = NULL
FROM staging.user_anons
WHERE usr.id = user_anons.id;
--Mangle addresses.
UPDATE actor.usr_address
SET street1 = SUBSTRING(CAST(id AS TEXT) FROM 1 FOR 4) || n.name
FROM staging.street_names n
WHERE usr_address.street1 ~* '[0-9]\s+a'
AND n.letter = 'a';
UPDATE actor.usr_address
SET street1 = SUBSTRING(CAST(id AS TEXT) FROM 1 FOR 4) || n.name
FROM staging.street_names n
WHERE usr_address.street1 ~* '[0-9]\s+b'
AND n.letter = 'b';
UPDATE actor.usr_address
SET street1 = SUBSTRING(CAST(id AS TEXT) FROM 1 FOR 4) || n.name
FROM staging.street_names n
WHERE usr_address.street1 ~* '[0-9]\s+c'
AND n.letter = 'c';
UPDATE actor.usr_address
SET street1 = SUBSTRING(CAST(id AS TEXT) FROM 1 FOR 4) || n.name
FROM staging.street_names n
WHERE usr_address.street1 ~* '[0-9]\s+d'
AND n.letter = 'd';
UPDATE actor.usr_address
SET street1 = SUBSTRING(CAST(id AS TEXT) FROM 1 FOR 4) || n.name
FROM staging.street_names n
WHERE usr_address.street1 ~* '[0-9]\s+e'
AND n.letter = 'e';
UPDATE actor.usr_address
SET street1 = SUBSTRING(CAST(id AS TEXT) FROM 1 FOR 4) || n.name
FROM staging.street_names n
WHERE usr_address.street1 ~* '[0-9]\s+f'
AND n.letter = 'f';
UPDATE actor.usr_address
SET street1 = SUBSTRING(CAST(id AS TEXT) FROM 1 FOR 4) || n.name
FROM staging.street_names n
WHERE usr_address.street1 ~* '[0-9]\s+g'
AND n.letter = 'g';
UPDATE actor.usr_address
SET street1 = SUBSTRING(CAST(id AS TEXT) FROM 1 FOR 4) || n.name
FROM staging.street_names n
WHERE usr_address.street1 ~* '[0-9]\s+h'
AND n.letter = 'h';
UPDATE actor.usr_address
SET street1 = SUBSTRING(CAST(id AS TEXT) FROM 1 FOR 4) || n.name
FROM staging.street_names n
WHERE usr_address.street1 ~* '[0-9]\s+i'
AND n.letter = 'i';
UPDATE actor.usr_address
SET street1 = SUBSTRING(CAST(id AS TEXT) FROM 1 FOR 4) || n.name
FROM staging.street_names n
WHERE usr_address.street1 ~* '[0-9]\s+j'
AND n.letter = 'j';
UPDATE actor.usr_address
SET street1 = SUBSTRING(CAST(id AS TEXT) FROM 1 FOR 4) || n.name
FROM staging.street_names n
WHERE usr_address.street1 ~* '[0-9]\s+k'
AND n.letter = 'k';
UPDATE actor.usr_address
SET street1 = SUBSTRING(CAST(id AS TEXT) FROM 1 FOR 4) || n.name
FROM staging.street_names n
WHERE usr_address.street1 ~* '[0-9]\s+l'
AND n.letter = 'l';
UPDATE actor.usr_address
SET street1 = SUBSTRING(CAST(id AS TEXT) FROM 1 FOR 4) || n.name
FROM staging.street_names n
WHERE usr_address.street1 ~* '[0-9]\s+m'
AND n.letter = 'm';
UPDATE actor.usr_address
SET street1 = SUBSTRING(CAST(id AS TEXT) FROM 1 FOR 4) || n.name
FROM staging.street_names n
WHERE usr_address.street1 ~* '[0-9]\s+n'
AND n.letter = 'n';
UPDATE actor.usr_address
SET street1 = SUBSTRING(CAST(id AS TEXT) FROM 1 FOR 4) || n.name
FROM staging.street_names n
WHERE usr_address.street1 ~* '[0-9]\s+o'
AND n.letter = 'o';
UPDATE actor.usr_address
SET street1 = SUBSTRING(CAST(id AS TEXT) FROM 1 FOR 4) || n.name
FROM staging.street_names n
WHERE usr_address.street1 ~* '[0-9]\s+p'
AND n.letter = 'p';
UPDATE actor.usr_address
SET street1 = SUBSTRING(CAST(id AS TEXT) FROM 1 FOR 4) || n.name
FROM staging.street_names n
WHERE usr_address.street1 ~* '[0-9]\s+q'
AND n.letter = 'q';
UPDATE actor.usr_address
SET street1 = SUBSTRING(CAST(id AS TEXT) FROM 1 FOR 4) || n.name
FROM staging.street_names n
WHERE usr_address.street1 ~* '[0-9]\s+r'
AND n.letter = 'r';
UPDATE actor.usr_address
SET street1 = SUBSTRING(CAST(id AS TEXT) FROM 1 FOR 4) || n.name
FROM staging.street_names n
WHERE usr_address.street1 ~* '[0-9]\s+s'
AND n.letter = 's';
UPDATE actor.usr_address
SET street1 = SUBSTRING(CAST(id AS TEXT) FROM 1 FOR 4) || n.name
FROM staging.street_names n
WHERE usr_address.street1 ~* '[0-9]\s+t'
AND n.letter = 't';
UPDATE actor.usr_address
SET street1 = SUBSTRING(CAST(id AS TEXT) FROM 1 FOR 4) || n.name
FROM staging.street_names n
WHERE usr_address.street1 ~* '[0-9]\s+u'
AND n.letter = 'u';
UPDATE actor.usr_address
SET street1 = SUBSTRING(CAST(id AS TEXT) FROM 1 FOR 4) || n.name
FROM staging.street_names n
WHERE usr_address.street1 ~* '[0-9]\s+v'
AND n.letter = 'v';
UPDATE actor.usr_address
SET street1 = SUBSTRING(CAST(id AS TEXT) FROM 1 FOR 4) || n.name
FROM staging.street_names n
WHERE usr_address.street1 ~* '[0-9]\s+w'
AND n.letter = 'w';
UPDATE actor.usr_address
SET street1 = SUBSTRING(CAST(id AS TEXT) FROM 1 FOR 4) || n.name
FROM staging.street_names n
WHERE usr_address.street1 ~* '[0-9]\s+x'
AND n.letter = 'x';
UPDATE actor.usr_address
SET street1 = SUBSTRING(CAST(id AS TEXT) FROM 1 FOR 4) || n.name
FROM staging.street_names n
WHERE usr_address.street1 ~* '[0-9]\s+y'
AND n.letter = 'y';
UPDATE actor.usr_address
SET street1 = SUBSTRING(CAST(id AS TEXT) FROM 1 FOR 4) || n.name
FROM staging.street_names n
WHERE usr_address.street1 ~* '[0-9]\s+z'
AND n.letter = 'z';
UPDATE actor.usr_address
SET street2 = SUBSTRING(CAST(id AS TEXT) FROM 1 FOR 4) || n.name
FROM staging.street_names n
WHERE usr_address.street2 ~* '[0-9]\s+a'
AND n.letter = 'a';
UPDATE actor.usr_address
SET street2 = SUBSTRING(CAST(id AS TEXT) FROM 1 FOR 4) || n.name
FROM staging.street_names n
WHERE usr_address.street2 ~* '[0-9]\s+b'
AND n.letter = 'b';
UPDATE actor.usr_address
SET street2 = SUBSTRING(CAST(id AS TEXT) FROM 1 FOR 4) || n.name
FROM staging.street_names n
WHERE usr_address.street2 ~* '[0-9]\s+c'
AND n.letter = 'c';
UPDATE actor.usr_address
SET street2 = SUBSTRING(CAST(id AS TEXT) FROM 1 FOR 4) || n.name
FROM staging.street_names n
WHERE usr_address.street2 ~* '[0-9]\s+d'
AND n.letter = 'd';
UPDATE actor.usr_address
SET street2 = SUBSTRING(CAST(id AS TEXT) FROM 1 FOR 4) || n.name
FROM staging.street_names n
WHERE usr_address.street2 ~* '[0-9]\s+e'
AND n.letter = 'e';
UPDATE actor.usr_address
SET street2 = SUBSTRING(CAST(id AS TEXT) FROM 1 FOR 4) || n.name
FROM staging.street_names n
WHERE usr_address.street2 ~* '[0-9]\s+f'
AND n.letter = 'f';
UPDATE actor.usr_address
SET street2 = SUBSTRING(CAST(id AS TEXT) FROM 1 FOR 4) || n.name
FROM staging.street_names n
WHERE usr_address.street2 ~* '[0-9]\s+g'
AND n.letter = 'g';
UPDATE actor.usr_address
SET street2 = SUBSTRING(CAST(id AS TEXT) FROM 1 FOR 4) || n.name
FROM staging.street_names n
WHERE usr_address.street2 ~* '[0-9]\s+h'
AND n.letter = 'h';
UPDATE actor.usr_address
SET street2 = SUBSTRING(CAST(id AS TEXT) FROM 1 FOR 4) || n.name
FROM staging.street_names n
WHERE usr_address.street2 ~* '[0-9]\s+i'
AND n.letter = 'i';
UPDATE actor.usr_address
SET street2 = SUBSTRING(CAST(id AS TEXT) FROM 1 FOR 4) || n.name
FROM staging.street_names n
WHERE usr_address.street2 ~* '[0-9]\s+j'
AND n.letter = 'j';
UPDATE actor.usr_address
SET street2 = SUBSTRING(CAST(id AS TEXT) FROM 1 FOR 4) || n.name
FROM staging.street_names n
WHERE usr_address.street2 ~* '[0-9]\s+k'
AND n.letter = 'k';
UPDATE actor.usr_address
SET street2 = SUBSTRING(CAST(id AS TEXT) FROM 1 FOR 4) || n.name
FROM staging.street_names n
WHERE usr_address.street2 ~* '[0-9]\s+l'
AND n.letter = 'l';
UPDATE actor.usr_address
SET street2 = SUBSTRING(CAST(id AS TEXT) FROM 1 FOR 4) || n.name
FROM staging.street_names n
WHERE usr_address.street2 ~* '[0-9]\s+m'
AND n.letter = 'm';
UPDATE actor.usr_address
SET street2 = SUBSTRING(CAST(id AS TEXT) FROM 1 FOR 4) || n.name
FROM staging.street_names n
WHERE usr_address.street2 ~* '[0-9]\s+n'
AND n.letter = 'n';
UPDATE actor.usr_address
SET street2 = SUBSTRING(CAST(id AS TEXT) FROM 1 FOR 4) || n.name
FROM staging.street_names n
WHERE usr_address.street2 ~* '[0-9]\s+o'
AND n.letter = 'o';
UPDATE actor.usr_address
SET street2 = SUBSTRING(CAST(id AS TEXT) FROM 1 FOR 4) || n.name
FROM staging.street_names n
WHERE usr_address.street2 ~* '[0-9]\s+p'
AND n.letter = 'p';
UPDATE actor.usr_address
SET street2 = SUBSTRING(CAST(id AS TEXT) FROM 1 FOR 4) || n.name
FROM staging.street_names n
WHERE usr_address.street2 ~* '[0-9]\s+q'
AND n.letter = 'q';
UPDATE actor.usr_address
SET street2 = SUBSTRING(CAST(id AS TEXT) FROM 1 FOR 4) || n.name
FROM staging.street_names n
WHERE usr_address.street2 ~* '[0-9]\s+r'
AND n.letter = 'r';
UPDATE actor.usr_address
SET street2 = SUBSTRING(CAST(id AS TEXT) FROM 1 FOR 4) || n.name
FROM staging.street_names n
WHERE usr_address.street2 ~* '[0-9]\s+s'
AND n.letter = 's';
UPDATE actor.usr_address
SET street2 = SUBSTRING(CAST(id AS TEXT) FROM 1 FOR 4) || n.name
FROM staging.street_names n
WHERE usr_address.street2 ~* '[0-9]\s+t'
AND n.letter = 't';
UPDATE actor.usr_address
SET street2 = SUBSTRING(CAST(id AS TEXT) FROM 1 FOR 4) || n.name
FROM staging.street_names n
WHERE usr_address.street2 ~* '[0-9]\s+u'
AND n.letter = 'u';
UPDATE actor.usr_address
SET street2 = SUBSTRING(CAST(id AS TEXT) FROM 1 FOR 4) || n.name
FROM staging.street_names n
WHERE usr_address.street2 ~* '[0-9]\s+v'
AND n.letter = 'v';
UPDATE actor.usr_address
SET street2 = SUBSTRING(CAST(id AS TEXT) FROM 1 FOR 4) || n.name
FROM staging.street_names n
WHERE usr_address.street2 ~* '[0-9]\s+w'
AND n.letter = 'w';
UPDATE actor.usr_address
SET street2 = SUBSTRING(CAST(id AS TEXT) FROM 1 FOR 4) || n.name
FROM staging.street_names n
WHERE usr_address.street2 ~* '[0-9]\s+x'
AND n.letter = 'x';
UPDATE actor.usr_address
SET street2 = SUBSTRING(CAST(id AS TEXT) FROM 1 FOR 4) || n.name
FROM staging.street_names n
WHERE usr_address.street2 ~* '[0-9]\s+y'
AND n.letter = 'y';
UPDATE actor.usr_address
SET street2 = SUBSTRING(CAST(id AS TEXT) FROM 1 FOR 4) || n.name
FROM staging.street_names n
WHERE usr_address.street2 ~* '[0-9]\s+z'
AND n.letter = 'z';
UPDATE actor.usr_address
SET street1 = SUBSTRING(CAST(id AS TEXT) FROM 1 FOR 4) || ' MAIN ST'
WHERE usr_address.street1 !~* '[0-9]\s+[a-z]'
AND usr_address.street1 !~* 'library';
UPDATE actor.usr_address
SET street2 = SUBSTRING(CAST(id AS TEXT) FROM 1 FOR 4) || ' MAIN ST'
WHERE usr_address.street1 !~* '[0-9]\s+[a-z]'
AND usr_address.street2 !~* 'library';
--Mangle phone numbers.
UPDATE actor.usr
SET day_phone = '508-711-1234'
FROM staging.user_anons
WHERE day_phone ~ '[0-9]'
AND usr.id = user_anons.id;
UPDATE actor.usr
SET evening_phone = '508-722-3456'
FROM staging.user_anons
WHERE evening_phone ~ '[0-9]'
AND usr.id = user_anons.id;
UPDATE actor.usr
SET other_phone = '508-733-4567'
FROM staging.user_anons
WHERE other_phone ~ '[0-9]'
AND usr.id = user_anons.id;
-- Mangle email addresses.
UPDATE actor.usr
SET email = NULL
WHERE email IS NOT NULL;
-- Change patron passwords.
-- SELECT actor.change_password(id, 'mypassword')
-- FROM staging.user_anons;
-- Mangle dates of birth.
UPDATE actor.usr
SET dob = dob + '7 years'::INTERVAL
WHERE id <= 200000
AND dob IS NOT NULL;
UPDATE actor.usr
SET dob = dob + '2 years'::INTERVAL
WHERE id > 200000
AND id <= 400000
AND dob IS NOT NULL;
UPDATE actor.usr
SET dob = dob - '2 years'::INTERVAL
WHERE id > 400000
AND id <= 600000
AND dob IS NOT NULL;
UPDATE actor.usr
SET dob = dob - '3 years'::INTERVAL
WHERE id > 600000
AND id <= 800000
AND dob IS NOT NULL;
UPDATE actor.usr
SET dob = dob + '3 years'::INTERVAL
WHERE id > 800000
AND id <= 1000000
AND dob IS NOT NULL;
UPDATE actor.usr
SET dob = dob - '1 years'::INTERVAL
WHERE id > 1000000
AND id <= 1200000
AND dob IS NOT NULL;
UPDATE actor.usr
SET dob = dob + '1 years'::INTERVAL
WHERE id > 1200000
AND dob IS NOT NULL;
UPDATE actor.usr
SET dob = NULL
WHERE dob::date > NOW()
AND dob IS NOT NULL;
-- Mangle ident value.
UPDATE actor.usr
SET ident_value = id
WHERE ident_value ~ '[0-9]';
-- Anonymize hold requests
UPDATE action.hold_request
SET sms_notify = NULL,
sms_carrier = NULL,
phone_notify = usr.day_phone
FROM actor.usr
WHERE hold_request.usr = usr.id;
-- Anonymize phone and sms settings.
UPDATE actor.usr_setting
SET value = '"' ||
CASE name
WHEN 'opac.default_phone'
THEN COALESCE(usr.day_phone, '508-711-1234')
WHEN 'opac.default_sms_notify'
THEN COALESCE(usr.other_phone, '508-733-4567')
END || '"'
FROM actor.usr
WHERE usr_setting.usr = usr.id
AND (usr_setting.name = 'opac.default_phone'
OR usr_setting.name = 'opac.default_sms_notify');
-- Truncate some tables.
TRUNCATE action_trigger.event_output CASCADE;
TRUNCATE auditor.acq_invoice_entry_history;
TRUNCATE auditor.acq_invoice_history;
TRUNCATE auditor.acq_invoice_item_history;
TRUNCATE auditor.actor_org_unit_history;
TRUNCATE auditor.actor_usr_address_history;
TRUNCATE auditor.actor_usr_history;
TRUNCATE auditor.asset_call_number_history;
TRUNCATE auditor.asset_copy_history;
TRUNCATE auditor.biblio_record_entry_history;
TRUNCATE auditor.serial_caption_and_pattern_history;
TRUNCATE auditor.serial_issuance_history;
TRUNCATE auditor.serial_unit_history;
-- Drop our staging views and tables.
DROP VIEW IF EXISTS staging.user_anons;
DROP TABLE IF EXISTS staging.first_names;
DROP TABLE IF EXISTS staging.street_names;
-- Stop recurring reports
TRUNCATE reporter.schedule;
UPDATE reporter.report SET recur = FALSE WHERE recur = TRUE;
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment