Skip to content

Instantly share code, notes, and snippets.

@robcowie
Created July 19, 2018 15:25
Show Gist options
  • Save robcowie/cd501187ad7fc1159bf8fbf269f8fb24 to your computer and use it in GitHub Desktop.
Save robcowie/cd501187ad7fc1159bf8fbf269f8fb24 to your computer and use it in GitHub Desktop.
Investigating IP anonymisation in Bigquery
#standardSQL
CREATE TEMPORARY FUNCTION anonIPToBytes(ip string) AS (
-- remove the last 8 bits of an IPv4 address (32 - 8 = 24)
NET.IP_TRUNC(NET.SAFE_IP_FROM_STRING(ip), 24)
-- TODO: how to distinguish v4 and v6?
-- remove the last 80 bits of an IPv6 address (128 - 80 = 48)
-- NET.IP_TRUNC(NET.SAFE_IP_FROM_STRING(ip), 48)
);
INSERT INTO temp.ip_anon_testing (ip_string, ip_byt, ip_anon_byt, ip_anon_str)
VALUES
('141.0.144.226', NET.SAFE_IP_FROM_STRING('141.0.144.226'), anonIPToBytes('141.0.144.226'), NET.IP_TO_STRING(anonIPToBytes('141.0.144.226')));
(null, NET.SAFE_IP_FROM_STRING(null), anonIPToBytes(null), NET.IP_TO_STRING(anonIPToBytes(null)));
# DELETE FROM temp.ip_anon_testing WHERE true;
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment