Instantly share code, notes, and snippets.
Created
July 26, 2023 18:23
-
Star
(0)
0
You must be signed in to star a gist -
Fork
(0)
0
You must be signed in to fork a gist
-
Save northwestcoder/2dc5cb856ac71a62cf13fff5ef426004 to your computer and use it in GitHub Desktop.
Import Snowflake tags into Satori's Data Inventory
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"cell_type": "markdown", | |
"id": "b460cfb7", | |
"metadata": {}, | |
"source": [ | |
"### Fill out the next cell with required values\n", | |
"\n", | |
"**This example expects a CSV file from Snowflake with the following format (this example assumes a database name of \"TYA_DATA\", schema \"PUBLIC\", and table \"PEOPLE\")**\n", | |
"\n", | |
"```\n", | |
"TAG_DATABASE,TAG_SCHEMA,OBJECT_NAME,COLUMN_NAME,TAG_VALUE\n", | |
"TYA_DATA,PUBLIC,PEOPLE,NAME_LAST,PERSONNAME\n", | |
"TYA_DATA,PUBLIC,PEOPLE,ADDRESS,ADDRESS\n", | |
"\n", | |
"```\n", | |
"\n", | |
"**In Snowflake, we ran the following SQL and then waited two hours (a standard Snowflake timing/cache item):**\n", | |
"\n", | |
"```\n", | |
"create or replace tag satori_c12n_pii allowed_values \n", | |
"'ADDRESS' , 'EMAIL', 'PERSONNAME', 'PHONE', 'SSN';\n", | |
"\n", | |
"create or replace tag satori_c12n_piilinkable allowed_values \n", | |
"'GENDER', 'USERNAME', 'CITY', 'STATE', 'ZIPCODE', 'STATE';\n", | |
"\n", | |
"ALTER TABLE tya_data.public.people\n", | |
" MODIFY COLUMN address\n", | |
" SET TAG satori_c12n_pii ='ADDRESS';\n", | |
"\n", | |
"ALTER TABLE tya_data.public.people\n", | |
" MODIFY COLUMN email\n", | |
" SET TAG satori_c12n_pii ='EMAIL';\n", | |
"\n", | |
"ALTER TABLE tya_data.public.people\n", | |
" MODIFY COLUMN name_last\n", | |
" SET TAG satori_c12n_pii ='PERSONNAME';\n", | |
"```\n", | |
"\n", | |
"**After two hours, we ran the following sql and then downloaded the results as a CSV file**\n", | |
"```\n", | |
"SELECT tag_database, tag_schema, object_name, column_name, tag_value FROM\n", | |
"SNOWFLAKE.ACCOUNT_USAGE.TAG_REFERENCES\n", | |
"WHERE TAG_NAME = 'SATORI_C12N_PII';\n", | |
"```\n", | |
"\n", | |
"**This file is what this Jupyter Notebook expects to see**" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"id": "10465e9e", | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"# your account info, following must be filled in\n", | |
"satori_account_id = \"\"\n", | |
"satori_serviceaccount_id = \"\"\n", | |
"satori_serviceaccount_key = \"\"\n", | |
"\n", | |
"# leave this host value as is for Satori production accounts\n", | |
"apihost = \"app.satoricyber.com\"\n", | |
"\n", | |
"# location of our csv input file\n", | |
"csv_input_file = \"tags_for_locations_from_snowflake.csv\"\n", | |
"\n", | |
"# action to take, the choices are 'add' or 'remove' - this allows for a full UNDO of the csv file\n", | |
"action = \"add\"\n", | |
"\n", | |
"# this message will appear in the location history for each field that is updated\n", | |
"location_history_message = \"Location Updated by Snowflake CSV Import Process, action: \" + action\n", | |
"\n", | |
"# The Snowflake database \"Display Name\" connection name from Satori Datastores\n", | |
"snowflake_datastore_name = \"New YCB Snowflake Connection\"" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"id": "22678bfa", | |
"metadata": {}, | |
"source": [ | |
"#### for demonstration purposes, no changes needed below this line" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"id": "09e8ed72", | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"import json\n", | |
"import requests\n", | |
"import csv\n", | |
"import io" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"id": "bbd49cfd", | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"# our auth function to get a bearer token\n", | |
"\n", | |
"def satori_auth(satori_serviceaccount_id, satori_serviceaccount_key, apihost):\n", | |
" auth_headers = {'content-type': 'application/json','accept': 'application/json'}\n", | |
" auth_url = \"https://{}/api/authentication/token\".format(apihost)\n", | |
" auth_body = json.dumps(\n", | |
" {\n", | |
" \"serviceAccountId\": satori_serviceaccount_id,\n", | |
" \"serviceAccountKey\": satori_serviceaccount_key\n", | |
" })\n", | |
" try:\n", | |
" r = requests.post(auth_url, headers=auth_headers, data=auth_body)\n", | |
" response = r.json()\n", | |
" satori_token = response[\"token\"]\n", | |
" except Exception as err:\n", | |
" print(\"Bearer Token Failure: :\", err)\n", | |
" print(\"Exception TYPE:\", type(err))\n", | |
" else:\n", | |
" return satori_token" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"id": "77fab60d", | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"# get the token and also create request header\n", | |
"\n", | |
"session_token = satori_auth(satori_serviceaccount_id, satori_serviceaccount_key, apihost)\n", | |
"#example\n", | |
"#print(session_token)\n", | |
"\n", | |
"headers = {'Authorization': 'Bearer {}'.format(session_token), 'Content-Type': 'application/json', 'Accept': 'application/json'}\n" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"id": "56c6beb4", | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"# get a Datastore ID by searching for its name\n", | |
"\n", | |
"def get_datastore_id(datastore_name):\n", | |
"\n", | |
" url = \"https://{}/api/v1/datastore?accountId={}&search={}\".format(apihost, \n", | |
" satori_account_id, \n", | |
" datastore_name)\n", | |
" try:\n", | |
" response = requests.get(url, headers=headers)\n", | |
" response.raise_for_status()\n", | |
" except requests.exceptions.RequestException as err:\n", | |
" print(\"EXCEPTION: \", type(err))\n", | |
" else:\n", | |
" return response.json()['records'][0]['id']\n", | |
" \n", | |
"#example\n", | |
"#print(get_datastore_id(\"AWS Postgres Data\"))" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"id": "522b61e7", | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"# get a Location ID by searching within datastore_id for database.schema.table.field.semiPath\n", | |
"\n", | |
"def get_location_id(datastore_id, location_prefix):\n", | |
"\n", | |
" url = \"https://{}/api/locations/{}/query?pageSize=100&dataStoreId={}&locationPrefix={}\".format(\n", | |
" apihost, \n", | |
" satori_account_id, \n", | |
" datastore_id,\n", | |
" location_prefix)\n", | |
" try:\n", | |
" response = requests.get(url, headers=headers)\n", | |
" response.raise_for_status()\n", | |
" except requests.exceptions.RequestException as err:\n", | |
" print(\"EXCEPTION: \", type(err))\n", | |
" else:\n", | |
" return response.json()['records'][0]['id']\n", | |
" \n", | |
"#example\n", | |
"#print(get_location_id('b35585f4-08b4-415c-82a1-d6c1af17fcbb', 'postgres.public.v2_people.blood_type'))" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"id": "2ceaf7f7", | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"# load all of the custom taxonomy as a dictionary\n", | |
"\n", | |
"def load_all_custom_taxonomy():\n", | |
" \n", | |
" url = \"https://{}/api/v1/taxonomy/custom?accountId={}\".format(apihost,satori_account_id)\n", | |
" try:\n", | |
" response = requests.get(url, headers=headers)\n", | |
" response.raise_for_status()\n", | |
" except requests.exceptions.RequestException as err:\n", | |
" print(\"EXCEPTION: \", type(err))\n", | |
" else:\n", | |
" custom_taxonomy = {}\n", | |
" for item in response.json()['records']:\n", | |
" custom_taxonomy[item['tag']] = item['id']\n", | |
" return custom_taxonomy\n", | |
"\n", | |
"#example \n", | |
"#print(load_all_custom_taxonomy())\n", | |
"\n", | |
"#now actually do it\n", | |
"custom_taxonomy = load_all_custom_taxonomy()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"id": "d9fe3f57", | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"# build our PUT payload, takes into account whether we are adding or removing the tags\n", | |
"\n", | |
"def build_update_location_payload(action, taxonomy_id):\n", | |
" payload =''\n", | |
" if action == 'add':\n", | |
" payload = json.dumps(\n", | |
" {\n", | |
" \"addTags\": [\n", | |
" taxonomy_id\n", | |
" ],\n", | |
" \"removeTags\": [\n", | |
" ],\n", | |
" \"notes\": location_history_message\n", | |
" } \n", | |
" )\n", | |
" elif action == 'remove':\n", | |
" payload = json.dumps(\n", | |
" {\n", | |
" \"addTags\": [\n", | |
" ],\n", | |
" \"removeTags\": [\n", | |
" taxonomy_id\n", | |
" ],\n", | |
" \"notes\": location_history_message\n", | |
" } \n", | |
" )\n", | |
" return payload\n", | |
" \n", | |
"#example\n", | |
"#print(build_update_location_payload(\"51d3d892-26f6-41b8-ab0c-c5638c386379\"))" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"id": "785e00d6", | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"# update one location with one tag (aka taxonomy id)\n", | |
"\n", | |
"def update_one_location(action, location_id, custom_taxonomy_id):\n", | |
" url = \"https://{}/api/locations/{}\".format(apihost,location_id)\n", | |
" \n", | |
" payload = build_update_location_payload(action, custom_taxonomy_id)\n", | |
" \n", | |
" try:\n", | |
" response = requests.put(url=url, headers=headers, data=payload)\n", | |
" response.raise_for_status()\n", | |
" except requests.exceptions.RequestException as err:\n", | |
" print(str(err))\n", | |
" else:\n", | |
" return response.json()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"id": "72409700", | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"# load our input from the csv file\n", | |
"\n", | |
"tags_for_locations_from_csv_file = []\n", | |
"\n", | |
"with open(csv_input_file, 'r', newline='\\n') as csvfile:\n", | |
" next(csvfile)\n", | |
" csvreader = csv.reader(csvfile, delimiter=',', quotechar='\"')\n", | |
" for row in csvreader:\n", | |
" tags_for_locations_from_csv_file.append(row)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"id": "51dff242", | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"for item in tags_for_locations_from_csv_file:\n", | |
" \n", | |
" location_prefix = item[0] + \".\" + item[1] + \".\" + item[2] + \".\" + item[3]\n", | |
" taxonomy_id = item[4]\n", | |
" \n", | |
" datastore_id = get_datastore_id(snowflake_datastore_name)\n", | |
" location_id = get_location_id(datastore_id, location_prefix)\n", | |
" response = update_one_location(action, location_id, taxonomy_id)\n", | |
" print(\"ACTION TAKEN: \" + str(response) + \"\\n\")\n" | |
] | |
} | |
], | |
"metadata": { | |
"kernelspec": { | |
"display_name": "Python 3 (ipykernel)", | |
"language": "python", | |
"name": "python3" | |
}, | |
"language_info": { | |
"codemirror_mode": { | |
"name": "ipython", | |
"version": 3 | |
}, | |
"file_extension": ".py", | |
"mimetype": "text/x-python", | |
"name": "python", | |
"nbconvert_exporter": "python", | |
"pygments_lexer": "ipython3", | |
"version": "3.11.0" | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 5 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment