Skip to content

Instantly share code, notes, and snippets.

Show Gist options
  • Save northwestcoder/2dc5cb856ac71a62cf13fff5ef426004 to your computer and use it in GitHub Desktop.
Save northwestcoder/2dc5cb856ac71a62cf13fff5ef426004 to your computer and use it in GitHub Desktop.
Import Snowflake tags into Satori's Data Inventory
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "markdown",
"id": "b460cfb7",
"metadata": {},
"source": [
"### Fill out the next cell with required values\n",
"\n",
"**This example expects a CSV file from Snowflake with the following format (this example assumes a database name of \"TYA_DATA\", schema \"PUBLIC\", and table \"PEOPLE\")**\n",
"\n",
"```\n",
"TAG_DATABASE,TAG_SCHEMA,OBJECT_NAME,COLUMN_NAME,TAG_VALUE\n",
"TYA_DATA,PUBLIC,PEOPLE,NAME_LAST,PERSONNAME\n",
"TYA_DATA,PUBLIC,PEOPLE,ADDRESS,ADDRESS\n",
"\n",
"```\n",
"\n",
"**In Snowflake, we ran the following SQL and then waited two hours (a standard Snowflake timing/cache item):**\n",
"\n",
"```\n",
"create or replace tag satori_c12n_pii allowed_values \n",
"'ADDRESS' , 'EMAIL', 'PERSONNAME', 'PHONE', 'SSN';\n",
"\n",
"create or replace tag satori_c12n_piilinkable allowed_values \n",
"'GENDER', 'USERNAME', 'CITY', 'STATE', 'ZIPCODE', 'STATE';\n",
"\n",
"ALTER TABLE tya_data.public.people\n",
" MODIFY COLUMN address\n",
" SET TAG satori_c12n_pii ='ADDRESS';\n",
"\n",
"ALTER TABLE tya_data.public.people\n",
" MODIFY COLUMN email\n",
" SET TAG satori_c12n_pii ='EMAIL';\n",
"\n",
"ALTER TABLE tya_data.public.people\n",
" MODIFY COLUMN name_last\n",
" SET TAG satori_c12n_pii ='PERSONNAME';\n",
"```\n",
"\n",
"**After two hours, we ran the following sql and then downloaded the results as a CSV file**\n",
"```\n",
"SELECT tag_database, tag_schema, object_name, column_name, tag_value FROM\n",
"SNOWFLAKE.ACCOUNT_USAGE.TAG_REFERENCES\n",
"WHERE TAG_NAME = 'SATORI_C12N_PII';\n",
"```\n",
"\n",
"**This file is what this Jupyter Notebook expects to see**"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "10465e9e",
"metadata": {},
"outputs": [],
"source": [
"# your account info, following must be filled in\n",
"satori_account_id = \"\"\n",
"satori_serviceaccount_id = \"\"\n",
"satori_serviceaccount_key = \"\"\n",
"\n",
"# leave this host value as is for Satori production accounts\n",
"apihost = \"app.satoricyber.com\"\n",
"\n",
"# location of our csv input file\n",
"csv_input_file = \"tags_for_locations_from_snowflake.csv\"\n",
"\n",
"# action to take, the choices are 'add' or 'remove' - this allows for a full UNDO of the csv file\n",
"action = \"add\"\n",
"\n",
"# this message will appear in the location history for each field that is updated\n",
"location_history_message = \"Location Updated by Snowflake CSV Import Process, action: \" + action\n",
"\n",
"# The Snowflake database \"Display Name\" connection name from Satori Datastores\n",
"snowflake_datastore_name = \"New YCB Snowflake Connection\""
]
},
{
"cell_type": "markdown",
"id": "22678bfa",
"metadata": {},
"source": [
"#### for demonstration purposes, no changes needed below this line"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "09e8ed72",
"metadata": {},
"outputs": [],
"source": [
"import json\n",
"import requests\n",
"import csv\n",
"import io"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "bbd49cfd",
"metadata": {},
"outputs": [],
"source": [
"# our auth function to get a bearer token\n",
"\n",
"def satori_auth(satori_serviceaccount_id, satori_serviceaccount_key, apihost):\n",
" auth_headers = {'content-type': 'application/json','accept': 'application/json'}\n",
" auth_url = \"https://{}/api/authentication/token\".format(apihost)\n",
" auth_body = json.dumps(\n",
" {\n",
" \"serviceAccountId\": satori_serviceaccount_id,\n",
" \"serviceAccountKey\": satori_serviceaccount_key\n",
" })\n",
" try:\n",
" r = requests.post(auth_url, headers=auth_headers, data=auth_body)\n",
" response = r.json()\n",
" satori_token = response[\"token\"]\n",
" except Exception as err:\n",
" print(\"Bearer Token Failure: :\", err)\n",
" print(\"Exception TYPE:\", type(err))\n",
" else:\n",
" return satori_token"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "77fab60d",
"metadata": {},
"outputs": [],
"source": [
"# get the token and also create request header\n",
"\n",
"session_token = satori_auth(satori_serviceaccount_id, satori_serviceaccount_key, apihost)\n",
"#example\n",
"#print(session_token)\n",
"\n",
"headers = {'Authorization': 'Bearer {}'.format(session_token), 'Content-Type': 'application/json', 'Accept': 'application/json'}\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "56c6beb4",
"metadata": {},
"outputs": [],
"source": [
"# get a Datastore ID by searching for its name\n",
"\n",
"def get_datastore_id(datastore_name):\n",
"\n",
" url = \"https://{}/api/v1/datastore?accountId={}&search={}\".format(apihost, \n",
" satori_account_id, \n",
" datastore_name)\n",
" try:\n",
" response = requests.get(url, headers=headers)\n",
" response.raise_for_status()\n",
" except requests.exceptions.RequestException as err:\n",
" print(\"EXCEPTION: \", type(err))\n",
" else:\n",
" return response.json()['records'][0]['id']\n",
" \n",
"#example\n",
"#print(get_datastore_id(\"AWS Postgres Data\"))"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "522b61e7",
"metadata": {},
"outputs": [],
"source": [
"# get a Location ID by searching within datastore_id for database.schema.table.field.semiPath\n",
"\n",
"def get_location_id(datastore_id, location_prefix):\n",
"\n",
" url = \"https://{}/api/locations/{}/query?pageSize=100&dataStoreId={}&locationPrefix={}\".format(\n",
" apihost, \n",
" satori_account_id, \n",
" datastore_id,\n",
" location_prefix)\n",
" try:\n",
" response = requests.get(url, headers=headers)\n",
" response.raise_for_status()\n",
" except requests.exceptions.RequestException as err:\n",
" print(\"EXCEPTION: \", type(err))\n",
" else:\n",
" return response.json()['records'][0]['id']\n",
" \n",
"#example\n",
"#print(get_location_id('b35585f4-08b4-415c-82a1-d6c1af17fcbb', 'postgres.public.v2_people.blood_type'))"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "2ceaf7f7",
"metadata": {},
"outputs": [],
"source": [
"# load all of the custom taxonomy as a dictionary\n",
"\n",
"def load_all_custom_taxonomy():\n",
" \n",
" url = \"https://{}/api/v1/taxonomy/custom?accountId={}\".format(apihost,satori_account_id)\n",
" try:\n",
" response = requests.get(url, headers=headers)\n",
" response.raise_for_status()\n",
" except requests.exceptions.RequestException as err:\n",
" print(\"EXCEPTION: \", type(err))\n",
" else:\n",
" custom_taxonomy = {}\n",
" for item in response.json()['records']:\n",
" custom_taxonomy[item['tag']] = item['id']\n",
" return custom_taxonomy\n",
"\n",
"#example \n",
"#print(load_all_custom_taxonomy())\n",
"\n",
"#now actually do it\n",
"custom_taxonomy = load_all_custom_taxonomy()"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "d9fe3f57",
"metadata": {},
"outputs": [],
"source": [
"# build our PUT payload, takes into account whether we are adding or removing the tags\n",
"\n",
"def build_update_location_payload(action, taxonomy_id):\n",
" payload =''\n",
" if action == 'add':\n",
" payload = json.dumps(\n",
" {\n",
" \"addTags\": [\n",
" taxonomy_id\n",
" ],\n",
" \"removeTags\": [\n",
" ],\n",
" \"notes\": location_history_message\n",
" } \n",
" )\n",
" elif action == 'remove':\n",
" payload = json.dumps(\n",
" {\n",
" \"addTags\": [\n",
" ],\n",
" \"removeTags\": [\n",
" taxonomy_id\n",
" ],\n",
" \"notes\": location_history_message\n",
" } \n",
" )\n",
" return payload\n",
" \n",
"#example\n",
"#print(build_update_location_payload(\"51d3d892-26f6-41b8-ab0c-c5638c386379\"))"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "785e00d6",
"metadata": {},
"outputs": [],
"source": [
"# update one location with one tag (aka taxonomy id)\n",
"\n",
"def update_one_location(action, location_id, custom_taxonomy_id):\n",
" url = \"https://{}/api/locations/{}\".format(apihost,location_id)\n",
" \n",
" payload = build_update_location_payload(action, custom_taxonomy_id)\n",
" \n",
" try:\n",
" response = requests.put(url=url, headers=headers, data=payload)\n",
" response.raise_for_status()\n",
" except requests.exceptions.RequestException as err:\n",
" print(str(err))\n",
" else:\n",
" return response.json()"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "72409700",
"metadata": {},
"outputs": [],
"source": [
"# load our input from the csv file\n",
"\n",
"tags_for_locations_from_csv_file = []\n",
"\n",
"with open(csv_input_file, 'r', newline='\\n') as csvfile:\n",
" next(csvfile)\n",
" csvreader = csv.reader(csvfile, delimiter=',', quotechar='\"')\n",
" for row in csvreader:\n",
" tags_for_locations_from_csv_file.append(row)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "51dff242",
"metadata": {},
"outputs": [],
"source": [
"for item in tags_for_locations_from_csv_file:\n",
" \n",
" location_prefix = item[0] + \".\" + item[1] + \".\" + item[2] + \".\" + item[3]\n",
" taxonomy_id = item[4]\n",
" \n",
" datastore_id = get_datastore_id(snowflake_datastore_name)\n",
" location_id = get_location_id(datastore_id, location_prefix)\n",
" response = update_one_location(action, location_id, taxonomy_id)\n",
" print(\"ACTION TAKEN: \" + str(response) + \"\\n\")\n"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.0"
}
},
"nbformat": 4,
"nbformat_minor": 5
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment