northwestcoder/import_satori_tags_from_snowflake.ipynb

## import_satori_tags_from_snowflake.ipynb
{
 "cells": [
  {
   "cell_type": "markdown",
   "id": "b460cfb7",
   "metadata": {},
   "source": [
    "### Fill out the next cell with required values\n",
    "\n",
    "**This example expects a CSV file from Snowflake with the following format (this example assumes a database name of \"TYA_DATA\", schema \"PUBLIC\", and table \"PEOPLE\")**\n",
    "\n",
    "```\n",
    "TAG_DATABASE,TAG_SCHEMA,OBJECT_NAME,COLUMN_NAME,TAG_VALUE\n",
    "TYA_DATA,PUBLIC,PEOPLE,NAME_LAST,PERSONNAME\n",
    "TYA_DATA,PUBLIC,PEOPLE,ADDRESS,ADDRESS\n",
    "\n",
    "```\n",
    "\n",
    "**In Snowflake, we ran the following SQL and then waited two hours (a standard Snowflake timing/cache item):**\n",
    "\n",
    "```\n",
    "create or replace tag satori_c12n_pii allowed_values \n",
    "'ADDRESS' , 'EMAIL', 'PERSONNAME', 'PHONE', 'SSN';\n",
    "\n",
    "create or replace tag satori_c12n_piilinkable allowed_values \n",
    "'GENDER', 'USERNAME', 'CITY', 'STATE', 'ZIPCODE', 'STATE';\n",
    "\n",
    "ALTER TABLE tya_data.public.people\n",
    "  MODIFY COLUMN address\n",
    "   SET TAG satori_c12n_pii ='ADDRESS';\n",
    "\n",
    "ALTER TABLE tya_data.public.people\n",
    "  MODIFY COLUMN email\n",
    "   SET TAG satori_c12n_pii ='EMAIL';\n",
    "\n",
    "ALTER TABLE tya_data.public.people\n",
    "  MODIFY COLUMN name_last\n",
    "   SET TAG satori_c12n_pii ='PERSONNAME';\n",
    "```\n",
    "\n",
    "**After two hours, we ran the following sql and then downloaded the results as a CSV file**\n",
    "```\n",
    "SELECT tag_database, tag_schema, object_name, column_name, tag_value FROM\n",
    "SNOWFLAKE.ACCOUNT_USAGE.TAG_REFERENCES\n",
    "WHERE TAG_NAME = 'SATORI_C12N_PII';\n",
    "```\n",
    "\n",
    "**This file is what this Jupyter Notebook expects to see**"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "10465e9e",
   "metadata": {},
   "outputs": [],
   "source": [
    "# your account info, following must be filled in\n",
    "satori_account_id = \"\"\n",
    "satori_serviceaccount_id = \"\"\n",
    "satori_serviceaccount_key = \"\"\n",
    "\n",
    "# leave this host value as is for Satori production accounts\n",
    "apihost = \"app.satoricyber.com\"\n",
    "\n",
    "# location of our csv input file\n",
    "csv_input_file = \"tags_for_locations_from_snowflake.csv\"\n",
    "\n",
    "# action to take, the choices are 'add' or 'remove' - this allows for a full UNDO of the csv file\n",
    "action = \"add\"\n",
    "\n",
    "# this message will appear in the location history for each field that is updated\n",
    "location_history_message = \"Location Updated by Snowflake CSV Import Process, action: \" + action\n",
    "\n",
    "# The Snowflake database \"Display Name\" connection name from Satori Datastores\n",
    "snowflake_datastore_name = \"New YCB Snowflake Connection\""
   ]
  },
  {
   "cell_type": "markdown",
   "id": "22678bfa",
   "metadata": {},
   "source": [
    "#### for demonstration purposes, no changes needed below this line"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "09e8ed72",
   "metadata": {},
   "outputs": [],
   "source": [
    "import json\n",
    "import requests\n",
    "import csv\n",
    "import io"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "bbd49cfd",
   "metadata": {},
   "outputs": [],
   "source": [
    "# our auth function to get a bearer token\n",
    "\n",
    "def satori_auth(satori_serviceaccount_id, satori_serviceaccount_key, apihost):\n",
    "    auth_headers = {'content-type': 'application/json','accept': 'application/json'}\n",
    "    auth_url = \"https://{}/api/authentication/token\".format(apihost)\n",
    "    auth_body = json.dumps(\n",
    "    {\n",
    "        \"serviceAccountId\": satori_serviceaccount_id,\n",
    "        \"serviceAccountKey\": satori_serviceaccount_key\n",
    "    })\n",
    "    try:\n",
    "        r = requests.post(auth_url, headers=auth_headers, data=auth_body)\n",
    "        response = r.json()\n",
    "        satori_token = response[\"token\"]\n",
    "    except Exception as err:\n",
    "        print(\"Bearer Token Failure: :\", err)\n",
    "        print(\"Exception TYPE:\", type(err))\n",
    "    else:\n",
    "        return satori_token"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "77fab60d",
   "metadata": {},
   "outputs": [],
   "source": [
    "# get the token and also create request header\n",
    "\n",
    "session_token = satori_auth(satori_serviceaccount_id, satori_serviceaccount_key, apihost)\n",
    "#example\n",
    "#print(session_token)\n",
    "\n",
    "headers = {'Authorization': 'Bearer {}'.format(session_token), 'Content-Type': 'application/json', 'Accept': 'application/json'}\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "56c6beb4",
   "metadata": {},
   "outputs": [],
   "source": [
    "# get a Datastore ID by searching for its name\n",
    "\n",
    "def get_datastore_id(datastore_name):\n",
    "\n",
    "    url = \"https://{}/api/v1/datastore?accountId={}&search={}\".format(apihost, \n",
    "                                                                    satori_account_id, \n",
    "                                                                    datastore_name)\n",
    "    try:\n",
    "        response = requests.get(url, headers=headers)\n",
    "        response.raise_for_status()\n",
    "    except requests.exceptions.RequestException as err:\n",
    "        print(\"EXCEPTION: \", type(err))\n",
    "    else:\n",
    "        return response.json()['records'][0]['id']\n",
    "        \n",
    "#example\n",
    "#print(get_datastore_id(\"AWS Postgres Data\"))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "522b61e7",
   "metadata": {},
   "outputs": [],
   "source": [
    "# get a Location ID by searching within datastore_id for database.schema.table.field.semiPath\n",
    "\n",
    "def get_location_id(datastore_id, location_prefix):\n",
    "\n",
    "    url = \"https://{}/api/locations/{}/query?pageSize=100&dataStoreId={}&locationPrefix={}\".format(\n",
    "                                                                    apihost, \n",
    "                                                                    satori_account_id, \n",
    "                                                                    datastore_id,\n",
    "                                                                    location_prefix)\n",
    "    try:\n",
    "        response = requests.get(url, headers=headers)\n",
    "        response.raise_for_status()\n",
    "    except requests.exceptions.RequestException as err:\n",
    "        print(\"EXCEPTION: \", type(err))\n",
    "    else:\n",
    "        return response.json()['records'][0]['id']\n",
    "        \n",
    "#example\n",
    "#print(get_location_id('b35585f4-08b4-415c-82a1-d6c1af17fcbb', 'postgres.public.v2_people.blood_type'))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "2ceaf7f7",
   "metadata": {},
   "outputs": [],
   "source": [
    "# load all of the custom taxonomy as a dictionary\n",
    "\n",
    "def load_all_custom_taxonomy():\n",
    "                \n",
    "    url = \"https://{}/api/v1/taxonomy/custom?accountId={}\".format(apihost,satori_account_id)\n",
    "    try:\n",
    "        response = requests.get(url, headers=headers)\n",
    "        response.raise_for_status()\n",
    "    except requests.exceptions.RequestException as err:\n",
    "        print(\"EXCEPTION: \", type(err))\n",
    "    else:\n",
    "        custom_taxonomy = {}\n",
    "        for item in response.json()['records']:\n",
    "            custom_taxonomy[item['tag']] = item['id']\n",
    "        return custom_taxonomy\n",
    "\n",
    "#example    \n",
    "#print(load_all_custom_taxonomy())\n",
    "\n",
    "#now actually do it\n",
    "custom_taxonomy = load_all_custom_taxonomy()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "d9fe3f57",
   "metadata": {},
   "outputs": [],
   "source": [
    "# build our PUT payload, takes into account whether we are adding or removing the tags\n",
    "\n",
    "def build_update_location_payload(action, taxonomy_id):\n",
    "    payload =''\n",
    "    if action == 'add':\n",
    "        payload = json.dumps(\n",
    "            {\n",
    "              \"addTags\": [\n",
    "                taxonomy_id\n",
    "              ],\n",
    "              \"removeTags\": [\n",
    "              ],\n",
    "              \"notes\": location_history_message\n",
    "            }    \n",
    "        )\n",
    "    elif action == 'remove':\n",
    "        payload = json.dumps(\n",
    "            {\n",
    "              \"addTags\": [\n",
    "              ],\n",
    "              \"removeTags\": [\n",
    "                taxonomy_id\n",
    "              ],\n",
    "              \"notes\": location_history_message\n",
    "            }    \n",
    "        )\n",
    "    return payload\n",
    "    \n",
    "#example\n",
    "#print(build_update_location_payload(\"51d3d892-26f6-41b8-ab0c-c5638c386379\"))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "785e00d6",
   "metadata": {},
   "outputs": [],
   "source": [
    "# update one location with one tag (aka taxonomy id)\n",
    "\n",
    "def update_one_location(action, location_id, custom_taxonomy_id):\n",
    "    url = \"https://{}/api/locations/{}\".format(apihost,location_id)\n",
    "    \n",
    "    payload = build_update_location_payload(action, custom_taxonomy_id)\n",
    "    \n",
    "    try:\n",
    "        response = requests.put(url=url, headers=headers, data=payload)\n",
    "        response.raise_for_status()\n",
    "    except requests.exceptions.RequestException as err:\n",
    "        print(str(err))\n",
    "    else:\n",
    "        return response.json()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "72409700",
   "metadata": {},
   "outputs": [],
   "source": [
    "# load our input from the csv file\n",
    "\n",
    "tags_for_locations_from_csv_file = []\n",
    "\n",
    "with open(csv_input_file, 'r', newline='\\n') as csvfile:\n",
    "    next(csvfile)\n",
    "    csvreader = csv.reader(csvfile, delimiter=',', quotechar='\"')\n",
    "    for row in csvreader:\n",
    "        tags_for_locations_from_csv_file.append(row)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "51dff242",
   "metadata": {},
   "outputs": [],
   "source": [
    "for item in tags_for_locations_from_csv_file:\n",
    "    \n",
    "    location_prefix = item[0] + \".\" + item[1] + \".\" + item[2] + \".\" + item[3]\n",
    "    taxonomy_id = item[4]\n",
    "    \n",
    "    datastore_id = get_datastore_id(snowflake_datastore_name)\n",
    "    location_id = get_location_id(datastore_id, location_prefix)\n",
    "    response = update_one_location(action, location_id, taxonomy_id)\n",
    "    print(\"ACTION TAKEN: \" + str(response) + \"\\n\")\n"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.11.0"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
	{
	"cells": [
	{
	"cell_type": "markdown",
	"id": "b460cfb7",
	"metadata": {},
	"source": [
	"### Fill out the next cell with required values\n",
	"\n",
	"This example expects a CSV file from Snowflake with the following format (this example assumes a database name of \"TYA_DATA\", schema \"PUBLIC\", and table \"PEOPLE\")\n",
	"\n",
	"```\n",
	"TAG_DATABASE,TAG_SCHEMA,OBJECT_NAME,COLUMN_NAME,TAG_VALUE\n",
	"TYA_DATA,PUBLIC,PEOPLE,NAME_LAST,PERSONNAME\n",
	"TYA_DATA,PUBLIC,PEOPLE,ADDRESS,ADDRESS\n",
	"\n",
	"```\n",
	"\n",
	"In Snowflake, we ran the following SQL and then waited two hours (a standard Snowflake timing/cache item):\n",
	"\n",
	"```\n",
	"create or replace tag satori_c12n_pii allowed_values \n",
	"'ADDRESS' , 'EMAIL', 'PERSONNAME', 'PHONE', 'SSN';\n",
	"\n",
	"create or replace tag satori_c12n_piilinkable allowed_values \n",
	"'GENDER', 'USERNAME', 'CITY', 'STATE', 'ZIPCODE', 'STATE';\n",
	"\n",
	"ALTER TABLE tya_data.public.people\n",
	" MODIFY COLUMN address\n",
	" SET TAG satori_c12n_pii ='ADDRESS';\n",
	"\n",
	"ALTER TABLE tya_data.public.people\n",
	" MODIFY COLUMN email\n",
	" SET TAG satori_c12n_pii ='EMAIL';\n",
	"\n",
	"ALTER TABLE tya_data.public.people\n",
	" MODIFY COLUMN name_last\n",
	" SET TAG satori_c12n_pii ='PERSONNAME';\n",
	"```\n",
	"\n",
	"After two hours, we ran the following sql and then downloaded the results as a CSV file\n",
	"```\n",
	"SELECT tag_database, tag_schema, object_name, column_name, tag_value FROM\n",
	"SNOWFLAKE.ACCOUNT_USAGE.TAG_REFERENCES\n",
	"WHERE TAG_NAME = 'SATORI_C12N_PII';\n",
	"```\n",
	"\n",
	"This file is what this Jupyter Notebook expects to see"
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"id": "10465e9e",
	"metadata": {},
	"outputs": [],
	"source": [
	"# your account info, following must be filled in\n",
	"satori_account_id = \"\"\n",
	"satori_serviceaccount_id = \"\"\n",
	"satori_serviceaccount_key = \"\"\n",
	"\n",
	"# leave this host value as is for Satori production accounts\n",
	"apihost = \"app.satoricyber.com\"\n",
	"\n",
	"# location of our csv input file\n",
	"csv_input_file = \"tags_for_locations_from_snowflake.csv\"\n",
	"\n",
	"# action to take, the choices are 'add' or 'remove' - this allows for a full UNDO of the csv file\n",
	"action = \"add\"\n",
	"\n",
	"# this message will appear in the location history for each field that is updated\n",
	"location_history_message = \"Location Updated by Snowflake CSV Import Process, action: \" + action\n",
	"\n",
	"# The Snowflake database \"Display Name\" connection name from Satori Datastores\n",
	"snowflake_datastore_name = \"New YCB Snowflake Connection\""
	]
	},
	{
	"cell_type": "markdown",
	"id": "22678bfa",
	"metadata": {},
	"source": [
	"#### for demonstration purposes, no changes needed below this line"
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"id": "09e8ed72",
	"metadata": {},
	"outputs": [],
	"source": [
	"import json\n",
	"import requests\n",
	"import csv\n",
	"import io"
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"id": "bbd49cfd",
	"metadata": {},
	"outputs": [],
	"source": [
	"# our auth function to get a bearer token\n",
	"\n",
	"def satori_auth(satori_serviceaccount_id, satori_serviceaccount_key, apihost):\n",
	" auth_headers = {'content-type': 'application/json','accept': 'application/json'}\n",
	" auth_url = \"https://{}/api/authentication/token\".format(apihost)\n",
	" auth_body = json.dumps(\n",
	" {\n",
	" \"serviceAccountId\": satori_serviceaccount_id,\n",
	" \"serviceAccountKey\": satori_serviceaccount_key\n",
	" })\n",
	" try:\n",
	" r = requests.post(auth_url, headers=auth_headers, data=auth_body)\n",
	" response = r.json()\n",
	" satori_token = response[\"token\"]\n",
	" except Exception as err:\n",
	" print(\"Bearer Token Failure: :\", err)\n",
	" print(\"Exception TYPE:\", type(err))\n",
	" else:\n",
	" return satori_token"
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"id": "77fab60d",
	"metadata": {},
	"outputs": [],
	"source": [
	"# get the token and also create request header\n",
	"\n",
	"session_token = satori_auth(satori_serviceaccount_id, satori_serviceaccount_key, apihost)\n",
	"#example\n",
	"#print(session_token)\n",
	"\n",
	"headers = {'Authorization': 'Bearer {}'.format(session_token), 'Content-Type': 'application/json', 'Accept': 'application/json'}\n"
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"id": "56c6beb4",
	"metadata": {},
	"outputs": [],
	"source": [
	"# get a Datastore ID by searching for its name\n",
	"\n",
	"def get_datastore_id(datastore_name):\n",
	"\n",
	" url = \"https://{}/api/v1/datastore?accountId={}&search={}\".format(apihost, \n",
	" satori_account_id, \n",
	" datastore_name)\n",
	" try:\n",
	" response = requests.get(url, headers=headers)\n",
	" response.raise_for_status()\n",
	" except requests.exceptions.RequestException as err:\n",
	" print(\"EXCEPTION: \", type(err))\n",
	" else:\n",
	" return response.json()['records'][0]['id']\n",
	" \n",
	"#example\n",
	"#print(get_datastore_id(\"AWS Postgres Data\"))"
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"id": "522b61e7",
	"metadata": {},
	"outputs": [],
	"source": [
	"# get a Location ID by searching within datastore_id for database.schema.table.field.semiPath\n",
	"\n",
	"def get_location_id(datastore_id, location_prefix):\n",
	"\n",
	" url = \"https://{}/api/locations/{}/query?pageSize=100&dataStoreId={}&locationPrefix={}\".format(\n",
	" apihost, \n",
	" satori_account_id, \n",
	" datastore_id,\n",
	" location_prefix)\n",
	" try:\n",
	" response = requests.get(url, headers=headers)\n",
	" response.raise_for_status()\n",
	" except requests.exceptions.RequestException as err:\n",
	" print(\"EXCEPTION: \", type(err))\n",
	" else:\n",
	" return response.json()['records'][0]['id']\n",
	" \n",
	"#example\n",
	"#print(get_location_id('b35585f4-08b4-415c-82a1-d6c1af17fcbb', 'postgres.public.v2_people.blood_type'))"
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"id": "2ceaf7f7",
	"metadata": {},
	"outputs": [],
	"source": [
	"# load all of the custom taxonomy as a dictionary\n",
	"\n",
	"def load_all_custom_taxonomy():\n",
	" \n",
	" url = \"https://{}/api/v1/taxonomy/custom?accountId={}\".format(apihost,satori_account_id)\n",
	" try:\n",
	" response = requests.get(url, headers=headers)\n",
	" response.raise_for_status()\n",
	" except requests.exceptions.RequestException as err:\n",
	" print(\"EXCEPTION: \", type(err))\n",
	" else:\n",
	" custom_taxonomy = {}\n",
	" for item in response.json()['records']:\n",
	" custom_taxonomy[item['tag']] = item['id']\n",
	" return custom_taxonomy\n",
	"\n",
	"#example \n",
	"#print(load_all_custom_taxonomy())\n",
	"\n",
	"#now actually do it\n",
	"custom_taxonomy = load_all_custom_taxonomy()"
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"id": "d9fe3f57",
	"metadata": {},
	"outputs": [],
	"source": [
	"# build our PUT payload, takes into account whether we are adding or removing the tags\n",
	"\n",
	"def build_update_location_payload(action, taxonomy_id):\n",
	" payload =''\n",
	" if action == 'add':\n",
	" payload = json.dumps(\n",
	" {\n",
	" \"addTags\": [\n",
	" taxonomy_id\n",
	" ],\n",
	" \"removeTags\": [\n",
	" ],\n",
	" \"notes\": location_history_message\n",
	" } \n",
	" )\n",
	" elif action == 'remove':\n",
	" payload = json.dumps(\n",
	" {\n",
	" \"addTags\": [\n",
	" ],\n",
	" \"removeTags\": [\n",
	" taxonomy_id\n",
	" ],\n",
	" \"notes\": location_history_message\n",
	" } \n",
	" )\n",
	" return payload\n",
	" \n",
	"#example\n",
	"#print(build_update_location_payload(\"51d3d892-26f6-41b8-ab0c-c5638c386379\"))"
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"id": "785e00d6",
	"metadata": {},
	"outputs": [],
	"source": [
	"# update one location with one tag (aka taxonomy id)\n",
	"\n",
	"def update_one_location(action, location_id, custom_taxonomy_id):\n",
	" url = \"https://{}/api/locations/{}\".format(apihost,location_id)\n",
	" \n",
	" payload = build_update_location_payload(action, custom_taxonomy_id)\n",
	" \n",
	" try:\n",
	" response = requests.put(url=url, headers=headers, data=payload)\n",
	" response.raise_for_status()\n",
	" except requests.exceptions.RequestException as err:\n",
	" print(str(err))\n",
	" else:\n",
	" return response.json()"
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"id": "72409700",
	"metadata": {},
	"outputs": [],
	"source": [
	"# load our input from the csv file\n",
	"\n",
	"tags_for_locations_from_csv_file = []\n",
	"\n",
	"with open(csv_input_file, 'r', newline='\\n') as csvfile:\n",
	" next(csvfile)\n",
	" csvreader = csv.reader(csvfile, delimiter=',', quotechar='\"')\n",
	" for row in csvreader:\n",
	" tags_for_locations_from_csv_file.append(row)"
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"id": "51dff242",
	"metadata": {},
	"outputs": [],
	"source": [
	"for item in tags_for_locations_from_csv_file:\n",
	" \n",
	" location_prefix = item[0] + \".\" + item[1] + \".\" + item[2] + \".\" + item[3]\n",
	" taxonomy_id = item[4]\n",
	" \n",
	" datastore_id = get_datastore_id(snowflake_datastore_name)\n",
	" location_id = get_location_id(datastore_id, location_prefix)\n",
	" response = update_one_location(action, location_id, taxonomy_id)\n",
	" print(\"ACTION TAKEN: \" + str(response) + \"\\n\")\n"
	]
	}
	],
	"metadata": {
	"kernelspec": {
	"display_name": "Python 3 (ipykernel)",
	"language": "python",
	"name": "python3"
	},
	"language_info": {
	"codemirror_mode": {
	"name": "ipython",
	"version": 3
	},
	"file_extension": ".py",
	"mimetype": "text/x-python",
	"name": "python",
	"nbconvert_exporter": "python",
	"pygments_lexer": "ipython3",
	"version": "3.11.0"
	}
	},
	"nbformat": 4,
	"nbformat_minor": 5
	}