northwestcoder/import_satori_classifications_from_csv_file.ipynb

## import_satori_classifications_from_csv_file.ipynb
{
 "cells": [
  {
   "cell_type": "markdown",
   "id": "b460cfb7",
   "metadata": {},
   "source": [
    "#### Fill out the next cell with required values\n",
    "\n",
    "#### This example expects a CSV file with a format like the attached csv file (scroll down)",
    "\n",
    "- This script expects the full path name of the Satori TAG, e.g. category::subcategory::tagname\n",
    "- DatastoreType is not used by this example, and provided for information only\n",
    "- if you do not have semi-structured data, you can leave 'semiPath' empty\n",
    "- The first three lines above are examples of assigning custom classifiers to your data.\n",
    "- The last two lines are examples of assigning Satori's built-in classifiers to your data, as well as using a semiPath example.\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "10465e9e",
   "metadata": {},
   "outputs": [],
   "source": [
    "#your account info, following must be filled in\n",
    "satori_account_id = \"\"\n",
    "satori_serviceaccount_id = \"\"\n",
    "satori_serviceaccount_key = \"\"\n",
    "\n",
    "#leave this host value as is for Satori production accounts\n",
    "apihost = \"app.satoricyber.com\"\n",
    "\n",
    "#location of our csv input file\n",
    "csv_input_file = \"tags_for_locations.csv\"\n",
    "\n",
    "#action to take, the choices are 'add' or 'remove' - this allows for a full UNDO of the csv file\n",
    "action = \"remove\"\n",
    "\n",
    "#this message will appear in the location history for each field that is updated\n",
    "location_history_message = \"Location Updated by CSV Import Process, action: \" + action"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "22678bfa",
   "metadata": {},
   "source": [
    "#### for demonstration purposes, no changes needed below this line"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "a0fa19b7",
   "metadata": {},
   "outputs": [],
   "source": [
    "import json\n",
    "import requests\n",
    "import csv\n",
    "import io"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "bbd49cfd",
   "metadata": {},
   "outputs": [],
   "source": [
    "#our auth function to get a bearer token\n",
    "\n",
    "def satori_auth(satori_serviceaccount_id, satori_serviceaccount_key, apihost):\n",
    "    auth_headers = {'content-type': 'application/json','accept': 'application/json'}\n",
    "    auth_url = \"https://{}/api/authentication/token\".format(apihost)\n",
    "    auth_body = json.dumps(\n",
    "    {\n",
    "        \"serviceAccountId\": satori_serviceaccount_id,\n",
    "        \"serviceAccountKey\": satori_serviceaccount_key\n",
    "    })\n",
    "    try:\n",
    "        r = requests.post(auth_url, headers=auth_headers, data=auth_body)\n",
    "        response = r.json()\n",
    "        satori_token = response[\"token\"]\n",
    "    except Exception as err:\n",
    "        print(\"Bearer Token Failure: :\", err)\n",
    "        print(\"Exception TYPE:\", type(err))\n",
    "    else:\n",
    "        return satori_token"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "72f7579d",
   "metadata": {},
   "outputs": [],
   "source": [
    "#get the token and also create request header\n",
    "\n",
    "session_token = satori_auth(satori_serviceaccount_id, satori_serviceaccount_key, apihost)\n",
    "#example\n",
    "#print(session_token)\n",
    "\n",
    "headers = {'Authorization': 'Bearer {}'.format(session_token), 'Content-Type': 'application/json', 'Accept': 'application/json'}\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "56c6beb4",
   "metadata": {},
   "outputs": [],
   "source": [
    "#get a Datastore ID by searching for its name\n",
    "\n",
    "def get_datastore_id(datastore_name):\n",
    "\n",
    "    url = \"https://{}/api/v1/datastore?accountId={}&search={}\".format(apihost, \n",
    "                                                                    satori_account_id, \n",
    "                                                                    datastore_name)\n",
    "    try:\n",
    "        response = requests.get(url, headers=headers)\n",
    "        response.raise_for_status()\n",
    "    except requests.exceptions.RequestException as err:\n",
    "        print(\"EXCEPTION: \", type(err))\n",
    "    else:\n",
    "        return response.json()['records'][0]['id']\n",
    "        \n",
    "#example\n",
    "#print(get_datastore_id(\"AWS Postgres Data\"))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "522b61e7",
   "metadata": {},
   "outputs": [],
   "source": [
    "#get a Location ID by searching within datastore_id for database.schema.table.field.semiPath\n",
    "\n",
    "def get_location_id(datastore_id, location_prefix):\n",
    "\n",
    "    url = \"https://{}/api/locations/{}/query?pageSize=100&dataStoreId={}&locationPrefix={}\".format(\n",
    "                                                                    apihost, \n",
    "                                                                    satori_account_id, \n",
    "                                                                    datastore_id,\n",
    "                                                                    location_prefix)\n",
    "    try:\n",
    "        response = requests.get(url, headers=headers)\n",
    "        response.raise_for_status()\n",
    "    except requests.exceptions.RequestException as err:\n",
    "        print(\"EXCEPTION: \", type(err))\n",
    "    else:\n",
    "        return response.json()['records'][0]['id']\n",
    "        \n",
    "#example\n",
    "#print(get_location_id('b35585f4-08b4-415c-82a1-d6c1af17fcbb', 'postgres.public.v2_people.blood_type'))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "2ceaf7f7",
   "metadata": {},
   "outputs": [],
   "source": [
    "#load all of the custom taxonomy as a dictionary\n",
    "\n",
    "def load_all_custom_taxonomy():\n",
    "                \n",
    "    url = \"https://{}/api/v1/taxonomy/custom?accountId={}\".format(apihost,satori_account_id)\n",
    "    try:\n",
    "        response = requests.get(url, headers=headers)\n",
    "        response.raise_for_status()\n",
    "    except requests.exceptions.RequestException as err:\n",
    "        print(\"EXCEPTION: \", type(err))\n",
    "    else:\n",
    "        custom_taxonomy = {}\n",
    "        for item in response.json()['records']:\n",
    "            custom_taxonomy[item['tag']] = item['id']\n",
    "        return custom_taxonomy\n",
    "\n",
    "#example    \n",
    "#print(load_all_custom_taxonomy())\n",
    "\n",
    "#now actually do it\n",
    "custom_taxonomy = load_all_custom_taxonomy()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "d9fe3f57",
   "metadata": {},
   "outputs": [],
   "source": [
    "#build our PUT payload, takes into account whether we are adding or removing the tags\n",
    "\n",
    "def build_update_location_payload(action, taxonomy_id):\n",
    "    payload =''\n",
    "    if action == 'add':\n",
    "        payload = json.dumps(\n",
    "            {\n",
    "              \"addTags\": [\n",
    "                taxonomy_id\n",
    "              ],\n",
    "              \"removeTags\": [\n",
    "              ],\n",
    "              \"notes\": location_history_message\n",
    "            }    \n",
    "        )\n",
    "    elif action == 'remove':\n",
    "        payload = json.dumps(\n",
    "            {\n",
    "              \"addTags\": [\n",
    "              ],\n",
    "              \"removeTags\": [\n",
    "                taxonomy_id\n",
    "              ],\n",
    "              \"notes\": location_history_message\n",
    "            }    \n",
    "        )\n",
    "    return payload\n",
    "    \n",
    "#example\n",
    "#print(build_update_location_payload(\"51d3d892-26f6-41b8-ab0c-c5638c386379\"))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "785e00d6",
   "metadata": {},
   "outputs": [],
   "source": [
    "#update one location with one tag (aka taxonomy id)\n",
    "\n",
    "def update_one_location(action, location_id, custom_taxonomy_id):\n",
    "    url = \"https://{}/api/locations/{}\".format(apihost,location_id)\n",
    "    \n",
    "    payload = build_update_location_payload(action, custom_taxonomy_id)\n",
    "    \n",
    "    try:\n",
    "        response = requests.put(url=url, headers=headers, data=payload)\n",
    "        response.raise_for_status()\n",
    "    except requests.exceptions.RequestException as err:\n",
    "        print(str(err))\n",
    "    else:\n",
    "        return response.json()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "72409700",
   "metadata": {},
   "outputs": [],
   "source": [
    "#load our input from the csv file\n",
    "\n",
    "tags_for_locations_from_csv_file = []\n",
    "\n",
    "with open(csv_input_file, 'r', newline='\\n') as csvfile:\n",
    "    next(csvfile)\n",
    "    csvreader = csv.reader(csvfile, delimiter=',', quotechar='\"')\n",
    "    for row in csvreader:\n",
    "        tags_for_locations_from_csv_file.append(row)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "51dff242",
   "metadata": {},
   "outputs": [],
   "source": [
    "#MAIN WORK: iterate through the dictionary we created from the csv file and update each location\n",
    "\n",
    "for item in tags_for_locations_from_csv_file:\n",
    "    \n",
    "    #whether or not we are dealing with a built-in classifier or a custom classifier\n",
    "    if item[7].startswith('c12n_custom'):\n",
    "        taxonomy_id = custom_taxonomy[item[7]]\n",
    "    else:\n",
    "        taxonomy_id = item[7]\n",
    "\n",
    "    #whether or not there is a semiPath\n",
    "    if item[6] != '':\n",
    "        location_prefix = item[2] + \".\" + item[3] + \".\" + item[4] + \".\" + item[5] + item[6]\n",
    "    else:\n",
    "        location_prefix = item[2] + \".\" + item[3] + \".\" + item[4] + \".\" + item[5]\n",
    "\n",
    "    datastore_id = get_datastore_id(item[0])\n",
    "    location_id = get_location_id(datastore_id, location_prefix)\n",
    "    response = update_one_location(action, location_id, taxonomy_id)\n",
    "    print(\"ACTION TAKEN: \" + str(response) + \"\\n\")\n"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.11.0"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}

## tags_for_locations.csv

          
            Datastore Name
             Datastore Type
             db
             schema
             table
             column
             semiPath
             tags

            
              AWS Postgres Data
              POSTGRES
              postgres
              public
              v2_people
              gender
              
              c12n_custom.Acme Specific Data or Column::custom_classifier2

            
              AWS Postgres Data
              POSTGRES
              postgres
              public
              v2_people
              blood_type
              
              c12n_custom.Acme Specific Data or Column::custom_classifier3

            
              AWS Postgres Data
              POSTGRES
              postgres
              public
              v2_people
              phone_number
              
              c12n_custom.Acme Specific Data or Column::BLOCK_THIS_TABLE

            
              AWS Postgres Data
              POSTGRES
              postgres
              public
              json_data
              info
              .$.phone
              PHONE

            
              AWS Postgres Data
              POSTGRES
              postgres
              public
              json_data
              info
              .$.email
              EMAIL
	{
	"cells": [
	{
	"cell_type": "markdown",
	"id": "b460cfb7",
	"metadata": {},
	"source": [
	"#### Fill out the next cell with required values\n",
	"\n",
	"#### This example expects a CSV file with a format like the attached csv file (scroll down)",
	"\n",
	"- This script expects the full path name of the Satori TAG, e.g. category::subcategory::tagname\n",
	"- DatastoreType is not used by this example, and provided for information only\n",
	"- if you do not have semi-structured data, you can leave 'semiPath' empty\n",
	"- The first three lines above are examples of assigning custom classifiers to your data.\n",
	"- The last two lines are examples of assigning Satori's built-in classifiers to your data, as well as using a semiPath example.\n"
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"id": "10465e9e",
	"metadata": {},
	"outputs": [],
	"source": [
	"#your account info, following must be filled in\n",
	"satori_account_id = \"\"\n",
	"satori_serviceaccount_id = \"\"\n",
	"satori_serviceaccount_key = \"\"\n",
	"\n",
	"#leave this host value as is for Satori production accounts\n",
	"apihost = \"app.satoricyber.com\"\n",
	"\n",
	"#location of our csv input file\n",
	"csv_input_file = \"tags_for_locations.csv\"\n",
	"\n",
	"#action to take, the choices are 'add' or 'remove' - this allows for a full UNDO of the csv file\n",
	"action = \"remove\"\n",
	"\n",
	"#this message will appear in the location history for each field that is updated\n",
	"location_history_message = \"Location Updated by CSV Import Process, action: \" + action"
	]
	},
	{
	"cell_type": "markdown",
	"id": "22678bfa",
	"metadata": {},
	"source": [
	"#### for demonstration purposes, no changes needed below this line"
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"id": "a0fa19b7",
	"metadata": {},
	"outputs": [],
	"source": [
	"import json\n",
	"import requests\n",
	"import csv\n",
	"import io"
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"id": "bbd49cfd",
	"metadata": {},
	"outputs": [],
	"source": [
	"#our auth function to get a bearer token\n",
	"\n",
	"def satori_auth(satori_serviceaccount_id, satori_serviceaccount_key, apihost):\n",
	" auth_headers = {'content-type': 'application/json','accept': 'application/json'}\n",
	" auth_url = \"https://{}/api/authentication/token\".format(apihost)\n",
	" auth_body = json.dumps(\n",
	" {\n",
	" \"serviceAccountId\": satori_serviceaccount_id,\n",
	" \"serviceAccountKey\": satori_serviceaccount_key\n",
	" })\n",
	" try:\n",
	" r = requests.post(auth_url, headers=auth_headers, data=auth_body)\n",
	" response = r.json()\n",
	" satori_token = response[\"token\"]\n",
	" except Exception as err:\n",
	" print(\"Bearer Token Failure: :\", err)\n",
	" print(\"Exception TYPE:\", type(err))\n",
	" else:\n",
	" return satori_token"
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"id": "72f7579d",
	"metadata": {},
	"outputs": [],
	"source": [
	"#get the token and also create request header\n",
	"\n",
	"session_token = satori_auth(satori_serviceaccount_id, satori_serviceaccount_key, apihost)\n",
	"#example\n",
	"#print(session_token)\n",
	"\n",
	"headers = {'Authorization': 'Bearer {}'.format(session_token), 'Content-Type': 'application/json', 'Accept': 'application/json'}\n"
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"id": "56c6beb4",
	"metadata": {},
	"outputs": [],
	"source": [
	"#get a Datastore ID by searching for its name\n",
	"\n",
	"def get_datastore_id(datastore_name):\n",
	"\n",
	" url = \"https://{}/api/v1/datastore?accountId={}&search={}\".format(apihost, \n",
	" satori_account_id, \n",
	" datastore_name)\n",
	" try:\n",
	" response = requests.get(url, headers=headers)\n",
	" response.raise_for_status()\n",
	" except requests.exceptions.RequestException as err:\n",
	" print(\"EXCEPTION: \", type(err))\n",
	" else:\n",
	" return response.json()['records'][0]['id']\n",
	" \n",
	"#example\n",
	"#print(get_datastore_id(\"AWS Postgres Data\"))"
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"id": "522b61e7",
	"metadata": {},
	"outputs": [],
	"source": [
	"#get a Location ID by searching within datastore_id for database.schema.table.field.semiPath\n",
	"\n",
	"def get_location_id(datastore_id, location_prefix):\n",
	"\n",
	" url = \"https://{}/api/locations/{}/query?pageSize=100&dataStoreId={}&locationPrefix={}\".format(\n",
	" apihost, \n",
	" satori_account_id, \n",
	" datastore_id,\n",
	" location_prefix)\n",
	" try:\n",
	" response = requests.get(url, headers=headers)\n",
	" response.raise_for_status()\n",
	" except requests.exceptions.RequestException as err:\n",
	" print(\"EXCEPTION: \", type(err))\n",
	" else:\n",
	" return response.json()['records'][0]['id']\n",
	" \n",
	"#example\n",
	"#print(get_location_id('b35585f4-08b4-415c-82a1-d6c1af17fcbb', 'postgres.public.v2_people.blood_type'))"
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"id": "2ceaf7f7",
	"metadata": {},
	"outputs": [],
	"source": [
	"#load all of the custom taxonomy as a dictionary\n",
	"\n",
	"def load_all_custom_taxonomy():\n",
	" \n",
	" url = \"https://{}/api/v1/taxonomy/custom?accountId={}\".format(apihost,satori_account_id)\n",
	" try:\n",
	" response = requests.get(url, headers=headers)\n",
	" response.raise_for_status()\n",
	" except requests.exceptions.RequestException as err:\n",
	" print(\"EXCEPTION: \", type(err))\n",
	" else:\n",
	" custom_taxonomy = {}\n",
	" for item in response.json()['records']:\n",
	" custom_taxonomy[item['tag']] = item['id']\n",
	" return custom_taxonomy\n",
	"\n",
	"#example \n",
	"#print(load_all_custom_taxonomy())\n",
	"\n",
	"#now actually do it\n",
	"custom_taxonomy = load_all_custom_taxonomy()"
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"id": "d9fe3f57",
	"metadata": {},
	"outputs": [],
	"source": [
	"#build our PUT payload, takes into account whether we are adding or removing the tags\n",
	"\n",
	"def build_update_location_payload(action, taxonomy_id):\n",
	" payload =''\n",
	" if action == 'add':\n",
	" payload = json.dumps(\n",
	" {\n",
	" \"addTags\": [\n",
	" taxonomy_id\n",
	" ],\n",
	" \"removeTags\": [\n",
	" ],\n",
	" \"notes\": location_history_message\n",
	" } \n",
	" )\n",
	" elif action == 'remove':\n",
	" payload = json.dumps(\n",
	" {\n",
	" \"addTags\": [\n",
	" ],\n",
	" \"removeTags\": [\n",
	" taxonomy_id\n",
	" ],\n",
	" \"notes\": location_history_message\n",
	" } \n",
	" )\n",
	" return payload\n",
	" \n",
	"#example\n",
	"#print(build_update_location_payload(\"51d3d892-26f6-41b8-ab0c-c5638c386379\"))"
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"id": "785e00d6",
	"metadata": {},
	"outputs": [],
	"source": [
	"#update one location with one tag (aka taxonomy id)\n",
	"\n",
	"def update_one_location(action, location_id, custom_taxonomy_id):\n",
	" url = \"https://{}/api/locations/{}\".format(apihost,location_id)\n",
	" \n",
	" payload = build_update_location_payload(action, custom_taxonomy_id)\n",
	" \n",
	" try:\n",
	" response = requests.put(url=url, headers=headers, data=payload)\n",
	" response.raise_for_status()\n",
	" except requests.exceptions.RequestException as err:\n",
	" print(str(err))\n",
	" else:\n",
	" return response.json()"
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"id": "72409700",
	"metadata": {},
	"outputs": [],
	"source": [
	"#load our input from the csv file\n",
	"\n",
	"tags_for_locations_from_csv_file = []\n",
	"\n",
	"with open(csv_input_file, 'r', newline='\\n') as csvfile:\n",
	" next(csvfile)\n",
	" csvreader = csv.reader(csvfile, delimiter=',', quotechar='\"')\n",
	" for row in csvreader:\n",
	" tags_for_locations_from_csv_file.append(row)"
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"id": "51dff242",
	"metadata": {},
	"outputs": [],
	"source": [
	"#MAIN WORK: iterate through the dictionary we created from the csv file and update each location\n",
	"\n",
	"for item in tags_for_locations_from_csv_file:\n",
	" \n",
	" #whether or not we are dealing with a built-in classifier or a custom classifier\n",
	" if item[7].startswith('c12n_custom'):\n",
	" taxonomy_id = custom_taxonomy[item[7]]\n",
	" else:\n",
	" taxonomy_id = item[7]\n",
	"\n",
	" #whether or not there is a semiPath\n",
	" if item[6] != '':\n",
	" location_prefix = item[2] + \".\" + item[3] + \".\" + item[4] + \".\" + item[5] + item[6]\n",
	" else:\n",
	" location_prefix = item[2] + \".\" + item[3] + \".\" + item[4] + \".\" + item[5]\n",
	"\n",
	" datastore_id = get_datastore_id(item[0])\n",
	" location_id = get_location_id(datastore_id, location_prefix)\n",
	" response = update_one_location(action, location_id, taxonomy_id)\n",
	" print(\"ACTION TAKEN: \" + str(response) + \"\\n\")\n"
	]
	}
	],
	"metadata": {
	"kernelspec": {
	"display_name": "Python 3 (ipykernel)",
	"language": "python",
	"name": "python3"
	},
	"language_info": {
	"codemirror_mode": {
	"name": "ipython",
	"version": 3
	},
	"file_extension": ".py",
	"mimetype": "text/x-python",
	"name": "python",
	"nbconvert_exporter": "python",
	"pygments_lexer": "ipython3",
	"version": "3.11.0"
	}
	},
	"nbformat": 4,
	"nbformat_minor": 5
	}
Datastore Name	Datastore Type	db	schema	table	column	semiPath	tags
AWS Postgres Data	POSTGRES	postgres	public	v2_people	gender		c12n_custom.Acme Specific Data or Column::custom_classifier2
AWS Postgres Data	POSTGRES	postgres	public	v2_people	blood_type		c12n_custom.Acme Specific Data or Column::custom_classifier3
AWS Postgres Data	POSTGRES	postgres	public	v2_people	phone_number		c12n_custom.Acme Specific Data or Column::BLOCK_THIS_TABLE
AWS Postgres Data	POSTGRES	postgres	public	json_data	info	.$.phone	PHONE
AWS Postgres Data	POSTGRES	postgres	public	json_data	info	.$.email	EMAIL