mzfr/Tanner-data-analysis.ipynb

## Tanner-data-analysis.ipynb
{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 169,
   "metadata": {},
   "outputs": [],
   "source": [
    "\"\"\"\n",
    "This is just a sample code which I wrote to see what all I can do.\n",
    "I am sure lot of other information can be extracted from the data.\n",
    "\n",
    "You'll notice lot of comments in the code, those sections are for\n",
    "different graphs. So lines would display line graph with location\n",
    "data and some will show bar graph on the same data.\n",
    "\"\"\"\n",
    "\n",
    "import matplotlib.pyplot as plt\n",
    "import json\n",
    "import pandas as pd\n",
    "from collections import Counter\n",
    "from itertools import dropwhile"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 140,
   "metadata": {},
   "outputs": [],
   "source": [
    "filename = \"tanner_9f7d7dd3-ac6b-468b-8cee-ce3e352eff6e.json\"\n",
    "\n",
    "with open(filename, \"r\") as f:\n",
    "    data = json.load(f)\n",
    "\n",
    "def cleanup(dictionary):\n",
    "    \"\"\"Remove the all the keys which have count less than 25\"\"\"\n",
    "    for key, count in dropwhile(lambda k: k[1] >= 25, dictionary.most_common()):\n",
    "        del dictionary[key]\n",
    "\n",
    "    return dictionary"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 141,
   "metadata": {},
   "outputs": [],
   "source": [
    "# For location\n",
    "country_code = []\n",
    "country = []\n",
    "for p in data:\n",
    "    if p[\"location\"] == \"NA\":\n",
    "        continue\n",
    "    else:\n",
    "        country_code.append(p[\"location\"][\"country_code\"])\n",
    "        country.append(p[\"location\"][\"country\"])\n",
    "    \n",
    "ccode = dict(Counter(country_code))\n",
    "coun = Counter(country)\n",
    "\n",
    "\n",
    "# This line makes sure we get graph in increasing order\n",
    "sorted_coun = {k: v for k, v in sorted(cleanup(coun).items(), key=lambda item: item[1])}"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 171,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Setting the dataframe\n",
    "\n",
    "ccode_df = pd.DataFrame(list(ccode.items()), columns=[\"Country Code\", \"No. of Requests\"])\n",
    "country_df = pd.DataFrame(list(sorted_coun.items()), columns=[\"Country\", \"No. of Requests\"])\n",
    "\n",
    "# Plot for Number of requests per country with country code\n",
    "# ccode_df.plot(x=0,y=1,color=\"red\")\n",
    "\n",
    "# Plot for Number of requests per country with Country name\n",
    "# country_df.plot(kind=\"bar\",x=0, y=1)\n",
    "\n",
    "#plot better bar graph\n",
    "\n",
    "# plt.title('Tanner requests per country')\n",
    "# plt.xlabel('Requests Counts')\n",
    "# plt.ylabel('Country')\n",
    "# plt.grid(axis='y', alpha=0.75)\n",
    "# plt.scatter(country_df[\"No. of Requests\"], country_df[\"Country\"])\n",
    "\n",
    "# data = list(country_df[\"No. of Requests\"])\n",
    "# plt.pie(data, labels=country_df[\"Country\"])\n",
    "\n",
    "# plt.show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 166,
   "metadata": {},
   "outputs": [],
   "source": [
    "# This cell is for displaying graphs related to attack types\n",
    "# and paths. The paths graph might not be very beautiful :)\n",
    "\n",
    "all_paths = []\n",
    "types = []\n",
    "\n",
    "for path in data:\n",
    "    for p in path[\"paths\"]:\n",
    "        all_paths.append(p[\"path\"])\n",
    "        if \"attack_type\" in p:\n",
    "            types.append(p[\"attack_type\"])\n",
    "\n",
    "paths, atypes = cleanup(Counter(all_paths)), Counter(types)\n",
    "df = pd.DataFrame(list(paths.items()), columns=[\"Paths\", \"Count\"])\n",
    "\n",
    "# plt.title('Most accessed paths')\n",
    "# plt.xlabel('Counts')\n",
    "# plt.ylabel('Paths')\n",
    "# plt.grid(axis='y', alpha=0.75)\n",
    "# plt.scatter(df[\"Count\"], df[\"Paths\"])\n",
    "\n",
    "# Plot the path vs count graph\n",
    "# df = pd.DataFrame(list(atypes.items()), columns=[\"Type\", \"Count\"])\n",
    "# plt.title('Attack types')\n",
    "# plt.ylabel('Counts')\n",
    "# plt.xlabel('Attack Type')\n",
    "# plt.grid(axis='y', alpha=0.75)\n",
    "# plt.bar(df[\"Type\"], df[\"Count\"])"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.8.2"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
	{
	"cells": [
	{
	"cell_type": "code",
	"execution_count": 169,
	"metadata": {},
	"outputs": [],
	"source": [
	"\"\"\"\n",
	"This is just a sample code which I wrote to see what all I can do.\n",
	"I am sure lot of other information can be extracted from the data.\n",
	"\n",
	"You'll notice lot of comments in the code, those sections are for\n",
	"different graphs. So lines would display line graph with location\n",
	"data and some will show bar graph on the same data.\n",
	"\"\"\"\n",
	"\n",
	"import matplotlib.pyplot as plt\n",
	"import json\n",
	"import pandas as pd\n",
	"from collections import Counter\n",
	"from itertools import dropwhile"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 140,
	"metadata": {},
	"outputs": [],
	"source": [
	"filename = \"tanner_9f7d7dd3-ac6b-468b-8cee-ce3e352eff6e.json\"\n",
	"\n",
	"with open(filename, \"r\") as f:\n",
	" data = json.load(f)\n",
	"\n",
	"def cleanup(dictionary):\n",
	" \"\"\"Remove the all the keys which have count less than 25\"\"\"\n",
	" for key, count in dropwhile(lambda k: k[1] >= 25, dictionary.most_common()):\n",
	" del dictionary[key]\n",
	"\n",
	" return dictionary"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 141,
	"metadata": {},
	"outputs": [],
	"source": [
	"# For location\n",
	"country_code = []\n",
	"country = []\n",
	"for p in data:\n",
	" if p[\"location\"] == \"NA\":\n",
	" continue\n",
	" else:\n",
	" country_code.append(p[\"location\"][\"country_code\"])\n",
	" country.append(p[\"location\"][\"country\"])\n",
	" \n",
	"ccode = dict(Counter(country_code))\n",
	"coun = Counter(country)\n",
	"\n",
	"\n",
	"# This line makes sure we get graph in increasing order\n",
	"sorted_coun = {k: v for k, v in sorted(cleanup(coun).items(), key=lambda item: item[1])}"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 171,
	"metadata": {},
	"outputs": [],
	"source": [
	"# Setting the dataframe\n",
	"\n",
	"ccode_df = pd.DataFrame(list(ccode.items()), columns=[\"Country Code\", \"No. of Requests\"])\n",
	"country_df = pd.DataFrame(list(sorted_coun.items()), columns=[\"Country\", \"No. of Requests\"])\n",
	"\n",
	"# Plot for Number of requests per country with country code\n",
	"# ccode_df.plot(x=0,y=1,color=\"red\")\n",
	"\n",
	"# Plot for Number of requests per country with Country name\n",
	"# country_df.plot(kind=\"bar\",x=0, y=1)\n",
	"\n",
	"#plot better bar graph\n",
	"\n",
	"# plt.title('Tanner requests per country')\n",
	"# plt.xlabel('Requests Counts')\n",
	"# plt.ylabel('Country')\n",
	"# plt.grid(axis='y', alpha=0.75)\n",
	"# plt.scatter(country_df[\"No. of Requests\"], country_df[\"Country\"])\n",
	"\n",
	"# data = list(country_df[\"No. of Requests\"])\n",
	"# plt.pie(data, labels=country_df[\"Country\"])\n",
	"\n",
	"# plt.show()"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 166,
	"metadata": {},
	"outputs": [],
	"source": [
	"# This cell is for displaying graphs related to attack types\n",
	"# and paths. The paths graph might not be very beautiful :)\n",
	"\n",
	"all_paths = []\n",
	"types = []\n",
	"\n",
	"for path in data:\n",
	" for p in path[\"paths\"]:\n",
	" all_paths.append(p[\"path\"])\n",
	" if \"attack_type\" in p:\n",
	" types.append(p[\"attack_type\"])\n",
	"\n",
	"paths, atypes = cleanup(Counter(all_paths)), Counter(types)\n",
	"df = pd.DataFrame(list(paths.items()), columns=[\"Paths\", \"Count\"])\n",
	"\n",
	"# plt.title('Most accessed paths')\n",
	"# plt.xlabel('Counts')\n",
	"# plt.ylabel('Paths')\n",
	"# plt.grid(axis='y', alpha=0.75)\n",
	"# plt.scatter(df[\"Count\"], df[\"Paths\"])\n",
	"\n",
	"# Plot the path vs count graph\n",
	"# df = pd.DataFrame(list(atypes.items()), columns=[\"Type\", \"Count\"])\n",
	"# plt.title('Attack types')\n",
	"# plt.ylabel('Counts')\n",
	"# plt.xlabel('Attack Type')\n",
	"# plt.grid(axis='y', alpha=0.75)\n",
	"# plt.bar(df[\"Type\"], df[\"Count\"])"
	]
	}
	],
	"metadata": {
	"kernelspec": {
	"display_name": "Python 3",
	"language": "python",
	"name": "python3"
	},
	"language_info": {
	"codemirror_mode": {
	"name": "ipython",
	"version": 3
	},
	"file_extension": ".py",
	"mimetype": "text/x-python",
	"name": "python",
	"nbconvert_exporter": "python",
	"pygments_lexer": "ipython3",
	"version": "3.8.2"
	}
	},
	"nbformat": 4,
	"nbformat_minor": 2
	}