Skip to content

Instantly share code, notes, and snippets.

@mzfr
Created May 31, 2020 11:16
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save mzfr/8b97889bb7893320b57579c70d7fa08e to your computer and use it in GitHub Desktop.
Save mzfr/8b97889bb7893320b57579c70d7fa08e to your computer and use it in GitHub Desktop.
Some sample code for analyzing data
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "code",
"execution_count": 169,
"metadata": {},
"outputs": [],
"source": [
"\"\"\"\n",
"This is just a sample code which I wrote to see what all I can do.\n",
"I am sure lot of other information can be extracted from the data.\n",
"\n",
"You'll notice lot of comments in the code, those sections are for\n",
"different graphs. So lines would display line graph with location\n",
"data and some will show bar graph on the same data.\n",
"\"\"\"\n",
"\n",
"import matplotlib.pyplot as plt\n",
"import json\n",
"import pandas as pd\n",
"from collections import Counter\n",
"from itertools import dropwhile"
]
},
{
"cell_type": "code",
"execution_count": 140,
"metadata": {},
"outputs": [],
"source": [
"filename = \"tanner_9f7d7dd3-ac6b-468b-8cee-ce3e352eff6e.json\"\n",
"\n",
"with open(filename, \"r\") as f:\n",
" data = json.load(f)\n",
"\n",
"def cleanup(dictionary):\n",
" \"\"\"Remove the all the keys which have count less than 25\"\"\"\n",
" for key, count in dropwhile(lambda k: k[1] >= 25, dictionary.most_common()):\n",
" del dictionary[key]\n",
"\n",
" return dictionary"
]
},
{
"cell_type": "code",
"execution_count": 141,
"metadata": {},
"outputs": [],
"source": [
"# For location\n",
"country_code = []\n",
"country = []\n",
"for p in data:\n",
" if p[\"location\"] == \"NA\":\n",
" continue\n",
" else:\n",
" country_code.append(p[\"location\"][\"country_code\"])\n",
" country.append(p[\"location\"][\"country\"])\n",
" \n",
"ccode = dict(Counter(country_code))\n",
"coun = Counter(country)\n",
"\n",
"\n",
"# This line makes sure we get graph in increasing order\n",
"sorted_coun = {k: v for k, v in sorted(cleanup(coun).items(), key=lambda item: item[1])}"
]
},
{
"cell_type": "code",
"execution_count": 171,
"metadata": {},
"outputs": [],
"source": [
"# Setting the dataframe\n",
"\n",
"ccode_df = pd.DataFrame(list(ccode.items()), columns=[\"Country Code\", \"No. of Requests\"])\n",
"country_df = pd.DataFrame(list(sorted_coun.items()), columns=[\"Country\", \"No. of Requests\"])\n",
"\n",
"# Plot for Number of requests per country with country code\n",
"# ccode_df.plot(x=0,y=1,color=\"red\")\n",
"\n",
"# Plot for Number of requests per country with Country name\n",
"# country_df.plot(kind=\"bar\",x=0, y=1)\n",
"\n",
"#plot better bar graph\n",
"\n",
"# plt.title('Tanner requests per country')\n",
"# plt.xlabel('Requests Counts')\n",
"# plt.ylabel('Country')\n",
"# plt.grid(axis='y', alpha=0.75)\n",
"# plt.scatter(country_df[\"No. of Requests\"], country_df[\"Country\"])\n",
"\n",
"# data = list(country_df[\"No. of Requests\"])\n",
"# plt.pie(data, labels=country_df[\"Country\"])\n",
"\n",
"# plt.show()"
]
},
{
"cell_type": "code",
"execution_count": 166,
"metadata": {},
"outputs": [],
"source": [
"# This cell is for displaying graphs related to attack types\n",
"# and paths. The paths graph might not be very beautiful :)\n",
"\n",
"all_paths = []\n",
"types = []\n",
"\n",
"for path in data:\n",
" for p in path[\"paths\"]:\n",
" all_paths.append(p[\"path\"])\n",
" if \"attack_type\" in p:\n",
" types.append(p[\"attack_type\"])\n",
"\n",
"paths, atypes = cleanup(Counter(all_paths)), Counter(types)\n",
"df = pd.DataFrame(list(paths.items()), columns=[\"Paths\", \"Count\"])\n",
"\n",
"# plt.title('Most accessed paths')\n",
"# plt.xlabel('Counts')\n",
"# plt.ylabel('Paths')\n",
"# plt.grid(axis='y', alpha=0.75)\n",
"# plt.scatter(df[\"Count\"], df[\"Paths\"])\n",
"\n",
"# Plot the path vs count graph\n",
"# df = pd.DataFrame(list(atypes.items()), columns=[\"Type\", \"Count\"])\n",
"# plt.title('Attack types')\n",
"# plt.ylabel('Counts')\n",
"# plt.xlabel('Attack Type')\n",
"# plt.grid(axis='y', alpha=0.75)\n",
"# plt.bar(df[\"Type\"], df[\"Count\"])"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.2"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment