dtrizna/eda.ipynb

## eda.ipynb
{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Loading XML "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "from lxml import etree\n",
    "\n",
    "FILE = 'logs.xml'\n",
    "\n",
    "# need to use lxml's XMLParser with recover=True\n",
    "# as from manual analysis we see that file is truncated\n",
    "# (i.e. does not have correct XML structure)\n",
    "parser = etree.XMLParser(recover=True)\n",
    "\n",
    "with open(FILE) as file:\n",
    "    data = file.readlines()\n",
    "\n",
    "# ignore XML documentation's tag (1st line), so taking only data[1]\n",
    "raw = etree.fromstring(data[1], parser=parser)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# we can see that parser appended xml tokens to file\n",
    "print(etree.tostring(raw)[:100])\n",
    "print(etree.tostring(raw)[-100:])"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### .iter() performs iteration over every element in XML, not grouping them"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "tags": [
     "outputPrepend",
     "outputPrepend",
     "outputPrepend",
     "outputPrepend",
     "outputPrepend",
     "outputPrepend",
     "outputPrepend",
     "outputPrepend",
     "outputPrepend",
     "outputPrepend",
     "outputPrepend",
     "outputPrepend",
     "outputPrepend",
     "outputPrepend",
     "outputPrepend",
     "outputPrepend",
     "outputPrepend",
     "outputPrepend",
     "outputPrepend",
     "outputPrepend",
     "outputPrepend",
     "outputPrepend",
     "outputPrepend",
     "outputPrepend",
     "outputPrepend",
     "outputPrepend",
     "outputPrepend",
     "outputPrepend",
     "outputPrepend",
     "outputPrepend",
     "outputPrepend",
     "outputPrepend",
     "outputPrepend",
     "outputPrepend",
     "outputPrepend",
     "outputPrepend",
     "outputPrepend",
     "outputPrepend",
     "outputPrepend",
     "outputPrepend",
     "outputPrepend",
     "outputPrepend",
     "outputPrepend",
     "outputPrepend",
     "outputPrepend",
     "outputPrepend",
     "outputPrepend",
     "outputPrepend",
     "outputPrepend",
     "outputPrepend",
     "outputPrepend",
     "outputPrepend",
     "outputPrepend",
     "outputPrepend",
     "outputPrepend",
     "outputPrepend",
     "outputPrepend",
     "outputPrepend",
     "outputPrepend",
     "outputPrepend",
     "outputPrepend",
     "outputPrepend",
     "outputPrepend",
     "outputPrepend",
     "outputPrepend",
     "outputPrepend",
     "outputPrepend",
     "outputPrepend",
     "outputPrepend"
    ]
   },
   "outputs": [],
   "source": [
    "elements = []\n",
    "for i,element in enumerate(raw.iter()):\n",
    "    elements.append(element)\n",
    "    if i == 20:\n",
    "        break"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# type: https://lxml.de/api/lxml.etree._Element-class.html\n",
    "print(elements[0])\n",
    "print(elements[0].tag)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# exploring first 20 elements and their tag syntaxis\n",
    "for i in range(len(elements)):\n",
    "    print(elements[i].tag, ':', elements[i].text)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# get all events in list\n",
    "events = []\n",
    "\n",
    "# we see prefix on every tag, define that\n",
    "tag = '{http://schemas.microsoft.com/win/2004/08/events/event}'\n",
    "for element in raw.iter(tag+'Event'):\n",
    "    events.append(element)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# .iterchildren() gives us direct subtokens\n",
    "for i in events[0].iterchildren():\n",
    "    print(i.tag)\n",
    "    print(i.text)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# .iterdescendants() give us all tokens in event\n",
    "for i in events[0].iterdescendants():\n",
    "    print(i.tag, ':', i.text, ':', i.items())\n",
    "    print()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Creating DataFrame\n",
    "\n",
    "First of all we make dictionary for every event, and by iterating over XML Element object collect data into that dictionary  \n",
    "Then, we create pandas DataFrame out of this dictionary and adding it to larger dataset"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "tags": [
     "outputPrepend",
     "outputPrepend",
     "outputPrepend",
     "outputPrepend"
    ]
   },
   "outputs": [],
   "source": [
    "df = pd.DataFrame()\n",
    "tag = '{http://schemas.microsoft.com/win/2004/08/events/event}'\n",
    "for idx, event in enumerate(events):\n",
    "    edict = {}\n",
    "    for element in event.iterdescendants():\n",
    "        if any(x in element.tag for x in ['TimeCreated', 'Execution', 'Security']):\n",
    "            for item in element.items():\n",
    "                edict[item[0]] = item[1]\n",
    "        # filter out empty fields\n",
    "        elif any(x in element.tag for x in ['Provider', 'System', 'Correlation']):\n",
    "            pass\n",
    "        elif 'Data' in element.tag:\n",
    "            for item in element.items():\n",
    "                edict[item[1]] = element.text\n",
    "        else:\n",
    "            edict[element.tag.replace(tag,'')] = element.text\n",
    "    \n",
    "    # add raw text event to have ability always access full value of eventlog\n",
    "    edict['raw'] = etree.tostring(event, pretty_print=True).decode()\n",
    "\n",
    "    edf = pd.DataFrame(edict, index=[idx])\n",
    "    df = df.append(edf)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# take a look on created dataset\n",
    "df.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# what data was collected\n",
    "df.columns"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# now we can work with data much easier than through XML parser, e.g. filter events with eventID == 20\n",
    "df[df['EventID'] == '20']"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Query dataframe for suspicious events (manual heuristics)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# see all unique events - all come from sysmon\n",
    "df.EventID.unique()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# see event apearance in dataset\n",
    "df.EventID.value_counts()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### WMI Event activity (Sysmon EventID: 19-21)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "for i in range(19,22):\n",
    "    df[df.EventID == str(i)]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# E.g. persistence attempts via WMI Event Subscriptions\n",
    "print(df[df.EventID == '20'].raw.iloc[0])"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Suspicious Process creation activity"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Sysmon EventID 1 has information about created Processes\n",
    "# Lets see what CommandLines were used during Process creation\n",
    "for cmd in df[df.EventID == '1'].CommandLine.unique():\n",
    "    print(cmd)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Lots of suspicious processes are seen.\n",
    "Finding heuristics are implemented in `detections.py` and are not described here."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "print(df[df.CommandLine.str.contains('wScript', na=False)].iloc[0].raw)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Analyse powershell network activity"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# filtering logs, where powershell is figuring in\n",
    "psdf = df[df.raw.str.contains('powershell', na=False)]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# see where PS connected\n",
    "psdf.DestinationIp.value_counts()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# only one IP address\n",
    "# what ports?\n",
    "psdf.DestinationPort.value_counts()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# all connections to '192.168.124.135:8080'\n",
    "# statistical analysis of these connections\n",
    "pd.to_datetime(psdf[psdf.DestinationIp == '192.168.124.135'].UtcTime).\\\n",
    "    sort_values().diff().iloc[1:].describe()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "pd.to_datetime(psdf[psdf.DestinationIp == '192.168.124.135'].UtcTime).\\\n",
    "    sort_values().diff()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# we may visualize these connections to see distribution across time\n",
    "# - it's obviously an automotive process with almost same time delays\n",
    "import seaborn as sns\n",
    "import matplotlib.pyplot as plt\n",
    "plt.figure(figsize=(15,10))\n",
    "sns.countplot(x='UtcTime', data=psdf[psdf.DestinationPort == '8080'], palette=sns.color_palette(\"Blues\"))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "We see that almost all connections have same time delay of ~5 seconds, which may indicate that there's Command and Control communication happening with C&C server at 192.168.124.135:8080  \n",
    "We may assume that C&C delay settings are ~5 seconds and jitter is ~10%."
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Other events"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "df[df.EventID == '15'].index"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "[print('Event nr in file:', x[0], '\\n', x[1].Image, 'created', x[1].TargetFilename) for x in df[df.EventID == '15'].iterrows()]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "print(df[df.EventID == '2'].iloc[2].raw)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Persistence attempts via registry"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "for row in df[df.EventID == '13'].iterrows():\n",
    "    if 'powershell' in row[1].raw.lower():\n",
    "        print(row[1].raw)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "##  DNS activity\n",
    "\n",
    "We see that only valid services like F-Secure's sensor, Cortana or Sysmon using DNS.  \n",
    "No attempts to establish C&C via DNS"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "[print(x[1].Image, ':', x[1].QueryName) for x in df[df.EventID == '22'].iterrows()]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "df.EventID.value_counts()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## File creations"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "tags": [
     "outputPrepend"
    ]
   },
   "outputs": [],
   "source": [
    "[print(x[1].Image, ':', x[1].TargetFilename) for x in df[df.EventID == '11'].iterrows()]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "df[df.EventID == '11'][df[df.EventID == '11'].Image.str.contains('suspicious_binary')].index"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "[print(x[1].TargetFilename) for x in df[df.EventID == '11'][df[df.EventID == '11'].\\\n",
    "    Image.str.contains('suspicious_binary')].iterrows()]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.5-final"
  },
  "orig_nbformat": 2,
  "kernelspec": {
   "name": "python37564bita866c8c9cf084ebd97979ec2adc3c734",
   "display_name": "Python 3.7.5 64-bit"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
	{
	"cells": [
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": [
	"## Loading XML "
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {},
	"outputs": [],
	"source": [
	"import pandas as pd\n",
	"from lxml import etree\n",
	"\n",
	"FILE = 'logs.xml'\n",
	"\n",
	"# need to use lxml's XMLParser with recover=True\n",
	"# as from manual analysis we see that file is truncated\n",
	"# (i.e. does not have correct XML structure)\n",
	"parser = etree.XMLParser(recover=True)\n",
	"\n",
	"with open(FILE) as file:\n",
	" data = file.readlines()\n",
	"\n",
	"# ignore XML documentation's tag (1st line), so taking only data[1]\n",
	"raw = etree.fromstring(data[1], parser=parser)"
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {},
	"outputs": [],
	"source": [
	"# we can see that parser appended xml tokens to file\n",
	"print(etree.tostring(raw)[:100])\n",
	"print(etree.tostring(raw)[-100:])"
	]
	},
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": [
	"### .iter() performs iteration over every element in XML, not grouping them"
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {
	"tags": [
	"outputPrepend",
	"outputPrepend",
	"outputPrepend",
	"outputPrepend",
	"outputPrepend",
	"outputPrepend",
	"outputPrepend",
	"outputPrepend",
	"outputPrepend",
	"outputPrepend",
	"outputPrepend",
	"outputPrepend",
	"outputPrepend",
	"outputPrepend",
	"outputPrepend",
	"outputPrepend",
	"outputPrepend",
	"outputPrepend",
	"outputPrepend",
	"outputPrepend",
	"outputPrepend",
	"outputPrepend",
	"outputPrepend",
	"outputPrepend",
	"outputPrepend",
	"outputPrepend",
	"outputPrepend",
	"outputPrepend",
	"outputPrepend",
	"outputPrepend",
	"outputPrepend",
	"outputPrepend",
	"outputPrepend",
	"outputPrepend",
	"outputPrepend",
	"outputPrepend",
	"outputPrepend",
	"outputPrepend",
	"outputPrepend",
	"outputPrepend",
	"outputPrepend",
	"outputPrepend",
	"outputPrepend",
	"outputPrepend",
	"outputPrepend",
	"outputPrepend",
	"outputPrepend",
	"outputPrepend",
	"outputPrepend",
	"outputPrepend",
	"outputPrepend",
	"outputPrepend",
	"outputPrepend",
	"outputPrepend",
	"outputPrepend",
	"outputPrepend",
	"outputPrepend",
	"outputPrepend",
	"outputPrepend",
	"outputPrepend",
	"outputPrepend",
	"outputPrepend",
	"outputPrepend",
	"outputPrepend",
	"outputPrepend",
	"outputPrepend",
	"outputPrepend",
	"outputPrepend",
	"outputPrepend"
	]
	},
	"outputs": [],
	"source": [
	"elements = []\n",
	"for i,element in enumerate(raw.iter()):\n",
	" elements.append(element)\n",
	" if i == 20:\n",
	" break"
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {},
	"outputs": [],
	"source": [
	"# type: https://lxml.de/api/lxml.etree._Element-class.html\n",
	"print(elements[0])\n",
	"print(elements[0].tag)"
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {},
	"outputs": [],
	"source": [
	"# exploring first 20 elements and their tag syntaxis\n",
	"for i in range(len(elements)):\n",
	" print(elements[i].tag, ':', elements[i].text)"
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {},
	"outputs": [],
	"source": [
	"# get all events in list\n",
	"events = []\n",
	"\n",
	"# we see prefix on every tag, define that\n",
	"tag = '{http://schemas.microsoft.com/win/2004/08/events/event}'\n",
	"for element in raw.iter(tag+'Event'):\n",
	" events.append(element)"
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {},
	"outputs": [],
	"source": [
	"# .iterchildren() gives us direct subtokens\n",
	"for i in events[0].iterchildren():\n",
	" print(i.tag)\n",
	" print(i.text)"
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {},
	"outputs": [],
	"source": [
	"# .iterdescendants() give us all tokens in event\n",
	"for i in events[0].iterdescendants():\n",
	" print(i.tag, ':', i.text, ':', i.items())\n",
	" print()"
	]
	},
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": [
	"## Creating DataFrame\n",
	"\n",
	"First of all we make dictionary for every event, and by iterating over XML Element object collect data into that dictionary \n",
	"Then, we create pandas DataFrame out of this dictionary and adding it to larger dataset"
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {
	"tags": [
	"outputPrepend",
	"outputPrepend",
	"outputPrepend",
	"outputPrepend"
	]
	},
	"outputs": [],
	"source": [
	"df = pd.DataFrame()\n",
	"tag = '{http://schemas.microsoft.com/win/2004/08/events/event}'\n",
	"for idx, event in enumerate(events):\n",
	" edict = {}\n",
	" for element in event.iterdescendants():\n",
	" if any(x in element.tag for x in ['TimeCreated', 'Execution', 'Security']):\n",
	" for item in element.items():\n",
	" edict[item[0]] = item[1]\n",
	" # filter out empty fields\n",
	" elif any(x in element.tag for x in ['Provider', 'System', 'Correlation']):\n",
	" pass\n",
	" elif 'Data' in element.tag:\n",
	" for item in element.items():\n",
	" edict[item[1]] = element.text\n",
	" else:\n",
	" edict[element.tag.replace(tag,'')] = element.text\n",
	" \n",
	" # add raw text event to have ability always access full value of eventlog\n",
	" edict['raw'] = etree.tostring(event, pretty_print=True).decode()\n",
	"\n",
	" edf = pd.DataFrame(edict, index=[idx])\n",
	" df = df.append(edf)"
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {},
	"outputs": [],
	"source": [
	"# take a look on created dataset\n",
	"df.head()"
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {},
	"outputs": [],
	"source": [
	"# what data was collected\n",
	"df.columns"
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {},
	"outputs": [],
	"source": [
	"# now we can work with data much easier than through XML parser, e.g. filter events with eventID == 20\n",
	"df[df['EventID'] == '20']"
	]
	},
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": [
	"## Query dataframe for suspicious events (manual heuristics)"
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {},
	"outputs": [],
	"source": [
	"# see all unique events - all come from sysmon\n",
	"df.EventID.unique()"
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {},
	"outputs": [],
	"source": [
	"# see event apearance in dataset\n",
	"df.EventID.value_counts()"
	]
	},
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": [
	"### WMI Event activity (Sysmon EventID: 19-21)"
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {},
	"outputs": [],
	"source": [
	"for i in range(19,22):\n",
	" df[df.EventID == str(i)]"
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {},
	"outputs": [],
	"source": [
	"# E.g. persistence attempts via WMI Event Subscriptions\n",
	"print(df[df.EventID == '20'].raw.iloc[0])"
	]
	},
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": [
	"### Suspicious Process creation activity"
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {},
	"outputs": [],
	"source": [
	"# Sysmon EventID 1 has information about created Processes\n",
	"# Lets see what CommandLines were used during Process creation\n",
	"for cmd in df[df.EventID == '1'].CommandLine.unique():\n",
	" print(cmd)"
	]
	},
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": [
	"Lots of suspicious processes are seen.\n",
	"Finding heuristics are implemented in `detections.py` and are not described here."
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {},
	"outputs": [],
	"source": [
	"print(df[df.CommandLine.str.contains('wScript', na=False)].iloc[0].raw)"
	]
	},
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": [
	"### Analyse powershell network activity"
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {},
	"outputs": [],
	"source": [
	"# filtering logs, where powershell is figuring in\n",
	"psdf = df[df.raw.str.contains('powershell', na=False)]"
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {},
	"outputs": [],
	"source": [
	"# see where PS connected\n",
	"psdf.DestinationIp.value_counts()"
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {},
	"outputs": [],
	"source": [
	"# only one IP address\n",
	"# what ports?\n",
	"psdf.DestinationPort.value_counts()"
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {},
	"outputs": [],
	"source": [
	"# all connections to '192.168.124.135:8080'\n",
	"# statistical analysis of these connections\n",
	"pd.to_datetime(psdf[psdf.DestinationIp == '192.168.124.135'].UtcTime).\\\n",
	" sort_values().diff().iloc[1:].describe()"
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {},
	"outputs": [],
	"source": [
	"pd.to_datetime(psdf[psdf.DestinationIp == '192.168.124.135'].UtcTime).\\\n",
	" sort_values().diff()"
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {},
	"outputs": [],
	"source": [
	"# we may visualize these connections to see distribution across time\n",
	"# - it's obviously an automotive process with almost same time delays\n",
	"import seaborn as sns\n",
	"import matplotlib.pyplot as plt\n",
	"plt.figure(figsize=(15,10))\n",
	"sns.countplot(x='UtcTime', data=psdf[psdf.DestinationPort == '8080'], palette=sns.color_palette(\"Blues\"))"
	]
	},
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": [
	"We see that almost all connections have same time delay of ~5 seconds, which may indicate that there's Command and Control communication happening with C&C server at 192.168.124.135:8080 \n",
	"We may assume that C&C delay settings are ~5 seconds and jitter is ~10%."
	]
	},
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": [
	"## Other events"
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {},
	"outputs": [],
	"source": [
	"df[df.EventID == '15'].index"
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {},
	"outputs": [],
	"source": [
	"[print('Event nr in file:', x[0], '\\n', x[1].Image, 'created', x[1].TargetFilename) for x in df[df.EventID == '15'].iterrows()]"
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {},
	"outputs": [],
	"source": [
	"print(df[df.EventID == '2'].iloc[2].raw)"
	]
	},
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": [
	"## Persistence attempts via registry"
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {},
	"outputs": [],
	"source": [
	"for row in df[df.EventID == '13'].iterrows():\n",
	" if 'powershell' in row[1].raw.lower():\n",
	" print(row[1].raw)"
	]
	},
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": [
	"## DNS activity\n",
	"\n",
	"We see that only valid services like F-Secure's sensor, Cortana or Sysmon using DNS. \n",
	"No attempts to establish C&C via DNS"
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {},
	"outputs": [],
	"source": [
	"[print(x[1].Image, ':', x[1].QueryName) for x in df[df.EventID == '22'].iterrows()]"
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {},
	"outputs": [],
	"source": [
	"df.EventID.value_counts()"
	]
	},
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": [
	"## File creations"
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {
	"tags": [
	"outputPrepend"
	]
	},
	"outputs": [],
	"source": [
	"[print(x[1].Image, ':', x[1].TargetFilename) for x in df[df.EventID == '11'].iterrows()]"
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {},
	"outputs": [],
	"source": [
	"df[df.EventID == '11'][df[df.EventID == '11'].Image.str.contains('suspicious_binary')].index"
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {},
	"outputs": [],
	"source": [
	"[print(x[1].TargetFilename) for x in df[df.EventID == '11'][df[df.EventID == '11'].\\\n",
	" Image.str.contains('suspicious_binary')].iterrows()]"
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {},
	"outputs": [],
	"source": []
	}
	],
	"metadata": {
	"language_info": {
	"codemirror_mode": {
	"name": "ipython",
	"version": 3
	},
	"file_extension": ".py",
	"mimetype": "text/x-python",
	"name": "python",
	"nbconvert_exporter": "python",
	"pygments_lexer": "ipython3",
	"version": "3.7.5-final"
	},
	"orig_nbformat": 2,
	"kernelspec": {
	"name": "python37564bita866c8c9cf084ebd97979ec2adc3c734",
	"display_name": "Python 3.7.5 64-bit"
	}
	},
	"nbformat": 4,
	"nbformat_minor": 2
	}