Skip to content

Instantly share code, notes, and snippets.

@maheshsenni
Created February 1, 2019 02:52
Show Gist options
  • Save maheshsenni/2efbc8820c3563e8b4b2acf3c24b4a81 to your computer and use it in GitHub Desktop.
Save maheshsenni/2efbc8820c3563e8b4b2acf3c24b4a81 to your computer and use it in GitHub Desktop.
Isolation Forest Anomaly detection - Python
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"(7100354, 32)"
]
},
"execution_count": 1,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"import numpy as np\n",
"from sklearn.neighbors import LocalOutlierFactor\n",
"import pandas as pd \n",
"from sklearn.ensemble import IsolationForest\n",
"from sklearn import preprocessing\n",
"from collections import defaultdict\n",
"from sklearn.model_selection import train_test_split\n",
"\n",
"df = pd.read_csv(\"/Users/ohk304/Developer/scratchpad/fstechsuperhack19/notebooks/data/FS_TECH_HACK_2019_FULL_DATASET.csv\")\n",
"df.shape"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Unnamed: 0</th>\n",
" <th>me_dt</th>\n",
" <th>loan_acct_sur_id</th>\n",
" <th>mthend_origtn_cd</th>\n",
" <th>current_month_repo</th>\n",
" <th>num_of_extnsns_cnt</th>\n",
" <th>day_pastdue_cnt</th>\n",
" <th>days_30_past_in_l12m_cd</th>\n",
" <th>days_60_past_in_l12m_cd</th>\n",
" <th>chrgof_me_dt</th>\n",
" <th>...</th>\n",
" <th>dlr_legl_nm</th>\n",
" <th>dlr_phy_adr_city_nm</th>\n",
" <th>dlr_phy_adr_st_cd</th>\n",
" <th>dlr_phy_adr_zip_cd</th>\n",
" <th>dlr_phy_adr_zip_4_cd</th>\n",
" <th>days_60_past_in_l12m_cd_label</th>\n",
" <th>days_30_past_in_l12m_cd_label</th>\n",
" <th>day_pastdue_cnt_label</th>\n",
" <th>dlr_phy_adr_st_cd_label</th>\n",
" <th>avg_fico_score_val_label</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>7</th>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>532246</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>...</td>\n",
" <td>10589</td>\n",
" <td>2190</td>\n",
" <td>11</td>\n",
" <td>1784</td>\n",
" <td>237403</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>10</th>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>125500</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>475798</td>\n",
" <td>...</td>\n",
" <td>8352</td>\n",
" <td>268</td>\n",
" <td>14</td>\n",
" <td>4495</td>\n",
" <td>20</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>32</th>\n",
" <td>2</td>\n",
" <td>0</td>\n",
" <td>638340</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>475799</td>\n",
" <td>...</td>\n",
" <td>13177</td>\n",
" <td>638</td>\n",
" <td>45</td>\n",
" <td>3989</td>\n",
" <td>340227</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>44</th>\n",
" <td>3</td>\n",
" <td>0</td>\n",
" <td>589717</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>475800</td>\n",
" <td>...</td>\n",
" <td>9604</td>\n",
" <td>1824</td>\n",
" <td>11</td>\n",
" <td>1834</td>\n",
" <td>340228</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>52</th>\n",
" <td>4</td>\n",
" <td>0</td>\n",
" <td>481463</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>475801</td>\n",
" <td>...</td>\n",
" <td>8898</td>\n",
" <td>253</td>\n",
" <td>25</td>\n",
" <td>3160</td>\n",
" <td>195</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>5 rows × 32 columns</p>\n",
"</div>"
],
"text/plain": [
" Unnamed: 0 me_dt loan_acct_sur_id mthend_origtn_cd current_month_repo \\\n",
"7 0 0 532246 0 0 \n",
"10 1 0 125500 0 0 \n",
"32 2 0 638340 0 0 \n",
"44 3 0 589717 0 0 \n",
"52 4 0 481463 0 0 \n",
"\n",
" num_of_extnsns_cnt day_pastdue_cnt days_30_past_in_l12m_cd \\\n",
"7 0 0 0 \n",
"10 0 0 0 \n",
"32 0 0 0 \n",
"44 0 0 0 \n",
"52 0 0 0 \n",
"\n",
" days_60_past_in_l12m_cd chrgof_me_dt ... dlr_legl_nm \\\n",
"7 0 0 ... 10589 \n",
"10 0 475798 ... 8352 \n",
"32 0 475799 ... 13177 \n",
"44 0 475800 ... 9604 \n",
"52 0 475801 ... 8898 \n",
"\n",
" dlr_phy_adr_city_nm dlr_phy_adr_st_cd dlr_phy_adr_zip_cd \\\n",
"7 2190 11 1784 \n",
"10 268 14 4495 \n",
"32 638 45 3989 \n",
"44 1824 11 1834 \n",
"52 253 25 3160 \n",
"\n",
" dlr_phy_adr_zip_4_cd days_60_past_in_l12m_cd_label \\\n",
"7 237403 0 \n",
"10 20 0 \n",
"32 340227 0 \n",
"44 340228 0 \n",
"52 195 0 \n",
"\n",
" days_30_past_in_l12m_cd_label day_pastdue_cnt_label \\\n",
"7 0 0 \n",
"10 0 0 \n",
"32 0 0 \n",
"44 0 0 \n",
"52 0 0 \n",
"\n",
" dlr_phy_adr_st_cd_label avg_fico_score_val_label \n",
"7 0 0 \n",
"10 0 0 \n",
"32 0 0 \n",
"44 0 0 \n",
"52 0 0 \n",
"\n",
"[5 rows x 32 columns]"
]
},
"execution_count": 2,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"dfna = df.dropna(subset=['days_60_past_in_l12m_cd_label', 'days_30_past_in_l12m_cd_label',\n",
" 'day_pastdue_cnt_label',\n",
" 'dlr_phy_adr_st_cd_label', 'avg_fico_score_val_label'])\n",
"# print row count\n",
"dfna.shape\n",
"# encoding for categories\n",
"le = preprocessing.LabelEncoder()\n",
"d = defaultdict(preprocessing.LabelEncoder)\n",
"dfna_enc_df = dfna.apply(lambda x: d[x.name].fit_transform(x))\n",
"dfna_enc_df.head()"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"(535281, 178428)\n"
]
},
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>dlr_phy_adr_city_nm</th>\n",
" <th>dlr_phy_adr_st_cd</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>7</th>\n",
" <td>2190</td>\n",
" <td>11</td>\n",
" </tr>\n",
" <tr>\n",
" <th>10</th>\n",
" <td>268</td>\n",
" <td>14</td>\n",
" </tr>\n",
" <tr>\n",
" <th>32</th>\n",
" <td>638</td>\n",
" <td>45</td>\n",
" </tr>\n",
" <tr>\n",
" <th>44</th>\n",
" <td>1824</td>\n",
" <td>11</td>\n",
" </tr>\n",
" <tr>\n",
" <th>52</th>\n",
" <td>253</td>\n",
" <td>25</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" dlr_phy_adr_city_nm dlr_phy_adr_st_cd\n",
"7 2190 11\n",
"10 268 14\n",
"32 638 45\n",
"44 1824 11\n",
"52 253 25"
]
},
"execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"traindf_whole, testdf_whole = train_test_split(dfna_enc_df, test_size=0.25, shuffle=False)\n",
"# select features\n",
"# traindf_whole_valid = traindf_whole.query('dlr_phy_adr_st_cd_label == 0')\n",
"traindf = traindf_whole[['dlr_phy_adr_city_nm', 'dlr_phy_adr_st_cd']]\n",
"testdf = testdf_whole[['dlr_phy_adr_city_nm', 'dlr_phy_adr_st_cd']]\n",
"\n",
"print(traindf.shape[0], testdf.shape[0])\n",
"traindf.head()"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"IsolationForest(behaviour='new', bootstrap=False, contamination=0.0004,\n",
" max_features=1.0, max_samples=535281, n_estimators=100,\n",
" n_jobs=None, random_state=None, verbose=0)"
]
},
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"clf = IsolationForest(contamination=0.0004, max_samples=traindf.shape[0], behaviour=\"new\")\n",
"clf.fit(traindf)"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [],
"source": [
"testdf_preds = clf.predict(testdf)"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [],
"source": [
"preds_df = pd.DataFrame(testdf_preds, columns=['preds'])"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"(178342, 1)"
]
},
"execution_count": 8,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"preds_df.query('preds > 0').shape"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"(86, 1)"
]
},
"execution_count": 7,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"preds_df.query('preds == -1').shape"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"traindf_whole.query('dlr_phy_adr_st_cd_label>0').shape"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {},
"outputs": [],
"source": [
"testdf_clone_enc = testdf_whole.copy()\n",
"testdf_clone = testdf_clone_enc.apply(lambda x: d[x.name].inverse_transform(x))"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {},
"outputs": [],
"source": [
"testdf_clone['predictions'] = testdf_preds"
]
},
{
"cell_type": "code",
"execution_count": 19,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"(178292, 33)"
]
},
"execution_count": 19,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"testdf_clone.query('dlr_phy_adr_st_cd_label == 0 and predictions == 1').shape"
]
},
{
"cell_type": "code",
"execution_count": 20,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"(9, 33)"
]
},
"execution_count": 20,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"testdf_clone.query('dlr_phy_adr_st_cd_label == 1 and predictions == -1').shape"
]
},
{
"cell_type": "code",
"execution_count": 15,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"(77, 33)"
]
},
"execution_count": 15,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"testdf_clone.query('dlr_phy_adr_st_cd_label == 0 and predictions == -1').shape"
]
},
{
"cell_type": "code",
"execution_count": 16,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"(50, 33)"
]
},
"execution_count": 16,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"testdf_clone.query('dlr_phy_adr_st_cd_label == 1 and predictions == 1').shape"
]
},
{
"cell_type": "code",
"execution_count": 21,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"(178428, 33)"
]
},
"execution_count": 21,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"testdf_clone.shape"
]
},
{
"cell_type": "code",
"execution_count": 23,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Unnamed: 0</th>\n",
" <th>me_dt</th>\n",
" <th>loan_acct_sur_id</th>\n",
" <th>mthend_origtn_cd</th>\n",
" <th>current_month_repo</th>\n",
" <th>num_of_extnsns_cnt</th>\n",
" <th>day_pastdue_cnt</th>\n",
" <th>days_30_past_in_l12m_cd</th>\n",
" <th>days_60_past_in_l12m_cd</th>\n",
" <th>chrgof_me_dt</th>\n",
" <th>...</th>\n",
" <th>dlr_phy_adr_city_nm</th>\n",
" <th>dlr_phy_adr_st_cd</th>\n",
" <th>dlr_phy_adr_zip_cd</th>\n",
" <th>dlr_phy_adr_zip_4_cd</th>\n",
" <th>days_60_past_in_l12m_cd_label</th>\n",
" <th>days_30_past_in_l12m_cd_label</th>\n",
" <th>day_pastdue_cnt_label</th>\n",
" <th>dlr_phy_adr_st_cd_label</th>\n",
" <th>avg_fico_score_val_label</th>\n",
" <th>predictions</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>5324364</th>\n",
" <td>5324364</td>\n",
" <td>2018-07-31</td>\n",
" <td>a100e65b0c34b7aa86f62776059ec027</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>6.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>NaN</td>\n",
" <td>...</td>\n",
" <td>GOLDEN VALLEY</td>\n",
" <td>MN</td>\n",
" <td>55426</td>\n",
" <td>NaN</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5324380</th>\n",
" <td>5324380</td>\n",
" <td>2018-07-31</td>\n",
" <td>bd205cebcf4920a9a39643ff7c07f341</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>NaN</td>\n",
" <td>...</td>\n",
" <td>DALLAS</td>\n",
" <td>TX</td>\n",
" <td>75237</td>\n",
" <td>NaN</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5324388</th>\n",
" <td>5324388</td>\n",
" <td>2018-07-31</td>\n",
" <td>bb6f7bff590af090df01c8dbb6999493</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>NaN</td>\n",
" <td>...</td>\n",
" <td>HOUSTON</td>\n",
" <td>TX</td>\n",
" <td>77034</td>\n",
" <td>4501.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5324397</th>\n",
" <td>5324397</td>\n",
" <td>2018-07-31</td>\n",
" <td>95cc7b712e19b561ee3c026fba9e165c</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>NaN</td>\n",
" <td>...</td>\n",
" <td>FORT MYERS</td>\n",
" <td>FL</td>\n",
" <td>33901</td>\n",
" <td>NaN</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5324414</th>\n",
" <td>5324414</td>\n",
" <td>2018-07-31</td>\n",
" <td>54d7fd33ec1384cf93a88ea81aa82da7</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>NaN</td>\n",
" <td>...</td>\n",
" <td>HIRAM</td>\n",
" <td>GA</td>\n",
" <td>30141</td>\n",
" <td>NaN</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>1</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>5 rows × 33 columns</p>\n",
"</div>"
],
"text/plain": [
" Unnamed: 0 me_dt loan_acct_sur_id \\\n",
"5324364 5324364 2018-07-31 a100e65b0c34b7aa86f62776059ec027 \n",
"5324380 5324380 2018-07-31 bd205cebcf4920a9a39643ff7c07f341 \n",
"5324388 5324388 2018-07-31 bb6f7bff590af090df01c8dbb6999493 \n",
"5324397 5324397 2018-07-31 95cc7b712e19b561ee3c026fba9e165c \n",
"5324414 5324414 2018-07-31 54d7fd33ec1384cf93a88ea81aa82da7 \n",
"\n",
" mthend_origtn_cd current_month_repo num_of_extnsns_cnt \\\n",
"5324364 0 0 0 \n",
"5324380 0 0 0 \n",
"5324388 0 0 0 \n",
"5324397 0 0 0 \n",
"5324414 0 0 0 \n",
"\n",
" day_pastdue_cnt days_30_past_in_l12m_cd days_60_past_in_l12m_cd \\\n",
"5324364 6.0 0.0 0.0 \n",
"5324380 0.0 0.0 0.0 \n",
"5324388 0.0 0.0 0.0 \n",
"5324397 0.0 0.0 0.0 \n",
"5324414 0.0 0.0 0.0 \n",
"\n",
" chrgof_me_dt ... dlr_phy_adr_city_nm dlr_phy_adr_st_cd \\\n",
"5324364 NaN ... GOLDEN VALLEY MN \n",
"5324380 NaN ... DALLAS TX \n",
"5324388 NaN ... HOUSTON TX \n",
"5324397 NaN ... FORT MYERS FL \n",
"5324414 NaN ... HIRAM GA \n",
"\n",
" dlr_phy_adr_zip_cd dlr_phy_adr_zip_4_cd \\\n",
"5324364 55426 NaN \n",
"5324380 75237 NaN \n",
"5324388 77034 4501.0 \n",
"5324397 33901 NaN \n",
"5324414 30141 NaN \n",
"\n",
" days_60_past_in_l12m_cd_label days_30_past_in_l12m_cd_label \\\n",
"5324364 0.0 0.0 \n",
"5324380 0.0 0.0 \n",
"5324388 0.0 0.0 \n",
"5324397 0.0 0.0 \n",
"5324414 0.0 0.0 \n",
"\n",
" day_pastdue_cnt_label dlr_phy_adr_st_cd_label \\\n",
"5324364 0.0 0.0 \n",
"5324380 0.0 0.0 \n",
"5324388 0.0 0.0 \n",
"5324397 0.0 0.0 \n",
"5324414 0.0 0.0 \n",
"\n",
" avg_fico_score_val_label predictions \n",
"5324364 0.0 1 \n",
"5324380 0.0 1 \n",
"5324388 0.0 1 \n",
"5324397 0.0 1 \n",
"5324414 0.0 1 \n",
"\n",
"[5 rows x 33 columns]"
]
},
"execution_count": 23,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"testdf_clone.head()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 2",
"language": "python",
"name": "python2"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 2
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython2",
"version": "2.7.13"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment